upgrade zstd to 1.4.4

This commit is contained in:
Thomas Waldmann 2020-03-07 19:59:35 +01:00
parent 2de1583ca1
commit 5c50c220d6
73 changed files with 13788 additions and 6816 deletions

View file

@ -12,6 +12,7 @@ import os
# zstd files, structure as seen in zstd project repository:
zstd_sources = [
'lib/common/debug.c',
'lib/common/entropy_common.c',
'lib/common/error_private.c',
'lib/common/fse_decompress.c',
@ -20,8 +21,11 @@ zstd_sources = [
'lib/common/xxhash.c',
'lib/common/zstd_common.c',
'lib/compress/fse_compress.c',
'lib/compress/hist.c',
'lib/compress/huf_compress.c',
'lib/compress/zstd_compress.c',
'lib/compress/zstd_compress_literals.c',
'lib/compress/zstd_compress_sequences.c',
'lib/compress/zstd_double_fast.c',
'lib/compress/zstd_fast.c',
'lib/compress/zstd_lazy.c',
@ -29,9 +33,12 @@ zstd_sources = [
'lib/compress/zstd_opt.c',
'lib/compress/zstdmt_compress.c',
'lib/decompress/huf_decompress.c',
'lib/decompress/zstd_ddict.c',
'lib/decompress/zstd_decompress.c',
'lib/decompress/zstd_decompress_block.c',
'lib/dictBuilder/cover.c',
'lib/dictBuilder/divsufsort.c',
'lib/dictBuilder/fastcover.c',
'lib/dictBuilder/zdict.c',
]

View file

@ -1,8 +1,7 @@
/* ******************************************************************
bitstream
Part of FSE library
header file (to include)
Copyright (C) 2013-2017, Yann Collet.
Copyright (C) 2013-present, Yann Collet.
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
@ -49,26 +48,17 @@ extern "C" {
* Dependencies
******************************************/
#include "mem.h" /* unaligned access routines */
#include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */
#include "error_private.h" /* error codes and messages */
/*-*************************************
* Debug
***************************************/
#if defined(BIT_DEBUG) && (BIT_DEBUG>=1)
# include <assert.h>
#else
# ifndef assert
# define assert(condition) ((void)0)
# endif
#endif
/*=========================================
* Target specific
=========================================*/
#if defined(__BMI__) && defined(__GNUC__)
# include <immintrin.h> /* support for bextr (experimental) */
#elif defined(__ICCARM__)
# include <intrinsics.h>
#endif
#define STREAM_ACCUMULATOR_MIN_32 25
@ -83,8 +73,7 @@ extern "C" {
* A critical property of these streams is that they encode and decode in **reverse** direction.
* So the first bit sequence you add will be the last to be read, like a LIFO stack.
*/
typedef struct
{
typedef struct {
size_t bitContainer;
unsigned bitPos;
char* startPtr;
@ -118,8 +107,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
/*-********************************************
* bitStream decoding API (read backward)
**********************************************/
typedef struct
{
typedef struct {
size_t bitContainer;
unsigned bitsConsumed;
const char* ptr;
@ -176,7 +164,9 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val)
_BitScanReverse ( &r, val );
return (unsigned) r;
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
return 31 - __builtin_clz (val);
return __builtin_clz (val) ^ 31;
# elif defined(__ICCARM__) /* IAR Intrinsic */
return 31 - __CLZ(val);
# else /* Software version */
static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29,
11, 14, 16, 18, 22, 25, 3, 30,
@ -236,7 +226,8 @@ MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
}
/*! BIT_addBitsFast() :
* works only if `value` is _clean_, meaning all high bits above nbBits are 0 */
* works only if `value` is _clean_,
* meaning all high bits above nbBits are 0 */
MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,
size_t value, unsigned nbBits)
{
@ -253,9 +244,9 @@ MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
{
size_t const nbBytes = bitC->bitPos >> 3;
assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
assert(bitC->ptr <= bitC->endPtr);
MEM_writeLEST(bitC->ptr, bitC->bitContainer);
bitC->ptr += nbBytes;
assert(bitC->ptr <= bitC->endPtr);
bitC->bitPos &= 7;
bitC->bitContainer >>= nbBytes*8;
}
@ -269,6 +260,7 @@ MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
{
size_t const nbBytes = bitC->bitPos >> 3;
assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
assert(bitC->ptr <= bitC->endPtr);
MEM_writeLEST(bitC->ptr, bitC->bitContainer);
bitC->ptr += nbBytes;
if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
@ -352,17 +344,10 @@ MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
{
#if defined(__BMI__) && defined(__GNUC__) && __GNUC__*1000+__GNUC_MINOR__ >= 4008 /* experimental */
# if defined(__x86_64__)
if (sizeof(bitContainer)==8)
return _bextr_u64(bitContainer, start, nbBits);
else
# endif
return _bextr_u32(bitContainer, start, nbBits);
#else
U32 const regMask = sizeof(bitContainer)*8 - 1;
/* if start > regMask, bitstream is corrupted, and result is undefined */
assert(nbBits < BIT_MASK_SIZE);
return (bitContainer >> start) & BIT_mask[nbBits];
#endif
return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
}
MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
@ -379,9 +364,13 @@ MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
* @return : value extracted */
MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
{
#if defined(__BMI__) && defined(__GNUC__) /* experimental; fails if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8 */
/* arbitrate between double-shift and shift+mask */
#if 1
/* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8,
* bitstream is likely corrupted, and result is undefined */
return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
#else
/* this code path is slower on my os-x laptop */
U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask);
#endif
@ -405,7 +394,7 @@ MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
* Read (consume) next n bits from local register and update.
* Pay attention to not read more than nbBits contained into local register.
* @return : extracted value. */
MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits)
MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
{
size_t const value = BIT_lookBits(bitD, nbBits);
BIT_skipBits(bitD, nbBits);
@ -414,7 +403,7 @@ MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits)
/*! BIT_readBitsFast() :
* unsafe version; only works only if nbBits >= 1 */
MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
{
size_t const value = BIT_lookBitsFast(bitD, nbBits);
assert(nbBits >= 1);

View file

@ -15,13 +15,15 @@
* Compiler specifics
*********************************************************/
/* force inlining */
#if !defined(ZSTD_NO_INLINE)
#if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
# define INLINE_KEYWORD inline
#else
# define INLINE_KEYWORD
#endif
#if defined(__GNUC__)
#if defined(__GNUC__) || defined(__ICCARM__)
# define FORCE_INLINE_ATTR __attribute__((always_inline))
#elif defined(_MSC_VER)
# define FORCE_INLINE_ATTR __forceinline
@ -29,9 +31,16 @@
# define FORCE_INLINE_ATTR
#endif
#else
#define INLINE_KEYWORD
#define FORCE_INLINE_ATTR
#endif
/**
* FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
* parameters. They must be inlined for the compiler to elimininate the constant
* parameters. They must be inlined for the compiler to eliminate the constant
* branches.
*/
#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
@ -52,11 +61,18 @@
# define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR
#endif
/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
#if defined(__GNUC__)
# define UNUSED_ATTR __attribute__((unused))
#else
# define UNUSED_ATTR
#endif
/* force no inlining */
#ifdef _MSC_VER
# define FORCE_NOINLINE static __declspec(noinline)
#else
# ifdef __GNUC__
# if defined(__GNUC__) || defined(__ICCARM__)
# define FORCE_NOINLINE static __attribute__((__noinline__))
# else
# define FORCE_NOINLINE static
@ -67,7 +83,7 @@
#ifndef __has_attribute
#define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */
#endif
#if defined(__GNUC__)
#if defined(__GNUC__) || defined(__ICCARM__)
# define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
#else
# define TARGET_ATTRIBUTE(target)
@ -77,9 +93,9 @@
* Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
*/
#ifndef DYNAMIC_BMI2
#if (defined(__clang__) && __has_attribute(__target__)) \
#if ((defined(__clang__) && __has_attribute(__target__)) \
|| (defined(__GNUC__) \
&& (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))) \
&& (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
&& (defined(__x86_64__) || defined(_M_X86)) \
&& !defined(__BMI2__)
# define DYNAMIC_BMI2 1
@ -88,14 +104,46 @@
#endif
#endif
/* prefetch */
#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
# define PREFETCH(ptr) _mm_prefetch((const char*)ptr, _MM_HINT_T0)
#elif defined(__GNUC__)
# define PREFETCH(ptr) __builtin_prefetch(ptr, 0, 0)
/* prefetch
* can be disabled, by declaring NO_PREFETCH build macro */
#if defined(NO_PREFETCH)
# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
#else
# define PREFETCH(ptr) /* disabled */
# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
# define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
# define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
# else
# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
# endif
#endif /* NO_PREFETCH */
#define CACHELINE_SIZE 64
#define PREFETCH_AREA(p, s) { \
const char* const _ptr = (const char*)(p); \
size_t const _size = (size_t)(s); \
size_t _pos; \
for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \
PREFETCH_L2(_ptr + _pos); \
} \
}
/* vectorization
* older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */
#if !defined(__clang__) && defined(__GNUC__)
# if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5)
# define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
# else
# define DONT_VECTORIZE _Pragma("GCC optimize(\"no-tree-vectorize\")")
# endif
#else
# define DONT_VECTORIZE
#endif
/* disable warnings */

View file

@ -36,7 +36,7 @@ MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
U32 f1d = 0;
U32 f7b = 0;
U32 f7c = 0;
#ifdef _MSC_VER
#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
int reg[4];
__cpuid((int*)reg, 0);
{
@ -72,14 +72,13 @@ MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
"cpuid\n\t"
"popl %%ebx\n\t"
: "=a"(f1a), "=c"(f1c), "=d"(f1d)
: "a"(1)
:);
: "a"(1));
}
if (n >= 7) {
__asm__(
"pushl %%ebx\n\t"
"cpuid\n\t"
"movl %%ebx, %%eax\n\r"
"movl %%ebx, %%eax\n\t"
"popl %%ebx"
: "=a"(f7b), "=c"(f7c)
: "a"(7), "c"(0)

View file

@ -0,0 +1,44 @@
/* ******************************************************************
debug
Part of FSE library
Copyright (C) 2013-present, Yann Collet.
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
You can contact the author at :
- Source repository : https://github.com/Cyan4973/FiniteStateEntropy
****************************************************************** */
/*
* This module only hosts one global variable
* which can be used to dynamically influence the verbosity of traces,
* such as DEBUGLOG and RAWLOG
*/
#include "debug.h"
int g_debuglevel = DEBUGLEVEL;

View file

@ -0,0 +1,134 @@
/* ******************************************************************
debug
Part of FSE library
Copyright (C) 2013-present, Yann Collet.
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
You can contact the author at :
- Source repository : https://github.com/Cyan4973/FiniteStateEntropy
****************************************************************** */
/*
* The purpose of this header is to enable debug functions.
* They regroup assert(), DEBUGLOG() and RAWLOG() for run-time,
* and DEBUG_STATIC_ASSERT() for compile-time.
*
* By default, DEBUGLEVEL==0, which means run-time debug is disabled.
*
* Level 1 enables assert() only.
* Starting level 2, traces can be generated and pushed to stderr.
* The higher the level, the more verbose the traces.
*
* It's possible to dynamically adjust level using variable g_debug_level,
* which is only declared if DEBUGLEVEL>=2,
* and is a global variable, not multi-thread protected (use with care)
*/
#ifndef DEBUG_H_12987983217
#define DEBUG_H_12987983217
#if defined (__cplusplus)
extern "C" {
#endif
/* static assert is triggered at compile time, leaving no runtime artefact.
* static assert only works with compile-time constants.
* Also, this variant can only be used inside a function. */
#define DEBUG_STATIC_ASSERT(c) (void)sizeof(char[(c) ? 1 : -1])
/* DEBUGLEVEL is expected to be defined externally,
* typically through compiler command line.
* Value must be a number. */
#ifndef DEBUGLEVEL
# define DEBUGLEVEL 0
#endif
/* DEBUGFILE can be defined externally,
* typically through compiler command line.
* note : currently useless.
* Value must be stderr or stdout */
#ifndef DEBUGFILE
# define DEBUGFILE stderr
#endif
/* recommended values for DEBUGLEVEL :
* 0 : release mode, no debug, all run-time checks disabled
* 1 : enables assert() only, no display
* 2 : reserved, for currently active debug path
* 3 : events once per object lifetime (CCtx, CDict, etc.)
* 4 : events once per frame
* 5 : events once per block
* 6 : events once per sequence (verbose)
* 7+: events at every position (*very* verbose)
*
* It's generally inconvenient to output traces > 5.
* In which case, it's possible to selectively trigger high verbosity levels
* by modifying g_debug_level.
*/
#if (DEBUGLEVEL>=1)
# include <assert.h>
#else
# ifndef assert /* assert may be already defined, due to prior #include <assert.h> */
# define assert(condition) ((void)0) /* disable assert (default) */
# endif
#endif
#if (DEBUGLEVEL>=2)
# include <stdio.h>
extern int g_debuglevel; /* the variable is only declared,
it actually lives in debug.c,
and is shared by the whole process.
It's not thread-safe.
It's useful when enabling very verbose levels
on selective conditions (such as position in src) */
# define RAWLOG(l, ...) { \
if (l<=g_debuglevel) { \
fprintf(stderr, __VA_ARGS__); \
} }
# define DEBUGLOG(l, ...) { \
if (l<=g_debuglevel) { \
fprintf(stderr, __FILE__ ": " __VA_ARGS__); \
fprintf(stderr, " \n"); \
} }
#else
# define RAWLOG(l, ...) {} /* disabled */
# define DEBUGLOG(l, ...) {} /* disabled */
#endif
#if defined (__cplusplus)
}
#endif
#endif /* DEBUG_H_12987983217 */

View file

@ -72,7 +72,21 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
unsigned charnum = 0;
int previous0 = 0;
if (hbSize < 4) return ERROR(srcSize_wrong);
if (hbSize < 4) {
/* This function only works when hbSize >= 4 */
char buffer[4];
memset(buffer, 0, sizeof(buffer));
memcpy(buffer, headerBuffer, hbSize);
{ size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr,
buffer, sizeof(buffer));
if (FSE_isError(countSize)) return countSize;
if (countSize > hbSize) return ERROR(corruption_detected);
return countSize;
} }
assert(hbSize >= 4);
/* init */
memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */
bitStream = MEM_readLE32(ip);
nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */
if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
@ -105,6 +119,7 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
while (charnum < n0) normalizedCounter[charnum++] = 0;
if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
assert((bitCount >> 3) <= 3); /* For first condition to work */
ip += bitCount>>3;
bitCount &= 7;
bitStream = MEM_readLE32(ip) >> bitCount;

View file

@ -14,6 +14,10 @@
const char* ERR_getErrorString(ERR_enum code)
{
#ifdef ZSTD_STRIP_ERROR_STRINGS
(void)code;
return "Error strings stripped";
#else
static const char* const notErrorCode = "Unspecified error code";
switch( code )
{
@ -39,10 +43,12 @@ const char* ERR_getErrorString(ERR_enum code)
case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples";
case PREFIX(dstSize_tooSmall): return "Destination buffer is too small";
case PREFIX(srcSize_wrong): return "Src size is incorrect";
case PREFIX(dstBuffer_null): return "Operation on NULL destination buffer";
/* following error codes are not stable and may be removed or changed in a future version */
case PREFIX(frameIndex_tooLarge): return "Frame index is too large";
case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking";
case PREFIX(maxCode):
default: return notErrorCode;
}
#endif
}

View file

@ -72,6 +72,7 @@ extern "C" {
#define FSE_VERSION_NUMBER (FSE_VERSION_MAJOR *100*100 + FSE_VERSION_MINOR *100 + FSE_VERSION_RELEASE)
FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */
/*-****************************************
* FSE simple functions
******************************************/
@ -129,7 +130,7 @@ FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src,
******************************************/
/*!
FSE_compress() does the following:
1. count symbol occurrence from source[] into table count[]
1. count symbol occurrence from source[] into table count[] (see hist.h)
2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog)
3. save normalized counters to memory buffer using writeNCount()
4. build encoding table 'CTable' from normalized counters
@ -147,15 +148,6 @@ or to save and provide normalized distribution using external method.
/* *** COMPRESSION *** */
/*! FSE_count():
Provides the precise count of each byte within a table 'count'.
'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1).
*maxSymbolValuePtr will be updated if detected smaller than initial value.
@return : the count of the most frequent symbol (which is not identified).
if return == srcSize, there is only one symbol.
Can also return an error code, which can be tested with FSE_isError(). */
FSE_PUBLIC_API size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
/*! FSE_optimalTableLog():
dynamically downsize 'tableLog' when conditions are met.
It saves CPU time, by using smaller tables, while preserving or even improving compression ratio.
@ -167,7 +159,8 @@ FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize
'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
@return : tableLog,
or an errorCode, which can be tested using FSE_isError() */
FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog,
const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
/*! FSE_NCountWriteBound():
Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
@ -178,8 +171,9 @@ FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tab
Compactly save 'normalizedCounter' into 'buffer'.
@return : size of the compressed table,
or an errorCode, which can be tested using FSE_isError(). */
FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize,
const short* normalizedCounter,
unsigned maxSymbolValue, unsigned tableLog);
/*! Constructor and Destructor of FSE_CTable.
Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
@ -250,7 +244,9 @@ If there is an error, the function will return an ErrorCode (which can be tested
@return : size read from 'rBuffer',
or an errorCode, which can be tested using FSE_isError().
maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize);
FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter,
unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
const void* rBuffer, size_t rBuffSize);
/*! Constructor and Destructor of FSE_DTable.
Note that its size depends on 'tableLog' */
@ -312,7 +308,7 @@ If there is an error, the function will return an error code, which can be teste
*******************************************/
/* FSE buffer bounds */
#define FSE_NCOUNTBOUND 512
#define FSE_BLOCKBOUND(size) (size + (size>>7))
#define FSE_BLOCKBOUND(size) (size + (size>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
/* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */
@ -325,33 +321,8 @@ If there is an error, the function will return an error code, which can be teste
/* *****************************************
* FSE advanced API
*******************************************/
/* FSE_count_wksp() :
* Same as FSE_count(), but using an externally provided scratch buffer.
* `workSpace` size must be table of >= `1024` unsigned
*/
size_t FSE_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
const void* source, size_t sourceSize, unsigned* workSpace);
/** FSE_countFast() :
* same as FSE_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr
*/
size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
/* FSE_countFast_wksp() :
* Same as FSE_countFast(), but using an externally provided scratch buffer.
* `workSpace` must be a table of minimum `1024` unsigned
*/
size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* workSpace);
/*! FSE_count_simple() :
* Same as FSE_countFast(), but does not use any additional memory (not even on stack).
* This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` (presuming it's also the size of `count`).
*/
size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
* FSE advanced API
***************************************** */
unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus);
/**< same as FSE_optimalTableLog(), which used `minus==2` */
@ -387,7 +358,7 @@ size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size
typedef enum {
FSE_repeat_none, /**< Cannot use the previous table */
FSE_repeat_check, /**< Can use the previous table but it must be checked */
FSE_repeat_valid /**< Can use the previous table and it is asumed to be valid */
FSE_repeat_valid /**< Can use the previous table and it is assumed to be valid */
} FSE_repeat;
/* *****************************************
@ -541,7 +512,7 @@ MEM_STATIC void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct)
const U32 tableLog = MEM_read16(ptr);
statePtr->value = (ptrdiff_t)1<<tableLog;
statePtr->stateTable = u16ptr+2;
statePtr->symbolTT = ((const U32*)ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1));
statePtr->symbolTT = ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1);
statePtr->stateLog = tableLog;
}
@ -560,7 +531,7 @@ MEM_STATIC void FSE_initCState2(FSE_CState_t* statePtr, const FSE_CTable* ct, U3
}
}
MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, U32 symbol)
MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, unsigned symbol)
{
FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
const U16* const stateTable = (const U16*)(statePtr->stateTable);
@ -576,6 +547,39 @@ MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePt
}
/* FSE_getMaxNbBits() :
* Approximate maximum cost of a symbol, in bits.
* Fractional get rounded up (i.e : a symbol with a normalized frequency of 3 gives the same result as a frequency of 2)
* note 1 : assume symbolValue is valid (<= maxSymbolValue)
* note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue)
{
const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr;
return (symbolTT[symbolValue].deltaNbBits + ((1<<16)-1)) >> 16;
}
/* FSE_bitCost() :
* Approximate symbol cost, as fractional value, using fixed-point format (accuracyLog fractional bits)
* note 1 : assume symbolValue is valid (<= maxSymbolValue)
* note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
MEM_STATIC U32 FSE_bitCost(const void* symbolTTPtr, U32 tableLog, U32 symbolValue, U32 accuracyLog)
{
const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr;
U32 const minNbBits = symbolTT[symbolValue].deltaNbBits >> 16;
U32 const threshold = (minNbBits+1) << 16;
assert(tableLog < 16);
assert(accuracyLog < 31-tableLog); /* ensure enough room for renormalization double shift */
{ U32 const tableSize = 1 << tableLog;
U32 const deltaFromThreshold = threshold - (symbolTT[symbolValue].deltaNbBits + tableSize);
U32 const normalizedDeltaFromThreshold = (deltaFromThreshold << accuracyLog) >> tableLog; /* linear interpolation (very approximate) */
U32 const bitMultiplier = 1 << accuracyLog;
assert(symbolTT[symbolValue].deltaNbBits + tableSize <= threshold);
assert(normalizedDeltaFromThreshold <= bitMultiplier);
return (minNbBits+1)*bitMultiplier - normalizedDeltaFromThreshold;
}
}
/* ====== Decompression ====== */
typedef struct {

View file

@ -49,10 +49,12 @@
* Error Management
****************************************************************/
#define FSE_isError ERR_isError
#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
#define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */
/* check and forward error code */
#ifndef CHECK_F
#define CHECK_F(f) { size_t const e = f; if (FSE_isError(e)) return e; }
#endif
/* **************************************************************

View file

@ -1,7 +1,7 @@
/* ******************************************************************
Huffman coder, part of New Generation Entropy library
header file
Copyright (C) 2013-2016, Yann Collet.
huff0 huffman codec,
part of Finite State Entropy library
Copyright (C) 2013-present, Yann Collet.
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
@ -163,25 +163,29 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
/* static allocation of HUF's DTable */
typedef U32 HUF_DTable;
#define HUF_DTABLE_SIZE(maxTableLog) (1 + (1<<(maxTableLog)))
#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \
#define HUF_CREATE_STATIC_DTABLEX1(DTable, maxTableLog) \
HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = { ((U32)((maxTableLog)-1) * 0x01000001) }
#define HUF_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) \
#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \
HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = { ((U32)(maxTableLog) * 0x01000001) }
/* ****************************************
* Advanced decompression functions
******************************************/
size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
#ifndef HUF_FORCE_DECOMPRESS_X1
size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
#endif
size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< decodes RLE and uncompressed */
size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */
size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< considers RLE and uncompressed as errors */
size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */
size_t HUF_decompress4X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */
size_t HUF_decompress4X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */
#ifndef HUF_FORCE_DECOMPRESS_X1
size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */
#endif
/* ****************************************
@ -208,7 +212,7 @@ size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, si
typedef enum {
HUF_repeat_none, /**< Cannot use the previous table */
HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */
HUF_repeat_valid /**< Can use the previous table and it is asumed to be valid */
HUF_repeat_valid /**< Can use the previous table and it is assumed to be valid */
} HUF_repeat;
/** HUF_compress4X_repeat() :
* Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
@ -227,7 +231,9 @@ size_t HUF_compress4X_repeat(void* dst, size_t dstSize,
*/
#define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1)
#define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned))
size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize);
size_t HUF_buildCTable_wksp (HUF_CElt* tree,
const unsigned* count, U32 maxSymbolValue, U32 maxNbBits,
void* workSpace, size_t wkspSize);
/*! HUF_readStats() :
* Read compact Huffman tree, saved by HUF_writeCTable().
@ -242,10 +248,15 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize,
* Loading a CTable saved with HUF_writeCTable() */
size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
/** HUF_getNbBits() :
* Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX
* Note 1 : is not inlined, as HUF_CElt definition is private
* Note 2 : const void* used, so that it can provide a statically allocated table as argument (which uses type U32) */
U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue);
/*
* HUF_decompress() does the following:
* 1. select the decompression algorithm (X2, X4) based on pre-computed heuristics
* 1. select the decompression algorithm (X1, X2) based on pre-computed heuristics
* 2. build Huffman table from save, using HUF_readDTableX?()
* 3. decode 1 or 4 segments in parallel using HUF_decompress?X?_usingDTable()
*/
@ -253,13 +264,13 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
/** HUF_selectDecoder() :
* Tells which decoder is likely to decode faster,
* based on a set of pre-computed metrics.
* @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
* @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
* Assumption : 0 < dstSize <= 128 KB */
U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize);
/**
* The minimum workspace size for the `workSpace` used in
* HUF_readDTableX2_wksp() and HUF_readDTableX4_wksp().
* HUF_readDTableX1_wksp() and HUF_readDTableX2_wksp().
*
* The space used depends on HUF_TABLELOG_MAX, ranging from ~1500 bytes when
* HUF_TABLE_LOG_MAX=12 to ~1850 bytes when HUF_TABLE_LOG_MAX=15.
@ -270,14 +281,22 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize);
#define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10)
#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
#ifndef HUF_FORCE_DECOMPRESS_X2
size_t HUF_readDTableX1 (HUF_DTable* DTable, const void* src, size_t srcSize);
size_t HUF_readDTableX1_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
#endif
#ifndef HUF_FORCE_DECOMPRESS_X1
size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize);
size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize);
size_t HUF_readDTableX4_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
#endif
size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
#ifndef HUF_FORCE_DECOMPRESS_X2
size_t HUF_decompress4X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
#endif
#ifndef HUF_FORCE_DECOMPRESS_X1
size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
size_t HUF_decompress4X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
#endif
/* ====================== */
@ -298,25 +317,37 @@ size_t HUF_compress1X_repeat(void* dst, size_t dstSize,
void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2);
size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */
size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */
size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */
#ifndef HUF_FORCE_DECOMPRESS_X1
size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */
#endif
size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);
size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */
size_t HUF_decompress1X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */
#ifndef HUF_FORCE_DECOMPRESS_X2
size_t HUF_decompress1X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */
#endif
#ifndef HUF_FORCE_DECOMPRESS_X1
size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */
#endif
size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */
#ifndef HUF_FORCE_DECOMPRESS_X2
size_t HUF_decompress1X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
#endif
#ifndef HUF_FORCE_DECOMPRESS_X1
size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
size_t HUF_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
#endif
/* BMI2 variants.
* If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
*/
size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
size_t HUF_decompress1X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
#ifndef HUF_FORCE_DECOMPRESS_X2
size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
#endif
size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);

View file

@ -39,10 +39,87 @@ extern "C" {
# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */
#endif
#ifndef __has_builtin
# define __has_builtin(x) 0 /* compat. with non-clang compilers */
#endif
/* code only tested on 32 and 64 bits systems */
#define MEM_STATIC_ASSERT(c) { enum { MEM_static_assert = 1/(int)(!!(c)) }; }
MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
/* detects whether we are being compiled under msan */
#if defined (__has_feature)
# if __has_feature(memory_sanitizer)
# define MEMORY_SANITIZER 1
# endif
#endif
#if defined (MEMORY_SANITIZER)
/* Not all platforms that support msan provide sanitizers/msan_interface.h.
* We therefore declare the functions we need ourselves, rather than trying to
* include the header file... */
#include <stdint.h> /* intptr_t */
/* Make memory region fully initialized (without changing its contents). */
void __msan_unpoison(const volatile void *a, size_t size);
/* Make memory region fully uninitialized (without changing its contents).
This is a legacy interface that does not update origin information. Use
__msan_allocated_memory() instead. */
void __msan_poison(const volatile void *a, size_t size);
/* Returns the offset of the first (at least partially) poisoned byte in the
memory range, or -1 if the whole range is good. */
intptr_t __msan_test_shadow(const volatile void *x, size_t size);
#endif
/* detects whether we are being compiled under asan */
#if defined (__has_feature)
# if __has_feature(address_sanitizer)
# define ADDRESS_SANITIZER 1
# endif
#elif defined(__SANITIZE_ADDRESS__)
# define ADDRESS_SANITIZER 1
#endif
#if defined (ADDRESS_SANITIZER)
/* Not all platforms that support asan provide sanitizers/asan_interface.h.
* We therefore declare the functions we need ourselves, rather than trying to
* include the header file... */
/**
* Marks a memory region (<c>[addr, addr+size)</c>) as unaddressable.
*
* This memory must be previously allocated by your program. Instrumented
* code is forbidden from accessing addresses in this region until it is
* unpoisoned. This function is not guaranteed to poison the entire region -
* it could poison only a subregion of <c>[addr, addr+size)</c> due to ASan
* alignment restrictions.
*
* \note This function is not thread-safe because no two threads can poison or
* unpoison memory in the same memory region simultaneously.
*
* \param addr Start of memory region.
* \param size Size of memory region. */
void __asan_poison_memory_region(void const volatile *addr, size_t size);
/**
* Marks a memory region (<c>[addr, addr+size)</c>) as addressable.
*
* This memory must be previously allocated by your program. Accessing
* addresses in this region is allowed until this region is poisoned again.
* This function could unpoison a super-region of <c>[addr, addr+size)</c> due
* to ASan alignment restrictions.
*
* \note This function is not thread-safe because no two threads can
* poison or unpoison memory in the same memory region simultaneously.
*
* \param addr Start of memory region.
* \param size Size of memory region. */
void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
#endif
/*-**************************************************************
* Basic Types
@ -57,11 +134,23 @@ MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (size
typedef uint64_t U64;
typedef int64_t S64;
#else
# include <limits.h>
#if CHAR_BIT != 8
# error "this implementation requires char to be exactly 8-bit type"
#endif
typedef unsigned char BYTE;
#if USHRT_MAX != 65535
# error "this implementation requires short to be exactly 16-bit type"
#endif
typedef unsigned short U16;
typedef signed short S16;
#if UINT_MAX != 4294967295
# error "this implementation requires int to be exactly 32-bit type"
#endif
typedef unsigned int U32;
typedef signed int S32;
/* note : there are no limits defined for long long type in C90.
* limits exist in C99, however, in such case, <stdint.h> is preferred */
typedef unsigned long long U64;
typedef signed long long S64;
#endif
@ -86,7 +175,7 @@ MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (size
#ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
# define MEM_FORCE_MEMORY_ACCESS 2
# elif defined(__INTEL_COMPILER) || defined(__GNUC__)
# elif defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__)
# define MEM_FORCE_MEMORY_ACCESS 1
# endif
#endif
@ -186,7 +275,8 @@ MEM_STATIC U32 MEM_swap32(U32 in)
{
#if defined(_MSC_VER) /* Visual Studio */
return _byteswap_ulong(in);
#elif defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)
#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \
|| (defined(__clang__) && __has_builtin(__builtin_bswap32))
return __builtin_bswap32(in);
#else
return ((in << 24) & 0xff000000 ) |
@ -200,7 +290,8 @@ MEM_STATIC U64 MEM_swap64(U64 in)
{
#if defined(_MSC_VER) /* Visual Studio */
return _byteswap_uint64(in);
#elif defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)
#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \
|| (defined(__clang__) && __has_builtin(__builtin_bswap64))
return __builtin_bswap64(in);
#else
return ((in << 56) & 0xff00000000000000ULL) |

View file

@ -10,9 +10,10 @@
/* ====== Dependencies ======= */
#include <stddef.h> /* size_t */
#include "pool.h"
#include <stddef.h> /* size_t */
#include "debug.h" /* assert */
#include "zstd_internal.h" /* ZSTD_malloc, ZSTD_free */
#include "pool.h"
/* ====== Compiler specifics ====== */
#if defined(_MSC_VER)
@ -33,8 +34,9 @@ typedef struct POOL_job_s {
struct POOL_ctx_s {
ZSTD_customMem customMem;
/* Keep track of the threads */
ZSTD_pthread_t *threads;
size_t numThreads;
ZSTD_pthread_t* threads;
size_t threadCapacity;
size_t threadLimit;
/* The queue is a circular buffer */
POOL_job *queue;
@ -58,10 +60,10 @@ struct POOL_ctx_s {
};
/* POOL_thread() :
Work thread for the thread pool.
Waits for jobs and executes them.
@returns : NULL on failure else non-null.
*/
* Work thread for the thread pool.
* Waits for jobs and executes them.
* @returns : NULL on failure else non-null.
*/
static void* POOL_thread(void* opaque) {
POOL_ctx* const ctx = (POOL_ctx*)opaque;
if (!ctx) { return NULL; }
@ -69,50 +71,55 @@ static void* POOL_thread(void* opaque) {
/* Lock the mutex and wait for a non-empty queue or until shutdown */
ZSTD_pthread_mutex_lock(&ctx->queueMutex);
while (ctx->queueEmpty && !ctx->shutdown) {
while ( ctx->queueEmpty
|| (ctx->numThreadsBusy >= ctx->threadLimit) ) {
if (ctx->shutdown) {
/* even if !queueEmpty, (possible if numThreadsBusy >= threadLimit),
* a few threads will be shutdown while !queueEmpty,
* but enough threads will remain active to finish the queue */
ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
return opaque;
}
ZSTD_pthread_cond_wait(&ctx->queuePopCond, &ctx->queueMutex);
}
/* empty => shutting down: so stop */
if (ctx->queueEmpty) {
ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
return opaque;
}
/* Pop a job off the queue */
{ POOL_job const job = ctx->queue[ctx->queueHead];
ctx->queueHead = (ctx->queueHead + 1) % ctx->queueSize;
ctx->numThreadsBusy++;
ctx->queueEmpty = ctx->queueHead == ctx->queueTail;
/* Unlock the mutex, signal a pusher, and run the job */
ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
ZSTD_pthread_cond_signal(&ctx->queuePushCond);
ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
job.function(job.opaque);
/* If the intended queue size was 0, signal after finishing job */
ZSTD_pthread_mutex_lock(&ctx->queueMutex);
ctx->numThreadsBusy--;
if (ctx->queueSize == 1) {
ZSTD_pthread_mutex_lock(&ctx->queueMutex);
ctx->numThreadsBusy--;
ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
ZSTD_pthread_cond_signal(&ctx->queuePushCond);
} }
}
ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
}
} /* for (;;) */
/* Unreachable */
assert(0); /* Unreachable */
}
POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) {
return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem);
}
POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem) {
POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize,
ZSTD_customMem customMem) {
POOL_ctx* ctx;
/* Check the parameters */
/* Check parameters */
if (!numThreads) { return NULL; }
/* Allocate the context and zero initialize */
ctx = (POOL_ctx*)ZSTD_calloc(sizeof(POOL_ctx), customMem);
if (!ctx) { return NULL; }
/* Initialize the job queue.
* It needs one extra space since one space is wasted to differentiate empty
* and full queues.
* It needs one extra space since one space is wasted to differentiate
* empty and full queues.
*/
ctx->queueSize = queueSize + 1;
ctx->queue = (POOL_job*)ZSTD_malloc(ctx->queueSize * sizeof(POOL_job), customMem);
@ -120,13 +127,17 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customM
ctx->queueTail = 0;
ctx->numThreadsBusy = 0;
ctx->queueEmpty = 1;
(void)ZSTD_pthread_mutex_init(&ctx->queueMutex, NULL);
(void)ZSTD_pthread_cond_init(&ctx->queuePushCond, NULL);
(void)ZSTD_pthread_cond_init(&ctx->queuePopCond, NULL);
{
int error = 0;
error |= ZSTD_pthread_mutex_init(&ctx->queueMutex, NULL);
error |= ZSTD_pthread_cond_init(&ctx->queuePushCond, NULL);
error |= ZSTD_pthread_cond_init(&ctx->queuePopCond, NULL);
if (error) { POOL_free(ctx); return NULL; }
}
ctx->shutdown = 0;
/* Allocate space for the thread handles */
ctx->threads = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), customMem);
ctx->numThreads = 0;
ctx->threadCapacity = 0;
ctx->customMem = customMem;
/* Check for errors */
if (!ctx->threads || !ctx->queue) { POOL_free(ctx); return NULL; }
@ -134,11 +145,12 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customM
{ size_t i;
for (i = 0; i < numThreads; ++i) {
if (ZSTD_pthread_create(&ctx->threads[i], NULL, &POOL_thread, ctx)) {
ctx->numThreads = i;
ctx->threadCapacity = i;
POOL_free(ctx);
return NULL;
} }
ctx->numThreads = numThreads;
ctx->threadCapacity = numThreads;
ctx->threadLimit = numThreads;
}
return ctx;
}
@ -156,8 +168,8 @@ static void POOL_join(POOL_ctx* ctx) {
ZSTD_pthread_cond_broadcast(&ctx->queuePopCond);
/* Join all of the threads */
{ size_t i;
for (i = 0; i < ctx->numThreads; ++i) {
ZSTD_pthread_join(ctx->threads[i], NULL);
for (i = 0; i < ctx->threadCapacity; ++i) {
ZSTD_pthread_join(ctx->threads[i], NULL); /* note : could fail */
} }
}
@ -172,24 +184,68 @@ void POOL_free(POOL_ctx *ctx) {
ZSTD_free(ctx, ctx->customMem);
}
size_t POOL_sizeof(POOL_ctx *ctx) {
if (ctx==NULL) return 0; /* supports sizeof NULL */
return sizeof(*ctx)
+ ctx->queueSize * sizeof(POOL_job)
+ ctx->numThreads * sizeof(ZSTD_pthread_t);
+ ctx->threadCapacity * sizeof(ZSTD_pthread_t);
}
/* @return : 0 on success, 1 on error */
static int POOL_resize_internal(POOL_ctx* ctx, size_t numThreads)
{
if (numThreads <= ctx->threadCapacity) {
if (!numThreads) return 1;
ctx->threadLimit = numThreads;
return 0;
}
/* numThreads > threadCapacity */
{ ZSTD_pthread_t* const threadPool = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), ctx->customMem);
if (!threadPool) return 1;
/* replace existing thread pool */
memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(*threadPool));
ZSTD_free(ctx->threads, ctx->customMem);
ctx->threads = threadPool;
/* Initialize additional threads */
{ size_t threadId;
for (threadId = ctx->threadCapacity; threadId < numThreads; ++threadId) {
if (ZSTD_pthread_create(&threadPool[threadId], NULL, &POOL_thread, ctx)) {
ctx->threadCapacity = threadId;
return 1;
} }
} }
/* successfully expanded */
ctx->threadCapacity = numThreads;
ctx->threadLimit = numThreads;
return 0;
}
/* @return : 0 on success, 1 on error */
int POOL_resize(POOL_ctx* ctx, size_t numThreads)
{
int result;
if (ctx==NULL) return 1;
ZSTD_pthread_mutex_lock(&ctx->queueMutex);
result = POOL_resize_internal(ctx, numThreads);
ZSTD_pthread_cond_broadcast(&ctx->queuePopCond);
ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
return result;
}
/**
* Returns 1 if the queue is full and 0 otherwise.
*
* If the queueSize is 1 (the pool was created with an intended queueSize of 0),
* then a queue is empty if there is a thread free and no job is waiting.
* When queueSize is 1 (pool was created with an intended queueSize of 0),
* then a queue is empty if there is a thread free _and_ no job is waiting.
*/
static int isQueueFull(POOL_ctx const* ctx) {
if (ctx->queueSize > 1) {
return ctx->queueHead == ((ctx->queueTail + 1) % ctx->queueSize);
} else {
return ctx->numThreadsBusy == ctx->numThreads ||
return (ctx->numThreadsBusy == ctx->threadLimit) ||
!ctx->queueEmpty;
}
}
@ -263,6 +319,11 @@ void POOL_free(POOL_ctx* ctx) {
(void)ctx;
}
int POOL_resize(POOL_ctx* ctx, size_t numThreads) {
(void)ctx; (void)numThreads;
return 0;
}
void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque) {
(void)ctx;
function(opaque);

View file

@ -30,40 +30,50 @@ typedef struct POOL_ctx_s POOL_ctx;
*/
POOL_ctx* POOL_create(size_t numThreads, size_t queueSize);
POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem);
POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize,
ZSTD_customMem customMem);
/*! POOL_free() :
Free a thread pool returned by POOL_create().
*/
* Free a thread pool returned by POOL_create().
*/
void POOL_free(POOL_ctx* ctx);
/*! POOL_resize() :
* Expands or shrinks pool's number of threads.
* This is more efficient than releasing + creating a new context,
* since it tries to preserve and re-use existing threads.
* `numThreads` must be at least 1.
* @return : 0 when resize was successful,
* !0 (typically 1) if there is an error.
* note : only numThreads can be resized, queueSize remains unchanged.
*/
int POOL_resize(POOL_ctx* ctx, size_t numThreads);
/*! POOL_sizeof() :
return memory usage of pool returned by POOL_create().
*/
* @return threadpool memory usage
* note : compatible with NULL (returns 0 in this case)
*/
size_t POOL_sizeof(POOL_ctx* ctx);
/*! POOL_function :
The function type that can be added to a thread pool.
*/
* The function type that can be added to a thread pool.
*/
typedef void (*POOL_function)(void*);
/*! POOL_add_function :
The function type for a generic thread pool add function.
*/
typedef void (*POOL_add_function)(void*, POOL_function, void*);
/*! POOL_add() :
Add the job `function(opaque)` to the thread pool. `ctx` must be valid.
Possibly blocks until there is room in the queue.
Note : The function may be executed asynchronously, so `opaque` must live until the function has been completed.
*/
* Add the job `function(opaque)` to the thread pool. `ctx` must be valid.
* Possibly blocks until there is room in the queue.
* Note : The function may be executed asynchronously,
* therefore, `opaque` must live until function has been completed.
*/
void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque);
/*! POOL_tryAdd() :
Add the job `function(opaque)` to the thread pool if a worker is available.
return immediately otherwise.
@return : 1 if successful, 0 if not.
*/
* Add the job `function(opaque)` to thread pool _if_ a worker is available.
* Returns immediately even if not (does not block).
* @return : 1 if successful, 0 if not.
*/
int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque);

View file

@ -14,8 +14,10 @@
* This file will hold wrapper for systems, which do not support pthreads
*/
/* create fake symbol to avoid empty trnaslation unit warning */
int g_ZSTD_threading_useles_symbol;
#include "threading.h"
/* create fake symbol to avoid empty translation unit warning */
int g_ZSTD_threading_useless_symbol;
#if defined(ZSTD_MULTITHREAD) && defined(_WIN32)
@ -28,7 +30,6 @@ int g_ZSTD_threading_useles_symbol;
/* === Dependencies === */
#include <process.h>
#include <errno.h>
#include "threading.h"
/* === Implementation === */
@ -73,3 +74,47 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread, void **value_ptr)
}
#endif /* ZSTD_MULTITHREAD */
#if defined(ZSTD_MULTITHREAD) && DEBUGLEVEL >= 1 && !defined(_WIN32)
#include <stdlib.h>
int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr)
{
*mutex = (pthread_mutex_t*)malloc(sizeof(pthread_mutex_t));
if (!*mutex)
return 1;
return pthread_mutex_init(*mutex, attr);
}
int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex)
{
if (!*mutex)
return 0;
{
int const ret = pthread_mutex_destroy(*mutex);
free(*mutex);
return ret;
}
}
int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr)
{
*cond = (pthread_cond_t*)malloc(sizeof(pthread_cond_t));
if (!*cond)
return 1;
return pthread_cond_init(*cond, attr);
}
int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond)
{
if (!*cond)
return 0;
{
int const ret = pthread_cond_destroy(*cond);
free(*cond);
return ret;
}
}
#endif

View file

@ -13,6 +13,8 @@
#ifndef THREADING_H_938743
#define THREADING_H_938743
#include "debug.h"
#if defined (__cplusplus)
extern "C" {
#endif
@ -75,10 +77,12 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr);
*/
#elif defined(ZSTD_MULTITHREAD) /* posix assumed ; need a better detection method */
#elif defined(ZSTD_MULTITHREAD) /* posix assumed ; need a better detection method */
/* === POSIX Systems === */
# include <pthread.h>
#if DEBUGLEVEL < 1
#define ZSTD_pthread_mutex_t pthread_mutex_t
#define ZSTD_pthread_mutex_init(a, b) pthread_mutex_init((a), (b))
#define ZSTD_pthread_mutex_destroy(a) pthread_mutex_destroy((a))
@ -96,6 +100,33 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr);
#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d))
#define ZSTD_pthread_join(a, b) pthread_join((a),(b))
#else /* DEBUGLEVEL >= 1 */
/* Debug implementation of threading.
* In this implementation we use pointers for mutexes and condition variables.
* This way, if we forget to init/destroy them the program will crash or ASAN
* will report leaks.
*/
#define ZSTD_pthread_mutex_t pthread_mutex_t*
int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr);
int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex);
#define ZSTD_pthread_mutex_lock(a) pthread_mutex_lock(*(a))
#define ZSTD_pthread_mutex_unlock(a) pthread_mutex_unlock(*(a))
#define ZSTD_pthread_cond_t pthread_cond_t*
int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr);
int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond);
#define ZSTD_pthread_cond_wait(a, b) pthread_cond_wait(*(a), *(b))
#define ZSTD_pthread_cond_signal(a) pthread_cond_signal(*(a))
#define ZSTD_pthread_cond_broadcast(a) pthread_cond_broadcast(*(a))
#define ZSTD_pthread_t pthread_t
#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d))
#define ZSTD_pthread_join(a, b) pthread_join((a),(b))
#endif
#else /* ZSTD_MULTITHREAD not defined */
/* No multithreading support */

View file

@ -53,7 +53,8 @@
# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
# define XXH_FORCE_MEMORY_ACCESS 2
# elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
(defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
(defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) || \
defined(__ICCARM__)
# define XXH_FORCE_MEMORY_ACCESS 1
# endif
#endif
@ -66,10 +67,10 @@
/* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */
/*!XXH_FORCE_NATIVE_FORMAT :
* By default, xxHash library provides endian-independant Hash values, based on little-endian convention.
* By default, xxHash library provides endian-independent Hash values, based on little-endian convention.
* Results are therefore identical for little-endian and big-endian CPU.
* This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
* Should endian-independance be of no importance for your application, you may set the #define below to 1,
* Should endian-independence be of no importance for your application, you may set the #define below to 1,
* to improve speed for Big-endian CPU.
* This option has no impact on Little_Endian CPU.
*/
@ -98,6 +99,7 @@
/* Modify the local functions below should you wish to use some other memory routines */
/* for malloc(), free() */
#include <stdlib.h>
#include <stddef.h> /* size_t */
static void* XXH_malloc(size_t s) { return malloc(s); }
static void XXH_free (void* p) { free(p); }
/* for memcpy() */
@ -119,7 +121,7 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcp
# define INLINE_KEYWORD
#endif
#if defined(__GNUC__)
#if defined(__GNUC__) || defined(__ICCARM__)
# define FORCE_INLINE_ATTR __attribute__((always_inline))
#elif defined(_MSC_VER)
# define FORCE_INLINE_ATTR __forceinline
@ -205,7 +207,12 @@ static U64 XXH_read64(const void* memPtr)
# define XXH_rotl32(x,r) _rotl(x,r)
# define XXH_rotl64(x,r) _rotl64(x,r)
#else
#if defined(__ICCARM__)
# include <intrinsics.h>
# define XXH_rotl32(x,r) __ROR(x,(32 - r))
#else
# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
#endif
# define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r)))
#endif

View file

@ -30,8 +30,10 @@ const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; }
/*-****************************************
* ZSTD Error Management
******************************************/
#undef ZSTD_isError /* defined within zstd_internal.h */
/*! ZSTD_isError() :
* tells if a return value is an error code */
* tells if a return value is an error code
* symbol is required for external callers */
unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
/*! ZSTD_getErrorName() :
@ -46,11 +48,6 @@ ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); }
* provides error code string from enum */
const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); }
/*! g_debuglog_enable :
* turn on/off debug traces (global switch) */
#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG >= 2)
int g_debuglog_enable = 1;
#endif
/*=**************************************************************

View file

@ -72,6 +72,7 @@ typedef enum {
ZSTD_error_workSpace_tooSmall= 66,
ZSTD_error_dstSize_tooSmall = 70,
ZSTD_error_srcSize_wrong = 72,
ZSTD_error_dstBuffer_null = 74,
/* following error codes are __NOT STABLE__, they can be removed or changed in future versions */
ZSTD_error_frameIndex_tooLarge = 100,
ZSTD_error_seekableIO = 102,

View file

@ -21,6 +21,7 @@
***************************************/
#include "compiler.h"
#include "mem.h"
#include "debug.h" /* assert, DEBUGLOG, RAWLOG, g_debuglevel */
#include "error_private.h"
#define ZSTD_STATIC_LINKING_ONLY
#include "zstd.h"
@ -33,48 +34,15 @@
#endif
#include "xxhash.h" /* XXH_reset, update, digest */
#if defined (__cplusplus)
extern "C" {
#endif
/*-*************************************
* Debug
***************************************/
#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=1)
# include <assert.h>
#else
# ifndef assert
# define assert(condition) ((void)0)
# endif
#endif
#define ZSTD_STATIC_ASSERT(c) { enum { ZSTD_static_assert = 1/(int)(!!(c)) }; }
#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=2)
# include <stdio.h>
extern int g_debuglog_enable;
/* recommended values for ZSTD_DEBUG display levels :
* 1 : no display, enables assert() only
* 2 : reserved for currently active debug path
* 3 : events once per object lifetime (CCtx, CDict, etc.)
* 4 : events once per frame
* 5 : events once per block
* 6 : events once per sequence (*very* verbose) */
# define RAWLOG(l, ...) { \
if ((g_debuglog_enable) & (l<=ZSTD_DEBUG)) { \
fprintf(stderr, __VA_ARGS__); \
} }
# define DEBUGLOG(l, ...) { \
if ((g_debuglog_enable) & (l<=ZSTD_DEBUG)) { \
fprintf(stderr, __FILE__ ": " __VA_ARGS__); \
fprintf(stderr, " \n"); \
} }
#else
# define RAWLOG(l, ...) {} /* disabled */
# define DEBUGLOG(l, ...) {} /* disabled */
#endif
/* ---- static assert (debug) --- */
#define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)
#define ZSTD_isError ERR_isError /* for inlining */
#define FSE_isError ERR_isError
#define HUF_isError ERR_isError
/*-*************************************
@ -84,8 +52,50 @@ extern int g_debuglog_enable;
#undef MAX
#define MIN(a,b) ((a)<(b) ? (a) : (b))
#define MAX(a,b) ((a)>(b) ? (a) : (b))
#define CHECK_F(f) { size_t const errcod = f; if (ERR_isError(errcod)) return errcod; } /* check and Forward error code */
#define CHECK_E(f, e) { size_t const errcod = f; if (ERR_isError(errcod)) return ERROR(e); } /* check and send Error code */
/**
* Return the specified error if the condition evaluates to true.
*
* In debug modes, prints additional information.
* In order to do that (particularly, printing the conditional that failed),
* this can't just wrap RETURN_ERROR().
*/
#define RETURN_ERROR_IF(cond, err, ...) \
if (cond) { \
RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \
RAWLOG(3, ": " __VA_ARGS__); \
RAWLOG(3, "\n"); \
return ERROR(err); \
}
/**
* Unconditionally return the specified error.
*
* In debug modes, prints additional information.
*/
#define RETURN_ERROR(err, ...) \
do { \
RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \
RAWLOG(3, ": " __VA_ARGS__); \
RAWLOG(3, "\n"); \
return ERROR(err); \
} while(0);
/**
* If the provided expression evaluates to an error code, returns that error code.
*
* In debug modes, prints additional information.
*/
#define FORWARD_IF_ERROR(err, ...) \
do { \
size_t const err_code = (err); \
if (ERR_isError(err_code)) { \
RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \
RAWLOG(3, ": " __VA_ARGS__); \
RAWLOG(3, "\n"); \
return err_code; \
} \
} while(0);
/*-*************************************
@ -109,12 +119,10 @@ static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
#define BIT0 1
#define ZSTD_WINDOWLOG_ABSOLUTEMIN 10
#define ZSTD_WINDOWLOG_DEFAULTMAX 27 /* Default maximum allowed window log */
static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };
static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
#define ZSTD_FRAMEIDSIZE 4
static const size_t ZSTD_frameIdSize = ZSTD_FRAMEIDSIZE; /* magic number size */
#define ZSTD_FRAMEIDSIZE 4 /* magic number size */
#define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
@ -184,29 +192,59 @@ static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
* Shared functions to include for inlining
*********************************************/
static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
static void ZSTD_copy16(void* dst, const void* src) { memcpy(dst, src, 16); }
#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
#define WILDCOPY_OVERLENGTH 32
#define WILDCOPY_VECLEN 16
typedef enum {
ZSTD_no_overlap,
ZSTD_overlap_src_before_dst
/* ZSTD_overlap_dst_before_src, */
} ZSTD_overlap_e;
/*! ZSTD_wildcopy() :
* custom version of memcpy(), can overwrite up to WILDCOPY_OVERLENGTH bytes (if length==0) */
#define WILDCOPY_OVERLENGTH 8
MEM_STATIC void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length)
* Custom version of memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0)
* @param ovtype controls the overlap detection
* - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
* - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart.
* The src buffer must be before the dst buffer.
*/
MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype)
{
ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
const BYTE* ip = (const BYTE*)src;
BYTE* op = (BYTE*)dst;
BYTE* const oend = op + length;
do
COPY8(op, ip)
while (op < oend);
}
MEM_STATIC void ZSTD_wildcopy_e(void* dst, const void* src, void* dstEnd) /* should be faster for decoding, but strangely, not verified on all platform */
{
const BYTE* ip = (const BYTE*)src;
BYTE* op = (BYTE*)dst;
BYTE* const oend = (BYTE*)dstEnd;
do
COPY8(op, ip)
while (op < oend);
assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN));
if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) {
/* Handle short offset copies. */
do {
COPY8(op, ip)
} while (op < oend);
} else {
assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN);
/* Separate out the first two COPY16() calls because the copy length is
* almost certain to be short, so the branches have different
* probabilities.
* On gcc-9 unrolling once is +1.6%, twice is +2%, thrice is +1.8%.
* On clang-8 unrolling once is +1.4%, twice is +3.3%, thrice is +3%.
*/
COPY16(op, ip);
COPY16(op, ip);
if (op >= oend) return;
do {
COPY16(op, ip);
COPY16(op, ip);
}
while (op < oend);
}
}
@ -227,10 +265,23 @@ typedef struct {
BYTE* llCode;
BYTE* mlCode;
BYTE* ofCode;
size_t maxNbSeq;
size_t maxNbLit;
U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */
U32 longLengthPos;
} seqStore_t;
/**
* Contains the compressed frame size and an upper-bound for the decompressed frame size.
* Note: before using `compressedSize`, check for errors using ZSTD_isError().
* similarly, before using `decompressedBound`, check for errors using:
* `decompressedBound != ZSTD_CONTENTSIZE_ERROR`
*/
typedef struct {
size_t compressedSize;
unsigned long long decompressedBound;
} ZSTD_frameSizeInfo; /* decompress & legacy */
const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */
void ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */
@ -249,7 +300,9 @@ MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus
_BitScanReverse(&r, val);
return (unsigned)r;
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
return 31 - __builtin_clz(val);
return __builtin_clz (val) ^ 31;
# elif defined(__ICCARM__) /* IAR Intrinsic */
return 31 - __CLZ(val);
# else /* Software version */
static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
U32 v = val;
@ -275,7 +328,7 @@ typedef struct {
blockType_e blockType;
U32 lastBlock;
U32 origSize;
} blockProperties_t;
} blockProperties_t; /* declared here for decompress and fullbench */
/*! ZSTD_getcBlockSize() :
* Provides the size of compressed block from block header `src` */
@ -283,6 +336,13 @@ typedef struct {
size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
blockProperties_t* bpPtr);
/*! ZSTD_decodeSeqHeaders() :
* decode sequence header from src */
/* Used by: decompress, fullbench (does not get its definition from here) */
size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
const void* src, size_t srcSize);
#if defined (__cplusplus)
}
#endif

View file

@ -1,6 +1,6 @@
/* ******************************************************************
FSE : Finite State Entropy encoder
Copyright (C) 2013-2015, Yann Collet.
Copyright (C) 2013-present, Yann Collet.
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
@ -37,9 +37,11 @@
****************************************************************/
#include <stdlib.h> /* malloc, free, qsort */
#include <string.h> /* memcpy, memset */
#include <stdio.h> /* printf (debug) */
#include "bitstream.h"
#include "compiler.h"
#include "mem.h" /* U32, U16, etc. */
#include "debug.h" /* assert, DEBUGLOG */
#include "hist.h" /* HIST_count_wksp */
#include "bitstream.h"
#define FSE_STATIC_LINKING_ONLY
#include "fse.h"
#include "error_private.h"
@ -49,7 +51,6 @@
* Error Management
****************************************************************/
#define FSE_isError ERR_isError
#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
/* **************************************************************
@ -82,7 +83,9 @@
* wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)`
* workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements
*/
size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
size_t FSE_buildCTable_wksp(FSE_CTable* ct,
const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
void* workSpace, size_t wkspSize)
{
U32 const tableSize = 1 << tableLog;
U32 const tableMask = tableSize - 1;
@ -100,14 +103,19 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsi
if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge);
tableU16[-2] = (U16) tableLog;
tableU16[-1] = (U16) maxSymbolValue;
assert(tableLog < 16); /* required for threshold strategy to work */
/* For explanations on how to distribute symbol values over the table :
* http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
* http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
#ifdef __clang_analyzer__
memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */
#endif
/* symbol start positions */
{ U32 u;
cumul[0] = 0;
for (u=1; u<=maxSymbolValue+1; u++) {
for (u=1; u <= maxSymbolValue+1; u++) {
if (normalizedCounter[u-1]==-1) { /* Low proba symbol */
cumul[u] = cumul[u-1] + 1;
tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1);
@ -121,14 +129,16 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsi
{ U32 position = 0;
U32 symbol;
for (symbol=0; symbol<=maxSymbolValue; symbol++) {
int nbOccurences;
for (nbOccurences=0; nbOccurences<normalizedCounter[symbol]; nbOccurences++) {
int nbOccurrences;
int const freq = normalizedCounter[symbol];
for (nbOccurrences=0; nbOccurrences<freq; nbOccurrences++) {
tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
position = (position + step) & tableMask;
while (position > highThreshold) position = (position + step) & tableMask; /* Low proba area */
while (position > highThreshold)
position = (position + step) & tableMask; /* Low proba area */
} }
if (position!=0) return ERROR(GENERIC); /* Must have gone through all positions */
assert(position==0); /* Must have initialized all positions */
}
/* Build table */
@ -143,7 +153,10 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsi
for (s=0; s<=maxSymbolValue; s++) {
switch (normalizedCounter[s])
{
case 0: break;
case 0:
/* filling nonetheless, for compatibility with FSE_getMaxNbBits() */
symbolTT[s].deltaNbBits = ((tableLog+1) << 16) - (1<<tableLog);
break;
case -1:
case 1:
@ -160,6 +173,18 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsi
total += normalizedCounter[s];
} } } }
#if 0 /* debug : symbol costs */
DEBUGLOG(5, "\n --- table statistics : ");
{ U32 symbol;
for (symbol=0; symbol<=maxSymbolValue; symbol++) {
DEBUGLOG(5, "%3u: w=%3i, maxBits=%u, fracBits=%.2f",
symbol, normalizedCounter[symbol],
FSE_getMaxNbBits(symbolTT, symbol),
(double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256);
}
}
#endif
return 0;
}
@ -174,8 +199,9 @@ size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned
#ifndef FSE_COMMONDEFS_ONLY
/*-**************************************************************
* FSE NCount encoding-decoding
* FSE NCount encoding
****************************************************************/
size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
{
@ -183,9 +209,10 @@ size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
}
static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
unsigned writeIsSafe)
static size_t
FSE_writeNCount_generic (void* header, size_t headerBufferSize,
const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
unsigned writeIsSafe)
{
BYTE* const ostart = (BYTE*) header;
BYTE* out = ostart;
@ -194,13 +221,12 @@ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
const int tableSize = 1 << tableLog;
int remaining;
int threshold;
U32 bitStream;
int bitCount;
unsigned charnum = 0;
int previous0 = 0;
U32 bitStream = 0;
int bitCount = 0;
unsigned symbol = 0;
unsigned const alphabetSize = maxSymbolValue + 1;
int previousIs0 = 0;
bitStream = 0;
bitCount = 0;
/* Table Size */
bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount;
bitCount += 4;
@ -210,48 +236,53 @@ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
threshold = tableSize;
nbBits = tableLog+1;
while (remaining>1) { /* stops at 1 */
if (previous0) {
unsigned start = charnum;
while (!normalizedCounter[charnum]) charnum++;
while (charnum >= start+24) {
while ((symbol < alphabetSize) && (remaining>1)) { /* stops at 1 */
if (previousIs0) {
unsigned start = symbol;
while ((symbol < alphabetSize) && !normalizedCounter[symbol]) symbol++;
if (symbol == alphabetSize) break; /* incorrect distribution */
while (symbol >= start+24) {
start+=24;
bitStream += 0xFFFFU << bitCount;
if ((!writeIsSafe) && (out > oend-2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */
if ((!writeIsSafe) && (out > oend-2))
return ERROR(dstSize_tooSmall); /* Buffer overflow */
out[0] = (BYTE) bitStream;
out[1] = (BYTE)(bitStream>>8);
out+=2;
bitStream>>=16;
}
while (charnum >= start+3) {
while (symbol >= start+3) {
start+=3;
bitStream += 3 << bitCount;
bitCount += 2;
}
bitStream += (charnum-start) << bitCount;
bitStream += (symbol-start) << bitCount;
bitCount += 2;
if (bitCount>16) {
if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */
if ((!writeIsSafe) && (out > oend - 2))
return ERROR(dstSize_tooSmall); /* Buffer overflow */
out[0] = (BYTE)bitStream;
out[1] = (BYTE)(bitStream>>8);
out += 2;
bitStream >>= 16;
bitCount -= 16;
} }
{ int count = normalizedCounter[charnum++];
int const max = (2*threshold-1)-remaining;
{ int count = normalizedCounter[symbol++];
int const max = (2*threshold-1) - remaining;
remaining -= count < 0 ? -count : count;
count++; /* +1 for extra accuracy */
if (count>=threshold) count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
if (count>=threshold)
count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
bitStream += count << bitCount;
bitCount += nbBits;
bitCount -= (count<max);
previous0 = (count==1);
previousIs0 = (count==1);
if (remaining<1) return ERROR(GENERIC);
while (remaining<threshold) { nbBits--; threshold>>=1; }
}
if (bitCount>16) {
if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */
if ((!writeIsSafe) && (out > oend - 2))
return ERROR(dstSize_tooSmall); /* Buffer overflow */
out[0] = (BYTE)bitStream;
out[1] = (BYTE)(bitStream>>8);
out += 2;
@ -259,19 +290,23 @@ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
bitCount -= 16;
} }
if (remaining != 1)
return ERROR(GENERIC); /* incorrect normalized distribution */
assert(symbol <= alphabetSize);
/* flush remaining bitStream */
if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */
if ((!writeIsSafe) && (out > oend - 2))
return ERROR(dstSize_tooSmall); /* Buffer overflow */
out[0] = (BYTE)bitStream;
out[1] = (BYTE)(bitStream>>8);
out+= (bitCount+7) /8;
if (charnum > maxSymbolValue + 1) return ERROR(GENERIC);
return (out-ostart);
}
size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
size_t FSE_writeNCount (void* buffer, size_t bufferSize,
const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
{
if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported */
if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported */
@ -279,179 +314,13 @@ size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalized
if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1);
return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1 /* write in buffer is safe */);
}
/*-**************************************************************
* Counting histogram
****************************************************************/
/*! FSE_count_simple
This function counts byte values within `src`, and store the histogram into table `count`.
It doesn't use any additional memory.
But this function is unsafe : it doesn't check that all values within `src` can fit into `count`.
For this reason, prefer using a table `count` with 256 elements.
@return : count of most numerous element.
*/
size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
const void* src, size_t srcSize)
{
const BYTE* ip = (const BYTE*)src;
const BYTE* const end = ip + srcSize;
unsigned maxSymbolValue = *maxSymbolValuePtr;
unsigned max=0;
memset(count, 0, (maxSymbolValue+1)*sizeof(*count));
if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
while (ip<end) {
assert(*ip <= maxSymbolValue);
count[*ip++]++;
}
while (!count[maxSymbolValue]) maxSymbolValue--;
*maxSymbolValuePtr = maxSymbolValue;
{ U32 s; for (s=0; s<=maxSymbolValue; s++) if (count[s] > max) max = count[s]; }
return (size_t)max;
}
/* FSE_count_parallel_wksp() :
* Same as FSE_count_parallel(), but using an externally provided scratch buffer.
* `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`.
* @return : largest histogram frequency, or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */
static size_t FSE_count_parallel_wksp(
unsigned* count, unsigned* maxSymbolValuePtr,
const void* source, size_t sourceSize,
unsigned checkMax, unsigned* const workSpace)
{
const BYTE* ip = (const BYTE*)source;
const BYTE* const iend = ip+sourceSize;
unsigned maxSymbolValue = *maxSymbolValuePtr;
unsigned max=0;
U32* const Counting1 = workSpace;
U32* const Counting2 = Counting1 + 256;
U32* const Counting3 = Counting2 + 256;
U32* const Counting4 = Counting3 + 256;
memset(workSpace, 0, 4*256*sizeof(unsigned));
/* safety checks */
if (!sourceSize) {
memset(count, 0, maxSymbolValue + 1);
*maxSymbolValuePtr = 0;
return 0;
}
if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */
/* by stripes of 16 bytes */
{ U32 cached = MEM_read32(ip); ip += 4;
while (ip < iend-15) {
U32 c = cached; cached = MEM_read32(ip); ip += 4;
Counting1[(BYTE) c ]++;
Counting2[(BYTE)(c>>8) ]++;
Counting3[(BYTE)(c>>16)]++;
Counting4[ c>>24 ]++;
c = cached; cached = MEM_read32(ip); ip += 4;
Counting1[(BYTE) c ]++;
Counting2[(BYTE)(c>>8) ]++;
Counting3[(BYTE)(c>>16)]++;
Counting4[ c>>24 ]++;
c = cached; cached = MEM_read32(ip); ip += 4;
Counting1[(BYTE) c ]++;
Counting2[(BYTE)(c>>8) ]++;
Counting3[(BYTE)(c>>16)]++;
Counting4[ c>>24 ]++;
c = cached; cached = MEM_read32(ip); ip += 4;
Counting1[(BYTE) c ]++;
Counting2[(BYTE)(c>>8) ]++;
Counting3[(BYTE)(c>>16)]++;
Counting4[ c>>24 ]++;
}
ip-=4;
}
/* finish last symbols */
while (ip<iend) Counting1[*ip++]++;
if (checkMax) { /* verify stats will fit into destination table */
U32 s; for (s=255; s>maxSymbolValue; s--) {
Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
} }
{ U32 s;
if (maxSymbolValue > 255) maxSymbolValue = 255;
for (s=0; s<=maxSymbolValue; s++) {
count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
if (count[s] > max) max = count[s];
} }
while (!count[maxSymbolValue]) maxSymbolValue--;
*maxSymbolValuePtr = maxSymbolValue;
return (size_t)max;
}
/* FSE_countFast_wksp() :
* Same as FSE_countFast(), but using an externally provided scratch buffer.
* `workSpace` size must be table of >= `1024` unsigned */
size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
const void* source, size_t sourceSize,
unsigned* workSpace)
{
if (sourceSize < 1500) /* heuristic threshold */
return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize);
return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace);
}
/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
const void* source, size_t sourceSize)
{
unsigned tmpCounters[1024];
return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters);
}
/* FSE_count_wksp() :
* Same as FSE_count(), but using an externally provided scratch buffer.
* `workSpace` size must be table of >= `1024` unsigned */
size_t FSE_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
const void* source, size_t sourceSize, unsigned* workSpace)
{
if (*maxSymbolValuePtr < 255)
return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace);
*maxSymbolValuePtr = 255;
return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace);
}
size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr,
const void* src, size_t srcSize)
{
unsigned tmpCounters[1024];
return FSE_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters);
}
/*-**************************************************************
* FSE Compression Code
****************************************************************/
/*! FSE_sizeof_CTable() :
FSE_CTable is a variable size structure which contains :
`U16 tableLog;`
`U16 maxSymbolValue;`
`U16 nextStateNumber[1 << tableLog];` // This size is variable
`FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];` // This size is variable
Allocation is manual (C standard does not support variable-size structures).
*/
size_t FSE_sizeof_CTable (unsigned maxSymbolValue, unsigned tableLog)
{
if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
return FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
}
FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
{
@ -466,7 +335,7 @@ void FSE_freeCTable (FSE_CTable* ct) { free(ct); }
/* provides the minimum logSize to safely represent a distribution */
static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
{
U32 minBitsSrc = BIT_highbit32((U32)(srcSize - 1)) + 1;
U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1;
U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
assert(srcSize > 1); /* Not supported, RLE should be used instead */
@ -529,6 +398,9 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
}
ToDistribute = (1 << tableLog) - distributed;
if (ToDistribute == 0)
return 0;
if ((total / ToDistribute) > lowOne) {
/* risk of rounding to zero */
lowOne = (U32)((total * 3) / (ToDistribute * 2));
@ -629,11 +501,11 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
U32 s;
U32 nTotal = 0;
for (s=0; s<=maxSymbolValue; s++)
printf("%3i: %4i \n", s, normalizedCounter[s]);
RAWLOG(2, "%3i: %4i \n", s, normalizedCounter[s]);
for (s=0; s<=maxSymbolValue; s++)
nTotal += abs(normalizedCounter[s]);
if (nTotal != (1U<<tableLog))
printf("Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog);
RAWLOG(2, "Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog);
getchar();
}
#endif
@ -786,7 +658,7 @@ size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t src
BYTE* op = ostart;
BYTE* const oend = ostart + dstSize;
U32 count[FSE_MAX_SYMBOL_VALUE+1];
unsigned count[FSE_MAX_SYMBOL_VALUE+1];
S16 norm[FSE_MAX_SYMBOL_VALUE+1];
FSE_CTable* CTable = (FSE_CTable*)workSpace;
size_t const CTableSize = FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue);
@ -800,7 +672,7 @@ size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t src
if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
/* Scan input and build symbol stats */
{ CHECK_V_F(maxCount, FSE_count_wksp(count, &maxSymbolValue, src, srcSize, (unsigned*)scratchBuffer) );
{ CHECK_V_F(maxCount, HIST_count_wksp(count, &maxSymbolValue, src, srcSize, scratchBuffer, scratchBufferSize) );
if (maxCount == srcSize) return 1; /* only a single symbol in src : rle */
if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */
@ -835,7 +707,7 @@ typedef struct {
size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
{
fseWkspMax_t scratchBuffer;
FSE_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */
DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */
if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer));
}

View file

@ -0,0 +1,203 @@
/* ******************************************************************
hist : Histogram functions
part of Finite State Entropy project
Copyright (C) 2013-present, Yann Collet.
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
You can contact the author at :
- FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
- Public forum : https://groups.google.com/forum/#!forum/lz4c
****************************************************************** */
/* --- dependencies --- */
#include "mem.h" /* U32, BYTE, etc. */
#include "debug.h" /* assert, DEBUGLOG */
#include "error_private.h" /* ERROR */
#include "hist.h"
/* --- Error management --- */
unsigned HIST_isError(size_t code) { return ERR_isError(code); }
/*-**************************************************************
* Histogram functions
****************************************************************/
unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
const void* src, size_t srcSize)
{
const BYTE* ip = (const BYTE*)src;
const BYTE* const end = ip + srcSize;
unsigned maxSymbolValue = *maxSymbolValuePtr;
unsigned largestCount=0;
memset(count, 0, (maxSymbolValue+1) * sizeof(*count));
if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
while (ip<end) {
assert(*ip <= maxSymbolValue);
count[*ip++]++;
}
while (!count[maxSymbolValue]) maxSymbolValue--;
*maxSymbolValuePtr = maxSymbolValue;
{ U32 s;
for (s=0; s<=maxSymbolValue; s++)
if (count[s] > largestCount) largestCount = count[s];
}
return largestCount;
}
typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e;
/* HIST_count_parallel_wksp() :
* store histogram into 4 intermediate tables, recombined at the end.
* this design makes better use of OoO cpus,
* and is noticeably faster when some values are heavily repeated.
* But it needs some additional workspace for intermediate tables.
* `workSpace` size must be a table of size >= HIST_WKSP_SIZE_U32.
* @return : largest histogram frequency,
* or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */
static size_t HIST_count_parallel_wksp(
unsigned* count, unsigned* maxSymbolValuePtr,
const void* source, size_t sourceSize,
HIST_checkInput_e check,
U32* const workSpace)
{
const BYTE* ip = (const BYTE*)source;
const BYTE* const iend = ip+sourceSize;
unsigned maxSymbolValue = *maxSymbolValuePtr;
unsigned max=0;
U32* const Counting1 = workSpace;
U32* const Counting2 = Counting1 + 256;
U32* const Counting3 = Counting2 + 256;
U32* const Counting4 = Counting3 + 256;
memset(workSpace, 0, 4*256*sizeof(unsigned));
/* safety checks */
if (!sourceSize) {
memset(count, 0, maxSymbolValue + 1);
*maxSymbolValuePtr = 0;
return 0;
}
if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */
/* by stripes of 16 bytes */
{ U32 cached = MEM_read32(ip); ip += 4;
while (ip < iend-15) {
U32 c = cached; cached = MEM_read32(ip); ip += 4;
Counting1[(BYTE) c ]++;
Counting2[(BYTE)(c>>8) ]++;
Counting3[(BYTE)(c>>16)]++;
Counting4[ c>>24 ]++;
c = cached; cached = MEM_read32(ip); ip += 4;
Counting1[(BYTE) c ]++;
Counting2[(BYTE)(c>>8) ]++;
Counting3[(BYTE)(c>>16)]++;
Counting4[ c>>24 ]++;
c = cached; cached = MEM_read32(ip); ip += 4;
Counting1[(BYTE) c ]++;
Counting2[(BYTE)(c>>8) ]++;
Counting3[(BYTE)(c>>16)]++;
Counting4[ c>>24 ]++;
c = cached; cached = MEM_read32(ip); ip += 4;
Counting1[(BYTE) c ]++;
Counting2[(BYTE)(c>>8) ]++;
Counting3[(BYTE)(c>>16)]++;
Counting4[ c>>24 ]++;
}
ip-=4;
}
/* finish last symbols */
while (ip<iend) Counting1[*ip++]++;
if (check) { /* verify stats will fit into destination table */
U32 s; for (s=255; s>maxSymbolValue; s--) {
Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
} }
{ U32 s;
if (maxSymbolValue > 255) maxSymbolValue = 255;
for (s=0; s<=maxSymbolValue; s++) {
count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
if (count[s] > max) max = count[s];
} }
while (!count[maxSymbolValue]) maxSymbolValue--;
*maxSymbolValuePtr = maxSymbolValue;
return (size_t)max;
}
/* HIST_countFast_wksp() :
* Same as HIST_countFast(), but using an externally provided scratch buffer.
* `workSpace` is a writable buffer which must be 4-bytes aligned,
* `workSpaceSize` must be >= HIST_WKSP_SIZE
*/
size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
const void* source, size_t sourceSize,
void* workSpace, size_t workSpaceSize)
{
if (sourceSize < 1500) /* heuristic threshold */
return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize);
if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall);
return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace);
}
/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
const void* source, size_t sourceSize)
{
unsigned tmpCounters[HIST_WKSP_SIZE_U32];
return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters, sizeof(tmpCounters));
}
/* HIST_count_wksp() :
* Same as HIST_count(), but using an externally provided scratch buffer.
* `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */
size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
const void* source, size_t sourceSize,
void* workSpace, size_t workSpaceSize)
{
if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall);
if (*maxSymbolValuePtr < 255)
return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, checkMaxSymbolValue, (U32*)workSpace);
*maxSymbolValuePtr = 255;
return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize);
}
size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr,
const void* src, size_t srcSize)
{
unsigned tmpCounters[HIST_WKSP_SIZE_U32];
return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters, sizeof(tmpCounters));
}

View file

@ -0,0 +1,95 @@
/* ******************************************************************
hist : Histogram functions
part of Finite State Entropy project
Copyright (C) 2013-present, Yann Collet.
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
You can contact the author at :
- FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
- Public forum : https://groups.google.com/forum/#!forum/lz4c
****************************************************************** */
/* --- dependencies --- */
#include <stddef.h> /* size_t */
/* --- simple histogram functions --- */
/*! HIST_count():
* Provides the precise count of each byte within a table 'count'.
* 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1).
* Updates *maxSymbolValuePtr with actual largest symbol value detected.
* @return : count of the most frequent symbol (which isn't identified).
* or an error code, which can be tested using HIST_isError().
* note : if return == srcSize, there is only one symbol.
*/
size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr,
const void* src, size_t srcSize);
unsigned HIST_isError(size_t code); /**< tells if a return value is an error code */
/* --- advanced histogram functions --- */
#define HIST_WKSP_SIZE_U32 1024
#define HIST_WKSP_SIZE (HIST_WKSP_SIZE_U32 * sizeof(unsigned))
/** HIST_count_wksp() :
* Same as HIST_count(), but using an externally provided scratch buffer.
* Benefit is this function will use very little stack space.
* `workSpace` is a writable buffer which must be 4-bytes aligned,
* `workSpaceSize` must be >= HIST_WKSP_SIZE
*/
size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
const void* src, size_t srcSize,
void* workSpace, size_t workSpaceSize);
/** HIST_countFast() :
* same as HIST_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr.
* This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr`
*/
size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
const void* src, size_t srcSize);
/** HIST_countFast_wksp() :
* Same as HIST_countFast(), but using an externally provided scratch buffer.
* `workSpace` is a writable buffer which must be 4-bytes aligned,
* `workSpaceSize` must be >= HIST_WKSP_SIZE
*/
size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
const void* src, size_t srcSize,
void* workSpace, size_t workSpaceSize);
/*! HIST_count_simple() :
* Same as HIST_countFast(), this function is unsafe,
* and will segfault if any value within `src` is `> *maxSymbolValuePtr`.
* It is also a bit slower for large inputs.
* However, it does not need any additional memory (not even on stack).
* @return : count of the most frequent symbol.
* Note this function doesn't produce any error (i.e. it must succeed).
*/
unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
const void* src, size_t srcSize);

View file

@ -45,8 +45,9 @@
****************************************************************/
#include <string.h> /* memcpy, memset */
#include <stdio.h> /* printf (debug) */
#include "bitstream.h"
#include "compiler.h"
#include "bitstream.h"
#include "hist.h"
#define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */
#include "fse.h" /* header compression */
#define HUF_STATIC_LINKING_ONLY
@ -58,7 +59,7 @@
* Error Management
****************************************************************/
#define HUF_isError ERR_isError
#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
#define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */
#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
#define CHECK_F(f) { CHECK_V_F(_var_err__, f); }
@ -81,28 +82,28 @@ unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS
* Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
*/
#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize)
static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize)
{
BYTE* const ostart = (BYTE*) dst;
BYTE* op = ostart;
BYTE* const oend = ostart + dstSize;
U32 maxSymbolValue = HUF_TABLELOG_MAX;
unsigned maxSymbolValue = HUF_TABLELOG_MAX;
U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
BYTE scratchBuffer[1<<MAX_FSE_TABLELOG_FOR_HUFF_HEADER];
U32 count[HUF_TABLELOG_MAX+1];
unsigned count[HUF_TABLELOG_MAX+1];
S16 norm[HUF_TABLELOG_MAX+1];
/* init conditions */
if (wtSize <= 1) return 0; /* Not compressible */
/* Scan input and build symbol stats */
{ CHECK_V_F(maxCount, FSE_count_simple(count, &maxSymbolValue, weightTable, wtSize) );
{ unsigned const maxCount = HIST_count_simple(count, &maxSymbolValue, weightTable, wtSize); /* never fails */
if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */
if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
}
tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
@ -133,7 +134,7 @@ struct HUF_CElt_s {
`CTable` : Huffman tree to save, using huf representation.
@return : size of saved CTable */
size_t HUF_writeCTable (void* dst, size_t maxDstSize,
const HUF_CElt* CTable, U32 maxSymbolValue, U32 huffLog)
const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
{
BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */
BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
@ -168,7 +169,7 @@ size_t HUF_writeCTable (void* dst, size_t maxDstSize,
}
size_t HUF_readCTable (HUF_CElt* CTable, U32* maxSymbolValuePtr, const void* src, size_t srcSize)
size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize)
{
BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; /* init not required, even though some static analyzer may complain */
U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */
@ -216,6 +217,13 @@ size_t HUF_readCTable (HUF_CElt* CTable, U32* maxSymbolValuePtr, const void* src
return readSize;
}
U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue)
{
const HUF_CElt* table = (const HUF_CElt*)symbolTable;
assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
return table[symbolValue].nbBits;
}
typedef struct nodeElt_s {
U32 count;
@ -307,7 +315,7 @@ typedef struct {
U32 current;
} rankPos;
static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue)
static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue)
{
rankPos rank[32];
U32 n;
@ -339,7 +347,7 @@ static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue)
*/
#define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
{
nodeElt* const huffNode0 = (nodeElt*)workSpace;
nodeElt* const huffNode = huffNode0+1;
@ -413,7 +421,7 @@ size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValu
* @return : maxNbBits
* Note : count is used before tree is written, so they can safely overlap
*/
size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits)
size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits)
{
huffNodeTable nodeTable;
return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, nodeTable, sizeof(nodeTable));
@ -602,13 +610,14 @@ size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, si
return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
}
typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
static size_t HUF_compressCTable_internal(
BYTE* const ostart, BYTE* op, BYTE* const oend,
const void* src, size_t srcSize,
unsigned singleStream, const HUF_CElt* CTable, const int bmi2)
HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2)
{
size_t const cSize = singleStream ?
size_t const cSize = (nbStreams==HUF_singleStream) ?
HUF_compress1X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2) :
HUF_compress4X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2);
if (HUF_isError(cSize)) { return cSize; }
@ -620,21 +629,21 @@ static size_t HUF_compressCTable_internal(
}
typedef struct {
U32 count[HUF_SYMBOLVALUE_MAX + 1];
unsigned count[HUF_SYMBOLVALUE_MAX + 1];
HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1];
huffNodeTable nodeTable;
} HUF_compress_tables_t;
/* HUF_compress_internal() :
* `workSpace` must a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
static size_t HUF_compress_internal (
void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog,
unsigned singleStream,
void* workSpace, size_t wkspSize,
HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
const int bmi2)
static size_t
HUF_compress_internal (void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog,
HUF_nbStreams_e nbStreams,
void* workSpace, size_t wkspSize,
HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
const int bmi2)
{
HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace;
BYTE* const ostart = (BYTE*)dst;
@ -643,7 +652,7 @@ static size_t HUF_compress_internal (
/* checks & inits */
if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall);
if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall);
if (!srcSize) return 0; /* Uncompressed */
if (!dstSize) return 0; /* cannot fit anything within dst budget */
if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */
@ -656,13 +665,13 @@ static size_t HUF_compress_internal (
if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
return HUF_compressCTable_internal(ostart, op, oend,
src, srcSize,
singleStream, oldHufTable, bmi2);
nbStreams, oldHufTable, bmi2);
}
/* Scan input and build symbol stats */
{ CHECK_V_F(largest, FSE_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->count) );
{ CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace, wkspSize) );
if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
if (largest <= (srcSize >> 7)+1) return 0; /* heuristic : probably not compressible enough */
if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */
}
/* Check validity of previous table */
@ -675,14 +684,15 @@ static size_t HUF_compress_internal (
if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
return HUF_compressCTable_internal(ostart, op, oend,
src, srcSize,
singleStream, oldHufTable, bmi2);
nbStreams, oldHufTable, bmi2);
}
/* Build Huffman Tree */
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
{ CHECK_V_F(maxBits, HUF_buildCTable_wksp(table->CTable, table->count,
maxSymbolValue, huffLog,
table->nodeTable, sizeof(table->nodeTable)) );
{ size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,
maxSymbolValue, huffLog,
table->nodeTable, sizeof(table->nodeTable));
CHECK_F(maxBits);
huffLog = (U32)maxBits;
/* Zero unused symbols in CTable, so we can check it for validity */
memset(table->CTable + (maxSymbolValue + 1), 0,
@ -698,7 +708,7 @@ static size_t HUF_compress_internal (
if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
return HUF_compressCTable_internal(ostart, op, oend,
src, srcSize,
singleStream, oldHufTable, bmi2);
nbStreams, oldHufTable, bmi2);
} }
/* Use the new huffman table */
@ -710,7 +720,7 @@ static size_t HUF_compress_internal (
}
return HUF_compressCTable_internal(ostart, op, oend,
src, srcSize,
singleStream, table->CTable, bmi2);
nbStreams, table->CTable, bmi2);
}
@ -720,7 +730,7 @@ size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
void* workSpace, size_t wkspSize)
{
return HUF_compress_internal(dst, dstSize, src, srcSize,
maxSymbolValue, huffLog, 1 /*single stream*/,
maxSymbolValue, huffLog, HUF_singleStream,
workSpace, wkspSize,
NULL, NULL, 0, 0 /*bmi2*/);
}
@ -732,7 +742,7 @@ size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
{
return HUF_compress_internal(dst, dstSize, src, srcSize,
maxSymbolValue, huffLog, 1 /*single stream*/,
maxSymbolValue, huffLog, HUF_singleStream,
workSpace, wkspSize, hufTable,
repeat, preferRepeat, bmi2);
}
@ -754,7 +764,7 @@ size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
void* workSpace, size_t wkspSize)
{
return HUF_compress_internal(dst, dstSize, src, srcSize,
maxSymbolValue, huffLog, 0 /*4 streams*/,
maxSymbolValue, huffLog, HUF_fourStreams,
workSpace, wkspSize,
NULL, NULL, 0, 0 /*bmi2*/);
}
@ -769,7 +779,7 @@ size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
{
return HUF_compress_internal(dst, dstSize, src, srcSize,
maxSymbolValue, huffLog, 0 /* 4 streams */,
maxSymbolValue, huffLog, HUF_fourStreams,
workSpace, wkspSize,
hufTable, repeat, preferRepeat, bmi2);
}

File diff suppressed because it is too large Load diff

View file

@ -19,6 +19,7 @@
* Dependencies
***************************************/
#include "zstd_internal.h"
#include "zstd_cwksp.h"
#ifdef ZSTD_MULTITHREAD
# include "zstdmt_compress.h"
#endif
@ -27,17 +28,19 @@
extern "C" {
#endif
/*-*************************************
* Constants
***************************************/
#define kSearchStrength 8
#define HASH_READ_SIZE 8
#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index 1 now means "unsorted".
#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted".
It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
It's not a big deal though : candidate will just be sorted again.
Additionnally, candidate position 1 will be lost.
Additionally, candidate position 1 will be lost.
But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be misdhandled after table re-use with a different strategy */
The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy.
This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
/*-*************************************
@ -53,14 +56,30 @@ typedef struct ZSTD_prefixDict_s {
} ZSTD_prefixDict;
typedef struct {
U32 hufCTable[HUF_CTABLE_SIZE_U32(255)];
void* dictBuffer;
void const* dict;
size_t dictSize;
ZSTD_dictContentType_e dictContentType;
ZSTD_CDict* cdict;
} ZSTD_localDict;
typedef struct {
U32 CTable[HUF_CTABLE_SIZE_U32(255)];
HUF_repeat repeatMode;
} ZSTD_hufCTables_t;
typedef struct {
FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
HUF_repeat hufCTable_repeatMode;
FSE_repeat offcode_repeatMode;
FSE_repeat matchlength_repeatMode;
FSE_repeat litlength_repeatMode;
} ZSTD_fseCTables_t;
typedef struct {
ZSTD_hufCTables_t huf;
ZSTD_fseCTables_t fse;
} ZSTD_entropyCTables_t;
typedef struct {
@ -76,26 +95,28 @@ typedef struct {
U32 rep[ZSTD_REP_NUM];
} ZSTD_optimal_t;
typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e;
typedef struct {
/* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */
U32* litFreq; /* table of literals statistics, of size 256 */
U32* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */
U32* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */
U32* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */
ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */
ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */
unsigned* litFreq; /* table of literals statistics, of size 256 */
unsigned* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */
unsigned* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */
unsigned* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */
ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */
ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */
U32 litSum; /* nb of literals */
U32 litLengthSum; /* nb of litLength codes */
U32 matchLengthSum; /* nb of matchLength codes */
U32 offCodeSum; /* nb of offset codes */
/* begin updated by ZSTD_setLog2Prices */
U32 log2litSum; /* pow2 to compare log2(litfreq) to */
U32 log2litLengthSum; /* pow2 to compare log2(llfreq) to */
U32 log2matchLengthSum; /* pow2 to compare log2(mlfreq) to */
U32 log2offCodeSum; /* pow2 to compare log2(offreq) to */
/* end : updated by ZSTD_setLog2Prices */
U32 staticPrices; /* prices follow a pre-defined cost structure, statistics are irrelevant */
U32 litSumBasePrice; /* to compare to log2(litfreq) */
U32 litLengthSumBasePrice; /* to compare to log2(llfreq) */
U32 matchLengthSumBasePrice;/* to compare to log2(mlfreq) */
U32 offCodeSumBasePrice; /* to compare to log2(offreq) */
ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */
const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */
ZSTD_literalCompressionMode_e literalCompressionMode;
} optState_t;
typedef struct {
@ -108,20 +129,28 @@ typedef struct {
BYTE const* base; /* All regular indexes relative to this position */
BYTE const* dictBase; /* extDict indexes relative to this position */
U32 dictLimit; /* below that point, need extDict */
U32 lowLimit; /* below that point, no more data */
U32 lowLimit; /* below that point, no more valid data */
} ZSTD_window_t;
typedef struct {
ZSTD_window_t window; /* State for window round buffer management */
U32 loadedDictEnd; /* index of end of dictionary */
U32 nextToUpdate; /* index from which to continue table update */
U32 nextToUpdate3; /* index from which to continue table update */
U32 hashLog3; /* dispatch table : larger == faster, more memory */
typedef struct ZSTD_matchState_t ZSTD_matchState_t;
struct ZSTD_matchState_t {
ZSTD_window_t window; /* State for window round buffer management */
U32 loadedDictEnd; /* index of end of dictionary, within context's referential.
* When loadedDictEnd != 0, a dictionary is in use, and still valid.
* This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance.
* Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity().
* When dict referential is copied into active context (i.e. not attached),
* loadedDictEnd == dictSize, since referential starts from zero.
*/
U32 nextToUpdate; /* index from which to continue table update */
U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */
U32* hashTable;
U32* hashTable3;
U32* chainTable;
optState_t opt; /* optimal parser state */
} ZSTD_matchState_t;
const ZSTD_matchState_t* dictMatchState;
ZSTD_compressionParameters cParams;
};
typedef struct {
ZSTD_compressedBlockState_t* prevCBlock;
@ -147,7 +176,7 @@ typedef struct {
U32 hashLog; /* Log size of hashTable */
U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */
U32 minMatchLength; /* Minimum match length */
U32 hashEveryLog; /* Log number of entries to skip */
U32 hashRateLog; /* Log number of entries to skip */
U32 windowLog; /* Window log for the LDM */
} ldmParams_t;
@ -161,23 +190,39 @@ typedef struct {
rawSeq* seq; /* The start of the sequences */
size_t pos; /* The position where reading stopped. <= size. */
size_t size; /* The number of sequences. <= capacity. */
size_t capacity; /* The capacity of the `seq` pointer */
size_t capacity; /* The capacity starting from `seq` pointer */
} rawSeqStore_t;
typedef struct {
int collectSequences;
ZSTD_Sequence* seqStart;
size_t seqIndex;
size_t maxSequences;
} SeqCollector;
struct ZSTD_CCtx_params_s {
ZSTD_format_e format;
ZSTD_compressionParameters cParams;
ZSTD_frameParameters fParams;
int compressionLevel;
int disableLiteralCompression;
int forceWindow; /* force back-references to respect limit of
* 1<<wLog, even for dictionary */
size_t targetCBlockSize; /* Tries to fit compressed block size to be around targetCBlockSize.
* No target when targetCBlockSize == 0.
* There is no guarantee on compressed block size */
int srcSizeHint; /* User's best guess of source size.
* Hint is not valid when srcSizeHint == 0.
* There is no guarantee that hint is close to actual source size */
ZSTD_dictAttachPref_e attachDictPref;
ZSTD_literalCompressionMode_e literalCompressionMode;
/* Multithreading: used to pass parameters to mtctx */
unsigned nbWorkers;
unsigned jobSize;
unsigned overlapSizeLog;
int nbWorkers;
size_t jobSize;
int overlapLog;
int rsyncable;
/* Long distance matching parameters */
ldmParams_t ldmParams;
@ -193,8 +238,8 @@ struct ZSTD_CCtx_s {
ZSTD_CCtx_params requestedParams;
ZSTD_CCtx_params appliedParams;
U32 dictID;
void* workSpace;
size_t workSpaceSize;
ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */
size_t blockSize;
unsigned long long pledgedSrcSizePlusOne; /* this way, 0 (default) == unknown */
unsigned long long consumedSrcSize;
@ -202,6 +247,8 @@ struct ZSTD_CCtx_s {
XXH64_state_t xxhState;
ZSTD_customMem customMem;
size_t staticSize;
SeqCollector seqCollector;
int isFirstBlock;
seqStore_t seqStore; /* sequences storage ptrs */
ldmState_t ldmState; /* long distance matching state */
@ -225,7 +272,7 @@ struct ZSTD_CCtx_s {
U32 frameEnded;
/* Dictionary */
ZSTD_CDict* cdictLocal;
ZSTD_localDict localDict;
const ZSTD_CDict* cdict;
ZSTD_prefixDict prefixDict; /* single-usage dictionary */
@ -235,11 +282,15 @@ struct ZSTD_CCtx_s {
#endif
};
typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
typedef enum { ZSTD_noDict = 0, ZSTD_extDict = 1, ZSTD_dictMatchState = 2 } ZSTD_dictMode_e;
typedef size_t (*ZSTD_blockCompressor) (
ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict);
void const* src, size_t srcSize);
ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode);
MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
@ -273,24 +324,81 @@ MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase];
}
/*! ZSTD_storeSeq() :
* Store a sequence (literal length, literals, offset code and match length code) into seqStore_t.
* `offsetCode` : distance to match + 3 (values 1-3 are repCodes).
* `mlBase` : matchLength - MINMATCH
*/
MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t mlBase)
/* ZSTD_cParam_withinBounds:
* @return 1 if value is within cParam bounds,
* 0 otherwise */
MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
{
#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG >= 6)
ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
if (ZSTD_isError(bounds.error)) return 0;
if (value < bounds.lowerBound) return 0;
if (value > bounds.upperBound) return 0;
return 1;
}
/* ZSTD_minGain() :
* minimum compression required
* to generate a compress block or a compressed literals section.
* note : use same formula for both situations */
MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
{
U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6;
ZSTD_STATIC_ASSERT(ZSTD_btultra == 8);
assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
return (srcSize >> minlog) + 2;
}
/*! ZSTD_safecopyLiterals() :
* memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w.
* Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
* large copies.
*/
static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) {
assert(iend > ilimit_w);
if (ip <= ilimit_w) {
ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap);
op += ilimit_w - ip;
ip = ilimit_w;
}
while (ip < iend) *op++ = *ip++;
}
/*! ZSTD_storeSeq() :
* Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t.
* `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes).
* `mlBase` : matchLength - MINMATCH
* Allowed to overread literals up to litLimit.
*/
HINT_INLINE UNUSED_ATTR
void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase)
{
BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
BYTE const* const litEnd = literals + litLength;
#if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
static const BYTE* g_start = NULL;
if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */
{ U32 const pos = (U32)((const BYTE*)literals - g_start);
DEBUGLOG(6, "Cpos%7u :%3u literals, match%3u bytes at dist.code%7u",
pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offsetCode);
DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode);
}
#endif
assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
/* copy Literals */
assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + 128 KB);
ZSTD_wildcopy(seqStorePtr->lit, literals, litLength);
assert(seqStorePtr->maxNbLit <= 128 KB);
assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
assert(literals + litLength <= litLimit);
if (litEnd <= litLimit_w) {
/* Common case we can use wildcopy.
* First copy 16 bytes, because literals are likely short.
*/
assert(WILDCOPY_OVERLENGTH >= 16);
ZSTD_copy16(seqStorePtr->lit, literals);
if (litLength > 16) {
ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap);
}
} else {
ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w);
}
seqStorePtr->lit += litLength;
/* literal Length */
@ -302,7 +410,7 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const v
seqStorePtr->sequences[0].litLength = (U16)litLength;
/* match offset */
seqStorePtr->sequences[0].offset = offsetCode + 1;
seqStorePtr->sequences[0].offset = offCode + 1;
/* match Length */
if (mlBase>0xFFFF) {
@ -420,6 +528,11 @@ ZSTD_count_2segments(const BYTE* ip, const BYTE* match,
const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd);
size_t const matchLength = ZSTD_count(ip, match, vEnd);
if (match + matchLength != mEnd) return matchLength;
DEBUGLOG(7, "ZSTD_count_2segments: found a 2-parts match (current length==%zu)", matchLength);
DEBUGLOG(7, "distance from match beginning to end dictionary = %zi", mEnd - match);
DEBUGLOG(7, "distance from current pos to end buffer = %zi", iEnd - ip);
DEBUGLOG(7, "next byte : ip==%02X, istart==%02X", ip[matchLength], *iStart);
DEBUGLOG(7, "final match length = %zu", matchLength + ZSTD_count(ip+matchLength, iStart, iEnd));
return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd);
}
@ -464,9 +577,70 @@ MEM_STATIC size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
}
}
/** ZSTD_ipow() :
* Return base^exponent.
*/
static U64 ZSTD_ipow(U64 base, U64 exponent)
{
U64 power = 1;
while (exponent) {
if (exponent & 1) power *= base;
exponent >>= 1;
base *= base;
}
return power;
}
#define ZSTD_ROLL_HASH_CHAR_OFFSET 10
/** ZSTD_rollingHash_append() :
* Add the buffer to the hash value.
*/
static U64 ZSTD_rollingHash_append(U64 hash, void const* buf, size_t size)
{
BYTE const* istart = (BYTE const*)buf;
size_t pos;
for (pos = 0; pos < size; ++pos) {
hash *= prime8bytes;
hash += istart[pos] + ZSTD_ROLL_HASH_CHAR_OFFSET;
}
return hash;
}
/** ZSTD_rollingHash_compute() :
* Compute the rolling hash value of the buffer.
*/
MEM_STATIC U64 ZSTD_rollingHash_compute(void const* buf, size_t size)
{
return ZSTD_rollingHash_append(0, buf, size);
}
/** ZSTD_rollingHash_primePower() :
* Compute the primePower to be passed to ZSTD_rollingHash_rotate() for a hash
* over a window of length bytes.
*/
MEM_STATIC U64 ZSTD_rollingHash_primePower(U32 length)
{
return ZSTD_ipow(prime8bytes, length - 1);
}
/** ZSTD_rollingHash_rotate() :
* Rotate the rolling hash by one byte.
*/
MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 primePower)
{
hash -= (toRemove + ZSTD_ROLL_HASH_CHAR_OFFSET) * primePower;
hash *= prime8bytes;
hash += toAdd + ZSTD_ROLL_HASH_CHAR_OFFSET;
return hash;
}
/*-*************************************
* Round buffer management
***************************************/
#if (ZSTD_WINDOWLOG_MAX_64 > 31)
# error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX"
#endif
/* Max current allowed */
#define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX))
/* Maximum chunk size before overflow correction needs to be called again */
@ -496,6 +670,20 @@ MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window)
return window.lowLimit < window.dictLimit;
}
/**
* ZSTD_matchState_dictMode():
* Inspects the provided matchState and figures out what dictMode should be
* passed to the compressor.
*/
MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms)
{
return ZSTD_window_hasExtDict(ms->window) ?
ZSTD_extDict :
ms->dictMatchState != NULL ?
ZSTD_dictMatchState :
ZSTD_noDict;
}
/**
* ZSTD_window_needOverflowCorrection():
* Returns non-zero if the indices are getting too large and need overflow
@ -563,34 +751,99 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
* ZSTD_window_enforceMaxDist():
* Updates lowLimit so that:
* (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
* This allows a simple check that index >= lowLimit to see if index is valid.
* This must be called before a block compression call, with srcEnd as the block
* source end.
* If loadedDictEndPtr is not NULL, we set it to zero once we update lowLimit.
* This is because dictionaries are allowed to be referenced as long as the last
* byte of the dictionary is in the window, but once they are out of range,
* they cannot be referenced. If loadedDictEndPtr is NULL, we use
* loadedDictEnd == 0.
*
* It ensures index is valid as long as index >= lowLimit.
* This must be called before a block compression call.
*
* loadedDictEnd is only defined if a dictionary is in use for current compression.
* As the name implies, loadedDictEnd represents the index at end of dictionary.
* The value lies within context's referential, it can be directly compared to blockEndIdx.
*
* If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0.
* If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit.
* This is because dictionaries are allowed to be referenced fully
* as long as the last byte of the dictionary is in the window.
* Once input has progressed beyond window size, dictionary cannot be referenced anymore.
*
* In normal dict mode, the dictionary lies between lowLimit and dictLimit.
* In dictMatchState mode, lowLimit and dictLimit are the same,
* and the dictionary is below them.
* forceWindow and dictMatchState are therefore incompatible.
*/
MEM_STATIC void ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
void const* srcEnd, U32 maxDist,
U32* loadedDictEndPtr)
MEM_STATIC void
ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
const void* blockEnd,
U32 maxDist,
U32* loadedDictEndPtr,
const ZSTD_matchState_t** dictMatchStatePtr)
{
U32 const current = (U32)((BYTE const*)srcEnd - window->base);
U32 loadedDictEnd = loadedDictEndPtr != NULL ? *loadedDictEndPtr : 0;
if (current > maxDist + loadedDictEnd) {
U32 const newLowLimit = current - maxDist;
U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
(unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
/* - When there is no dictionary : loadedDictEnd == 0.
In which case, the test (blockEndIdx > maxDist) is merely to avoid
overflowing next operation `newLowLimit = blockEndIdx - maxDist`.
- When there is a standard dictionary :
Index referential is copied from the dictionary,
which means it starts from 0.
In which case, loadedDictEnd == dictSize,
and it makes sense to compare `blockEndIdx > maxDist + dictSize`
since `blockEndIdx` also starts from zero.
- When there is an attached dictionary :
loadedDictEnd is expressed within the referential of the context,
so it can be directly compared against blockEndIdx.
*/
if (blockEndIdx > maxDist + loadedDictEnd) {
U32 const newLowLimit = blockEndIdx - maxDist;
if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
if (window->dictLimit < window->lowLimit) {
DEBUGLOG(5, "Update dictLimit from %u to %u", window->dictLimit,
window->lowLimit);
DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u",
(unsigned)window->dictLimit, (unsigned)window->lowLimit);
window->dictLimit = window->lowLimit;
}
if (loadedDictEndPtr)
*loadedDictEndPtr = 0;
/* On reaching window size, dictionaries are invalidated */
if (loadedDictEndPtr) *loadedDictEndPtr = 0;
if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
}
}
/* Similar to ZSTD_window_enforceMaxDist(),
* but only invalidates dictionary
* when input progresses beyond window size.
* assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL)
* loadedDictEnd uses same referential as window->base
* maxDist is the window size */
MEM_STATIC void
ZSTD_checkDictValidity(const ZSTD_window_t* window,
const void* blockEnd,
U32 maxDist,
U32* loadedDictEndPtr,
const ZSTD_matchState_t** dictMatchStatePtr)
{
assert(loadedDictEndPtr != NULL);
assert(dictMatchStatePtr != NULL);
{ U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
U32 const loadedDictEnd = *loadedDictEndPtr;
DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
(unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
assert(blockEndIdx >= loadedDictEnd);
if (blockEndIdx > loadedDictEnd + maxDist) {
/* On reaching window size, dictionaries are invalidated.
* For simplification, if window size is reached anywhere within next block,
* the dictionary is invalidated for the full block.
*/
DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)");
*loadedDictEndPtr = 0;
*dictMatchStatePtr = NULL;
} else {
if (*loadedDictEndPtr != 0) {
DEBUGLOG(6, "dictionary considered valid for current block");
} } }
}
/**
* ZSTD_window_update():
* Updates the window by appending [src, src + srcSize) to the window.
@ -603,12 +856,12 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
{
BYTE const* const ip = (BYTE const*)src;
U32 contiguous = 1;
DEBUGLOG(5, "ZSTD_window_update");
/* Check if blocks follow each other */
if (src != window->nextSrc) {
/* not contiguous */
size_t const distanceFromBase = (size_t)(window->nextSrc - window->base);
DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u",
window->dictLimit);
DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit);
window->lowLimit = window->dictLimit;
assert(distanceFromBase == (size_t)(U32)distanceFromBase); /* should never overflow */
window->dictLimit = (U32)distanceFromBase;
@ -625,10 +878,55 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase;
U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx;
window->lowLimit = lowLimitMax;
DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit);
}
return contiguous;
}
MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 current, unsigned windowLog)
{
U32 const maxDistance = 1U << windowLog;
U32 const lowestValid = ms->window.lowLimit;
U32 const withinWindow = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;
U32 const isDictionary = (ms->loadedDictEnd != 0);
U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
return matchLowest;
}
/* debug functions */
#if (DEBUGLEVEL>=2)
MEM_STATIC double ZSTD_fWeight(U32 rawStat)
{
U32 const fp_accuracy = 8;
U32 const fp_multiplier = (1 << fp_accuracy);
U32 const newStat = rawStat + 1;
U32 const hb = ZSTD_highbit32(newStat);
U32 const BWeight = hb * fp_multiplier;
U32 const FWeight = (newStat << fp_accuracy) >> hb;
U32 const weight = BWeight + FWeight;
assert(hb + fp_accuracy < 31);
return (double)weight / fp_multiplier;
}
/* display a table content,
* listing each element, its frequency, and its predicted bit cost */
MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
{
unsigned u, sum;
for (u=0, sum=0; u<=max; u++) sum += table[u];
DEBUGLOG(2, "total nb elts: %u", sum);
for (u=0; u<=max; u++) {
DEBUGLOG(2, "%2u: %5u (%.2f)",
u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) );
}
}
#endif
#if defined (__cplusplus)
}
#endif
@ -640,7 +938,7 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
* ============================================================== */
/* ZSTD_getCParamsFromCCtxParams() :
* cParams are built depending on compressionLevel, src size hints,
* cParams are built depending on compressionLevel, src size hints,
* LDM and manually set compression parameters.
*/
ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
@ -654,14 +952,9 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
const void* dict, size_t dictSize,
const ZSTD_CDict* cdict,
ZSTD_CCtx_params params, unsigned long long pledgedSrcSize);
const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize);
/*! ZSTD_compressStream_generic() :
* Private use only. To be called from zstdmt_compress.c in single-thread mode. */
size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
ZSTD_outBuffer* output,
ZSTD_inBuffer* input,
ZSTD_EndDirective const flushMode);
void ZSTD_resetSeqStore(seqStore_t* ssPtr);
/*! ZSTD_getCParamsFromCDict() :
* as the name implies */
@ -672,8 +965,9 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict);
size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
const void* dict, size_t dictSize,
ZSTD_dictContentType_e dictContentType,
ZSTD_dictTableLoadMethod_e dtlm,
const ZSTD_CDict* cdict,
ZSTD_CCtx_params params,
const ZSTD_CCtx_params* params,
unsigned long long pledgedSrcSize);
/* ZSTD_compress_advanced_internal() :
@ -682,13 +976,13 @@ size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
const void* dict,size_t dictSize,
ZSTD_CCtx_params params);
const ZSTD_CCtx_params* params);
/* ZSTD_writeLastEmptyBlock() :
* output an empty Block with end-of-frame mark to complete a frame
* @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
* or an error code if `dstCapcity` is too small (<ZSTD_blockHeaderSize)
* or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize)
*/
size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);

View file

@ -0,0 +1,154 @@
/*
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
/*-*************************************
* Dependencies
***************************************/
#include "zstd_compress_literals.h"
size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
{
BYTE* const ostart = (BYTE* const)dst;
U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall);
switch(flSize)
{
case 1: /* 2 - 1 - 5 */
ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3));
break;
case 2: /* 2 - 2 - 12 */
MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4)));
break;
case 3: /* 2 - 2 - 20 */
MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4)));
break;
default: /* not necessary : flSize is {1,2,3} */
assert(0);
}
memcpy(ostart + flSize, src, srcSize);
return srcSize + flSize;
}
size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
{
BYTE* const ostart = (BYTE* const)dst;
U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
(void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */
switch(flSize)
{
case 1: /* 2 - 1 - 5 */
ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3));
break;
case 2: /* 2 - 2 - 12 */
MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4)));
break;
case 3: /* 2 - 2 - 20 */
MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4)));
break;
default: /* not necessary : flSize is {1,2,3} */
assert(0);
}
ostart[flSize] = *(const BYTE*)src;
return flSize+1;
}
size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
ZSTD_hufCTables_t* nextHuf,
ZSTD_strategy strategy, int disableLiteralCompression,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
void* entropyWorkspace, size_t entropyWorkspaceSize,
const int bmi2)
{
size_t const minGain = ZSTD_minGain(srcSize, strategy);
size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
BYTE* const ostart = (BYTE*)dst;
U32 singleStream = srcSize < 256;
symbolEncodingType_e hType = set_compressed;
size_t cLitSize;
DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i)",
disableLiteralCompression);
/* Prepare nextEntropy assuming reusing the existing table */
memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
if (disableLiteralCompression)
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
/* small ? don't even attempt compression (speed opt) */
# define COMPRESS_LITERALS_SIZE_MIN 63
{ size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
}
RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression");
{ HUF_repeat repeat = prevHuf->repeatMode;
int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
cLitSize = singleStream ?
HUF_compress1X_repeat(
ostart+lhSize, dstCapacity-lhSize, src, srcSize,
255, 11, entropyWorkspace, entropyWorkspaceSize,
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) :
HUF_compress4X_repeat(
ostart+lhSize, dstCapacity-lhSize, src, srcSize,
255, 11, entropyWorkspace, entropyWorkspaceSize,
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2);
if (repeat != HUF_repeat_none) {
/* reused the existing table */
hType = set_repeat;
}
}
if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) {
memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
}
if (cLitSize==1) {
memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
}
if (hType == set_compressed) {
/* using a newly constructed table */
nextHuf->repeatMode = HUF_repeat_check;
}
/* Build header */
switch(lhSize)
{
case 3: /* 2 - 2 - 10 - 10 */
{ U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
MEM_writeLE24(ostart, lhc);
break;
}
case 4: /* 2 - 2 - 14 - 14 */
{ U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
MEM_writeLE32(ostart, lhc);
break;
}
case 5: /* 2 - 2 - 18 - 18 */
{ U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
MEM_writeLE32(ostart, lhc);
ostart[4] = (BYTE)(cLitSize >> 10);
break;
}
default: /* not possible : lhSize is {3,4,5} */
assert(0);
}
return lhSize+cLitSize;
}

View file

@ -0,0 +1,29 @@
/*
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef ZSTD_COMPRESS_LITERALS_H
#define ZSTD_COMPRESS_LITERALS_H
#include "zstd_compress_internal.h" /* ZSTD_hufCTables_t, ZSTD_minGain() */
size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
ZSTD_hufCTables_t* nextHuf,
ZSTD_strategy strategy, int disableLiteralCompression,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
void* entropyWorkspace, size_t entropyWorkspaceSize,
const int bmi2);
#endif /* ZSTD_COMPRESS_LITERALS_H */

View file

@ -0,0 +1,415 @@
/*
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
/*-*************************************
* Dependencies
***************************************/
#include "zstd_compress_sequences.h"
/**
* -log2(x / 256) lookup table for x in [0, 256).
* If x == 0: Return 0
* Else: Return floor(-log2(x / 256) * 256)
*/
static unsigned const kInverseProbabilityLog256[256] = {
0, 2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162,
1130, 1100, 1073, 1047, 1024, 1001, 980, 960, 941, 923, 906, 889,
874, 859, 844, 830, 817, 804, 791, 779, 768, 756, 745, 734,
724, 714, 704, 694, 685, 676, 667, 658, 650, 642, 633, 626,
618, 610, 603, 595, 588, 581, 574, 567, 561, 554, 548, 542,
535, 529, 523, 517, 512, 506, 500, 495, 489, 484, 478, 473,
468, 463, 458, 453, 448, 443, 438, 434, 429, 424, 420, 415,
411, 407, 402, 398, 394, 390, 386, 382, 377, 373, 370, 366,
362, 358, 354, 350, 347, 343, 339, 336, 332, 329, 325, 322,
318, 315, 311, 308, 305, 302, 298, 295, 292, 289, 286, 282,
279, 276, 273, 270, 267, 264, 261, 258, 256, 253, 250, 247,
244, 241, 239, 236, 233, 230, 228, 225, 222, 220, 217, 215,
212, 209, 207, 204, 202, 199, 197, 194, 192, 190, 187, 185,
182, 180, 178, 175, 173, 171, 168, 166, 164, 162, 159, 157,
155, 153, 151, 149, 146, 144, 142, 140, 138, 136, 134, 132,
130, 128, 126, 123, 121, 119, 117, 115, 114, 112, 110, 108,
106, 104, 102, 100, 98, 96, 94, 93, 91, 89, 87, 85,
83, 82, 80, 78, 76, 74, 73, 71, 69, 67, 66, 64,
62, 61, 59, 57, 55, 54, 52, 50, 49, 47, 46, 44,
42, 41, 39, 37, 36, 34, 33, 31, 30, 28, 26, 25,
23, 22, 20, 19, 17, 16, 14, 13, 11, 10, 8, 7,
5, 4, 2, 1,
};
static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) {
void const* ptr = ctable;
U16 const* u16ptr = (U16 const*)ptr;
U32 const maxSymbolValue = MEM_read16(u16ptr + 1);
return maxSymbolValue;
}
/**
* Returns the cost in bytes of encoding the normalized count header.
* Returns an error if any of the helper functions return an error.
*/
static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max,
size_t const nbSeq, unsigned const FSELog)
{
BYTE wksp[FSE_NCOUNTBOUND];
S16 norm[MaxSeq + 1];
const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max));
return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog);
}
/**
* Returns the cost in bits of encoding the distribution described by count
* using the entropy bound.
*/
static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total)
{
unsigned cost = 0;
unsigned s;
for (s = 0; s <= max; ++s) {
unsigned norm = (unsigned)((256 * count[s]) / total);
if (count[s] != 0 && norm == 0)
norm = 1;
assert(count[s] < total);
cost += count[s] * kInverseProbabilityLog256[norm];
}
return cost >> 8;
}
/**
* Returns the cost in bits of encoding the distribution in count using ctable.
* Returns an error if ctable cannot represent all the symbols in count.
*/
static size_t ZSTD_fseBitCost(
FSE_CTable const* ctable,
unsigned const* count,
unsigned const max)
{
unsigned const kAccuracyLog = 8;
size_t cost = 0;
unsigned s;
FSE_CState_t cstate;
FSE_initCState(&cstate, ctable);
RETURN_ERROR_IF(ZSTD_getFSEMaxSymbolValue(ctable) < max, GENERIC,
"Repeat FSE_CTable has maxSymbolValue %u < %u",
ZSTD_getFSEMaxSymbolValue(ctable), max);
for (s = 0; s <= max; ++s) {
unsigned const tableLog = cstate.stateLog;
unsigned const badCost = (tableLog + 1) << kAccuracyLog;
unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog);
if (count[s] == 0)
continue;
RETURN_ERROR_IF(bitCost >= badCost, GENERIC,
"Repeat FSE_CTable has Prob[%u] == 0", s);
cost += count[s] * bitCost;
}
return cost >> kAccuracyLog;
}
/**
* Returns the cost in bits of encoding the distribution in count using the
* table described by norm. The max symbol support by norm is assumed >= max.
* norm must be valid for every symbol with non-zero probability in count.
*/
static size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
unsigned const* count, unsigned const max)
{
unsigned const shift = 8 - accuracyLog;
size_t cost = 0;
unsigned s;
assert(accuracyLog <= 8);
for (s = 0; s <= max; ++s) {
unsigned const normAcc = norm[s] != -1 ? norm[s] : 1;
unsigned const norm256 = normAcc << shift;
assert(norm256 > 0);
assert(norm256 < 256);
cost += count[s] * kInverseProbabilityLog256[norm256];
}
return cost >> 8;
}
symbolEncodingType_e
ZSTD_selectEncodingType(
FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
FSE_CTable const* prevCTable,
short const* defaultNorm, U32 defaultNormLog,
ZSTD_defaultPolicy_e const isDefaultAllowed,
ZSTD_strategy const strategy)
{
ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0);
if (mostFrequent == nbSeq) {
*repeatMode = FSE_repeat_none;
if (isDefaultAllowed && nbSeq <= 2) {
/* Prefer set_basic over set_rle when there are 2 or less symbols,
* since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
* If basic encoding isn't possible, always choose RLE.
*/
DEBUGLOG(5, "Selected set_basic");
return set_basic;
}
DEBUGLOG(5, "Selected set_rle");
return set_rle;
}
if (strategy < ZSTD_lazy) {
if (isDefaultAllowed) {
size_t const staticFse_nbSeq_max = 1000;
size_t const mult = 10 - strategy;
size_t const baseLog = 3;
size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog; /* 28-36 for offset, 56-72 for lengths */
assert(defaultNormLog >= 5 && defaultNormLog <= 6); /* xx_DEFAULTNORMLOG */
assert(mult <= 9 && mult >= 7);
if ( (*repeatMode == FSE_repeat_valid)
&& (nbSeq < staticFse_nbSeq_max) ) {
DEBUGLOG(5, "Selected set_repeat");
return set_repeat;
}
if ( (nbSeq < dynamicFse_nbSeq_min)
|| (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) {
DEBUGLOG(5, "Selected set_basic");
/* The format allows default tables to be repeated, but it isn't useful.
* When using simple heuristics to select encoding type, we don't want
* to confuse these tables with dictionaries. When running more careful
* analysis, we don't need to waste time checking both repeating tables
* and default tables.
*/
*repeatMode = FSE_repeat_none;
return set_basic;
}
}
} else {
size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC);
size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC);
size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog);
size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq);
if (isDefaultAllowed) {
assert(!ZSTD_isError(basicCost));
assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost)));
}
assert(!ZSTD_isError(NCountCost));
assert(compressedCost < ERROR(maxCode));
DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u",
(unsigned)basicCost, (unsigned)repeatCost, (unsigned)compressedCost);
if (basicCost <= repeatCost && basicCost <= compressedCost) {
DEBUGLOG(5, "Selected set_basic");
assert(isDefaultAllowed);
*repeatMode = FSE_repeat_none;
return set_basic;
}
if (repeatCost <= compressedCost) {
DEBUGLOG(5, "Selected set_repeat");
assert(!ZSTD_isError(repeatCost));
return set_repeat;
}
assert(compressedCost < basicCost && compressedCost < repeatCost);
}
DEBUGLOG(5, "Selected set_compressed");
*repeatMode = FSE_repeat_check;
return set_compressed;
}
size_t
ZSTD_buildCTable(void* dst, size_t dstCapacity,
FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
unsigned* count, U32 max,
const BYTE* codeTable, size_t nbSeq,
const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
const FSE_CTable* prevCTable, size_t prevCTableSize,
void* entropyWorkspace, size_t entropyWorkspaceSize)
{
BYTE* op = (BYTE*)dst;
const BYTE* const oend = op + dstCapacity;
DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity);
switch (type) {
case set_rle:
FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max));
RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall);
*op = codeTable[0];
return 1;
case set_repeat:
memcpy(nextCTable, prevCTable, prevCTableSize);
return 0;
case set_basic:
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize)); /* note : could be pre-calculated */
return 0;
case set_compressed: {
S16 norm[MaxSeq + 1];
size_t nbSeq_1 = nbSeq;
const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
if (count[codeTable[nbSeq-1]] > 1) {
count[codeTable[nbSeq-1]]--;
nbSeq_1--;
}
assert(nbSeq_1 > 1);
FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max));
{ size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */
FORWARD_IF_ERROR(NCountSize);
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, entropyWorkspace, entropyWorkspaceSize));
return NCountSize;
}
}
default: assert(0); RETURN_ERROR(GENERIC);
}
}
FORCE_INLINE_TEMPLATE size_t
ZSTD_encodeSequences_body(
void* dst, size_t dstCapacity,
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
seqDef const* sequences, size_t nbSeq, int longOffsets)
{
BIT_CStream_t blockStream;
FSE_CState_t stateMatchLength;
FSE_CState_t stateOffsetBits;
FSE_CState_t stateLitLength;
RETURN_ERROR_IF(
ERR_isError(BIT_initCStream(&blockStream, dst, dstCapacity)),
dstSize_tooSmall, "not enough space remaining");
DEBUGLOG(6, "available space for bitstream : %i (dstCapacity=%u)",
(int)(blockStream.endPtr - blockStream.startPtr),
(unsigned)dstCapacity);
/* first symbols */
FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]);
FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]);
BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
if (MEM_32bits()) BIT_flushBits(&blockStream);
BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
if (MEM_32bits()) BIT_flushBits(&blockStream);
if (longOffsets) {
U32 const ofBits = ofCodeTable[nbSeq-1];
int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
if (extraBits) {
BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits);
BIT_flushBits(&blockStream);
}
BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits,
ofBits - extraBits);
} else {
BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
}
BIT_flushBits(&blockStream);
{ size_t n;
for (n=nbSeq-2 ; n<nbSeq ; n--) { /* intentional underflow */
BYTE const llCode = llCodeTable[n];
BYTE const ofCode = ofCodeTable[n];
BYTE const mlCode = mlCodeTable[n];
U32 const llBits = LL_bits[llCode];
U32 const ofBits = ofCode;
U32 const mlBits = ML_bits[mlCode];
DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u",
(unsigned)sequences[n].litLength,
(unsigned)sequences[n].matchLength + MINMATCH,
(unsigned)sequences[n].offset);
/* 32b*/ /* 64b*/
/* (7)*/ /* (7)*/
FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */
FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */
if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/
FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */
if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
BIT_flushBits(&blockStream); /* (7)*/
BIT_addBits(&blockStream, sequences[n].litLength, llBits);
if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream);
if (longOffsets) {
int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
if (extraBits) {
BIT_addBits(&blockStream, sequences[n].offset, extraBits);
BIT_flushBits(&blockStream); /* (7)*/
}
BIT_addBits(&blockStream, sequences[n].offset >> extraBits,
ofBits - extraBits); /* 31 */
} else {
BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */
}
BIT_flushBits(&blockStream); /* (7)*/
DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr));
} }
DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog);
FSE_flushCState(&blockStream, &stateMatchLength);
DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog);
FSE_flushCState(&blockStream, &stateOffsetBits);
DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog);
FSE_flushCState(&blockStream, &stateLitLength);
{ size_t const streamSize = BIT_closeCStream(&blockStream);
RETURN_ERROR_IF(streamSize==0, dstSize_tooSmall, "not enough space");
return streamSize;
}
}
static size_t
ZSTD_encodeSequences_default(
void* dst, size_t dstCapacity,
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
seqDef const* sequences, size_t nbSeq, int longOffsets)
{
return ZSTD_encodeSequences_body(dst, dstCapacity,
CTable_MatchLength, mlCodeTable,
CTable_OffsetBits, ofCodeTable,
CTable_LitLength, llCodeTable,
sequences, nbSeq, longOffsets);
}
#if DYNAMIC_BMI2
static TARGET_ATTRIBUTE("bmi2") size_t
ZSTD_encodeSequences_bmi2(
void* dst, size_t dstCapacity,
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
seqDef const* sequences, size_t nbSeq, int longOffsets)
{
return ZSTD_encodeSequences_body(dst, dstCapacity,
CTable_MatchLength, mlCodeTable,
CTable_OffsetBits, ofCodeTable,
CTable_LitLength, llCodeTable,
sequences, nbSeq, longOffsets);
}
#endif
size_t ZSTD_encodeSequences(
void* dst, size_t dstCapacity,
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2)
{
DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity);
#if DYNAMIC_BMI2
if (bmi2) {
return ZSTD_encodeSequences_bmi2(dst, dstCapacity,
CTable_MatchLength, mlCodeTable,
CTable_OffsetBits, ofCodeTable,
CTable_LitLength, llCodeTable,
sequences, nbSeq, longOffsets);
}
#endif
(void)bmi2;
return ZSTD_encodeSequences_default(dst, dstCapacity,
CTable_MatchLength, mlCodeTable,
CTable_OffsetBits, ofCodeTable,
CTable_LitLength, llCodeTable,
sequences, nbSeq, longOffsets);
}

View file

@ -0,0 +1,47 @@
/*
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef ZSTD_COMPRESS_SEQUENCES_H
#define ZSTD_COMPRESS_SEQUENCES_H
#include "fse.h" /* FSE_repeat, FSE_CTable */
#include "zstd_internal.h" /* symbolEncodingType_e, ZSTD_strategy */
typedef enum {
ZSTD_defaultDisallowed = 0,
ZSTD_defaultAllowed = 1
} ZSTD_defaultPolicy_e;
symbolEncodingType_e
ZSTD_selectEncodingType(
FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
FSE_CTable const* prevCTable,
short const* defaultNorm, U32 defaultNormLog,
ZSTD_defaultPolicy_e const isDefaultAllowed,
ZSTD_strategy const strategy);
size_t
ZSTD_buildCTable(void* dst, size_t dstCapacity,
FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
unsigned* count, U32 max,
const BYTE* codeTable, size_t nbSeq,
const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
const FSE_CTable* prevCTable, size_t prevCTableSize,
void* entropyWorkspace, size_t entropyWorkspaceSize);
size_t ZSTD_encodeSequences(
void* dst, size_t dstCapacity,
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2);
#endif /* ZSTD_COMPRESS_SEQUENCES_H */

View file

@ -0,0 +1,535 @@
/*
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef ZSTD_CWKSP_H
#define ZSTD_CWKSP_H
/*-*************************************
* Dependencies
***************************************/
#include "zstd_internal.h"
#if defined (__cplusplus)
extern "C" {
#endif
/*-*************************************
* Constants
***************************************/
/* define "workspace is too large" as this number of times larger than needed */
#define ZSTD_WORKSPACETOOLARGE_FACTOR 3
/* when workspace is continuously too large
* during at least this number of times,
* context's memory usage is considered wasteful,
* because it's sized to handle a worst case scenario which rarely happens.
* In which case, resize it down to free some memory */
#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128
/* Since the workspace is effectively its own little malloc implementation /
* arena, when we run under ASAN, we should similarly insert redzones between
* each internal element of the workspace, so ASAN will catch overruns that
* reach outside an object but that stay inside the workspace.
*
* This defines the size of that redzone.
*/
#ifndef ZSTD_CWKSP_ASAN_REDZONE_SIZE
#define ZSTD_CWKSP_ASAN_REDZONE_SIZE 128
#endif
/*-*************************************
* Structures
***************************************/
typedef enum {
ZSTD_cwksp_alloc_objects,
ZSTD_cwksp_alloc_buffers,
ZSTD_cwksp_alloc_aligned
} ZSTD_cwksp_alloc_phase_e;
/**
* Zstd fits all its internal datastructures into a single continuous buffer,
* so that it only needs to perform a single OS allocation (or so that a buffer
* can be provided to it and it can perform no allocations at all). This buffer
* is called the workspace.
*
* Several optimizations complicate that process of allocating memory ranges
* from this workspace for each internal datastructure:
*
* - These different internal datastructures have different setup requirements:
*
* - The static objects need to be cleared once and can then be trivially
* reused for each compression.
*
* - Various buffers don't need to be initialized at all--they are always
* written into before they're read.
*
* - The matchstate tables have a unique requirement that they don't need
* their memory to be totally cleared, but they do need the memory to have
* some bound, i.e., a guarantee that all values in the memory they've been
* allocated is less than some maximum value (which is the starting value
* for the indices that they will then use for compression). When this
* guarantee is provided to them, they can use the memory without any setup
* work. When it can't, they have to clear the area.
*
* - These buffers also have different alignment requirements.
*
* - We would like to reuse the objects in the workspace for multiple
* compressions without having to perform any expensive reallocation or
* reinitialization work.
*
* - We would like to be able to efficiently reuse the workspace across
* multiple compressions **even when the compression parameters change** and
* we need to resize some of the objects (where possible).
*
* To attempt to manage this buffer, given these constraints, the ZSTD_cwksp
* abstraction was created. It works as follows:
*
* Workspace Layout:
*
* [ ... workspace ... ]
* [objects][tables ... ->] free space [<- ... aligned][<- ... buffers]
*
* The various objects that live in the workspace are divided into the
* following categories, and are allocated separately:
*
* - Static objects: this is optionally the enclosing ZSTD_CCtx or ZSTD_CDict,
* so that literally everything fits in a single buffer. Note: if present,
* this must be the first object in the workspace, since ZSTD_free{CCtx,
* CDict}() rely on a pointer comparison to see whether one or two frees are
* required.
*
* - Fixed size objects: these are fixed-size, fixed-count objects that are
* nonetheless "dynamically" allocated in the workspace so that we can
* control how they're initialized separately from the broader ZSTD_CCtx.
* Examples:
* - Entropy Workspace
* - 2 x ZSTD_compressedBlockState_t
* - CDict dictionary contents
*
* - Tables: these are any of several different datastructures (hash tables,
* chain tables, binary trees) that all respect a common format: they are
* uint32_t arrays, all of whose values are between 0 and (nextSrc - base).
* Their sizes depend on the cparams.
*
* - Aligned: these buffers are used for various purposes that require 4 byte
* alignment, but don't require any initialization before they're used.
*
* - Buffers: these buffers are used for various purposes that don't require
* any alignment or initialization before they're used. This means they can
* be moved around at no cost for a new compression.
*
* Allocating Memory:
*
* The various types of objects must be allocated in order, so they can be
* correctly packed into the workspace buffer. That order is:
*
* 1. Objects
* 2. Buffers
* 3. Aligned
* 4. Tables
*
* Attempts to reserve objects of different types out of order will fail.
*/
typedef struct {
void* workspace;
void* workspaceEnd;
void* objectEnd;
void* tableEnd;
void* tableValidEnd;
void* allocStart;
int allocFailed;
int workspaceOversizedDuration;
ZSTD_cwksp_alloc_phase_e phase;
} ZSTD_cwksp;
/*-*************************************
* Functions
***************************************/
MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws);
MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
(void)ws;
assert(ws->workspace <= ws->objectEnd);
assert(ws->objectEnd <= ws->tableEnd);
assert(ws->objectEnd <= ws->tableValidEnd);
assert(ws->tableEnd <= ws->allocStart);
assert(ws->tableValidEnd <= ws->allocStart);
assert(ws->allocStart <= ws->workspaceEnd);
}
/**
* Align must be a power of 2.
*/
MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) {
size_t const mask = align - 1;
assert((align & mask) == 0);
return (size + mask) & ~mask;
}
/**
* Use this to determine how much space in the workspace we will consume to
* allocate this object. (Normally it should be exactly the size of the object,
* but under special conditions, like ASAN, where we pad each object, it might
* be larger.)
*
* Since tables aren't currently redzoned, you don't need to call through this
* to figure out how much space you need for the matchState tables. Everything
* else is though.
*/
MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) {
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
return size + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
#else
return size;
#endif
}
MEM_STATIC void ZSTD_cwksp_internal_advance_phase(
ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase) {
assert(phase >= ws->phase);
if (phase > ws->phase) {
if (ws->phase < ZSTD_cwksp_alloc_buffers &&
phase >= ZSTD_cwksp_alloc_buffers) {
ws->tableValidEnd = ws->objectEnd;
}
if (ws->phase < ZSTD_cwksp_alloc_aligned &&
phase >= ZSTD_cwksp_alloc_aligned) {
/* If unaligned allocations down from a too-large top have left us
* unaligned, we need to realign our alloc ptr. Technically, this
* can consume space that is unaccounted for in the neededSpace
* calculation. However, I believe this can only happen when the
* workspace is too large, and specifically when it is too large
* by a larger margin than the space that will be consumed. */
/* TODO: cleaner, compiler warning friendly way to do this??? */
ws->allocStart = (BYTE*)ws->allocStart - ((size_t)ws->allocStart & (sizeof(U32)-1));
if (ws->allocStart < ws->tableValidEnd) {
ws->tableValidEnd = ws->allocStart;
}
}
ws->phase = phase;
}
}
/**
* Returns whether this object/buffer/etc was allocated in this workspace.
*/
MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr) {
return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd);
}
/**
* Internal function. Do not use directly.
*/
MEM_STATIC void* ZSTD_cwksp_reserve_internal(
ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase) {
void* alloc;
void* bottom = ws->tableEnd;
ZSTD_cwksp_internal_advance_phase(ws, phase);
alloc = (BYTE *)ws->allocStart - bytes;
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
/* over-reserve space */
alloc = (BYTE *)alloc - 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
#endif
DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining",
alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
ZSTD_cwksp_assert_internal_consistency(ws);
assert(alloc >= bottom);
if (alloc < bottom) {
DEBUGLOG(4, "cwksp: alloc failed!");
ws->allocFailed = 1;
return NULL;
}
if (alloc < ws->tableValidEnd) {
ws->tableValidEnd = alloc;
}
ws->allocStart = alloc;
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
/* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on
* either size. */
alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
__asan_unpoison_memory_region(alloc, bytes);
#endif
return alloc;
}
/**
* Reserves and returns unaligned memory.
*/
MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes) {
return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers);
}
/**
* Reserves and returns memory sized on and aligned on sizeof(unsigned).
*/
MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) {
assert((bytes & (sizeof(U32)-1)) == 0);
return ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, sizeof(U32)), ZSTD_cwksp_alloc_aligned);
}
/**
* Aligned on sizeof(unsigned). These buffers have the special property that
* their values remain constrained, allowing us to re-use them without
* memset()-ing them.
*/
MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) {
const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned;
void* alloc = ws->tableEnd;
void* end = (BYTE *)alloc + bytes;
void* top = ws->allocStart;
DEBUGLOG(5, "cwksp: reserving %p table %zd bytes, %zd bytes remaining",
alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
assert((bytes & (sizeof(U32)-1)) == 0);
ZSTD_cwksp_internal_advance_phase(ws, phase);
ZSTD_cwksp_assert_internal_consistency(ws);
assert(end <= top);
if (end > top) {
DEBUGLOG(4, "cwksp: table alloc failed!");
ws->allocFailed = 1;
return NULL;
}
ws->tableEnd = end;
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
__asan_unpoison_memory_region(alloc, bytes);
#endif
return alloc;
}
/**
* Aligned on sizeof(void*).
*/
MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) {
size_t roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*));
void* alloc = ws->objectEnd;
void* end = (BYTE*)alloc + roundedBytes;
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
/* over-reserve space */
end = (BYTE *)end + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
#endif
DEBUGLOG(5,
"cwksp: reserving %p object %zd bytes (rounded to %zd), %zd bytes remaining",
alloc, bytes, roundedBytes, ZSTD_cwksp_available_space(ws) - roundedBytes);
assert(((size_t)alloc & (sizeof(void*)-1)) == 0);
assert((bytes & (sizeof(void*)-1)) == 0);
ZSTD_cwksp_assert_internal_consistency(ws);
/* we must be in the first phase, no advance is possible */
if (ws->phase != ZSTD_cwksp_alloc_objects || end > ws->workspaceEnd) {
DEBUGLOG(4, "cwksp: object alloc failed!");
ws->allocFailed = 1;
return NULL;
}
ws->objectEnd = end;
ws->tableEnd = end;
ws->tableValidEnd = end;
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
/* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on
* either size. */
alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
__asan_unpoison_memory_region(alloc, bytes);
#endif
return alloc;
}
MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws) {
DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty");
#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
/* To validate that the table re-use logic is sound, and that we don't
* access table space that we haven't cleaned, we re-"poison" the table
* space every time we mark it dirty. */
{
size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd;
assert(__msan_test_shadow(ws->objectEnd, size) == -1);
__msan_poison(ws->objectEnd, size);
}
#endif
assert(ws->tableValidEnd >= ws->objectEnd);
assert(ws->tableValidEnd <= ws->allocStart);
ws->tableValidEnd = ws->objectEnd;
ZSTD_cwksp_assert_internal_consistency(ws);
}
MEM_STATIC void ZSTD_cwksp_mark_tables_clean(ZSTD_cwksp* ws) {
DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_clean");
assert(ws->tableValidEnd >= ws->objectEnd);
assert(ws->tableValidEnd <= ws->allocStart);
if (ws->tableValidEnd < ws->tableEnd) {
ws->tableValidEnd = ws->tableEnd;
}
ZSTD_cwksp_assert_internal_consistency(ws);
}
/**
* Zero the part of the allocated tables not already marked clean.
*/
MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) {
DEBUGLOG(4, "cwksp: ZSTD_cwksp_clean_tables");
assert(ws->tableValidEnd >= ws->objectEnd);
assert(ws->tableValidEnd <= ws->allocStart);
if (ws->tableValidEnd < ws->tableEnd) {
memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd);
}
ZSTD_cwksp_mark_tables_clean(ws);
}
/**
* Invalidates table allocations.
* All other allocations remain valid.
*/
MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) {
DEBUGLOG(4, "cwksp: clearing tables!");
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
{
size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd;
__asan_poison_memory_region(ws->objectEnd, size);
}
#endif
ws->tableEnd = ws->objectEnd;
ZSTD_cwksp_assert_internal_consistency(ws);
}
/**
* Invalidates all buffer, aligned, and table allocations.
* Object allocations remain valid.
*/
MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
DEBUGLOG(4, "cwksp: clearing!");
#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
/* To validate that the context re-use logic is sound, and that we don't
* access stuff that this compression hasn't initialized, we re-"poison"
* the workspace (or at least the non-static, non-table parts of it)
* every time we start a new compression. */
{
size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->tableValidEnd;
__msan_poison(ws->tableValidEnd, size);
}
#endif
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
{
size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->objectEnd;
__asan_poison_memory_region(ws->objectEnd, size);
}
#endif
ws->tableEnd = ws->objectEnd;
ws->allocStart = ws->workspaceEnd;
ws->allocFailed = 0;
if (ws->phase > ZSTD_cwksp_alloc_buffers) {
ws->phase = ZSTD_cwksp_alloc_buffers;
}
ZSTD_cwksp_assert_internal_consistency(ws);
}
/**
* The provided workspace takes ownership of the buffer [start, start+size).
* Any existing values in the workspace are ignored (the previously managed
* buffer, if present, must be separately freed).
*/
MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size) {
DEBUGLOG(4, "cwksp: init'ing workspace with %zd bytes", size);
assert(((size_t)start & (sizeof(void*)-1)) == 0); /* ensure correct alignment */
ws->workspace = start;
ws->workspaceEnd = (BYTE*)start + size;
ws->objectEnd = ws->workspace;
ws->tableValidEnd = ws->objectEnd;
ws->phase = ZSTD_cwksp_alloc_objects;
ZSTD_cwksp_clear(ws);
ws->workspaceOversizedDuration = 0;
ZSTD_cwksp_assert_internal_consistency(ws);
}
MEM_STATIC size_t ZSTD_cwksp_create(ZSTD_cwksp* ws, size_t size, ZSTD_customMem customMem) {
void* workspace = ZSTD_malloc(size, customMem);
DEBUGLOG(4, "cwksp: creating new workspace with %zd bytes", size);
RETURN_ERROR_IF(workspace == NULL, memory_allocation);
ZSTD_cwksp_init(ws, workspace, size);
return 0;
}
MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) {
void *ptr = ws->workspace;
DEBUGLOG(4, "cwksp: freeing workspace");
memset(ws, 0, sizeof(ZSTD_cwksp));
ZSTD_free(ptr, customMem);
}
/**
* Moves the management of a workspace from one cwksp to another. The src cwksp
* is left in an invalid state (src must be re-init()'ed before its used again).
*/
MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) {
*dst = *src;
memset(src, 0, sizeof(ZSTD_cwksp));
}
MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) {
return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace);
}
MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
return ws->allocFailed;
}
/*-*************************************
* Functions Checking Free Space
***************************************/
MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws) {
return (size_t)((BYTE*)ws->allocStart - (BYTE*)ws->tableEnd);
}
MEM_STATIC int ZSTD_cwksp_check_available(ZSTD_cwksp* ws, size_t additionalNeededSpace) {
return ZSTD_cwksp_available_space(ws) >= additionalNeededSpace;
}
MEM_STATIC int ZSTD_cwksp_check_too_large(ZSTD_cwksp* ws, size_t additionalNeededSpace) {
return ZSTD_cwksp_check_available(
ws, additionalNeededSpace * ZSTD_WORKSPACETOOLARGE_FACTOR);
}
MEM_STATIC int ZSTD_cwksp_check_wasteful(ZSTD_cwksp* ws, size_t additionalNeededSpace) {
return ZSTD_cwksp_check_too_large(ws, additionalNeededSpace)
&& ws->workspaceOversizedDuration > ZSTD_WORKSPACETOOLARGE_MAXDURATION;
}
MEM_STATIC void ZSTD_cwksp_bump_oversized_duration(
ZSTD_cwksp* ws, size_t additionalNeededSpace) {
if (ZSTD_cwksp_check_too_large(ws, additionalNeededSpace)) {
ws->workspaceOversizedDuration++;
} else {
ws->workspaceOversizedDuration = 0;
}
}
#if defined (__cplusplus)
}
#endif
#endif /* ZSTD_CWKSP_H */

View file

@ -13,12 +13,12 @@
void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
ZSTD_compressionParameters const* cParams,
void const* end)
void const* end, ZSTD_dictTableLoadMethod_e dtlm)
{
const ZSTD_compressionParameters* const cParams = &ms->cParams;
U32* const hashLarge = ms->hashTable;
U32 const hBitsL = cParams->hashLog;
U32 const mls = cParams->searchLength;
U32 const mls = cParams->minMatch;
U32* const hashSmall = ms->chainTable;
U32 const hBitsS = cParams->chainLog;
const BYTE* const base = ms->window.base;
@ -40,17 +40,20 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
hashSmall[smHash] = current + i;
if (i == 0 || hashLarge[lgHash] == 0)
hashLarge[lgHash] = current + i;
}
}
/* Only load extra positions for ZSTD_dtlm_full */
if (dtlm == ZSTD_dtlm_fast)
break;
} }
}
FORCE_INLINE_TEMPLATE
size_t ZSTD_compressBlock_doubleFast_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize,
U32 const mls /* template */)
void const* src, size_t srcSize,
U32 const mls /* template */, ZSTD_dictMode_e const dictMode)
{
ZSTD_compressionParameters const* cParams = &ms->cParams;
U32* const hashLong = ms->hashTable;
const U32 hBitsL = cParams->hashLog;
U32* const hashSmall = ms->chainTable;
@ -59,236 +62,424 @@ size_t ZSTD_compressBlock_doubleFast_generic(
const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart;
const BYTE* anchor = istart;
const U32 lowestIndex = ms->window.dictLimit;
const BYTE* const lowest = base + lowestIndex;
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
const U32 lowestValid = ms->window.dictLimit;
const U32 maxDistance = 1U << cParams->windowLog;
/* presumes that, if there is a dictionary, it must be using Attach mode */
const U32 prefixLowestIndex = (endIndex - lowestValid > maxDistance) ? endIndex - maxDistance : lowestValid;
const BYTE* const prefixLowest = base + prefixLowestIndex;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - HASH_READ_SIZE;
U32 offset_1=rep[0], offset_2=rep[1];
U32 offsetSaved = 0;
const ZSTD_matchState_t* const dms = ms->dictMatchState;
const ZSTD_compressionParameters* const dictCParams =
dictMode == ZSTD_dictMatchState ?
&dms->cParams : NULL;
const U32* const dictHashLong = dictMode == ZSTD_dictMatchState ?
dms->hashTable : NULL;
const U32* const dictHashSmall = dictMode == ZSTD_dictMatchState ?
dms->chainTable : NULL;
const U32 dictStartIndex = dictMode == ZSTD_dictMatchState ?
dms->window.dictLimit : 0;
const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ?
dms->window.base : NULL;
const BYTE* const dictStart = dictMode == ZSTD_dictMatchState ?
dictBase + dictStartIndex : NULL;
const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ?
dms->window.nextSrc : NULL;
const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
prefixLowestIndex - (U32)(dictEnd - dictBase) :
0;
const U32 dictHBitsL = dictMode == ZSTD_dictMatchState ?
dictCParams->hashLog : hBitsL;
const U32 dictHBitsS = dictMode == ZSTD_dictMatchState ?
dictCParams->chainLog : hBitsS;
const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictStart);
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic");
assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState);
/* if a dictionary is attached, it must be within window range */
if (dictMode == ZSTD_dictMatchState) {
assert(lowestValid + maxDistance >= endIndex);
}
/* init */
ip += (ip==lowest);
{ U32 const maxRep = (U32)(ip-lowest);
ip += (dictAndPrefixLength == 0);
if (dictMode == ZSTD_noDict) {
U32 const maxRep = (U32)(ip - prefixLowest);
if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
}
if (dictMode == ZSTD_dictMatchState) {
/* dictMatchState repCode checks don't currently handle repCode == 0
* disabling. */
assert(offset_1 <= dictAndPrefixLength);
assert(offset_2 <= dictAndPrefixLength);
}
/* Main Search Loop */
while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
size_t mLength;
U32 offset;
size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8);
size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls);
U32 const current = (U32)(ip-base);
U32 const matchIndexL = hashLong[h2];
U32 const matchIndexS = hashSmall[h];
U32 matchIndexS = hashSmall[h];
const BYTE* matchLong = base + matchIndexL;
const BYTE* match = base + matchIndexS;
const U32 repIndex = current + 1 - offset_1;
const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
&& repIndex < prefixLowestIndex) ?
dictBase + (repIndex - dictIndexDelta) :
base + repIndex;
hashLong[h2] = hashSmall[h] = current; /* update hash tables */
assert(offset_1 <= current); /* supposed guaranteed by construction */
if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
/* favor repcode */
/* check dictMatchState repcode */
if (dictMode == ZSTD_dictMatchState
&& ((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
ip++;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
goto _match_stored;
}
/* check noDict repcode */
if ( dictMode == ZSTD_noDict
&& ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
ip++;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
} else {
U32 offset;
if ( (matchIndexL > lowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip)) ) {
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
goto _match_stored;
}
if (matchIndexL > prefixLowestIndex) {
/* check prefix long match */
if (MEM_read64(matchLong) == MEM_read64(ip)) {
mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8;
offset = (U32)(ip-matchLong);
while (((ip>anchor) & (matchLong>lowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
} else if ( (matchIndexS > lowestIndex) && (MEM_read32(match) == MEM_read32(ip)) ) {
size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
U32 const matchIndexL3 = hashLong[hl3];
const BYTE* matchL3 = base + matchIndexL3;
hashLong[hl3] = current + 1;
if ( (matchIndexL3 > lowestIndex) && (MEM_read64(matchL3) == MEM_read64(ip+1)) ) {
while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
goto _match_found;
}
} else if (dictMode == ZSTD_dictMatchState) {
/* check dictMatchState long match */
U32 const dictMatchIndexL = dictHashLong[dictHL];
const BYTE* dictMatchL = dictBase + dictMatchIndexL;
assert(dictMatchL < dictEnd);
if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) {
mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8;
offset = (U32)(current - dictMatchIndexL - dictIndexDelta);
while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */
goto _match_found;
} }
if (matchIndexS > prefixLowestIndex) {
/* check prefix short match */
if (MEM_read32(match) == MEM_read32(ip)) {
goto _search_next_long;
}
} else if (dictMode == ZSTD_dictMatchState) {
/* check dictMatchState short match */
U32 const dictMatchIndexS = dictHashSmall[dictHS];
match = dictBase + dictMatchIndexS;
matchIndexS = dictMatchIndexS + dictIndexDelta;
if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) {
goto _search_next_long;
} }
ip += ((ip-anchor) >> kSearchStrength) + 1;
continue;
_search_next_long:
{ size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
U32 const matchIndexL3 = hashLong[hl3];
const BYTE* matchL3 = base + matchIndexL3;
hashLong[hl3] = current + 1;
/* check prefix long +1 match */
if (matchIndexL3 > prefixLowestIndex) {
if (MEM_read64(matchL3) == MEM_read64(ip+1)) {
mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8;
ip++;
offset = (U32)(ip-matchL3);
while (((ip>anchor) & (matchL3>lowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
} else {
mLength = ZSTD_count(ip+4, match+4, iend) + 4;
offset = (U32)(ip-match);
while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
goto _match_found;
}
} else {
ip += ((ip-anchor) >> kSearchStrength) + 1;
continue;
}
} else if (dictMode == ZSTD_dictMatchState) {
/* check dict long +1 match */
U32 const dictMatchIndexL3 = dictHashLong[dictHLNext];
const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3;
assert(dictMatchL3 < dictEnd);
if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {
mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8;
ip++;
offset = (U32)(current + 1 - dictMatchIndexL3 - dictIndexDelta);
while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */
goto _match_found;
} } }
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
/* if no long +1 match, explore the short match we found */
if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) {
mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4;
offset = (U32)(current - matchIndexS);
while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
} else {
mLength = ZSTD_count(ip+4, match+4, iend) + 4;
offset = (U32)(ip - match);
while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
}
/* fall-through */
_match_found:
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
_match_stored:
/* match found */
ip += mLength;
anchor = ip;
if (ip <= ilimit) {
/* Fill Table */
hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] =
hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2; /* here because current+2 could be > iend-8 */
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] =
hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base);
/* Complementary insertion */
/* done after iLimit test, as candidates could be > iend-8 */
{ U32 const indexToInsert = current+2;
hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
}
/* check immediate repcode */
while ( (ip <= ilimit)
&& ( (offset_2>0)
& (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
/* store sequence */
size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
{ U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
ip += rLength;
anchor = ip;
continue; /* faster when present ... (?) */
} } }
if (dictMode == ZSTD_dictMatchState) {
while (ip <= ilimit) {
U32 const current2 = (U32)(ip-base);
U32 const repIndex2 = current2 - offset_2;
const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState
&& repIndex2 < prefixLowestIndex ?
dictBase - dictIndexDelta + repIndex2 :
base + repIndex2;
if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
ip += repLength2;
anchor = ip;
continue;
}
break;
} }
if (dictMode == ZSTD_noDict) {
while ( (ip <= ilimit)
&& ( (offset_2>0)
& (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
/* store sequence */
size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH);
ip += rLength;
anchor = ip;
continue; /* faster when present ... (?) */
} } }
} /* while (ip < ilimit) */
/* save reps for next block */
rep[0] = offset_1 ? offset_1 : offsetSaved;
rep[1] = offset_2 ? offset_2 : offsetSaved;
/* Return the last literals size */
return iend - anchor;
return (size_t)(iend - anchor);
}
size_t ZSTD_compressBlock_doubleFast(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
void const* src, size_t srcSize)
{
const U32 mls = cParams->searchLength;
const U32 mls = ms->cParams.minMatch;
switch(mls)
{
default: /* includes case 3 */
case 4 :
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, cParams, src, srcSize, 4);
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_noDict);
case 5 :
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, cParams, src, srcSize, 5);
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_noDict);
case 6 :
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, cParams, src, srcSize, 6);
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_noDict);
case 7 :
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, cParams, src, srcSize, 7);
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_noDict);
}
}
size_t ZSTD_compressBlock_doubleFast_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
const U32 mls = ms->cParams.minMatch;
switch(mls)
{
default: /* includes case 3 */
case 4 :
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_dictMatchState);
case 5 :
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_dictMatchState);
case 6 :
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_dictMatchState);
case 7 :
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_dictMatchState);
}
}
static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize,
void const* src, size_t srcSize,
U32 const mls /* template */)
{
ZSTD_compressionParameters const* cParams = &ms->cParams;
U32* const hashLong = ms->hashTable;
U32 const hBitsL = cParams->hashLog;
U32* const hashSmall = ms->chainTable;
U32 const hBitsS = cParams->chainLog;
const BYTE* const base = ms->window.base;
const BYTE* const dictBase = ms->window.dictBase;
const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart;
const BYTE* anchor = istart;
const U32 lowestIndex = ms->window.lowLimit;
const BYTE* const dictStart = dictBase + lowestIndex;
const U32 dictLimit = ms->window.dictLimit;
const BYTE* const lowPrefixPtr = base + dictLimit;
const BYTE* const dictEnd = dictBase + dictLimit;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - 8;
const BYTE* const base = ms->window.base;
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
const U32 dictStartIndex = lowLimit;
const U32 dictLimit = ms->window.dictLimit;
const U32 prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit;
const BYTE* const prefixStart = base + prefixStartIndex;
const BYTE* const dictBase = ms->window.dictBase;
const BYTE* const dictStart = dictBase + dictStartIndex;
const BYTE* const dictEnd = dictBase + prefixStartIndex;
U32 offset_1=rep[0], offset_2=rep[1];
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize);
/* if extDict is invalidated due to maxDistance, switch to "regular" variant */
if (prefixStartIndex == dictStartIndex)
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict);
/* Search Loop */
while (ip < ilimit) { /* < instead of <=, because (ip+1) */
const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);
const U32 matchIndex = hashSmall[hSmall];
const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base;
const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
const BYTE* match = matchBase + matchIndex;
const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8);
const U32 matchLongIndex = hashLong[hLong];
const BYTE* matchLongBase = matchLongIndex < dictLimit ? dictBase : base;
const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base;
const BYTE* matchLong = matchLongBase + matchLongIndex;
const U32 current = (U32)(ip-base);
const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */
const BYTE* repBase = repIndex < dictLimit ? dictBase : base;
const BYTE* repMatch = repBase + repIndex;
const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
const BYTE* const repMatch = repBase + repIndex;
size_t mLength;
hashSmall[hSmall] = hashLong[hLong] = current; /* update hash table */
if ( (((U32)((dictLimit-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex))
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, lowPrefixPtr) + 4;
if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
& (repIndex > dictStartIndex))
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
ip++;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
} else {
if ((matchLongIndex > lowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
const BYTE* matchEnd = matchLongIndex < dictLimit ? dictEnd : iend;
const BYTE* lowMatchPtr = matchLongIndex < dictLimit ? dictStart : lowPrefixPtr;
if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart;
U32 offset;
mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, lowPrefixPtr) + 8;
mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8;
offset = current - matchLongIndex;
while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
} else if ((matchIndex > lowestIndex) && (MEM_read32(match) == MEM_read32(ip))) {
} else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
U32 const matchIndex3 = hashLong[h3];
const BYTE* const match3Base = matchIndex3 < dictLimit ? dictBase : base;
const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base;
const BYTE* match3 = match3Base + matchIndex3;
U32 offset;
hashLong[h3] = current + 1;
if ( (matchIndex3 > lowestIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
const BYTE* matchEnd = matchIndex3 < dictLimit ? dictEnd : iend;
const BYTE* lowMatchPtr = matchIndex3 < dictLimit ? dictStart : lowPrefixPtr;
mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, lowPrefixPtr) + 8;
if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend;
const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart;
mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8;
ip++;
offset = current+1 - matchIndex3;
while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
} else {
const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend;
const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr;
mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, lowPrefixPtr) + 4;
const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
offset = current - matchIndex;
while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
}
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
} else {
ip += ((ip-anchor) >> kSearchStrength) + 1;
continue;
} }
/* found a match : store it */
/* move to next sequence start */
ip += mLength;
anchor = ip;
if (ip <= ilimit) {
/* Fill Table */
hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2;
hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = current+2;
hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base);
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
/* Complementary insertion */
/* done after iLimit test, as candidates could be > iend-8 */
{ U32 const indexToInsert = current+2;
hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
}
/* check immediate repcode */
while (ip <= ilimit) {
U32 const current2 = (U32)(ip-base);
U32 const repIndex2 = current2 - offset_2;
const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2;
if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, lowPrefixPtr) + 4;
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */
& (repIndex2 > dictStartIndex))
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
ip += repLength2;
@ -303,25 +494,25 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
rep[1] = offset_2;
/* Return the last literals size */
return iend - anchor;
return (size_t)(iend - anchor);
}
size_t ZSTD_compressBlock_doubleFast_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
void const* src, size_t srcSize)
{
U32 const mls = cParams->searchLength;
U32 const mls = ms->cParams.minMatch;
switch(mls)
{
default: /* includes case 3 */
case 4 :
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 4);
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 4);
case 5 :
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 5);
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 5);
case 6 :
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 6);
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 6);
case 7 :
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 7);
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 7);
}
}

View file

@ -19,14 +19,16 @@ extern "C" {
#include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */
void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
ZSTD_compressionParameters const* cParams,
void const* end);
void const* end, ZSTD_dictTableLoadMethod_e dtlm);
size_t ZSTD_compressBlock_doubleFast(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_doubleFast_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_doubleFast_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
void const* src, size_t srcSize);
#if defined (__cplusplus)

View file

@ -8,17 +8,18 @@
* You may select, at your option, one of the above-listed licenses.
*/
#include "zstd_compress_internal.h"
#include "zstd_compress_internal.h" /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */
#include "zstd_fast.h"
void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
ZSTD_compressionParameters const* cParams,
void const* end)
const void* const end,
ZSTD_dictTableLoadMethod_e dtlm)
{
const ZSTD_compressionParameters* const cParams = &ms->cParams;
U32* const hashTable = ms->hashTable;
U32 const hBits = cParams->hashLog;
U32 const mls = cParams->searchLength;
U32 const mls = cParams->minMatch;
const BYTE* const base = ms->window.base;
const BYTE* ip = base + ms->nextToUpdate;
const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
@ -27,42 +28,221 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
/* Always insert every fastHashFillStep position into the hash table.
* Insert the other positions if their hash entry is empty.
*/
for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) {
U32 const current = (U32)(ip - base);
U32 i;
for (i = 0; i < fastHashFillStep; ++i) {
size_t const hash = ZSTD_hashPtr(ip + i, hBits, mls);
if (i == 0 || hashTable[hash] == 0)
hashTable[hash] = current + i;
}
}
size_t const hash0 = ZSTD_hashPtr(ip, hBits, mls);
hashTable[hash0] = current;
if (dtlm == ZSTD_dtlm_fast) continue;
/* Only load extra positions for ZSTD_dtlm_full */
{ U32 p;
for (p = 1; p < fastHashFillStep; ++p) {
size_t const hash = ZSTD_hashPtr(ip + p, hBits, mls);
if (hashTable[hash] == 0) { /* not yet filled */
hashTable[hash] = current + p;
} } } }
}
FORCE_INLINE_TEMPLATE
size_t ZSTD_compressBlock_fast_generic(
FORCE_INLINE_TEMPLATE size_t
ZSTD_compressBlock_fast_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize,
U32 const hlog, U32 const stepSize, U32 const mls)
U32 const mls)
{
const ZSTD_compressionParameters* const cParams = &ms->cParams;
U32* const hashTable = ms->hashTable;
U32 const hlog = cParams->hashLog;
/* support stepSize of 0 */
size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1;
const BYTE* const base = ms->window.base;
const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart;
/* We check ip0 (ip + 0) and ip1 (ip + 1) each loop */
const BYTE* ip0 = istart;
const BYTE* ip1;
const BYTE* anchor = istart;
const U32 lowestIndex = ms->window.dictLimit;
const BYTE* const lowest = base + lowestIndex;
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
const U32 maxDistance = 1U << cParams->windowLog;
const U32 validStartIndex = ms->window.dictLimit;
const U32 prefixStartIndex = (endIndex - validStartIndex > maxDistance) ? endIndex - maxDistance : validStartIndex;
const BYTE* const prefixStart = base + prefixStartIndex;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - HASH_READ_SIZE;
U32 offset_1=rep[0], offset_2=rep[1];
U32 offsetSaved = 0;
/* init */
ip += (ip==lowest);
{ U32 const maxRep = (U32)(ip-lowest);
DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
ip0 += (ip0 == prefixStart);
ip1 = ip0 + 1;
{ U32 const maxRep = (U32)(ip0 - prefixStart);
if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
}
/* Main Search Loop */
while (ip1 < ilimit) { /* < instead of <=, because check at ip0+2 */
size_t mLength;
BYTE const* ip2 = ip0 + 2;
size_t const h0 = ZSTD_hashPtr(ip0, hlog, mls);
U32 const val0 = MEM_read32(ip0);
size_t const h1 = ZSTD_hashPtr(ip1, hlog, mls);
U32 const val1 = MEM_read32(ip1);
U32 const current0 = (U32)(ip0-base);
U32 const current1 = (U32)(ip1-base);
U32 const matchIndex0 = hashTable[h0];
U32 const matchIndex1 = hashTable[h1];
BYTE const* repMatch = ip2-offset_1;
const BYTE* match0 = base + matchIndex0;
const BYTE* match1 = base + matchIndex1;
U32 offcode;
hashTable[h0] = current0; /* update hash table */
hashTable[h1] = current1; /* update hash table */
assert(ip0 + 1 == ip1);
if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) {
mLength = ip2[-1] == repMatch[-1] ? 1 : 0;
ip0 = ip2 - mLength;
match0 = repMatch - mLength;
offcode = 0;
goto _match;
}
if ((matchIndex0 > prefixStartIndex) && MEM_read32(match0) == val0) {
/* found a regular match */
goto _offset;
}
if ((matchIndex1 > prefixStartIndex) && MEM_read32(match1) == val1) {
/* found a regular match after one literal */
ip0 = ip1;
match0 = match1;
goto _offset;
}
{ size_t const step = ((size_t)(ip0-anchor) >> (kSearchStrength - 1)) + stepSize;
assert(step >= 2);
ip0 += step;
ip1 += step;
continue;
}
_offset: /* Requires: ip0, match0 */
/* Compute the offset code */
offset_2 = offset_1;
offset_1 = (U32)(ip0-match0);
offcode = offset_1 + ZSTD_REP_MOVE;
mLength = 0;
/* Count the backwards match length */
while (((ip0>anchor) & (match0>prefixStart))
&& (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */
_match: /* Requires: ip0, match0, offcode */
/* Count the forward length */
mLength += ZSTD_count(ip0+mLength+4, match0+mLength+4, iend) + 4;
ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH);
/* match found */
ip0 += mLength;
anchor = ip0;
ip1 = ip0 + 1;
if (ip0 <= ilimit) {
/* Fill Table */
assert(base+current0+2 > istart); /* check base overflow */
hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */
hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
while ( ((ip0 <= ilimit) & (offset_2>0)) /* offset_2==0 means offset_2 is invalidated */
&& (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) {
/* store sequence */
size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4;
{ U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
ip0 += rLength;
ip1 = ip0 + 1;
ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH);
anchor = ip0;
continue; /* faster when present (confirmed on gcc-8) ... (?) */
}
}
}
/* save reps for next block */
rep[0] = offset_1 ? offset_1 : offsetSaved;
rep[1] = offset_2 ? offset_2 : offsetSaved;
/* Return the last literals size */
return (size_t)(iend - anchor);
}
size_t ZSTD_compressBlock_fast(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
U32 const mls = ms->cParams.minMatch;
assert(ms->dictMatchState == NULL);
switch(mls)
{
default: /* includes case 3 */
case 4 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4);
case 5 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5);
case 6 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6);
case 7 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7);
}
}
FORCE_INLINE_TEMPLATE
size_t ZSTD_compressBlock_fast_dictMatchState_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize, U32 const mls)
{
const ZSTD_compressionParameters* const cParams = &ms->cParams;
U32* const hashTable = ms->hashTable;
U32 const hlog = cParams->hashLog;
/* support stepSize of 0 */
U32 const stepSize = cParams->targetLength + !(cParams->targetLength);
const BYTE* const base = ms->window.base;
const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart;
const BYTE* anchor = istart;
const U32 prefixStartIndex = ms->window.dictLimit;
const BYTE* const prefixStart = base + prefixStartIndex;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - HASH_READ_SIZE;
U32 offset_1=rep[0], offset_2=rep[1];
U32 offsetSaved = 0;
const ZSTD_matchState_t* const dms = ms->dictMatchState;
const ZSTD_compressionParameters* const dictCParams = &dms->cParams ;
const U32* const dictHashTable = dms->hashTable;
const U32 dictStartIndex = dms->window.dictLimit;
const BYTE* const dictBase = dms->window.base;
const BYTE* const dictStart = dictBase + dictStartIndex;
const BYTE* const dictEnd = dms->window.nextSrc;
const U32 dictIndexDelta = prefixStartIndex - (U32)(dictEnd - dictBase);
const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart);
const U32 dictHLog = dictCParams->hashLog;
/* if a dictionary is still attached, it necessarily means that
* it is within window size. So we just check it. */
const U32 maxDistance = 1U << cParams->windowLog;
const U32 endIndex = (U32)((size_t)(ip - base) + srcSize);
assert(endIndex - prefixStartIndex <= maxDistance);
(void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */
/* ensure there will be no no underflow
* when translating a dict index into a local index */
assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
/* init */
DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic");
ip += (dictAndPrefixLength == 0);
/* dictMatchState repCode checks don't currently handle repCode == 0
* disabling. */
assert(offset_1 <= dictAndPrefixLength);
assert(offset_2 <= dictAndPrefixLength);
/* Main Search Loop */
while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
size_t mLength;
@ -70,26 +250,54 @@ size_t ZSTD_compressBlock_fast_generic(
U32 const current = (U32)(ip-base);
U32 const matchIndex = hashTable[h];
const BYTE* match = base + matchIndex;
const U32 repIndex = current + 1 - offset_1;
const BYTE* repMatch = (repIndex < prefixStartIndex) ?
dictBase + (repIndex - dictIndexDelta) :
base + repIndex;
hashTable[h] = current; /* update hash table */
if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
ip++;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
} else {
if ( (matchIndex <= lowestIndex)
|| (MEM_read32(match) != MEM_read32(ip)) ) {
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
} else if ( (matchIndex <= prefixStartIndex) ) {
size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
U32 const dictMatchIndex = dictHashTable[dictHash];
const BYTE* dictMatch = dictBase + dictMatchIndex;
if (dictMatchIndex <= dictStartIndex ||
MEM_read32(dictMatch) != MEM_read32(ip)) {
assert(stepSize >= 1);
ip += ((ip-anchor) >> kSearchStrength) + stepSize;
continue;
}
mLength = ZSTD_count(ip+4, match+4, iend) + 4;
{ U32 const offset = (U32)(ip-match);
while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
} else {
/* found a dict match */
U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta);
mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4;
while (((ip>anchor) & (dictMatch>dictStart))
&& (ip[-1] == dictMatch[-1])) {
ip--; dictMatch--; mLength++;
} /* catch up */
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
} }
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
}
} else if (MEM_read32(match) != MEM_read32(ip)) {
/* it's not a match, and we're not going to check the dictionary */
assert(stepSize >= 1);
ip += ((ip-anchor) >> kSearchStrength) + stepSize;
continue;
} else {
/* found a regular match */
U32 const offset = (U32)(ip-match);
mLength = ZSTD_count(ip+4, match+4, iend) + 4;
while (((ip>anchor) & (match>prefixStart))
&& (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
}
/* match found */
ip += mLength;
@ -97,114 +305,133 @@ size_t ZSTD_compressBlock_fast_generic(
if (ip <= ilimit) {
/* Fill Table */
assert(base+current+2 > istart); /* check base overflow */
hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; /* here because current+2 could be > iend-8 */
hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
/* check immediate repcode */
while ( (ip <= ilimit)
&& ( (offset_2>0)
& (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
/* store sequence */
size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
{ U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
hashTable[ZSTD_hashPtr(ip, hlog, mls)] = (U32)(ip-base);
ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
ip += rLength;
anchor = ip;
continue; /* faster when present ... (?) */
} } }
while (ip <= ilimit) {
U32 const current2 = (U32)(ip-base);
U32 const repIndex2 = current2 - offset_2;
const BYTE* repMatch2 = repIndex2 < prefixStartIndex ?
dictBase - dictIndexDelta + repIndex2 :
base + repIndex2;
if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
ip += repLength2;
anchor = ip;
continue;
}
break;
}
}
}
/* save reps for next block */
rep[0] = offset_1 ? offset_1 : offsetSaved;
rep[1] = offset_2 ? offset_2 : offsetSaved;
/* Return the last literals size */
return iend - anchor;
return (size_t)(iend - anchor);
}
size_t ZSTD_compressBlock_fast(
size_t ZSTD_compressBlock_fast_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
void const* src, size_t srcSize)
{
U32 const hlog = cParams->hashLog;
U32 const mls = cParams->searchLength;
U32 const stepSize = cParams->targetLength;
U32 const mls = ms->cParams.minMatch;
assert(ms->dictMatchState != NULL);
switch(mls)
{
default: /* includes case 3 */
case 4 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4);
return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 4);
case 5 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5);
return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 5);
case 6 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6);
return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 6);
case 7 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7);
return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 7);
}
}
static size_t ZSTD_compressBlock_fast_extDict_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize,
U32 const hlog, U32 const stepSize, U32 const mls)
void const* src, size_t srcSize, U32 const mls)
{
U32* hashTable = ms->hashTable;
const ZSTD_compressionParameters* const cParams = &ms->cParams;
U32* const hashTable = ms->hashTable;
U32 const hlog = cParams->hashLog;
/* support stepSize of 0 */
U32 const stepSize = cParams->targetLength + !(cParams->targetLength);
const BYTE* const base = ms->window.base;
const BYTE* const dictBase = ms->window.dictBase;
const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart;
const BYTE* anchor = istart;
const U32 lowestIndex = ms->window.lowLimit;
const BYTE* const dictStart = dictBase + lowestIndex;
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
const U32 dictStartIndex = lowLimit;
const BYTE* const dictStart = dictBase + dictStartIndex;
const U32 dictLimit = ms->window.dictLimit;
const BYTE* const lowPrefixPtr = base + dictLimit;
const BYTE* const dictEnd = dictBase + dictLimit;
const U32 prefixStartIndex = dictLimit < lowLimit ? lowLimit : dictLimit;
const BYTE* const prefixStart = base + prefixStartIndex;
const BYTE* const dictEnd = dictBase + prefixStartIndex;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - 8;
U32 offset_1=rep[0], offset_2=rep[1];
DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic");
/* switch to "regular" variant if extDict is invalidated due to maxDistance */
if (prefixStartIndex == dictStartIndex)
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls);
/* Search Loop */
while (ip < ilimit) { /* < instead of <=, because (ip+1) */
const size_t h = ZSTD_hashPtr(ip, hlog, mls);
const U32 matchIndex = hashTable[h];
const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base;
const BYTE* match = matchBase + matchIndex;
const U32 current = (U32)(ip-base);
const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */
const BYTE* repBase = repIndex < dictLimit ? dictBase : base;
const BYTE* repMatch = repBase + repIndex;
size_t mLength;
const U32 matchIndex = hashTable[h];
const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
const BYTE* match = matchBase + matchIndex;
const U32 current = (U32)(ip-base);
const U32 repIndex = current + 1 - offset_1;
const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
const BYTE* const repMatch = repBase + repIndex;
hashTable[h] = current; /* update hash table */
assert(offset_1 <= current +1); /* check repIndex */
if ( (((U32)((dictLimit-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex))
if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex))
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, lowPrefixPtr) + 4;
const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
ip++;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, rLength-MINMATCH);
ip += rLength;
anchor = ip;
} else {
if ( (matchIndex < lowestIndex) ||
if ( (matchIndex < dictStartIndex) ||
(MEM_read32(match) != MEM_read32(ip)) ) {
assert(stepSize >= 1);
ip += ((ip-anchor) >> kSearchStrength) + stepSize;
continue;
}
{ const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend;
const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr;
U32 offset;
mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, lowPrefixPtr) + 4;
{ const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
U32 const offset = current - matchIndex;
size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
offset = current - matchIndex;
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
offset_2 = offset_1; offset_1 = offset; /* update offset history */
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
ip += mLength;
anchor = ip;
} }
/* found a match : store it */
ip += mLength;
anchor = ip;
if (ip <= ilimit) {
/* Fill Table */
hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2;
@ -213,13 +440,13 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
while (ip <= ilimit) {
U32 const current2 = (U32)(ip-base);
U32 const repIndex2 = current2 - offset_2;
const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2;
if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */
const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex)) /* intentional overflow */
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, lowPrefixPtr) + 4;
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
{ U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */
ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, 0 /*offcode*/, repLength2-MINMATCH);
hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
ip += repLength2;
anchor = ip;
@ -233,27 +460,25 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
rep[1] = offset_2;
/* Return the last literals size */
return iend - anchor;
return (size_t)(iend - anchor);
}
size_t ZSTD_compressBlock_fast_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
void const* src, size_t srcSize)
{
U32 const hlog = cParams->hashLog;
U32 const mls = cParams->searchLength;
U32 const stepSize = cParams->targetLength;
U32 const mls = ms->cParams.minMatch;
switch(mls)
{
default: /* includes case 3 */
case 4 :
return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4);
return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 4);
case 5 :
return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5);
return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 5);
case 6 :
return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6);
return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 6);
case 7 :
return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7);
return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 7);
}
}

View file

@ -19,14 +19,16 @@ extern "C" {
#include "zstd_compress_internal.h"
void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
ZSTD_compressionParameters const* cParams,
void const* end);
void const* end, ZSTD_dictTableLoadMethod_e dtlm);
size_t ZSTD_compressBlock_fast(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_fast_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_fast_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
void const* src, size_t srcSize);
#if defined (__cplusplus)
}

View file

@ -16,11 +16,12 @@
* Binary Tree search
***************************************/
void ZSTD_updateDUBT(
ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
static void
ZSTD_updateDUBT(ZSTD_matchState_t* ms,
const BYTE* ip, const BYTE* iend,
U32 mls)
{
const ZSTD_compressionParameters* const cParams = &ms->cParams;
U32* const hashTable = ms->hashTable;
U32 const hashLog = cParams->hashLog;
@ -59,14 +60,16 @@ void ZSTD_updateDUBT(
* sort one already inserted but unsorted position
* assumption : current >= btlow == (current - btmask)
* doesn't fail */
static void ZSTD_insertDUBT1(
ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
static void
ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
U32 current, const BYTE* inputEnd,
U32 nbCompares, U32 btLow, int extDict)
U32 nbCompares, U32 btLow,
const ZSTD_dictMode_e dictMode)
{
U32* const bt = ms->chainTable;
U32 const btLog = cParams->chainLog - 1;
U32 const btMask = (1 << btLog) - 1;
const ZSTD_compressionParameters* const cParams = &ms->cParams;
U32* const bt = ms->chainTable;
U32 const btLog = cParams->chainLog - 1;
U32 const btMask = (1 << btLog) - 1;
size_t commonLengthSmaller=0, commonLengthLarger=0;
const BYTE* const base = ms->window.base;
const BYTE* const dictBase = ms->window.dictBase;
@ -78,9 +81,12 @@ static void ZSTD_insertDUBT1(
const BYTE* match;
U32* smallerPtr = bt + 2*(current&btMask);
U32* largerPtr = smallerPtr + 1;
U32 matchIndex = *smallerPtr;
U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
U32 dummy32; /* to be nullified at the end */
U32 const windowLow = ms->window.lowLimit;
U32 const windowValid = ms->window.lowLimit;
U32 const maxDistance = 1U << cParams->windowLog;
U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid;
DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
current, dictLimit, windowLow);
@ -91,11 +97,16 @@ static void ZSTD_insertDUBT1(
U32* const nextPtr = bt + 2*(matchIndex & btMask);
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
assert(matchIndex < current);
/* note : all candidates are now supposed sorted,
* but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK
* when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */
if ( (!extDict)
if ( (dictMode != ZSTD_extDict)
|| (matchIndex+matchLength >= dictLimit) /* both in current segment*/
|| (current < dictLimit) /* both in extDict */) {
const BYTE* const mBase = !extDict || ((matchIndex+matchLength) >= dictLimit) ? base : dictBase;
const BYTE* const mBase = ( (dictMode != ZSTD_extDict)
|| (matchIndex+matchLength >= dictLimit)) ?
base : dictBase;
assert( (matchIndex+matchLength >= dictLimit) /* might be wrong if extDict is incorrectly set to 0 */
|| (current < dictLimit) );
match = mBase + matchIndex;
@ -104,7 +115,7 @@ static void ZSTD_insertDUBT1(
match = dictBase + matchIndex;
matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
if (matchIndex+matchLength >= dictLimit)
match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
match = base + matchIndex; /* preparation for next read of match[matchLength] */
}
DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ",
@ -138,13 +149,92 @@ static void ZSTD_insertDUBT1(
}
static size_t ZSTD_DUBT_findBestMatch (
ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
const BYTE* const ip, const BYTE* const iend,
size_t* offsetPtr,
U32 const mls,
U32 const extDict)
static size_t
ZSTD_DUBT_findBetterDictMatch (
ZSTD_matchState_t* ms,
const BYTE* const ip, const BYTE* const iend,
size_t* offsetPtr,
size_t bestLength,
U32 nbCompares,
U32 const mls,
const ZSTD_dictMode_e dictMode)
{
const ZSTD_matchState_t * const dms = ms->dictMatchState;
const ZSTD_compressionParameters* const dmsCParams = &dms->cParams;
const U32 * const dictHashTable = dms->hashTable;
U32 const hashLog = dmsCParams->hashLog;
size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
U32 dictMatchIndex = dictHashTable[h];
const BYTE* const base = ms->window.base;
const BYTE* const prefixStart = base + ms->window.dictLimit;
U32 const current = (U32)(ip-base);
const BYTE* const dictBase = dms->window.base;
const BYTE* const dictEnd = dms->window.nextSrc;
U32 const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base);
U32 const dictLowLimit = dms->window.lowLimit;
U32 const dictIndexDelta = ms->window.lowLimit - dictHighLimit;
U32* const dictBt = dms->chainTable;
U32 const btLog = dmsCParams->chainLog - 1;
U32 const btMask = (1 << btLog) - 1;
U32 const btLow = (btMask >= dictHighLimit - dictLowLimit) ? dictLowLimit : dictHighLimit - btMask;
size_t commonLengthSmaller=0, commonLengthLarger=0;
(void)dictMode;
assert(dictMode == ZSTD_dictMatchState);
while (nbCompares-- && (dictMatchIndex > dictLowLimit)) {
U32* const nextPtr = dictBt + 2*(dictMatchIndex & btMask);
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
const BYTE* match = dictBase + dictMatchIndex;
matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
if (dictMatchIndex+matchLength >= dictHighLimit)
match = base + dictMatchIndex + dictIndexDelta; /* to prepare for next usage of match[matchLength] */
if (matchLength > bestLength) {
U32 matchIndex = dictMatchIndex + dictIndexDelta;
if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)",
current, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + current - matchIndex, dictMatchIndex, matchIndex);
bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
}
if (ip+matchLength == iend) { /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */
break; /* drop, to guarantee consistency (miss a little bit of compression) */
}
}
if (match[matchLength] < ip[matchLength]) {
if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */
commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
} else {
/* match is larger than current */
if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */
commonLengthLarger = matchLength;
dictMatchIndex = nextPtr[0];
}
}
if (bestLength >= MINMATCH) {
U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
current, (U32)bestLength, (U32)*offsetPtr, mIndex);
}
return bestLength;
}
static size_t
ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
const BYTE* const ip, const BYTE* const iend,
size_t* offsetPtr,
U32 const mls,
const ZSTD_dictMode_e dictMode)
{
const ZSTD_compressionParameters* const cParams = &ms->cParams;
U32* const hashTable = ms->hashTable;
U32 const hashLog = cParams->hashLog;
size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
@ -152,7 +242,7 @@ static size_t ZSTD_DUBT_findBestMatch (
const BYTE* const base = ms->window.base;
U32 const current = (U32)(ip-base);
U32 const windowLow = ms->window.lowLimit;
U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog);
U32* const bt = ms->chainTable;
U32 const btLog = cParams->chainLog - 1;
@ -175,7 +265,7 @@ static size_t ZSTD_DUBT_findBestMatch (
&& (nbCandidates > 1) ) {
DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted",
matchIndex);
*unsortedMark = previousCandidate;
*unsortedMark = previousCandidate; /* the unsortedMark becomes a reversed chain, to move up back to original position */
previousCandidate = matchIndex;
matchIndex = *nextCandidate;
nextCandidate = bt + 2*(matchIndex&btMask);
@ -183,11 +273,13 @@ static size_t ZSTD_DUBT_findBestMatch (
nbCandidates --;
}
/* nullify last candidate if it's still unsorted
* simplification, detrimental to compression ratio, beneficial for speed */
if ( (matchIndex > unsortLimit)
&& (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) {
DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u",
matchIndex);
*nextCandidate = *unsortedMark = 0; /* nullify next candidate if it's still unsorted (note : simplification, detrimental to compression ratio, beneficial for speed) */
*nextCandidate = *unsortedMark = 0;
}
/* batch sort stacked candidates */
@ -195,21 +287,21 @@ static size_t ZSTD_DUBT_findBestMatch (
while (matchIndex) { /* will end on matchIndex == 0 */
U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1;
U32 const nextCandidateIdx = *nextCandidateIdxPtr;
ZSTD_insertDUBT1(ms, cParams, matchIndex, iend,
nbCandidates, unsortLimit, extDict);
ZSTD_insertDUBT1(ms, matchIndex, iend,
nbCandidates, unsortLimit, dictMode);
matchIndex = nextCandidateIdx;
nbCandidates++;
}
/* find longest match */
{ size_t commonLengthSmaller=0, commonLengthLarger=0;
{ size_t commonLengthSmaller = 0, commonLengthLarger = 0;
const BYTE* const dictBase = ms->window.dictBase;
const U32 dictLimit = ms->window.dictLimit;
const BYTE* const dictEnd = dictBase + dictLimit;
const BYTE* const prefixStart = base + dictLimit;
U32* smallerPtr = bt + 2*(current&btMask);
U32* largerPtr = bt + 2*(current&btMask) + 1;
U32 matchEndIdx = current+8+1;
U32 matchEndIdx = current + 8 + 1;
U32 dummy32; /* to be nullified at the end */
size_t bestLength = 0;
@ -221,7 +313,7 @@ static size_t ZSTD_DUBT_findBestMatch (
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
const BYTE* match;
if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
if ((dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit)) {
match = base + matchIndex;
matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
} else {
@ -237,6 +329,11 @@ static size_t ZSTD_DUBT_findBestMatch (
if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
if (dictMode == ZSTD_dictMatchState) {
nbCompares = 0; /* in addition to avoiding checking any
* further in this loop, make sure we
* skip checking in the dictionary. */
}
break; /* drop, to guarantee consistency (miss a little bit of compression) */
}
}
@ -259,6 +356,13 @@ static size_t ZSTD_DUBT_findBestMatch (
*smallerPtr = *largerPtr = 0;
if (dictMode == ZSTD_dictMatchState && nbCompares) {
bestLength = ZSTD_DUBT_findBetterDictMatch(
ms, ip, iend,
offsetPtr, bestLength, nbCompares,
mls, dictMode);
}
assert(matchEndIdx > current+8); /* ensure nextToUpdate is increased */
ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
if (bestLength >= MINMATCH) {
@ -272,61 +376,64 @@ static size_t ZSTD_DUBT_findBestMatch (
/** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
static size_t ZSTD_BtFindBestMatch (
ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
const BYTE* const ip, const BYTE* const iLimit,
size_t* offsetPtr,
const U32 mls /* template */)
FORCE_INLINE_TEMPLATE size_t
ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms,
const BYTE* const ip, const BYTE* const iLimit,
size_t* offsetPtr,
const U32 mls /* template */,
const ZSTD_dictMode_e dictMode)
{
DEBUGLOG(7, "ZSTD_BtFindBestMatch");
if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
ZSTD_updateDUBT(ms, cParams, ip, iLimit, mls);
return ZSTD_DUBT_findBestMatch(ms, cParams, ip, iLimit, offsetPtr, mls, 0);
ZSTD_updateDUBT(ms, ip, iLimit, mls);
return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode);
}
static size_t ZSTD_BtFindBestMatch_selectMLS (
ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
const BYTE* ip, const BYTE* const iLimit,
size_t* offsetPtr)
static size_t
ZSTD_BtFindBestMatch_selectMLS ( ZSTD_matchState_t* ms,
const BYTE* ip, const BYTE* const iLimit,
size_t* offsetPtr)
{
switch(cParams->searchLength)
switch(ms->cParams.minMatch)
{
default : /* includes case 3 */
case 4 : return ZSTD_BtFindBestMatch(ms, cParams, ip, iLimit, offsetPtr, 4);
case 5 : return ZSTD_BtFindBestMatch(ms, cParams, ip, iLimit, offsetPtr, 5);
case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
case 7 :
case 6 : return ZSTD_BtFindBestMatch(ms, cParams, ip, iLimit, offsetPtr, 6);
case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
}
}
/** Tree updater, providing best match */
static size_t ZSTD_BtFindBestMatch_extDict (
ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
const BYTE* const ip, const BYTE* const iLimit,
size_t* offsetPtr,
const U32 mls)
{
DEBUGLOG(7, "ZSTD_BtFindBestMatch_extDict");
if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
ZSTD_updateDUBT(ms, cParams, ip, iLimit, mls);
return ZSTD_DUBT_findBestMatch(ms, cParams, ip, iLimit, offsetPtr, mls, 1);
}
static size_t ZSTD_BtFindBestMatch_selectMLS_extDict (
ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
static size_t ZSTD_BtFindBestMatch_dictMatchState_selectMLS (
ZSTD_matchState_t* ms,
const BYTE* ip, const BYTE* const iLimit,
size_t* offsetPtr)
{
switch(cParams->searchLength)
switch(ms->cParams.minMatch)
{
default : /* includes case 3 */
case 4 : return ZSTD_BtFindBestMatch_extDict(ms, cParams, ip, iLimit, offsetPtr, 4);
case 5 : return ZSTD_BtFindBestMatch_extDict(ms, cParams, ip, iLimit, offsetPtr, 5);
case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
case 7 :
case 6 : return ZSTD_BtFindBestMatch_extDict(ms, cParams, ip, iLimit, offsetPtr, 6);
case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
}
}
static size_t ZSTD_BtFindBestMatch_extDict_selectMLS (
ZSTD_matchState_t* ms,
const BYTE* ip, const BYTE* const iLimit,
size_t* offsetPtr)
{
switch(ms->cParams.minMatch)
{
default : /* includes case 3 */
case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
case 7 :
case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
}
}
@ -335,12 +442,13 @@ static size_t ZSTD_BtFindBestMatch_selectMLS_extDict (
/* *********************************
* Hash Chain
***********************************/
#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & mask]
#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & (mask)]
/* Update chains up to ip (excluded)
Assumption : always within prefix (i.e. not within extDict) */
static U32 ZSTD_insertAndFindFirstIndex_internal(
ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
ZSTD_matchState_t* ms,
const ZSTD_compressionParameters* const cParams,
const BYTE* ip, U32 const mls)
{
U32* const hashTable = ms->hashTable;
@ -362,22 +470,21 @@ static U32 ZSTD_insertAndFindFirstIndex_internal(
return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
}
U32 ZSTD_insertAndFindFirstIndex(
ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
const BYTE* ip)
{
return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, cParams->searchLength);
U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
const ZSTD_compressionParameters* const cParams = &ms->cParams;
return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
}
/* inlining is important to hardwire a hot branch (template emulation) */
FORCE_INLINE_TEMPLATE
size_t ZSTD_HcFindBestMatch_generic (
ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
ZSTD_matchState_t* ms,
const BYTE* const ip, const BYTE* const iLimit,
size_t* offsetPtr,
const U32 mls, const U32 extDict)
const U32 mls, const ZSTD_dictMode_e dictMode)
{
const ZSTD_compressionParameters* const cParams = &ms->cParams;
U32* const chainTable = ms->chainTable;
const U32 chainSize = (1 << cParams->chainLog);
const U32 chainMask = chainSize-1;
@ -386,8 +493,12 @@ size_t ZSTD_HcFindBestMatch_generic (
const U32 dictLimit = ms->window.dictLimit;
const BYTE* const prefixStart = base + dictLimit;
const BYTE* const dictEnd = dictBase + dictLimit;
const U32 lowLimit = ms->window.lowLimit;
const U32 current = (U32)(ip-base);
const U32 maxDistance = 1U << cParams->windowLog;
const U32 lowestValid = ms->window.lowLimit;
const U32 withinMaxDistance = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;
const U32 isDictionary = (ms->loadedDictEnd != 0);
const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
const U32 minChain = current > chainSize ? current - chainSize : 0;
U32 nbAttempts = 1U << cParams->searchLog;
size_t ml=4-1;
@ -397,8 +508,9 @@ size_t ZSTD_HcFindBestMatch_generic (
for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) {
size_t currentMl=0;
if ((!extDict) || matchIndex >= dictLimit) {
if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
const BYTE* const match = base + matchIndex;
assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */
if (match[ml] == ip[ml]) /* potentially better */
currentMl = ZSTD_count(ip, match, iLimit);
} else {
@ -419,38 +531,87 @@ size_t ZSTD_HcFindBestMatch_generic (
matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
}
if (dictMode == ZSTD_dictMatchState) {
const ZSTD_matchState_t* const dms = ms->dictMatchState;
const U32* const dmsChainTable = dms->chainTable;
const U32 dmsChainSize = (1 << dms->cParams.chainLog);
const U32 dmsChainMask = dmsChainSize - 1;
const U32 dmsLowestIndex = dms->window.dictLimit;
const BYTE* const dmsBase = dms->window.base;
const BYTE* const dmsEnd = dms->window.nextSrc;
const U32 dmsSize = (U32)(dmsEnd - dmsBase);
const U32 dmsIndexDelta = dictLimit - dmsSize;
const U32 dmsMinChain = dmsSize > dmsChainSize ? dmsSize - dmsChainSize : 0;
matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)];
for ( ; (matchIndex>dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) {
size_t currentMl=0;
const BYTE* const match = dmsBase + matchIndex;
assert(match+4 <= dmsEnd);
if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */
currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4;
/* save best solution */
if (currentMl > ml) {
ml = currentMl;
*offsetPtr = current - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE;
if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
}
if (matchIndex <= dmsMinChain) break;
matchIndex = dmsChainTable[matchIndex & dmsChainMask];
}
}
return ml;
}
FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS (
ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
ZSTD_matchState_t* ms,
const BYTE* ip, const BYTE* const iLimit,
size_t* offsetPtr)
{
switch(cParams->searchLength)
switch(ms->cParams.minMatch)
{
default : /* includes case 3 */
case 4 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 4, 0);
case 5 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 5, 0);
case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
case 7 :
case 6 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 6, 0);
case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
}
}
static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS (
ZSTD_matchState_t* ms,
const BYTE* ip, const BYTE* const iLimit,
size_t* offsetPtr)
{
switch(ms->cParams.minMatch)
{
default : /* includes case 3 */
case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
case 7 :
case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
}
}
FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
ZSTD_matchState_t* ms,
const BYTE* ip, const BYTE* const iLimit,
size_t* const offsetPtr)
size_t* offsetPtr)
{
switch(cParams->searchLength)
switch(ms->cParams.minMatch)
{
default : /* includes case 3 */
case 4 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 4, 1);
case 5 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 5, 1);
case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
case 7 :
case 6 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 6, 1);
case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
}
}
@ -458,34 +619,62 @@ FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
/* *******************************
* Common parser - lazy strategy
*********************************/
FORCE_INLINE_TEMPLATE
size_t ZSTD_compressBlock_lazy_generic(
typedef enum { search_hashChain, search_binaryTree } searchMethod_e;
FORCE_INLINE_TEMPLATE size_t
ZSTD_compressBlock_lazy_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore,
U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams,
const void* src, size_t srcSize,
const U32 searchMethod, const U32 depth)
const searchMethod_e searchMethod, const U32 depth,
ZSTD_dictMode_e const dictMode)
{
const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart;
const BYTE* anchor = istart;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - 8;
const BYTE* const base = ms->window.base + ms->window.dictLimit;
const BYTE* const base = ms->window.base;
const U32 prefixLowestIndex = ms->window.dictLimit;
const BYTE* const prefixLowest = base + prefixLowestIndex;
typedef size_t (*searchMax_f)(
ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
ZSTD_matchState_t* ms,
const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
searchMax_f const searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS;
searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ?
(searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS
: ZSTD_HcFindBestMatch_dictMatchState_selectMLS) :
(searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_selectMLS
: ZSTD_HcFindBestMatch_selectMLS);
U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
const ZSTD_matchState_t* const dms = ms->dictMatchState;
const U32 dictLowestIndex = dictMode == ZSTD_dictMatchState ?
dms->window.dictLimit : 0;
const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ?
dms->window.base : NULL;
const BYTE* const dictLowest = dictMode == ZSTD_dictMatchState ?
dictBase + dictLowestIndex : NULL;
const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ?
dms->window.nextSrc : NULL;
const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
prefixLowestIndex - (U32)(dictEnd - dictBase) :
0;
const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictLowest);
/* init */
ip += (ip==base);
ms->nextToUpdate3 = ms->nextToUpdate;
{ U32 const maxRep = (U32)(ip-base);
ip += (dictAndPrefixLength == 0);
if (dictMode == ZSTD_noDict) {
U32 const maxRep = (U32)(ip - prefixLowest);
if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
}
if (dictMode == ZSTD_dictMatchState) {
/* dictMatchState repCode checks don't currently handle repCode == 0
* disabling. */
assert(offset_1 <= dictAndPrefixLength);
assert(offset_2 <= dictAndPrefixLength);
}
/* Match Loop */
while (ip < ilimit) {
@ -494,15 +683,28 @@ size_t ZSTD_compressBlock_lazy_generic(
const BYTE* start=ip+1;
/* check repCode */
if ((offset_1>0) & (MEM_read32(ip+1) == MEM_read32(ip+1 - offset_1))) {
/* repcode : we take it */
if (dictMode == ZSTD_dictMatchState) {
const U32 repIndex = (U32)(ip - base) + 1 - offset_1;
const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
&& repIndex < prefixLowestIndex) ?
dictBase + (repIndex - dictIndexDelta) :
base + repIndex;
if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
if (depth==0) goto _storeSequence;
}
}
if ( dictMode == ZSTD_noDict
&& ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
if (depth==0) goto _storeSequence;
}
/* first search (depth 0) */
{ size_t offsetFound = 99999999;
size_t const ml2 = searchMax(ms, cParams, ip, iend, &offsetFound);
{ size_t offsetFound = 999999999;
size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
if (ml2 > matchLength)
matchLength = ml2, start = ip, offset=offsetFound;
}
@ -516,15 +718,31 @@ size_t ZSTD_compressBlock_lazy_generic(
if (depth>=1)
while (ip<ilimit) {
ip ++;
if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
if ( (dictMode == ZSTD_noDict)
&& (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
int const gain2 = (int)(mlRep * 3);
int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
if ((mlRep >= 4) && (gain2 > gain1))
matchLength = mlRep, offset = 0, start = ip;
}
{ size_t offset2=99999999;
size_t const ml2 = searchMax(ms, cParams, ip, iend, &offset2);
if (dictMode == ZSTD_dictMatchState) {
const U32 repIndex = (U32)(ip - base) - offset_1;
const BYTE* repMatch = repIndex < prefixLowestIndex ?
dictBase + (repIndex - dictIndexDelta) :
base + repIndex;
if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
&& (MEM_read32(repMatch) == MEM_read32(ip)) ) {
const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
int const gain2 = (int)(mlRep * 3);
int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
if ((mlRep >= 4) && (gain2 > gain1))
matchLength = mlRep, offset = 0, start = ip;
}
}
{ size_t offset2=999999999;
size_t const ml2 = searchMax(ms, ip, iend, &offset2);
int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
if ((ml2 >= 4) && (gain2 > gain1)) {
@ -535,15 +753,31 @@ size_t ZSTD_compressBlock_lazy_generic(
/* let's find an even better one */
if ((depth==2) && (ip<ilimit)) {
ip ++;
if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
size_t const ml2 = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
int const gain2 = (int)(ml2 * 4);
if ( (dictMode == ZSTD_noDict)
&& (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
int const gain2 = (int)(mlRep * 4);
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
if ((ml2 >= 4) && (gain2 > gain1))
matchLength = ml2, offset = 0, start = ip;
if ((mlRep >= 4) && (gain2 > gain1))
matchLength = mlRep, offset = 0, start = ip;
}
{ size_t offset2=99999999;
size_t const ml2 = searchMax(ms, cParams, ip, iend, &offset2);
if (dictMode == ZSTD_dictMatchState) {
const U32 repIndex = (U32)(ip - base) - offset_1;
const BYTE* repMatch = repIndex < prefixLowestIndex ?
dictBase + (repIndex - dictIndexDelta) :
base + repIndex;
if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
&& (MEM_read32(repMatch) == MEM_read32(ip)) ) {
const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
int const gain2 = (int)(mlRep * 4);
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
if ((mlRep >= 4) && (gain2 > gain1))
matchLength = mlRep, offset = 0, start = ip;
}
}
{ size_t offset2=999999999;
size_t const ml2 = searchMax(ms, ip, iend, &offset2);
int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
if ((ml2 >= 4) && (gain2 > gain1)) {
@ -560,65 +794,124 @@ size_t ZSTD_compressBlock_lazy_generic(
*/
/* catch up */
if (offset) {
while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > base))
&& (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */
{ start--; matchLength++; }
if (dictMode == ZSTD_noDict) {
while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > prefixLowest))
&& (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */
{ start--; matchLength++; }
}
if (dictMode == ZSTD_dictMatchState) {
U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex;
const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest;
while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */
}
offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
}
/* store sequence */
_storeSequence:
{ size_t const litLength = start - anchor;
ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH);
ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
anchor = ip = start + matchLength;
}
/* check immediate repcode */
while ( ((ip <= ilimit) & (offset_2>0))
&& (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
/* store sequence */
matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
ip += matchLength;
anchor = ip;
continue; /* faster when present ... (?) */
} }
if (dictMode == ZSTD_dictMatchState) {
while (ip <= ilimit) {
U32 const current2 = (U32)(ip-base);
U32 const repIndex = current2 - offset_2;
const BYTE* repMatch = dictMode == ZSTD_dictMatchState
&& repIndex < prefixLowestIndex ?
dictBase - dictIndexDelta + repIndex :
base + repIndex;
if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */)
&& (MEM_read32(repMatch) == MEM_read32(ip)) ) {
const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset_2 <=> offset_1 */
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
ip += matchLength;
anchor = ip;
continue;
}
break;
}
}
if (dictMode == ZSTD_noDict) {
while ( ((ip <= ilimit) & (offset_2>0))
&& (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
/* store sequence */
matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
ip += matchLength;
anchor = ip;
continue; /* faster when present ... (?) */
} } }
/* Save reps for next block */
rep[0] = offset_1 ? offset_1 : savedOffset;
rep[1] = offset_2 ? offset_2 : savedOffset;
/* Return the last literals size */
return iend - anchor;
return (size_t)(iend - anchor);
}
size_t ZSTD_compressBlock_btlazy2(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 1, 2);
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
}
size_t ZSTD_compressBlock_lazy2(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 2);
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
}
size_t ZSTD_compressBlock_lazy(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 1);
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
}
size_t ZSTD_compressBlock_greedy(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 0);
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
}
size_t ZSTD_compressBlock_btlazy2_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
}
size_t ZSTD_compressBlock_lazy2_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
}
size_t ZSTD_compressBlock_lazy_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
}
size_t ZSTD_compressBlock_greedy_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
}
@ -626,9 +919,8 @@ FORCE_INLINE_TEMPLATE
size_t ZSTD_compressBlock_lazy_extDict_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore,
U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams,
const void* src, size_t srcSize,
const U32 searchMethod, const U32 depth)
const searchMethod_e searchMethod, const U32 depth)
{
const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart;
@ -644,14 +936,13 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
const BYTE* const dictStart = dictBase + lowestIndex;
typedef size_t (*searchMax_f)(
ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
ZSTD_matchState_t* ms,
const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS_extDict : ZSTD_HcFindBestMatch_extDict_selectMLS;
searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
U32 offset_1 = rep[0], offset_2 = rep[1];
/* init */
ms->nextToUpdate3 = ms->nextToUpdate;
ip += (ip == prefixStart);
/* Match Loop */
@ -674,8 +965,8 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
} }
/* first search (depth 0) */
{ size_t offsetFound = 99999999;
size_t const ml2 = searchMax(ms, cParams, ip, iend, &offsetFound);
{ size_t offsetFound = 999999999;
size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
if (ml2 > matchLength)
matchLength = ml2, start = ip, offset=offsetFound;
}
@ -707,8 +998,8 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
} }
/* search match, depth 1 */
{ size_t offset2=99999999;
size_t const ml2 = searchMax(ms, cParams, ip, iend, &offset2);
{ size_t offset2=999999999;
size_t const ml2 = searchMax(ms, ip, iend, &offset2);
int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
if ((ml2 >= 4) && (gain2 > gain1)) {
@ -737,8 +1028,8 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
} }
/* search match, depth 2 */
{ size_t offset2=99999999;
size_t const ml2 = searchMax(ms, cParams, ip, iend, &offset2);
{ size_t offset2=999999999;
size_t const ml2 = searchMax(ms, ip, iend, &offset2);
int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
if ((ml2 >= 4) && (gain2 > gain1)) {
@ -760,7 +1051,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
/* store sequence */
_storeSequence:
{ size_t const litLength = start - anchor;
ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH);
ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
anchor = ip = start + matchLength;
}
@ -775,7 +1066,7 @@ _storeSequence:
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */
ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
ip += matchLength;
anchor = ip;
continue; /* faster when present ... (?) */
@ -788,37 +1079,37 @@ _storeSequence:
rep[1] = offset_2;
/* Return the last literals size */
return iend - anchor;
return (size_t)(iend - anchor);
}
size_t ZSTD_compressBlock_greedy_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 0);
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0);
}
size_t ZSTD_compressBlock_lazy_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 1);
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);
}
size_t ZSTD_compressBlock_lazy2_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 2);
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
}
size_t ZSTD_compressBlock_btlazy2_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 1, 2);
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
}

View file

@ -17,37 +17,48 @@ extern "C" {
#include "zstd_compress_internal.h"
U32 ZSTD_insertAndFindFirstIndex(
ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
const BYTE* ip);
U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). pre-emptively increase value of ZSTD_DUBT_UNSORTED_MARK */
void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */
size_t ZSTD_compressBlock_btlazy2(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_greedy(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btlazy2_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_greedy_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_greedy_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btlazy2_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
void const* src, size_t srcSize);
#if defined (__cplusplus)
}

View file

@ -9,6 +9,7 @@
#include "zstd_ldm.h"
#include "debug.h"
#include "zstd_fast.h" /* ZSTD_fillHashTable() */
#include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */
@ -20,7 +21,7 @@
void ZSTD_ldm_adjustParameters(ldmParams_t* params,
ZSTD_compressionParameters const* cParams)
{
U32 const windowLog = cParams->windowLog;
params->windowLog = cParams->windowLog;
ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX);
DEBUGLOG(4, "ZSTD_ldm_adjustParameters");
if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
@ -33,12 +34,13 @@ void ZSTD_ldm_adjustParameters(ldmParams_t* params,
params->minMatchLength = minMatch;
}
if (params->hashLog == 0) {
params->hashLog = MAX(ZSTD_HASHLOG_MIN, windowLog - LDM_HASH_RLOG);
params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG);
assert(params->hashLog <= ZSTD_HASHLOG_MAX);
}
if (params->hashEveryLog == 0) {
params->hashEveryLog =
windowLog < params->hashLog ? 0 : windowLog - params->hashLog;
if (params->hashRateLog == 0) {
params->hashRateLog = params->windowLog < params->hashLog
? 0
: params->windowLog - params->hashLog;
}
params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog);
}
@ -47,9 +49,9 @@ size_t ZSTD_ldm_getTableSize(ldmParams_t params)
{
size_t const ldmHSize = ((size_t)1) << params.hashLog;
size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog);
size_t const ldmBucketSize =
((size_t)1) << (params.hashLog - ldmBucketSizeLog);
size_t const totalSize = ldmBucketSize + ldmHSize * sizeof(ldmEntry_t);
size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize)
+ ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t));
return params.enableLdm ? totalSize : 0;
}
@ -117,20 +119,20 @@ static void ZSTD_ldm_insertEntry(ldmState_t* ldmState,
*
* Gets the small hash, checksum, and tag from the rollingHash.
*
* If the tag matches (1 << ldmParams.hashEveryLog)-1, then
* If the tag matches (1 << ldmParams.hashRateLog)-1, then
* creates an ldmEntry from the offset, and inserts it into the hash table.
*
* hBits is the length of the small hash, which is the most significant hBits
* of rollingHash. The checksum is the next 32 most significant bits, followed
* by ldmParams.hashEveryLog bits that make up the tag. */
* by ldmParams.hashRateLog bits that make up the tag. */
static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,
U64 const rollingHash,
U32 const hBits,
U32 const offset,
ldmParams_t const ldmParams)
{
U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog);
U32 const tagMask = ((U32)1 << ldmParams.hashEveryLog) - 1;
U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashRateLog);
U32 const tagMask = ((U32)1 << ldmParams.hashRateLog) - 1;
if (tag == tagMask) {
U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits);
U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
@ -141,56 +143,6 @@ static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,
}
}
/** ZSTD_ldm_getRollingHash() :
* Get a 64-bit hash using the first len bytes from buf.
*
* Giving bytes s = s_1, s_2, ... s_k, the hash is defined to be
* H(s) = s_1*(a^(k-1)) + s_2*(a^(k-2)) + ... + s_k*(a^0)
*
* where the constant a is defined to be prime8bytes.
*
* The implementation adds an offset to each byte, so
* H(s) = (s_1 + HASH_CHAR_OFFSET)*(a^(k-1)) + ... */
static U64 ZSTD_ldm_getRollingHash(const BYTE* buf, U32 len)
{
U64 ret = 0;
U32 i;
for (i = 0; i < len; i++) {
ret *= prime8bytes;
ret += buf[i] + LDM_HASH_CHAR_OFFSET;
}
return ret;
}
/** ZSTD_ldm_ipow() :
* Return base^exp. */
static U64 ZSTD_ldm_ipow(U64 base, U64 exp)
{
U64 ret = 1;
while (exp) {
if (exp & 1) { ret *= base; }
exp >>= 1;
base *= base;
}
return ret;
}
U64 ZSTD_ldm_getHashPower(U32 minMatchLength) {
DEBUGLOG(4, "ZSTD_ldm_getHashPower: mml=%u", minMatchLength);
assert(minMatchLength >= ZSTD_LDM_MINMATCH_MIN);
return ZSTD_ldm_ipow(prime8bytes, minMatchLength - 1);
}
/** ZSTD_ldm_updateHash() :
* Updates hash by removing toRemove and adding toAdd. */
static U64 ZSTD_ldm_updateHash(U64 hash, BYTE toRemove, BYTE toAdd, U64 hashPower)
{
hash -= ((toRemove + LDM_HASH_CHAR_OFFSET) * hashPower);
hash *= prime8bytes;
hash += toAdd + LDM_HASH_CHAR_OFFSET;
return hash;
}
/** ZSTD_ldm_countBackwardsMatch() :
* Returns the number of bytes that match backwards before pIn and pMatch.
*
@ -216,21 +168,18 @@ static size_t ZSTD_ldm_countBackwardsMatch(
* The tables for the other strategies are filled within their
* block compressors. */
static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
ZSTD_compressionParameters const* cParams,
void const* end)
{
const BYTE* const iend = (const BYTE*)end;
switch(cParams->strategy)
switch(ms->cParams.strategy)
{
case ZSTD_fast:
ZSTD_fillHashTable(ms, cParams, iend);
ms->nextToUpdate = (U32)(iend - ms->window.base);
ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast);
break;
case ZSTD_dfast:
ZSTD_fillDoubleHashTable(ms, cParams, iend);
ms->nextToUpdate = (U32)(iend - ms->window.base);
ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast);
break;
case ZSTD_greedy:
@ -239,6 +188,7 @@ static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
case ZSTD_btlazy2:
case ZSTD_btopt:
case ZSTD_btultra:
case ZSTD_btultra2:
break;
default:
assert(0); /* not possible : not a valid strategy id */
@ -262,9 +212,9 @@ static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
const BYTE* cur = lastHashed + 1;
while (cur < iend) {
rollingHash = ZSTD_ldm_updateHash(rollingHash, cur[-1],
cur[ldmParams.minMatchLength-1],
state->hashPower);
rollingHash = ZSTD_rollingHash_rotate(rollingHash, cur[-1],
cur[ldmParams.minMatchLength-1],
state->hashPower);
ZSTD_ldm_makeEntryAndInsertByTag(state,
rollingHash, hBits,
(U32)(cur - base), ldmParams);
@ -298,8 +248,8 @@ static size_t ZSTD_ldm_generateSequences_internal(
U64 const hashPower = ldmState->hashPower;
U32 const hBits = params->hashLog - params->bucketSizeLog;
U32 const ldmBucketSize = 1U << params->bucketSizeLog;
U32 const hashEveryLog = params->hashEveryLog;
U32 const ldmTagMask = (1U << params->hashEveryLog) - 1;
U32 const hashRateLog = params->hashRateLog;
U32 const ldmTagMask = (1U << params->hashRateLog) - 1;
/* Prefix and extDict parameters */
U32 const dictLimit = ldmState->window.dictLimit;
U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit;
@ -325,16 +275,16 @@ static size_t ZSTD_ldm_generateSequences_internal(
size_t forwardMatchLength = 0, backwardMatchLength = 0;
ldmEntry_t* bestEntry = NULL;
if (ip != istart) {
rollingHash = ZSTD_ldm_updateHash(rollingHash, lastHashed[0],
lastHashed[minMatchLength],
hashPower);
rollingHash = ZSTD_rollingHash_rotate(rollingHash, lastHashed[0],
lastHashed[minMatchLength],
hashPower);
} else {
rollingHash = ZSTD_ldm_getRollingHash(ip, minMatchLength);
rollingHash = ZSTD_rollingHash_compute(ip, minMatchLength);
}
lastHashed = ip;
/* Do not insert and do not look for a match */
if (ZSTD_ldm_getTag(rollingHash, hBits, hashEveryLog) != ldmTagMask) {
if (ZSTD_ldm_getTag(rollingHash, hBits, hashRateLog) != ldmTagMask) {
ip++;
continue;
}
@ -479,7 +429,7 @@ size_t ZSTD_ldm_generateSequences(
*/
assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize);
/* The input could be very large (in zstdmt), so it must be broken up into
* chunks to enforce the maximmum distance and handle overflow correction.
* chunks to enforce the maximum distance and handle overflow correction.
*/
assert(sequences->pos <= sequences->size);
assert(sequences->size <= sequences->capacity);
@ -497,7 +447,7 @@ size_t ZSTD_ldm_generateSequences(
if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
U32 const ldmHSize = 1U << params->hashLog;
U32 const correction = ZSTD_window_correctOverflow(
&ldmState->window, /* cycleLog */ 0, maxDist, src);
&ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
}
/* 2. We enforce the maximum offset allowed.
@ -508,7 +458,7 @@ size_t ZSTD_ldm_generateSequences(
* * Try invalidation after the sequence generation and test the
* the offset against maxDist directly.
*/
ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL);
ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL, NULL);
/* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
newLeftoverSize = ZSTD_ldm_generateSequences_internal(
ldmState, sequences, params, chunkStart, chunkSize);
@ -591,19 +541,19 @@ static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,
size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize,
int const extDict)
void const* src, size_t srcSize)
{
unsigned const minMatch = cParams->searchLength;
const ZSTD_compressionParameters* const cParams = &ms->cParams;
unsigned const minMatch = cParams->minMatch;
ZSTD_blockCompressor const blockCompressor =
ZSTD_selectBlockCompressor(cParams->strategy, extDict);
BYTE const* const base = ms->window.base;
ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms));
/* Input bounds */
BYTE const* const istart = (BYTE const*)src;
BYTE const* const iend = istart + srcSize;
/* Input positions */
BYTE const* ip = istart;
DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize);
assert(rawSeqStore->pos <= rawSeqStore->size);
assert(rawSeqStore->size <= rawSeqStore->capacity);
/* Loop through each sequence and apply the block compressor to the lits */
@ -621,20 +571,19 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
/* Fill tables for block compressor */
ZSTD_ldm_limitTableUpdate(ms, ip);
ZSTD_ldm_fillFastTables(ms, cParams, ip);
ZSTD_ldm_fillFastTables(ms, ip);
/* Run the block compressor */
DEBUGLOG(5, "calling block compressor on segment of size %u", sequence.litLength);
{
size_t const newLitLength =
blockCompressor(ms, seqStore, rep, cParams, ip,
sequence.litLength);
blockCompressor(ms, seqStore, rep, ip, sequence.litLength);
ip += sequence.litLength;
ms->nextToUpdate = (U32)(ip - base);
/* Update the repcodes */
for (i = ZSTD_REP_NUM - 1; i > 0; i--)
rep[i] = rep[i-1];
rep[0] = sequence.offset;
/* Store the sequence */
ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength,
ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
sequence.offset + ZSTD_REP_MOVE,
sequence.matchLength - MINMATCH);
ip += sequence.matchLength;
@ -642,12 +591,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
}
/* Fill the tables for the block compressor */
ZSTD_ldm_limitTableUpdate(ms, ip);
ZSTD_ldm_fillFastTables(ms, cParams, ip);
ZSTD_ldm_fillFastTables(ms, ip);
/* Compress the last literals */
{
size_t const lastLiterals = blockCompressor(ms, seqStore, rep, cParams,
ip, iend - ip);
ms->nextToUpdate = (U32)(iend - base);
return lastLiterals;
}
return blockCompressor(ms, seqStore, rep, ip, iend - ip);
}

View file

@ -21,7 +21,7 @@ extern "C" {
* Long distance matching
***************************************/
#define ZSTD_LDM_DEFAULT_WINDOW_LOG ZSTD_WINDOWLOG_DEFAULTMAX
#define ZSTD_LDM_DEFAULT_WINDOW_LOG ZSTD_WINDOWLOG_LIMIT_DEFAULT
/**
* ZSTD_ldm_generateSequences():
@ -61,9 +61,7 @@ size_t ZSTD_ldm_generateSequences(
*/
size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams,
void const* src, size_t srcSize,
int const extDict);
void const* src, size_t srcSize);
/**
* ZSTD_ldm_skipSequences():
@ -88,12 +86,8 @@ size_t ZSTD_ldm_getTableSize(ldmParams_t params);
*/
size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize);
/** ZSTD_ldm_getTableSize() :
* Return prime8bytes^(minMatchLength-1) */
U64 ZSTD_ldm_getHashPower(U32 minMatchLength);
/** ZSTD_ldm_adjustParameters() :
* If the params->hashEveryLog is not set, set it to its default value based on
* If the params->hashRateLog is not set, set it to its default value based on
* windowLog and params->hashLog.
*
* Ensures that params->bucketSizeLog is <= params->hashLog (setting it to

File diff suppressed because it is too large Load diff

View file

@ -17,23 +17,37 @@ extern "C" {
#include "zstd_compress_internal.h"
void ZSTD_updateTree(
ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
const BYTE* ip, const BYTE* iend); /* used in ZSTD_loadDictionaryContent() */
/* used in ZSTD_loadDictionaryContent() */
void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend);
size_t ZSTD_compressBlock_btopt(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btultra(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btultra2(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btopt_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btultra_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btopt_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_btultra_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
void const* src, size_t srcSize);
/* note : no btultra2 variant for extDict nor dictMatchState,
* because btultra2 is not meant to work with dictionaries
* and is only specific for the first block (no prefix) */
#if defined (__cplusplus)
}

File diff suppressed because it is too large Load diff

View file

@ -17,10 +17,25 @@
/* Note : This is an internal API.
* Some methods are still exposed (ZSTDLIB_API),
* These APIs used to be exposed with ZSTDLIB_API,
* because it used to be the only way to invoke MT compression.
* Now, it's recommended to use ZSTD_compress_generic() instead.
* These methods will stop being exposed in a future version */
* Now, it's recommended to use ZSTD_compress2 and ZSTD_compressStream2()
* instead.
*
* If you depend on these APIs and can't switch, then define
* ZSTD_LEGACY_MULTITHREADED_API when making the dynamic library.
* However, we may completely remove these functions in a future
* release, so please switch soon.
*
* This API requires ZSTD_MULTITHREAD to be defined during compilation,
* otherwise ZSTDMT_createCCtx*() will fail.
*/
#ifdef ZSTD_LEGACY_MULTITHREADED_API
# define ZSTDMT_API ZSTDLIB_API
#else
# define ZSTDMT_API
#endif
/* === Dependencies === */
#include <stddef.h> /* size_t */
@ -28,19 +43,32 @@
#include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */
/* === Constants === */
#ifndef ZSTDMT_NBWORKERS_MAX
# define ZSTDMT_NBWORKERS_MAX 200
#endif
#ifndef ZSTDMT_JOBSIZE_MIN
# define ZSTDMT_JOBSIZE_MIN (1 MB)
#endif
#define ZSTDMT_JOBLOG_MAX (MEM_32bits() ? 29 : 30)
#define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (1024 MB))
/* === Memory management === */
typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx;
ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers);
ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers,
/* Requires ZSTD_MULTITHREAD to be defined during compilation, otherwise it will return NULL. */
ZSTDMT_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers);
/* Requires ZSTD_MULTITHREAD to be defined during compilation, otherwise it will return NULL. */
ZSTDMT_API ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers,
ZSTD_customMem cMem);
ZSTDLIB_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx);
ZSTDMT_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx);
ZSTDLIB_API size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx);
ZSTDMT_API size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx);
/* === Simple one-pass compression function === */
ZSTDLIB_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
ZSTDMT_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
int compressionLevel);
@ -49,34 +77,31 @@ ZSTDLIB_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
/* === Streaming functions === */
ZSTDLIB_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel);
ZSTDLIB_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize); /**< if srcSize is not known at reset time, use ZSTD_CONTENTSIZE_UNKNOWN. Note: for compatibility with older programs, 0 means the same as ZSTD_CONTENTSIZE_UNKNOWN, but it will change in the future to mean "empty" */
ZSTDMT_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel);
ZSTDMT_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize); /**< if srcSize is not known at reset time, use ZSTD_CONTENTSIZE_UNKNOWN. Note: for compatibility with older programs, 0 means the same as ZSTD_CONTENTSIZE_UNKNOWN, but it will change in the future to mean "empty" */
ZSTDLIB_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
ZSTDMT_API size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx);
ZSTDMT_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
ZSTDLIB_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
ZSTDLIB_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
ZSTDMT_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
ZSTDMT_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
/* === Advanced functions and parameters === */
#ifndef ZSTDMT_JOBSIZE_MIN
# define ZSTDMT_JOBSIZE_MIN (1U << 20) /* 1 MB - Minimum size of each compression job */
#endif
ZSTDMT_API size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
const ZSTD_CDict* cdict,
ZSTD_parameters params,
int overlapLog);
ZSTDLIB_API size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
const ZSTD_CDict* cdict,
ZSTD_parameters params,
unsigned overlapLog);
ZSTDLIB_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
ZSTDMT_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
const void* dict, size_t dictSize, /* dict can be released after init, a local copy is preserved within zcs */
ZSTD_parameters params,
unsigned long long pledgedSrcSize); /* pledgedSrcSize is optional and can be zero == unknown */
ZSTDLIB_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
ZSTDMT_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
const ZSTD_CDict* cdict,
ZSTD_frameParameters fparams,
unsigned long long pledgedSrcSize); /* note : zero means empty */
@ -84,8 +109,9 @@ ZSTDLIB_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
/* ZSTDMT_parameter :
* List of parameters that can be set using ZSTDMT_setMTCtxParameter() */
typedef enum {
ZSTDMT_p_jobSize, /* Each job is compressed in parallel. By default, this value is dynamically determined depending on compression parameters. Can be set explicitly here. */
ZSTDMT_p_overlapSectionLog /* Each job may reload a part of previous job to enhance compressionr ratio; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window. This is a "sticky" parameter : its value will be re-used on next compression job */
ZSTDMT_p_jobSize, /* Each job is compressed in parallel. By default, this value is dynamically determined depending on compression parameters. Can be set explicitly here. */
ZSTDMT_p_overlapLog, /* Each job may reload a part of previous job to enhance compression ratio; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window. This is a "sticky" parameter : its value will be re-used on next compression job */
ZSTDMT_p_rsyncable /* Enables rsyncable mode. */
} ZSTDMT_parameter;
/* ZSTDMT_setMTCtxParameter() :
@ -93,7 +119,12 @@ typedef enum {
* The function must be called typically after ZSTD_createCCtx() but __before ZSTDMT_init*() !__
* Parameters not explicitly reset by ZSTDMT_init*() remain the same in consecutive compression sessions.
* @return : 0, or an error code (which can be tested using ZSTD_isError()) */
ZSTDLIB_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned value);
ZSTDMT_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int value);
/* ZSTDMT_getMTCtxParameter() :
* Query the ZSTDMT_CCtx for a parameter value.
* @return : 0, or an error code (which can be tested using ZSTD_isError()) */
ZSTDMT_API size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int* value);
/*! ZSTDMT_compressStream_generic() :
@ -103,7 +134,7 @@ ZSTDLIB_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter
* 0 if fully flushed
* or an error code
* note : needs to be init using any ZSTD_initCStream*() variant */
ZSTDLIB_API size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
ZSTDMT_API size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
ZSTD_outBuffer* output,
ZSTD_inBuffer* input,
ZSTD_EndDirective endOp);
@ -114,11 +145,21 @@ ZSTDLIB_API size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
* === Not exposed in libzstd. Never invoke directly ===
* ======================================================== */
size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params, ZSTDMT_parameter parameter, unsigned value);
/*! ZSTDMT_toFlushNow()
* Tell how many bytes are ready to be flushed immediately.
* Probe the oldest active job (not yet entirely flushed) and check its output buffer.
* If return 0, it means there is no active job,
* or, it means oldest job is still active, but everything produced has been flushed so far,
* therefore flushing is limited by speed of oldest job. */
size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx);
/* ZSTDMT_CCtxParam_setNbWorkers()
* Set nbWorkers, and clamp it.
* Also reset jobSize and overlapLog */
/*! ZSTDMT_CCtxParam_setMTCtxParameter()
* like ZSTDMT_setMTCtxParameter(), but into a ZSTD_CCtx_Params */
size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params, ZSTDMT_parameter parameter, int value);
/*! ZSTDMT_CCtxParam_setNbWorkers()
* Set nbWorkers, and clamp it.
* Also reset jobSize and overlapLog */
size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers);
/*! ZSTDMT_updateCParams_whileCompressing() :
@ -126,14 +167,9 @@ size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorker
* New parameters will be applied to next compression job. */
void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams);
/* ZSTDMT_getNbWorkers():
* @return nb threads currently active in mtctx.
* mtctx must be valid */
unsigned ZSTDMT_getNbWorkers(const ZSTDMT_CCtx* mtctx);
/* ZSTDMT_getFrameProgression():
* tells how much data has been consumed (input) and produced (output) for current frame.
* able to count progression inside worker threads.
/*! ZSTDMT_getFrameProgression():
* tells how much data has been consumed (input) and produced (output) for current frame.
* able to count progression inside worker threads.
*/
ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx);

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,240 @@
/*
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
/* zstd_ddict.c :
* concentrates all logic that needs to know the internals of ZSTD_DDict object */
/*-*******************************************************
* Dependencies
*********************************************************/
#include <string.h> /* memcpy, memmove, memset */
#include "cpu.h" /* bmi2 */
#include "mem.h" /* low level memory routines */
#define FSE_STATIC_LINKING_ONLY
#include "fse.h"
#define HUF_STATIC_LINKING_ONLY
#include "huf.h"
#include "zstd_decompress_internal.h"
#include "zstd_ddict.h"
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
# include "zstd_legacy.h"
#endif
/*-*******************************************************
* Types
*********************************************************/
struct ZSTD_DDict_s {
void* dictBuffer;
const void* dictContent;
size_t dictSize;
ZSTD_entropyDTables_t entropy;
U32 dictID;
U32 entropyPresent;
ZSTD_customMem cMem;
}; /* typedef'd to ZSTD_DDict within "zstd.h" */
const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict)
{
assert(ddict != NULL);
return ddict->dictContent;
}
size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict)
{
assert(ddict != NULL);
return ddict->dictSize;
}
void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
{
DEBUGLOG(4, "ZSTD_copyDDictParameters");
assert(dctx != NULL);
assert(ddict != NULL);
dctx->dictID = ddict->dictID;
dctx->prefixStart = ddict->dictContent;
dctx->virtualStart = ddict->dictContent;
dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
dctx->previousDstEnd = dctx->dictEnd;
if (ddict->entropyPresent) {
dctx->litEntropy = 1;
dctx->fseEntropy = 1;
dctx->LLTptr = ddict->entropy.LLTable;
dctx->MLTptr = ddict->entropy.MLTable;
dctx->OFTptr = ddict->entropy.OFTable;
dctx->HUFptr = ddict->entropy.hufTable;
dctx->entropy.rep[0] = ddict->entropy.rep[0];
dctx->entropy.rep[1] = ddict->entropy.rep[1];
dctx->entropy.rep[2] = ddict->entropy.rep[2];
} else {
dctx->litEntropy = 0;
dctx->fseEntropy = 0;
}
}
static size_t
ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict,
ZSTD_dictContentType_e dictContentType)
{
ddict->dictID = 0;
ddict->entropyPresent = 0;
if (dictContentType == ZSTD_dct_rawContent) return 0;
if (ddict->dictSize < 8) {
if (dictContentType == ZSTD_dct_fullDict)
return ERROR(dictionary_corrupted); /* only accept specified dictionaries */
return 0; /* pure content mode */
}
{ U32 const magic = MEM_readLE32(ddict->dictContent);
if (magic != ZSTD_MAGIC_DICTIONARY) {
if (dictContentType == ZSTD_dct_fullDict)
return ERROR(dictionary_corrupted); /* only accept specified dictionaries */
return 0; /* pure content mode */
}
}
ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE);
/* load entropy tables */
RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy(
&ddict->entropy, ddict->dictContent, ddict->dictSize)),
dictionary_corrupted);
ddict->entropyPresent = 1;
return 0;
}
static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
const void* dict, size_t dictSize,
ZSTD_dictLoadMethod_e dictLoadMethod,
ZSTD_dictContentType_e dictContentType)
{
if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) {
ddict->dictBuffer = NULL;
ddict->dictContent = dict;
if (!dict) dictSize = 0;
} else {
void* const internalBuffer = ZSTD_malloc(dictSize, ddict->cMem);
ddict->dictBuffer = internalBuffer;
ddict->dictContent = internalBuffer;
if (!internalBuffer) return ERROR(memory_allocation);
memcpy(internalBuffer, dict, dictSize);
}
ddict->dictSize = dictSize;
ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */
/* parse dictionary content */
FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) );
return 0;
}
ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
ZSTD_dictLoadMethod_e dictLoadMethod,
ZSTD_dictContentType_e dictContentType,
ZSTD_customMem customMem)
{
if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
{ ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem);
if (ddict == NULL) return NULL;
ddict->cMem = customMem;
{ size_t const initResult = ZSTD_initDDict_internal(ddict,
dict, dictSize,
dictLoadMethod, dictContentType);
if (ZSTD_isError(initResult)) {
ZSTD_freeDDict(ddict);
return NULL;
} }
return ddict;
}
}
/*! ZSTD_createDDict() :
* Create a digested dictionary, to start decompression without startup delay.
* `dict` content is copied inside DDict.
* Consequently, `dict` can be released after `ZSTD_DDict` creation */
ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
{
ZSTD_customMem const allocator = { NULL, NULL, NULL };
return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator);
}
/*! ZSTD_createDDict_byReference() :
* Create a digested dictionary, to start decompression without startup delay.
* Dictionary content is simply referenced, it will be accessed during decompression.
* Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */
ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize)
{
ZSTD_customMem const allocator = { NULL, NULL, NULL };
return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator);
}
const ZSTD_DDict* ZSTD_initStaticDDict(
void* sBuffer, size_t sBufferSize,
const void* dict, size_t dictSize,
ZSTD_dictLoadMethod_e dictLoadMethod,
ZSTD_dictContentType_e dictContentType)
{
size_t const neededSpace = sizeof(ZSTD_DDict)
+ (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer;
assert(sBuffer != NULL);
assert(dict != NULL);
if ((size_t)sBuffer & 7) return NULL; /* 8-aligned */
if (sBufferSize < neededSpace) return NULL;
if (dictLoadMethod == ZSTD_dlm_byCopy) {
memcpy(ddict+1, dict, dictSize); /* local copy */
dict = ddict+1;
}
if (ZSTD_isError( ZSTD_initDDict_internal(ddict,
dict, dictSize,
ZSTD_dlm_byRef, dictContentType) ))
return NULL;
return ddict;
}
size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
{
if (ddict==NULL) return 0; /* support free on NULL */
{ ZSTD_customMem const cMem = ddict->cMem;
ZSTD_free(ddict->dictBuffer, cMem);
ZSTD_free(ddict, cMem);
return 0;
}
}
/*! ZSTD_estimateDDictSize() :
* Estimate amount of memory that will be needed to create a dictionary for decompression.
* Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */
size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod)
{
return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
}
size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
{
if (ddict==NULL) return 0; /* support sizeof on NULL */
return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ;
}
/*! ZSTD_getDictID_fromDDict() :
* Provides the dictID of the dictionary loaded into `ddict`.
* If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
* Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
{
if (ddict==NULL) return 0;
return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize);
}

View file

@ -0,0 +1,44 @@
/*
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef ZSTD_DDICT_H
#define ZSTD_DDICT_H
/*-*******************************************************
* Dependencies
*********************************************************/
#include <stddef.h> /* size_t */
#include "zstd.h" /* ZSTD_DDict, and several public functions */
/*-*******************************************************
* Interface
*********************************************************/
/* note: several prototypes are already published in `zstd.h` :
* ZSTD_createDDict()
* ZSTD_createDDict_byReference()
* ZSTD_createDDict_advanced()
* ZSTD_freeDDict()
* ZSTD_initStaticDDict()
* ZSTD_sizeof_DDict()
* ZSTD_estimateDDictSize()
* ZSTD_getDictID_fromDict()
*/
const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict);
size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict);
void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
#endif /* ZSTD_DDICT_H */

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,59 @@
/*
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef ZSTD_DEC_BLOCK_H
#define ZSTD_DEC_BLOCK_H
/*-*******************************************************
* Dependencies
*********************************************************/
#include <stddef.h> /* size_t */
#include "zstd.h" /* DCtx, and some public functions */
#include "zstd_internal.h" /* blockProperties_t, and some public functions */
#include "zstd_decompress_internal.h" /* ZSTD_seqSymbol */
/* === Prototypes === */
/* note: prototypes already published within `zstd.h` :
* ZSTD_decompressBlock()
*/
/* note: prototypes already published within `zstd_internal.h` :
* ZSTD_getcBlockSize()
* ZSTD_decodeSeqHeaders()
*/
/* ZSTD_decompressBlock_internal() :
* decompress block, starting at `src`,
* into destination buffer `dst`.
* @return : decompressed block size,
* or an error code (which can be tested using ZSTD_isError())
*/
size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize, const int frame);
/* ZSTD_buildFSETable() :
* generate FSE decoding table for one symbol (ll, ml or off)
* this function must be called with valid parameters only
* (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.)
* in which case it cannot fail.
* Internal use only.
*/
void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
const short* normalizedCounter, unsigned maxSymbolValue,
const U32* baseValue, const U32* nbAdditionalBits,
unsigned tableLog);
#endif /* ZSTD_DEC_BLOCK_H */

View file

@ -0,0 +1,175 @@
/*
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
/* zstd_decompress_internal:
* objects and definitions shared within lib/decompress modules */
#ifndef ZSTD_DECOMPRESS_INTERNAL_H
#define ZSTD_DECOMPRESS_INTERNAL_H
/*-*******************************************************
* Dependencies
*********************************************************/
#include "mem.h" /* BYTE, U16, U32 */
#include "zstd_internal.h" /* ZSTD_seqSymbol */
/*-*******************************************************
* Constants
*********************************************************/
static const U32 LL_base[MaxLL+1] = {
0, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15,
16, 18, 20, 22, 24, 28, 32, 40,
48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
0x2000, 0x4000, 0x8000, 0x10000 };
static const U32 OF_base[MaxOff+1] = {
0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D,
0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD,
0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
static const U32 OF_bits[MaxOff+1] = {
0, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31 };
static const U32 ML_base[MaxML+1] = {
3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17, 18,
19, 20, 21, 22, 23, 24, 25, 26,
27, 28, 29, 30, 31, 32, 33, 34,
35, 37, 39, 41, 43, 47, 51, 59,
67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
/*-*******************************************************
* Decompression types
*********************************************************/
typedef struct {
U32 fastMode;
U32 tableLog;
} ZSTD_seqSymbol_header;
typedef struct {
U16 nextState;
BYTE nbAdditionalBits;
BYTE nbBits;
U32 baseValue;
} ZSTD_seqSymbol;
#define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log)))
typedef struct {
ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */
ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */
ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
U32 rep[ZSTD_REP_NUM];
} ZSTD_entropyDTables_t;
typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock,
ZSTDds_decompressLastBlock, ZSTDds_checkChecksum,
ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage;
typedef enum { zdss_init=0, zdss_loadHeader,
zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage;
typedef enum {
ZSTD_use_indefinitely = -1, /* Use the dictionary indefinitely */
ZSTD_dont_use = 0, /* Do not use the dictionary (if one exists free it) */
ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */
} ZSTD_dictUses_e;
struct ZSTD_DCtx_s
{
const ZSTD_seqSymbol* LLTptr;
const ZSTD_seqSymbol* MLTptr;
const ZSTD_seqSymbol* OFTptr;
const HUF_DTable* HUFptr;
ZSTD_entropyDTables_t entropy;
U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; /* space needed when building huffman tables */
const void* previousDstEnd; /* detect continuity */
const void* prefixStart; /* start of current segment */
const void* virtualStart; /* virtual start of previous segment if it was just before current one */
const void* dictEnd; /* end of previous segment */
size_t expected;
ZSTD_frameHeader fParams;
U64 decodedSize;
blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */
ZSTD_dStage stage;
U32 litEntropy;
U32 fseEntropy;
XXH64_state_t xxhState;
size_t headerSize;
ZSTD_format_e format;
const BYTE* litPtr;
ZSTD_customMem customMem;
size_t litSize;
size_t rleSize;
size_t staticSize;
int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
/* dictionary */
ZSTD_DDict* ddictLocal;
const ZSTD_DDict* ddict; /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */
U32 dictID;
int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
ZSTD_dictUses_e dictUses;
/* streaming */
ZSTD_dStreamStage streamStage;
char* inBuff;
size_t inBuffSize;
size_t inPos;
size_t maxWindowSize;
char* outBuff;
size_t outBuffSize;
size_t outStart;
size_t outEnd;
size_t lhSize;
void* legacyContext;
U32 previousLegacyVersion;
U32 legacyVersion;
U32 hostageByte;
int noForwardProgress;
/* workspace */
BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
}; /* typedef'd to ZSTD_DCtx within "zstd.h" */
/*-*******************************************************
* Shared internal functions
*********************************************************/
/*! ZSTD_loadDEntropy() :
* dict : must point at beginning of a valid zstd dictionary.
* @return : size of entropy tables read */
size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
const void* const dict, size_t const dictSize);
/*! ZSTD_checkContinuity() :
* check if next `dst` follows previous position, where decompression ended.
* If yes, do nothing (continue on current segment).
* If not, classify previous segment as "external dictionary", and start a new segment.
* This function cannot fail. */
void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst);
#endif /* ZSTD_DECOMPRESS_INTERNAL_H */

View file

@ -36,16 +36,17 @@ extern "C" {
*****************************************************************/
/* Deprecation warnings */
/* Should these warnings be a problem,
it is generally possible to disable them,
typically with -Wno-deprecated-declarations for gcc
or _CRT_SECURE_NO_WARNINGS in Visual.
Otherwise, it's also possible to define ZBUFF_DISABLE_DEPRECATE_WARNINGS */
* it is generally possible to disable them,
* typically with -Wno-deprecated-declarations for gcc
* or _CRT_SECURE_NO_WARNINGS in Visual.
* Otherwise, it's also possible to define ZBUFF_DISABLE_DEPRECATE_WARNINGS
*/
#ifdef ZBUFF_DISABLE_DEPRECATE_WARNINGS
# define ZBUFF_DEPRECATED(message) ZSTDLIB_API /* disable deprecation warnings */
#else
# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
# define ZBUFF_DEPRECATED(message) [[deprecated(message)]] ZSTDLIB_API
# elif (defined(__GNUC__) && (__GNUC__ >= 5)) || defined(__clang__)
# elif (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__)
# define ZBUFF_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated(message)))
# elif defined(__GNUC__) && (__GNUC__ >= 3)
# define ZBUFF_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated))

View file

@ -29,6 +29,7 @@
#include "mem.h" /* read */
#include "pool.h"
#include "threading.h"
#include "cover.h"
#include "zstd_internal.h" /* includes zstd.h */
#ifndef ZDICT_STATIC_LINKING_ONLY
#define ZDICT_STATIC_LINKING_ONLY
@ -38,7 +39,8 @@
/*-*************************************
* Constants
***************************************/
#define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((U32)-1) : ((U32)1 GB))
#define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
#define DEFAULT_SPLITPOINT 1.0
/*-*************************************
* Console display
@ -184,7 +186,7 @@ static void COVER_map_remove(COVER_map_t *map, U32 key) {
}
/**
* Destroyes a map that is inited with COVER_map_init().
* Destroys a map that is inited with COVER_map_init().
*/
static void COVER_map_destroy(COVER_map_t *map) {
if (map->data) {
@ -203,6 +205,8 @@ typedef struct {
size_t *offsets;
const size_t *samplesSizes;
size_t nbSamples;
size_t nbTrainSamples;
size_t nbTestSamples;
U32 *suffix;
size_t suffixSize;
U32 *freqs;
@ -220,9 +224,9 @@ static COVER_ctx_t *g_ctx = NULL;
/**
* Returns the sum of the sample sizes.
*/
static size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) {
size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) {
size_t sum = 0;
size_t i;
unsigned i;
for (i = 0; i < nbSamples; ++i) {
sum += samplesSizes[i];
}
@ -377,14 +381,6 @@ static void COVER_group(COVER_ctx_t *ctx, const void *group,
ctx->suffix[dmerId] = freq;
}
/**
* A segment is a range in the source as well as the score of the segment.
*/
typedef struct {
U32 begin;
U32 end;
U32 score;
} COVER_segment_t;
/**
* Selects the best segment in an epoch.
@ -395,7 +391,7 @@ typedef struct {
*
* Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1})
*
* Once the dmer d is in the dictionay we set F(d) = 0.
* Once the dmer d is in the dictionary we set F(d) = 0.
*/
static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs,
COVER_map_t *activeDmers, U32 begin,
@ -439,7 +435,7 @@ static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs,
U32 *delDmerOcc = COVER_map_at(activeDmers, delDmer);
activeSegment.begin += 1;
*delDmerOcc -= 1;
/* If this is the last occurence of the dmer, subtract its score */
/* If this is the last occurrence of the dmer, subtract its score */
if (*delDmerOcc == 0) {
COVER_map_remove(activeDmers, delDmer);
activeSegment.score -= freqs[delDmer];
@ -494,6 +490,10 @@ static int COVER_checkParameters(ZDICT_cover_params_t parameters,
if (parameters.d > parameters.k) {
return 0;
}
/* 0 < splitPoint <= 1 */
if (parameters.splitPoint <= 0 || parameters.splitPoint > 1){
return 0;
}
return 1;
}
@ -526,30 +526,49 @@ static void COVER_ctx_destroy(COVER_ctx_t *ctx) {
* Prepare a context for dictionary building.
* The context is only dependent on the parameter `d` and can used multiple
* times.
* Returns 1 on success or zero on error.
* Returns 0 on success or error code on error.
* The context must be destroyed with `COVER_ctx_destroy()`.
*/
static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
const size_t *samplesSizes, unsigned nbSamples,
unsigned d) {
unsigned d, double splitPoint) {
const BYTE *const samples = (const BYTE *)samplesBuffer;
const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples);
/* Split samples into testing and training sets */
const unsigned nbTrainSamples = splitPoint < 1.0 ? (unsigned)((double)nbSamples * splitPoint) : nbSamples;
const unsigned nbTestSamples = splitPoint < 1.0 ? nbSamples - nbTrainSamples : nbSamples;
const size_t trainingSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes, nbTrainSamples) : totalSamplesSize;
const size_t testSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes + nbTrainSamples, nbTestSamples) : totalSamplesSize;
/* Checks */
if (totalSamplesSize < MAX(d, sizeof(U64)) ||
totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
(U32)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20));
return 0;
(unsigned)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20));
return ERROR(srcSize_wrong);
}
/* Check if there are at least 5 training samples */
if (nbTrainSamples < 5) {
DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples);
return ERROR(srcSize_wrong);
}
/* Check if there's testing sample */
if (nbTestSamples < 1) {
DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples);
return ERROR(srcSize_wrong);
}
/* Zero the context */
memset(ctx, 0, sizeof(*ctx));
DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbSamples,
(U32)totalSamplesSize);
DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbTrainSamples,
(unsigned)trainingSamplesSize);
DISPLAYLEVEL(2, "Testing on %u samples of total size %u\n", nbTestSamples,
(unsigned)testSamplesSize);
ctx->samples = samples;
ctx->samplesSizes = samplesSizes;
ctx->nbSamples = nbSamples;
ctx->nbTrainSamples = nbTrainSamples;
ctx->nbTestSamples = nbTestSamples;
/* Partial suffix array */
ctx->suffixSize = totalSamplesSize - MAX(d, sizeof(U64)) + 1;
ctx->suffixSize = trainingSamplesSize - MAX(d, sizeof(U64)) + 1;
ctx->suffix = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
/* Maps index to the dmerID */
ctx->dmerAt = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
@ -558,12 +577,12 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
if (!ctx->suffix || !ctx->dmerAt || !ctx->offsets) {
DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n");
COVER_ctx_destroy(ctx);
return 0;
return ERROR(memory_allocation);
}
ctx->freqs = NULL;
ctx->d = d;
/* Fill offsets from the samlesSizes */
/* Fill offsets from the samplesSizes */
{
U32 i;
ctx->offsets[0] = 0;
@ -581,10 +600,17 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
for (i = 0; i < ctx->suffixSize; ++i) {
ctx->suffix[i] = i;
}
/* qsort doesn't take an opaque pointer, so pass as a global */
/* qsort doesn't take an opaque pointer, so pass as a global.
* On OpenBSD qsort() is not guaranteed to be stable, their mergesort() is.
*/
g_ctx = ctx;
#if defined(__OpenBSD__)
mergesort(ctx->suffix, ctx->suffixSize, sizeof(U32),
(ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
#else
qsort(ctx->suffix, ctx->suffixSize, sizeof(U32),
(ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
#endif
}
DISPLAYLEVEL(2, "Computing frequencies\n");
/* For each dmer group (group of positions with the same first d bytes):
@ -598,7 +624,40 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
(ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group);
ctx->freqs = ctx->suffix;
ctx->suffix = NULL;
return 1;
return 0;
}
void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel)
{
const double ratio = (double)nbDmers / maxDictSize;
if (ratio >= 10) {
return;
}
LOCALDISPLAYLEVEL(displayLevel, 1,
"WARNING: The maximum dictionary size %u is too large "
"compared to the source size %u! "
"size(source)/size(dictionary) = %f, but it should be >= "
"10! This may lead to a subpar dictionary! We recommend "
"training on sources at least 10x, and preferably 100x "
"the size of the dictionary! \n", (U32)maxDictSize,
(U32)nbDmers, ratio);
}
COVER_epoch_info_t COVER_computeEpochs(U32 maxDictSize,
U32 nbDmers, U32 k, U32 passes)
{
const U32 minEpochSize = k * 10;
COVER_epoch_info_t epochs;
epochs.num = MAX(1, maxDictSize / k / passes);
epochs.size = nbDmers / epochs.num;
if (epochs.size >= minEpochSize) {
assert(epochs.size * epochs.num <= nbDmers);
return epochs;
}
epochs.size = MIN(minEpochSize, nbDmers);
epochs.num = nbDmers / epochs.size;
assert(epochs.size * epochs.num <= nbDmers);
return epochs;
}
/**
@ -610,28 +669,34 @@ static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
ZDICT_cover_params_t parameters) {
BYTE *const dict = (BYTE *)dictBuffer;
size_t tail = dictBufferCapacity;
/* Divide the data up into epochs of equal size.
* We will select at least one segment from each epoch.
*/
const U32 epochs = (U32)(dictBufferCapacity / parameters.k);
const U32 epochSize = (U32)(ctx->suffixSize / epochs);
/* Divide the data into epochs. We will select one segment from each epoch. */
const COVER_epoch_info_t epochs = COVER_computeEpochs(
(U32)dictBufferCapacity, (U32)ctx->suffixSize, parameters.k, 4);
const size_t maxZeroScoreRun = MAX(10, MIN(100, epochs.num >> 3));
size_t zeroScoreRun = 0;
size_t epoch;
DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", epochs,
epochSize);
DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n",
(U32)epochs.num, (U32)epochs.size);
/* Loop through the epochs until there are no more segments or the dictionary
* is full.
*/
for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs) {
const U32 epochBegin = (U32)(epoch * epochSize);
const U32 epochEnd = epochBegin + epochSize;
for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs.num) {
const U32 epochBegin = (U32)(epoch * epochs.size);
const U32 epochEnd = epochBegin + epochs.size;
size_t segmentSize;
/* Select a segment */
COVER_segment_t segment = COVER_selectSegment(
ctx, freqs, activeDmers, epochBegin, epochEnd, parameters);
/* If the segment covers no dmers, then we are out of content */
/* If the segment covers no dmers, then we are out of content.
* There may be new content in other epochs, for continue for some time.
*/
if (segment.score == 0) {
break;
if (++zeroScoreRun >= maxZeroScoreRun) {
break;
}
continue;
}
zeroScoreRun = 0;
/* Trim the segment if necessary and if it is too small then we are done */
segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail);
if (segmentSize < parameters.d) {
@ -644,7 +709,7 @@ static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
memcpy(dict + tail, ctx->samples + segment.begin, segmentSize);
DISPLAYUPDATE(
2, "\r%u%% ",
(U32)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity));
(unsigned)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity));
}
DISPLAYLEVEL(2, "\r%79s\r", "");
return tail;
@ -658,17 +723,17 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
BYTE* const dict = (BYTE*)dictBuffer;
COVER_ctx_t ctx;
COVER_map_t activeDmers;
parameters.splitPoint = 1.0;
/* Initialize global data */
g_displayLevel = parameters.zParams.notificationLevel;
/* Checks */
if (!COVER_checkParameters(parameters, dictBufferCapacity)) {
DISPLAYLEVEL(1, "Cover parameters incorrect\n");
return ERROR(GENERIC);
return ERROR(parameter_outOfBound);
}
if (nbSamples == 0) {
DISPLAYLEVEL(1, "Cover must have at least one input file\n");
return ERROR(GENERIC);
return ERROR(srcSize_wrong);
}
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
@ -676,14 +741,18 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
return ERROR(dstSize_tooSmall);
}
/* Initialize context and activeDmers */
if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
parameters.d)) {
return ERROR(GENERIC);
{
size_t const initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
parameters.d, parameters.splitPoint);
if (ZSTD_isError(initVal)) {
return initVal;
}
}
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, g_displayLevel);
if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
COVER_ctx_destroy(&ctx);
return ERROR(GENERIC);
return ERROR(memory_allocation);
}
DISPLAYLEVEL(2, "Building dictionary\n");
@ -696,7 +765,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
samplesBuffer, samplesSizes, nbSamples, parameters.zParams);
if (!ZSTD_isError(dictionarySize)) {
DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
(U32)dictionarySize);
(unsigned)dictionarySize);
}
COVER_ctx_destroy(&ctx);
COVER_map_destroy(&activeDmers);
@ -704,28 +773,65 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
}
}
/**
* COVER_best_t is used for two purposes:
* 1. Synchronizing threads.
* 2. Saving the best parameters and dictionary.
*
* All of the methods except COVER_best_init() are thread safe if zstd is
* compiled with multithreaded support.
*/
typedef struct COVER_best_s {
ZSTD_pthread_mutex_t mutex;
ZSTD_pthread_cond_t cond;
size_t liveJobs;
void *dict;
size_t dictSize;
ZDICT_cover_params_t parameters;
size_t compressedSize;
} COVER_best_t;
size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters,
const size_t *samplesSizes, const BYTE *samples,
size_t *offsets,
size_t nbTrainSamples, size_t nbSamples,
BYTE *const dict, size_t dictBufferCapacity) {
size_t totalCompressedSize = ERROR(GENERIC);
/* Pointers */
ZSTD_CCtx *cctx;
ZSTD_CDict *cdict;
void *dst;
/* Local variables */
size_t dstCapacity;
size_t i;
/* Allocate dst with enough space to compress the maximum sized sample */
{
size_t maxSampleSize = 0;
i = parameters.splitPoint < 1.0 ? nbTrainSamples : 0;
for (; i < nbSamples; ++i) {
maxSampleSize = MAX(samplesSizes[i], maxSampleSize);
}
dstCapacity = ZSTD_compressBound(maxSampleSize);
dst = malloc(dstCapacity);
}
/* Create the cctx and cdict */
cctx = ZSTD_createCCtx();
cdict = ZSTD_createCDict(dict, dictBufferCapacity,
parameters.zParams.compressionLevel);
if (!dst || !cctx || !cdict) {
goto _compressCleanup;
}
/* Compress each sample and sum their sizes (or error) */
totalCompressedSize = dictBufferCapacity;
i = parameters.splitPoint < 1.0 ? nbTrainSamples : 0;
for (; i < nbSamples; ++i) {
const size_t size = ZSTD_compress_usingCDict(
cctx, dst, dstCapacity, samples + offsets[i],
samplesSizes[i], cdict);
if (ZSTD_isError(size)) {
totalCompressedSize = size;
goto _compressCleanup;
}
totalCompressedSize += size;
}
_compressCleanup:
ZSTD_freeCCtx(cctx);
ZSTD_freeCDict(cdict);
if (dst) {
free(dst);
}
return totalCompressedSize;
}
/**
* Initialize the `COVER_best_t`.
*/
static void COVER_best_init(COVER_best_t *best) {
void COVER_best_init(COVER_best_t *best) {
if (best==NULL) return; /* compatible with init on NULL */
(void)ZSTD_pthread_mutex_init(&best->mutex, NULL);
(void)ZSTD_pthread_cond_init(&best->cond, NULL);
@ -739,7 +845,7 @@ static void COVER_best_init(COVER_best_t *best) {
/**
* Wait until liveJobs == 0.
*/
static void COVER_best_wait(COVER_best_t *best) {
void COVER_best_wait(COVER_best_t *best) {
if (!best) {
return;
}
@ -753,7 +859,7 @@ static void COVER_best_wait(COVER_best_t *best) {
/**
* Call COVER_best_wait() and then destroy the COVER_best_t.
*/
static void COVER_best_destroy(COVER_best_t *best) {
void COVER_best_destroy(COVER_best_t *best) {
if (!best) {
return;
}
@ -769,7 +875,7 @@ static void COVER_best_destroy(COVER_best_t *best) {
* Called when a thread is about to be launched.
* Increments liveJobs.
*/
static void COVER_best_start(COVER_best_t *best) {
void COVER_best_start(COVER_best_t *best) {
if (!best) {
return;
}
@ -783,9 +889,11 @@ static void COVER_best_start(COVER_best_t *best) {
* Decrements liveJobs and signals any waiting threads if liveJobs == 0.
* If this dictionary is the best so far save it and its parameters.
*/
static void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
ZDICT_cover_params_t parameters, void *dict,
size_t dictSize) {
void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
COVER_dictSelection_t selection) {
void* dict = selection.dictContent;
size_t compressedSize = selection.totalCompressedSize;
size_t dictSize = selection.dictSize;
if (!best) {
return;
}
@ -805,19 +913,128 @@ static void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
if (!best->dict) {
best->compressedSize = ERROR(GENERIC);
best->dictSize = 0;
ZSTD_pthread_cond_signal(&best->cond);
ZSTD_pthread_mutex_unlock(&best->mutex);
return;
}
}
/* Save the dictionary, parameters, and size */
memcpy(best->dict, dict, dictSize);
best->dictSize = dictSize;
best->parameters = parameters;
best->compressedSize = compressedSize;
if (dict) {
memcpy(best->dict, dict, dictSize);
best->dictSize = dictSize;
best->parameters = parameters;
best->compressedSize = compressedSize;
}
}
ZSTD_pthread_mutex_unlock(&best->mutex);
if (liveJobs == 0) {
ZSTD_pthread_cond_broadcast(&best->cond);
}
ZSTD_pthread_mutex_unlock(&best->mutex);
}
}
COVER_dictSelection_t COVER_dictSelectionError(size_t error) {
COVER_dictSelection_t selection = { NULL, 0, error };
return selection;
}
unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection) {
return (ZSTD_isError(selection.totalCompressedSize) || !selection.dictContent);
}
void COVER_dictSelectionFree(COVER_dictSelection_t selection){
free(selection.dictContent);
}
COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize) {
size_t largestDict = 0;
size_t largestCompressed = 0;
BYTE* customDictContentEnd = customDictContent + dictContentSize;
BYTE * largestDictbuffer = (BYTE *)malloc(dictContentSize);
BYTE * candidateDictBuffer = (BYTE *)malloc(dictContentSize);
double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00;
if (!largestDictbuffer || !candidateDictBuffer) {
free(largestDictbuffer);
free(candidateDictBuffer);
return COVER_dictSelectionError(dictContentSize);
}
/* Initial dictionary size and compressed size */
memcpy(largestDictbuffer, customDictContent, dictContentSize);
dictContentSize = ZDICT_finalizeDictionary(
largestDictbuffer, dictContentSize, customDictContent, dictContentSize,
samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
if (ZDICT_isError(dictContentSize)) {
free(largestDictbuffer);
free(candidateDictBuffer);
return COVER_dictSelectionError(dictContentSize);
}
totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes,
samplesBuffer, offsets,
nbCheckSamples, nbSamples,
largestDictbuffer, dictContentSize);
if (ZSTD_isError(totalCompressedSize)) {
free(largestDictbuffer);
free(candidateDictBuffer);
return COVER_dictSelectionError(totalCompressedSize);
}
if (params.shrinkDict == 0) {
COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
free(candidateDictBuffer);
return selection;
}
largestDict = dictContentSize;
largestCompressed = totalCompressedSize;
dictContentSize = ZDICT_DICTSIZE_MIN;
/* Largest dict is initially at least ZDICT_DICTSIZE_MIN */
while (dictContentSize < largestDict) {
memcpy(candidateDictBuffer, largestDictbuffer, largestDict);
dictContentSize = ZDICT_finalizeDictionary(
candidateDictBuffer, dictContentSize, customDictContentEnd - dictContentSize, dictContentSize,
samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
if (ZDICT_isError(dictContentSize)) {
free(largestDictbuffer);
free(candidateDictBuffer);
return COVER_dictSelectionError(dictContentSize);
}
totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes,
samplesBuffer, offsets,
nbCheckSamples, nbSamples,
candidateDictBuffer, dictContentSize);
if (ZSTD_isError(totalCompressedSize)) {
free(largestDictbuffer);
free(candidateDictBuffer);
return COVER_dictSelectionError(totalCompressedSize);
}
if (totalCompressedSize <= largestCompressed * regressionTolerance) {
COVER_dictSelection_t selection = { candidateDictBuffer, dictContentSize, totalCompressedSize };
free(largestDictbuffer);
return selection;
}
dictContentSize *= 2;
}
dictContentSize = largestDict;
totalCompressedSize = largestCompressed;
{
COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
free(candidateDictBuffer);
return selection;
}
}
@ -832,7 +1049,7 @@ typedef struct COVER_tryParameters_data_s {
} COVER_tryParameters_data_t;
/**
* Tries a set of parameters and upates the COVER_best_t with the results.
* Tries a set of parameters and updates the COVER_best_t with the results.
* This function is thread safe if zstd is compiled with multithreaded support.
* It takes its parameters as an *OWNING* opaque pointer to support threading.
*/
@ -846,6 +1063,7 @@ static void COVER_tryParameters(void *opaque) {
/* Allocate space for hash table, dict, and freqs */
COVER_map_t activeDmers;
BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
U32 *freqs = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
@ -861,68 +1079,21 @@ static void COVER_tryParameters(void *opaque) {
{
const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
dictBufferCapacity, parameters);
dictBufferCapacity = ZDICT_finalizeDictionary(
dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbSamples,
parameters.zParams);
if (ZDICT_isError(dictBufferCapacity)) {
DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
totalCompressedSize);
if (COVER_dictSelectionIsError(selection)) {
DISPLAYLEVEL(1, "Failed to select dictionary\n");
goto _cleanup;
}
}
/* Check total compressed size */
{
/* Pointers */
ZSTD_CCtx *cctx;
ZSTD_CDict *cdict;
void *dst;
/* Local variables */
size_t dstCapacity;
size_t i;
/* Allocate dst with enough space to compress the maximum sized sample */
{
size_t maxSampleSize = 0;
for (i = 0; i < ctx->nbSamples; ++i) {
maxSampleSize = MAX(ctx->samplesSizes[i], maxSampleSize);
}
dstCapacity = ZSTD_compressBound(maxSampleSize);
dst = malloc(dstCapacity);
}
/* Create the cctx and cdict */
cctx = ZSTD_createCCtx();
cdict = ZSTD_createCDict(dict, dictBufferCapacity,
parameters.zParams.compressionLevel);
if (!dst || !cctx || !cdict) {
goto _compressCleanup;
}
/* Compress each sample and sum their sizes (or error) */
totalCompressedSize = dictBufferCapacity;
for (i = 0; i < ctx->nbSamples; ++i) {
const size_t size = ZSTD_compress_usingCDict(
cctx, dst, dstCapacity, ctx->samples + ctx->offsets[i],
ctx->samplesSizes[i], cdict);
if (ZSTD_isError(size)) {
totalCompressedSize = ERROR(GENERIC);
goto _compressCleanup;
}
totalCompressedSize += size;
}
_compressCleanup:
ZSTD_freeCCtx(cctx);
ZSTD_freeCDict(cdict);
if (dst) {
free(dst);
}
}
_cleanup:
COVER_best_finish(data->best, totalCompressedSize, parameters, dict,
dictBufferCapacity);
free(dict);
COVER_best_finish(data->best, parameters, selection);
free(data);
COVER_map_destroy(&activeDmers);
if (dict) {
free(dict);
}
COVER_dictSelectionFree(selection);
if (freqs) {
free(freqs);
}
@ -934,6 +1105,8 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
ZDICT_cover_params_t *parameters) {
/* constants */
const unsigned nbThreads = parameters->nbThreads;
const double splitPoint =
parameters->splitPoint <= 0.0 ? DEFAULT_SPLITPOINT : parameters->splitPoint;
const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
@ -942,6 +1115,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);
const unsigned kIterations =
(1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
const unsigned shrinkDict = 0;
/* Local variables */
const int displayLevel = parameters->zParams.notificationLevel;
unsigned iteration = 1;
@ -949,15 +1123,20 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
unsigned k;
COVER_best_t best;
POOL_ctx *pool = NULL;
int warned = 0;
/* Checks */
if (splitPoint <= 0 || splitPoint > 1) {
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
return ERROR(parameter_outOfBound);
}
if (kMinK < kMaxD || kMaxK < kMinK) {
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
return ERROR(GENERIC);
return ERROR(parameter_outOfBound);
}
if (nbSamples == 0) {
DISPLAYLEVEL(1, "Cover must have at least one input file\n");
return ERROR(GENERIC);
return ERROR(srcSize_wrong);
}
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
@ -981,11 +1160,18 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
/* Initialize the context for this value of d */
COVER_ctx_t ctx;
LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d)) {
LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
COVER_best_destroy(&best);
POOL_free(pool);
return ERROR(GENERIC);
{
const size_t initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint);
if (ZSTD_isError(initVal)) {
LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
COVER_best_destroy(&best);
POOL_free(pool);
return initVal;
}
}
if (!warned) {
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, displayLevel);
warned = 1;
}
/* Loop through k reusing the same context */
for (k = kMinK; k <= kMaxK; k += kStepSize) {
@ -998,7 +1184,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
COVER_best_destroy(&best);
COVER_ctx_destroy(&ctx);
POOL_free(pool);
return ERROR(GENERIC);
return ERROR(memory_allocation);
}
data->ctx = &ctx;
data->best = &best;
@ -1006,7 +1192,9 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
data->parameters = *parameters;
data->parameters.k = k;
data->parameters.d = d;
data->parameters.splitPoint = splitPoint;
data->parameters.steps = kSteps;
data->parameters.shrinkDict = shrinkDict;
data->parameters.zParams.notificationLevel = g_displayLevel;
/* Check the parameters */
if (!COVER_checkParameters(data->parameters, dictBufferCapacity)) {
@ -1023,7 +1211,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
}
/* Print status */
LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%% ",
(U32)((iteration * 100) / kIterations));
(unsigned)((iteration * 100) / kIterations));
++iteration;
}
COVER_best_wait(&best);

View file

@ -0,0 +1,147 @@
#include <stdio.h> /* fprintf */
#include <stdlib.h> /* malloc, free, qsort */
#include <string.h> /* memset */
#include <time.h> /* clock */
#include "mem.h" /* read */
#include "pool.h"
#include "threading.h"
#include "zstd_internal.h" /* includes zstd.h */
#ifndef ZDICT_STATIC_LINKING_ONLY
#define ZDICT_STATIC_LINKING_ONLY
#endif
#include "zdict.h"
/**
* COVER_best_t is used for two purposes:
* 1. Synchronizing threads.
* 2. Saving the best parameters and dictionary.
*
* All of the methods except COVER_best_init() are thread safe if zstd is
* compiled with multithreaded support.
*/
typedef struct COVER_best_s {
ZSTD_pthread_mutex_t mutex;
ZSTD_pthread_cond_t cond;
size_t liveJobs;
void *dict;
size_t dictSize;
ZDICT_cover_params_t parameters;
size_t compressedSize;
} COVER_best_t;
/**
* A segment is a range in the source as well as the score of the segment.
*/
typedef struct {
U32 begin;
U32 end;
U32 score;
} COVER_segment_t;
/**
*Number of epochs and size of each epoch.
*/
typedef struct {
U32 num;
U32 size;
} COVER_epoch_info_t;
/**
* Struct used for the dictionary selection function.
*/
typedef struct COVER_dictSelection {
BYTE* dictContent;
size_t dictSize;
size_t totalCompressedSize;
} COVER_dictSelection_t;
/**
* Computes the number of epochs and the size of each epoch.
* We will make sure that each epoch gets at least 10 * k bytes.
*
* The COVER algorithms divide the data up into epochs of equal size and
* select one segment from each epoch.
*
* @param maxDictSize The maximum allowed dictionary size.
* @param nbDmers The number of dmers we are training on.
* @param k The parameter k (segment size).
* @param passes The target number of passes over the dmer corpus.
* More passes means a better dictionary.
*/
COVER_epoch_info_t COVER_computeEpochs(U32 maxDictSize, U32 nbDmers,
U32 k, U32 passes);
/**
* Warns the user when their corpus is too small.
*/
void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel);
/**
* Checks total compressed size of a dictionary
*/
size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters,
const size_t *samplesSizes, const BYTE *samples,
size_t *offsets,
size_t nbTrainSamples, size_t nbSamples,
BYTE *const dict, size_t dictBufferCapacity);
/**
* Returns the sum of the sample sizes.
*/
size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) ;
/**
* Initialize the `COVER_best_t`.
*/
void COVER_best_init(COVER_best_t *best);
/**
* Wait until liveJobs == 0.
*/
void COVER_best_wait(COVER_best_t *best);
/**
* Call COVER_best_wait() and then destroy the COVER_best_t.
*/
void COVER_best_destroy(COVER_best_t *best);
/**
* Called when a thread is about to be launched.
* Increments liveJobs.
*/
void COVER_best_start(COVER_best_t *best);
/**
* Called when a thread finishes executing, both on error or success.
* Decrements liveJobs and signals any waiting threads if liveJobs == 0.
* If this dictionary is the best so far save it and its parameters.
*/
void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
COVER_dictSelection_t selection);
/**
* Error function for COVER_selectDict function. Checks if the return
* value is an error.
*/
unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection);
/**
* Error function for COVER_selectDict function. Returns a struct where
* return.totalCompressedSize is a ZSTD error.
*/
COVER_dictSelection_t COVER_dictSelectionError(size_t error);
/**
* Always call after selectDict is called to free up used memory from
* newly created dictionary.
*/
void COVER_dictSelectionFree(COVER_dictSelection_t selection);
/**
* Called to finalize the dictionary and select one based on whether or not
* the shrink-dict flag was enabled. If enabled the dictionary used is the
* smallest dictionary within a specified regression of the compressed size
* from the largest dictionary.
*/
COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize);

View file

@ -1637,7 +1637,7 @@ construct_SA(const unsigned char *T, int *SA,
if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
k = SA + BUCKET_B(c2 = c0, c1);
}
assert(k < j);
assert(k < j); assert(k != NULL);
*k-- = s;
} else {
assert(((s == 0) && (T[s] == c1)) || (s < 0));
@ -1701,7 +1701,7 @@ construct_BWT(const unsigned char *T, int *SA,
if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
k = SA + BUCKET_B(c2 = c0, c1);
}
assert(k < j);
assert(k < j); assert(k != NULL);
*k-- = s;
} else if(s != 0) {
*j = ~s;
@ -1785,7 +1785,7 @@ construct_BWT_indexes(const unsigned char *T, int *SA,
if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
k = SA + BUCKET_B(c2 = c0, c1);
}
assert(k < j);
assert(k < j); assert(k != NULL);
*k-- = s;
} else if(s != 0) {
*j = ~s;

View file

@ -0,0 +1,747 @@
/*-*************************************
* Dependencies
***************************************/
#include <stdio.h> /* fprintf */
#include <stdlib.h> /* malloc, free, qsort */
#include <string.h> /* memset */
#include <time.h> /* clock */
#include "mem.h" /* read */
#include "pool.h"
#include "threading.h"
#include "cover.h"
#include "zstd_internal.h" /* includes zstd.h */
#ifndef ZDICT_STATIC_LINKING_ONLY
#define ZDICT_STATIC_LINKING_ONLY
#endif
#include "zdict.h"
/*-*************************************
* Constants
***************************************/
#define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
#define FASTCOVER_MAX_F 31
#define FASTCOVER_MAX_ACCEL 10
#define DEFAULT_SPLITPOINT 0.75
#define DEFAULT_F 20
#define DEFAULT_ACCEL 1
/*-*************************************
* Console display
***************************************/
static int g_displayLevel = 2;
#define DISPLAY(...) \
{ \
fprintf(stderr, __VA_ARGS__); \
fflush(stderr); \
}
#define LOCALDISPLAYLEVEL(displayLevel, l, ...) \
if (displayLevel >= l) { \
DISPLAY(__VA_ARGS__); \
} /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
if (displayLevel >= l) { \
if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \
g_time = clock(); \
DISPLAY(__VA_ARGS__); \
} \
}
#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
static clock_t g_time = 0;
/*-*************************************
* Hash Functions
***************************************/
static const U64 prime6bytes = 227718039650203ULL;
static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; }
static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
/**
* Hash the d-byte value pointed to by p and mod 2^f
*/
static size_t FASTCOVER_hashPtrToIndex(const void* p, U32 h, unsigned d) {
if (d == 6) {
return ZSTD_hash6Ptr(p, h) & ((1 << h) - 1);
}
return ZSTD_hash8Ptr(p, h) & ((1 << h) - 1);
}
/*-*************************************
* Acceleration
***************************************/
typedef struct {
unsigned finalize; /* Percentage of training samples used for ZDICT_finalizeDictionary */
unsigned skip; /* Number of dmer skipped between each dmer counted in computeFrequency */
} FASTCOVER_accel_t;
static const FASTCOVER_accel_t FASTCOVER_defaultAccelParameters[FASTCOVER_MAX_ACCEL+1] = {
{ 100, 0 }, /* accel = 0, should not happen because accel = 0 defaults to accel = 1 */
{ 100, 0 }, /* accel = 1 */
{ 50, 1 }, /* accel = 2 */
{ 34, 2 }, /* accel = 3 */
{ 25, 3 }, /* accel = 4 */
{ 20, 4 }, /* accel = 5 */
{ 17, 5 }, /* accel = 6 */
{ 14, 6 }, /* accel = 7 */
{ 13, 7 }, /* accel = 8 */
{ 11, 8 }, /* accel = 9 */
{ 10, 9 }, /* accel = 10 */
};
/*-*************************************
* Context
***************************************/
typedef struct {
const BYTE *samples;
size_t *offsets;
const size_t *samplesSizes;
size_t nbSamples;
size_t nbTrainSamples;
size_t nbTestSamples;
size_t nbDmers;
U32 *freqs;
unsigned d;
unsigned f;
FASTCOVER_accel_t accelParams;
} FASTCOVER_ctx_t;
/*-*************************************
* Helper functions
***************************************/
/**
* Selects the best segment in an epoch.
* Segments of are scored according to the function:
*
* Let F(d) be the frequency of all dmers with hash value d.
* Let S_i be hash value of the dmer at position i of segment S which has length k.
*
* Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1})
*
* Once the dmer with hash value d is in the dictionary we set F(d) = 0.
*/
static COVER_segment_t FASTCOVER_selectSegment(const FASTCOVER_ctx_t *ctx,
U32 *freqs, U32 begin, U32 end,
ZDICT_cover_params_t parameters,
U16* segmentFreqs) {
/* Constants */
const U32 k = parameters.k;
const U32 d = parameters.d;
const U32 f = ctx->f;
const U32 dmersInK = k - d + 1;
/* Try each segment (activeSegment) and save the best (bestSegment) */
COVER_segment_t bestSegment = {0, 0, 0};
COVER_segment_t activeSegment;
/* Reset the activeDmers in the segment */
/* The activeSegment starts at the beginning of the epoch. */
activeSegment.begin = begin;
activeSegment.end = begin;
activeSegment.score = 0;
/* Slide the activeSegment through the whole epoch.
* Save the best segment in bestSegment.
*/
while (activeSegment.end < end) {
/* Get hash value of current dmer */
const size_t idx = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.end, f, d);
/* Add frequency of this index to score if this is the first occurrence of index in active segment */
if (segmentFreqs[idx] == 0) {
activeSegment.score += freqs[idx];
}
/* Increment end of segment and segmentFreqs*/
activeSegment.end += 1;
segmentFreqs[idx] += 1;
/* If the window is now too large, drop the first position */
if (activeSegment.end - activeSegment.begin == dmersInK + 1) {
/* Get hash value of the dmer to be eliminated from active segment */
const size_t delIndex = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.begin, f, d);
segmentFreqs[delIndex] -= 1;
/* Subtract frequency of this index from score if this is the last occurrence of this index in active segment */
if (segmentFreqs[delIndex] == 0) {
activeSegment.score -= freqs[delIndex];
}
/* Increment start of segment */
activeSegment.begin += 1;
}
/* If this segment is the best so far save it */
if (activeSegment.score > bestSegment.score) {
bestSegment = activeSegment;
}
}
/* Zero out rest of segmentFreqs array */
while (activeSegment.begin < end) {
const size_t delIndex = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.begin, f, d);
segmentFreqs[delIndex] -= 1;
activeSegment.begin += 1;
}
{
/* Zero the frequency of hash value of each dmer covered by the chosen segment. */
U32 pos;
for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) {
const size_t i = FASTCOVER_hashPtrToIndex(ctx->samples + pos, f, d);
freqs[i] = 0;
}
}
return bestSegment;
}
static int FASTCOVER_checkParameters(ZDICT_cover_params_t parameters,
size_t maxDictSize, unsigned f,
unsigned accel) {
/* k, d, and f are required parameters */
if (parameters.d == 0 || parameters.k == 0) {
return 0;
}
/* d has to be 6 or 8 */
if (parameters.d != 6 && parameters.d != 8) {
return 0;
}
/* k <= maxDictSize */
if (parameters.k > maxDictSize) {
return 0;
}
/* d <= k */
if (parameters.d > parameters.k) {
return 0;
}
/* 0 < f <= FASTCOVER_MAX_F*/
if (f > FASTCOVER_MAX_F || f == 0) {
return 0;
}
/* 0 < splitPoint <= 1 */
if (parameters.splitPoint <= 0 || parameters.splitPoint > 1) {
return 0;
}
/* 0 < accel <= 10 */
if (accel > 10 || accel == 0) {
return 0;
}
return 1;
}
/**
* Clean up a context initialized with `FASTCOVER_ctx_init()`.
*/
static void
FASTCOVER_ctx_destroy(FASTCOVER_ctx_t* ctx)
{
if (!ctx) return;
free(ctx->freqs);
ctx->freqs = NULL;
free(ctx->offsets);
ctx->offsets = NULL;
}
/**
* Calculate for frequency of hash value of each dmer in ctx->samples
*/
static void
FASTCOVER_computeFrequency(U32* freqs, const FASTCOVER_ctx_t* ctx)
{
const unsigned f = ctx->f;
const unsigned d = ctx->d;
const unsigned skip = ctx->accelParams.skip;
const unsigned readLength = MAX(d, 8);
size_t i;
assert(ctx->nbTrainSamples >= 5);
assert(ctx->nbTrainSamples <= ctx->nbSamples);
for (i = 0; i < ctx->nbTrainSamples; i++) {
size_t start = ctx->offsets[i]; /* start of current dmer */
size_t const currSampleEnd = ctx->offsets[i+1];
while (start + readLength <= currSampleEnd) {
const size_t dmerIndex = FASTCOVER_hashPtrToIndex(ctx->samples + start, f, d);
freqs[dmerIndex]++;
start = start + skip + 1;
}
}
}
/**
* Prepare a context for dictionary building.
* The context is only dependent on the parameter `d` and can used multiple
* times.
* Returns 0 on success or error code on error.
* The context must be destroyed with `FASTCOVER_ctx_destroy()`.
*/
static size_t
FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
const void* samplesBuffer,
const size_t* samplesSizes, unsigned nbSamples,
unsigned d, double splitPoint, unsigned f,
FASTCOVER_accel_t accelParams)
{
const BYTE* const samples = (const BYTE*)samplesBuffer;
const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples);
/* Split samples into testing and training sets */
const unsigned nbTrainSamples = splitPoint < 1.0 ? (unsigned)((double)nbSamples * splitPoint) : nbSamples;
const unsigned nbTestSamples = splitPoint < 1.0 ? nbSamples - nbTrainSamples : nbSamples;
const size_t trainingSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes, nbTrainSamples) : totalSamplesSize;
const size_t testSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes + nbTrainSamples, nbTestSamples) : totalSamplesSize;
/* Checks */
if (totalSamplesSize < MAX(d, sizeof(U64)) ||
totalSamplesSize >= (size_t)FASTCOVER_MAX_SAMPLES_SIZE) {
DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
(unsigned)(totalSamplesSize >> 20), (FASTCOVER_MAX_SAMPLES_SIZE >> 20));
return ERROR(srcSize_wrong);
}
/* Check if there are at least 5 training samples */
if (nbTrainSamples < 5) {
DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid\n", nbTrainSamples);
return ERROR(srcSize_wrong);
}
/* Check if there's testing sample */
if (nbTestSamples < 1) {
DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.\n", nbTestSamples);
return ERROR(srcSize_wrong);
}
/* Zero the context */
memset(ctx, 0, sizeof(*ctx));
DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbTrainSamples,
(unsigned)trainingSamplesSize);
DISPLAYLEVEL(2, "Testing on %u samples of total size %u\n", nbTestSamples,
(unsigned)testSamplesSize);
ctx->samples = samples;
ctx->samplesSizes = samplesSizes;
ctx->nbSamples = nbSamples;
ctx->nbTrainSamples = nbTrainSamples;
ctx->nbTestSamples = nbTestSamples;
ctx->nbDmers = trainingSamplesSize - MAX(d, sizeof(U64)) + 1;
ctx->d = d;
ctx->f = f;
ctx->accelParams = accelParams;
/* The offsets of each file */
ctx->offsets = (size_t*)calloc((nbSamples + 1), sizeof(size_t));
if (ctx->offsets == NULL) {
DISPLAYLEVEL(1, "Failed to allocate scratch buffers \n");
FASTCOVER_ctx_destroy(ctx);
return ERROR(memory_allocation);
}
/* Fill offsets from the samplesSizes */
{ U32 i;
ctx->offsets[0] = 0;
assert(nbSamples >= 5);
for (i = 1; i <= nbSamples; ++i) {
ctx->offsets[i] = ctx->offsets[i - 1] + samplesSizes[i - 1];
}
}
/* Initialize frequency array of size 2^f */
ctx->freqs = (U32*)calloc(((U64)1 << f), sizeof(U32));
if (ctx->freqs == NULL) {
DISPLAYLEVEL(1, "Failed to allocate frequency table \n");
FASTCOVER_ctx_destroy(ctx);
return ERROR(memory_allocation);
}
DISPLAYLEVEL(2, "Computing frequencies\n");
FASTCOVER_computeFrequency(ctx->freqs, ctx);
return 0;
}
/**
* Given the prepared context build the dictionary.
*/
static size_t
FASTCOVER_buildDictionary(const FASTCOVER_ctx_t* ctx,
U32* freqs,
void* dictBuffer, size_t dictBufferCapacity,
ZDICT_cover_params_t parameters,
U16* segmentFreqs)
{
BYTE *const dict = (BYTE *)dictBuffer;
size_t tail = dictBufferCapacity;
/* Divide the data into epochs. We will select one segment from each epoch. */
const COVER_epoch_info_t epochs = COVER_computeEpochs(
(U32)dictBufferCapacity, (U32)ctx->nbDmers, parameters.k, 1);
const size_t maxZeroScoreRun = 10;
size_t zeroScoreRun = 0;
size_t epoch;
DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n",
(U32)epochs.num, (U32)epochs.size);
/* Loop through the epochs until there are no more segments or the dictionary
* is full.
*/
for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs.num) {
const U32 epochBegin = (U32)(epoch * epochs.size);
const U32 epochEnd = epochBegin + epochs.size;
size_t segmentSize;
/* Select a segment */
COVER_segment_t segment = FASTCOVER_selectSegment(
ctx, freqs, epochBegin, epochEnd, parameters, segmentFreqs);
/* If the segment covers no dmers, then we are out of content.
* There may be new content in other epochs, for continue for some time.
*/
if (segment.score == 0) {
if (++zeroScoreRun >= maxZeroScoreRun) {
break;
}
continue;
}
zeroScoreRun = 0;
/* Trim the segment if necessary and if it is too small then we are done */
segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail);
if (segmentSize < parameters.d) {
break;
}
/* We fill the dictionary from the back to allow the best segments to be
* referenced with the smallest offsets.
*/
tail -= segmentSize;
memcpy(dict + tail, ctx->samples + segment.begin, segmentSize);
DISPLAYUPDATE(
2, "\r%u%% ",
(unsigned)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity));
}
DISPLAYLEVEL(2, "\r%79s\r", "");
return tail;
}
/**
* Parameters for FASTCOVER_tryParameters().
*/
typedef struct FASTCOVER_tryParameters_data_s {
const FASTCOVER_ctx_t* ctx;
COVER_best_t* best;
size_t dictBufferCapacity;
ZDICT_cover_params_t parameters;
} FASTCOVER_tryParameters_data_t;
/**
* Tries a set of parameters and updates the COVER_best_t with the results.
* This function is thread safe if zstd is compiled with multithreaded support.
* It takes its parameters as an *OWNING* opaque pointer to support threading.
*/
static void FASTCOVER_tryParameters(void *opaque)
{
/* Save parameters as local variables */
FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t *)opaque;
const FASTCOVER_ctx_t *const ctx = data->ctx;
const ZDICT_cover_params_t parameters = data->parameters;
size_t dictBufferCapacity = data->dictBufferCapacity;
size_t totalCompressedSize = ERROR(GENERIC);
/* Initialize array to keep track of frequency of dmer within activeSegment */
U16* segmentFreqs = (U16 *)calloc(((U64)1 << ctx->f), sizeof(U16));
/* Allocate space for hash table, dict, and freqs */
BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
U32 *freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32));
if (!segmentFreqs || !dict || !freqs) {
DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
goto _cleanup;
}
/* Copy the frequencies because we need to modify them */
memcpy(freqs, ctx->freqs, ((U64)1 << ctx->f) * sizeof(U32));
/* Build the dictionary */
{ const size_t tail = FASTCOVER_buildDictionary(ctx, freqs, dict, dictBufferCapacity,
parameters, segmentFreqs);
const unsigned nbFinalizeSamples = (unsigned)(ctx->nbTrainSamples * ctx->accelParams.finalize / 100);
selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
ctx->samples, ctx->samplesSizes, nbFinalizeSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
totalCompressedSize);
if (COVER_dictSelectionIsError(selection)) {
DISPLAYLEVEL(1, "Failed to select dictionary\n");
goto _cleanup;
}
}
_cleanup:
free(dict);
COVER_best_finish(data->best, parameters, selection);
free(data);
free(segmentFreqs);
COVER_dictSelectionFree(selection);
free(freqs);
}
static void
FASTCOVER_convertToCoverParams(ZDICT_fastCover_params_t fastCoverParams,
ZDICT_cover_params_t* coverParams)
{
coverParams->k = fastCoverParams.k;
coverParams->d = fastCoverParams.d;
coverParams->steps = fastCoverParams.steps;
coverParams->nbThreads = fastCoverParams.nbThreads;
coverParams->splitPoint = fastCoverParams.splitPoint;
coverParams->zParams = fastCoverParams.zParams;
coverParams->shrinkDict = fastCoverParams.shrinkDict;
}
static void
FASTCOVER_convertToFastCoverParams(ZDICT_cover_params_t coverParams,
ZDICT_fastCover_params_t* fastCoverParams,
unsigned f, unsigned accel)
{
fastCoverParams->k = coverParams.k;
fastCoverParams->d = coverParams.d;
fastCoverParams->steps = coverParams.steps;
fastCoverParams->nbThreads = coverParams.nbThreads;
fastCoverParams->splitPoint = coverParams.splitPoint;
fastCoverParams->f = f;
fastCoverParams->accel = accel;
fastCoverParams->zParams = coverParams.zParams;
fastCoverParams->shrinkDict = coverParams.shrinkDict;
}
ZDICTLIB_API size_t
ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
const void* samplesBuffer,
const size_t* samplesSizes, unsigned nbSamples,
ZDICT_fastCover_params_t parameters)
{
BYTE* const dict = (BYTE*)dictBuffer;
FASTCOVER_ctx_t ctx;
ZDICT_cover_params_t coverParams;
FASTCOVER_accel_t accelParams;
/* Initialize global data */
g_displayLevel = parameters.zParams.notificationLevel;
/* Assign splitPoint and f if not provided */
parameters.splitPoint = 1.0;
parameters.f = parameters.f == 0 ? DEFAULT_F : parameters.f;
parameters.accel = parameters.accel == 0 ? DEFAULT_ACCEL : parameters.accel;
/* Convert to cover parameter */
memset(&coverParams, 0 , sizeof(coverParams));
FASTCOVER_convertToCoverParams(parameters, &coverParams);
/* Checks */
if (!FASTCOVER_checkParameters(coverParams, dictBufferCapacity, parameters.f,
parameters.accel)) {
DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n");
return ERROR(parameter_outOfBound);
}
if (nbSamples == 0) {
DISPLAYLEVEL(1, "FASTCOVER must have at least one input file\n");
return ERROR(srcSize_wrong);
}
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
ZDICT_DICTSIZE_MIN);
return ERROR(dstSize_tooSmall);
}
/* Assign corresponding FASTCOVER_accel_t to accelParams*/
accelParams = FASTCOVER_defaultAccelParameters[parameters.accel];
/* Initialize context */
{
size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
coverParams.d, parameters.splitPoint, parameters.f,
accelParams);
if (ZSTD_isError(initVal)) {
DISPLAYLEVEL(1, "Failed to initialize context\n");
return initVal;
}
}
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, g_displayLevel);
/* Build the dictionary */
DISPLAYLEVEL(2, "Building dictionary\n");
{
/* Initialize array to keep track of frequency of dmer within activeSegment */
U16* segmentFreqs = (U16 *)calloc(((U64)1 << parameters.f), sizeof(U16));
const size_t tail = FASTCOVER_buildDictionary(&ctx, ctx.freqs, dictBuffer,
dictBufferCapacity, coverParams, segmentFreqs);
const unsigned nbFinalizeSamples = (unsigned)(ctx.nbTrainSamples * ctx.accelParams.finalize / 100);
const size_t dictionarySize = ZDICT_finalizeDictionary(
dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
samplesBuffer, samplesSizes, nbFinalizeSamples, coverParams.zParams);
if (!ZSTD_isError(dictionarySize)) {
DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
(unsigned)dictionarySize);
}
FASTCOVER_ctx_destroy(&ctx);
free(segmentFreqs);
return dictionarySize;
}
}
ZDICTLIB_API size_t
ZDICT_optimizeTrainFromBuffer_fastCover(
void* dictBuffer, size_t dictBufferCapacity,
const void* samplesBuffer,
const size_t* samplesSizes, unsigned nbSamples,
ZDICT_fastCover_params_t* parameters)
{
ZDICT_cover_params_t coverParams;
FASTCOVER_accel_t accelParams;
/* constants */
const unsigned nbThreads = parameters->nbThreads;
const double splitPoint =
parameters->splitPoint <= 0.0 ? DEFAULT_SPLITPOINT : parameters->splitPoint;
const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
const unsigned kMaxK = parameters->k == 0 ? 2000 : parameters->k;
const unsigned kSteps = parameters->steps == 0 ? 40 : parameters->steps;
const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);
const unsigned kIterations =
(1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
const unsigned f = parameters->f == 0 ? DEFAULT_F : parameters->f;
const unsigned accel = parameters->accel == 0 ? DEFAULT_ACCEL : parameters->accel;
const unsigned shrinkDict = 0;
/* Local variables */
const int displayLevel = parameters->zParams.notificationLevel;
unsigned iteration = 1;
unsigned d;
unsigned k;
COVER_best_t best;
POOL_ctx *pool = NULL;
int warned = 0;
/* Checks */
if (splitPoint <= 0 || splitPoint > 1) {
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect splitPoint\n");
return ERROR(parameter_outOfBound);
}
if (accel == 0 || accel > FASTCOVER_MAX_ACCEL) {
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect accel\n");
return ERROR(parameter_outOfBound);
}
if (kMinK < kMaxD || kMaxK < kMinK) {
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect k\n");
return ERROR(parameter_outOfBound);
}
if (nbSamples == 0) {
LOCALDISPLAYLEVEL(displayLevel, 1, "FASTCOVER must have at least one input file\n");
return ERROR(srcSize_wrong);
}
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
LOCALDISPLAYLEVEL(displayLevel, 1, "dictBufferCapacity must be at least %u\n",
ZDICT_DICTSIZE_MIN);
return ERROR(dstSize_tooSmall);
}
if (nbThreads > 1) {
pool = POOL_create(nbThreads, 1);
if (!pool) {
return ERROR(memory_allocation);
}
}
/* Initialization */
COVER_best_init(&best);
memset(&coverParams, 0 , sizeof(coverParams));
FASTCOVER_convertToCoverParams(*parameters, &coverParams);
accelParams = FASTCOVER_defaultAccelParameters[accel];
/* Turn down global display level to clean up display at level 2 and below */
g_displayLevel = displayLevel == 0 ? 0 : displayLevel - 1;
/* Loop through d first because each new value needs a new context */
LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n",
kIterations);
for (d = kMinD; d <= kMaxD; d += 2) {
/* Initialize the context for this value of d */
FASTCOVER_ctx_t ctx;
LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
{
size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f, accelParams);
if (ZSTD_isError(initVal)) {
LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
COVER_best_destroy(&best);
POOL_free(pool);
return initVal;
}
}
if (!warned) {
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, displayLevel);
warned = 1;
}
/* Loop through k reusing the same context */
for (k = kMinK; k <= kMaxK; k += kStepSize) {
/* Prepare the arguments */
FASTCOVER_tryParameters_data_t *data = (FASTCOVER_tryParameters_data_t *)malloc(
sizeof(FASTCOVER_tryParameters_data_t));
LOCALDISPLAYLEVEL(displayLevel, 3, "k=%u\n", k);
if (!data) {
LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to allocate parameters\n");
COVER_best_destroy(&best);
FASTCOVER_ctx_destroy(&ctx);
POOL_free(pool);
return ERROR(memory_allocation);
}
data->ctx = &ctx;
data->best = &best;
data->dictBufferCapacity = dictBufferCapacity;
data->parameters = coverParams;
data->parameters.k = k;
data->parameters.d = d;
data->parameters.splitPoint = splitPoint;
data->parameters.steps = kSteps;
data->parameters.shrinkDict = shrinkDict;
data->parameters.zParams.notificationLevel = g_displayLevel;
/* Check the parameters */
if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity,
data->ctx->f, accel)) {
DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n");
free(data);
continue;
}
/* Call the function and pass ownership of data to it */
COVER_best_start(&best);
if (pool) {
POOL_add(pool, &FASTCOVER_tryParameters, data);
} else {
FASTCOVER_tryParameters(data);
}
/* Print status */
LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%% ",
(unsigned)((iteration * 100) / kIterations));
++iteration;
}
COVER_best_wait(&best);
FASTCOVER_ctx_destroy(&ctx);
}
LOCALDISPLAYLEVEL(displayLevel, 2, "\r%79s\r", "");
/* Fill the output buffer and parameters with output of the best parameters */
{
const size_t dictSize = best.dictSize;
if (ZSTD_isError(best.compressedSize)) {
const size_t compressedSize = best.compressedSize;
COVER_best_destroy(&best);
POOL_free(pool);
return compressedSize;
}
FASTCOVER_convertToFastCoverParams(best.parameters, parameters, f, accel);
memcpy(dictBuffer, best.dict, dictSize);
COVER_best_destroy(&best);
POOL_free(pool);
return dictSize;
}
}

View file

@ -255,15 +255,15 @@ static dictItem ZDICT_analyzePos(
}
{ int i;
U32 searchLength;
U32 mml;
U32 refinedStart = start;
U32 refinedEnd = end;
DISPLAYLEVEL(4, "\n");
DISPLAYLEVEL(4, "found %3u matches of length >= %i at pos %7u ", (U32)(end-start), MINMATCHLENGTH, (U32)pos);
DISPLAYLEVEL(4, "found %3u matches of length >= %i at pos %7u ", (unsigned)(end-start), MINMATCHLENGTH, (unsigned)pos);
DISPLAYLEVEL(4, "\n");
for (searchLength = MINMATCHLENGTH ; ; searchLength++) {
for (mml = MINMATCHLENGTH ; ; mml++) {
BYTE currentChar = 0;
U32 currentCount = 0;
U32 currentID = refinedStart;
@ -271,13 +271,13 @@ static dictItem ZDICT_analyzePos(
U32 selectedCount = 0;
U32 selectedID = currentID;
for (id =refinedStart; id < refinedEnd; id++) {
if (b[suffix[id] + searchLength] != currentChar) {
if (b[suffix[id] + mml] != currentChar) {
if (currentCount > selectedCount) {
selectedCount = currentCount;
selectedID = currentID;
}
currentID = id;
currentChar = b[ suffix[id] + searchLength];
currentChar = b[ suffix[id] + mml];
currentCount = 0;
}
currentCount ++;
@ -293,7 +293,7 @@ static dictItem ZDICT_analyzePos(
refinedEnd = refinedStart + selectedCount;
}
/* evaluate gain based on new ref */
/* evaluate gain based on new dict */
start = refinedStart;
pos = suffix[refinedStart];
end = start;
@ -341,8 +341,8 @@ static dictItem ZDICT_analyzePos(
for (i=MINMATCHLENGTH; i<=(int)maxLength; i++)
savings[i] = savings[i-1] + (lengthList[i] * (i-3));
DISPLAYLEVEL(4, "Selected ref at position %u, of length %u : saves %u (ratio: %.2f) \n",
(U32)pos, (U32)maxLength, savings[maxLength], (double)savings[maxLength] / maxLength);
DISPLAYLEVEL(4, "Selected dict at position %u, of length %u : saves %u (ratio: %.2f) \n",
(unsigned)pos, (unsigned)maxLength, (unsigned)savings[maxLength], (double)savings[maxLength] / maxLength);
solution.pos = (U32)pos;
solution.length = (U32)maxLength;
@ -497,7 +497,7 @@ static U32 ZDICT_dictSize(const dictItem* dictList)
static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
const void* const buffer, size_t bufferSize, /* buffer must end with noisy guard band */
const size_t* fileSizes, unsigned nbFiles,
U32 minRatio, U32 notificationLevel)
unsigned minRatio, U32 notificationLevel)
{
int* const suffix0 = (int*)malloc((bufferSize+2)*sizeof(*suffix0));
int* const suffix = suffix0+1;
@ -523,11 +523,11 @@ static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
memset(doneMarks, 0, bufferSize+16);
/* limit sample set size (divsufsort limitation)*/
if (bufferSize > ZDICT_MAX_SAMPLES_SIZE) DISPLAYLEVEL(3, "sample set too large : reduced to %u MB ...\n", (U32)(ZDICT_MAX_SAMPLES_SIZE>>20));
if (bufferSize > ZDICT_MAX_SAMPLES_SIZE) DISPLAYLEVEL(3, "sample set too large : reduced to %u MB ...\n", (unsigned)(ZDICT_MAX_SAMPLES_SIZE>>20));
while (bufferSize > ZDICT_MAX_SAMPLES_SIZE) bufferSize -= fileSizes[--nbFiles];
/* sort */
DISPLAYLEVEL(2, "sorting %u files of total size %u MB ...\n", nbFiles, (U32)(bufferSize>>20));
DISPLAYLEVEL(2, "sorting %u files of total size %u MB ...\n", nbFiles, (unsigned)(bufferSize>>20));
{ int const divSuftSortResult = divsufsort((const unsigned char*)buffer, suffix, (int)bufferSize, 0);
if (divSuftSortResult != 0) { result = ERROR(GENERIC); goto _cleanup; }
}
@ -571,7 +571,7 @@ static void ZDICT_fillNoise(void* buffer, size_t length)
unsigned const prime1 = 2654435761U;
unsigned const prime2 = 2246822519U;
unsigned acc = prime1;
size_t p=0;;
size_t p=0;
for (p=0; p<length; p++) {
acc *= prime2;
((unsigned char*)buffer)[p] = (unsigned char)(acc >> 21);
@ -581,7 +581,7 @@ static void ZDICT_fillNoise(void* buffer, size_t length)
typedef struct
{
ZSTD_CCtx* ref; /* contains reference to dictionary */
ZSTD_CDict* dict; /* dictionary */
ZSTD_CCtx* zc; /* working context */
void* workPlace; /* must be ZSTD_BLOCKSIZE_MAX allocated */
} EStats_ress_t;
@ -589,7 +589,7 @@ typedef struct
#define MAXREPOFFSET 1024
static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount, U32* repOffsets,
unsigned* countLit, unsigned* offsetcodeCount, unsigned* matchlengthCount, unsigned* litlengthCount, U32* repOffsets,
const void* src, size_t srcSize,
U32 notificationLevel)
{
@ -597,11 +597,12 @@ static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
size_t cSize;
if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */
{ size_t const errorCode = ZSTD_copyCCtx(esr.zc, esr.ref, 0);
if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; }
{ size_t const errorCode = ZSTD_compressBegin_usingCDict(esr.zc, esr.dict);
if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_compressBegin_usingCDict failed \n"); return; }
}
cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (U32)srcSize); return; }
if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (unsigned)srcSize); return; }
if (cSize) { /* if == 0; block is not compressible */
const seqStore_t* const seqStorePtr = ZSTD_getSeqStore(esr.zc);
@ -670,7 +671,7 @@ static void ZDICT_insertSortCount(offsetCount_t table[ZSTD_REP_NUM+1], U32 val,
* rewrite `countLit` to contain a mostly flat but still compressible distribution of literals.
* necessary to avoid generating a non-compressible distribution that HUF_writeCTable() cannot encode.
*/
static void ZDICT_flatLit(U32* countLit)
static void ZDICT_flatLit(unsigned* countLit)
{
int u;
for (u=1; u<256; u++) countLit[u] = 2;
@ -686,18 +687,18 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
const void* dictBuffer, size_t dictBufferSize,
unsigned notificationLevel)
{
U32 countLit[256];
unsigned countLit[256];
HUF_CREATE_STATIC_CTABLE(hufTable, 255);
U32 offcodeCount[OFFCODE_MAX+1];
unsigned offcodeCount[OFFCODE_MAX+1];
short offcodeNCount[OFFCODE_MAX+1];
U32 offcodeMax = ZSTD_highbit32((U32)(dictBufferSize + 128 KB));
U32 matchLengthCount[MaxML+1];
unsigned matchLengthCount[MaxML+1];
short matchLengthNCount[MaxML+1];
U32 litLengthCount[MaxLL+1];
unsigned litLengthCount[MaxLL+1];
short litLengthNCount[MaxLL+1];
U32 repOffset[MAXREPOFFSET];
offsetCount_t bestRepOffset[ZSTD_REP_NUM+1];
EStats_ress_t esr;
EStats_ress_t esr = { NULL, NULL, NULL };
ZSTD_parameters params;
U32 u, huffLog = 11, Offlog = OffFSELog, mlLog = MLFSELog, llLog = LLFSELog, total;
size_t pos = 0, errorCode;
@ -708,14 +709,6 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
/* init */
DEBUGLOG(4, "ZDICT_analyzeEntropy");
esr.ref = ZSTD_createCCtx();
esr.zc = ZSTD_createCCtx();
esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX);
if (!esr.ref || !esr.zc || !esr.workPlace) {
eSize = ERROR(memory_allocation);
DISPLAYLEVEL(1, "Not enough memory \n");
goto _cleanup;
}
if (offcodeMax>OFFCODE_MAX) { eSize = ERROR(dictionaryCreation_failed); goto _cleanup; } /* too large dictionary */
for (u=0; u<256; u++) countLit[u] = 1; /* any character must be described */
for (u=0; u<=offcodeMax; u++) offcodeCount[u] = 1;
@ -724,14 +717,17 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
memset(repOffset, 0, sizeof(repOffset));
repOffset[1] = repOffset[4] = repOffset[8] = 1;
memset(bestRepOffset, 0, sizeof(bestRepOffset));
if (compressionLevel<=0) compressionLevel = g_compressionLevel_default;
if (compressionLevel==0) compressionLevel = g_compressionLevel_default;
params = ZSTD_getParams(compressionLevel, averageSampleSize, dictBufferSize);
{ size_t const beginResult = ZSTD_compressBegin_advanced(esr.ref, dictBuffer, dictBufferSize, params, 0);
if (ZSTD_isError(beginResult)) {
DISPLAYLEVEL(1, "error : ZSTD_compressBegin_advanced() failed : %s \n", ZSTD_getErrorName(beginResult));
eSize = ERROR(GENERIC);
goto _cleanup;
} }
esr.dict = ZSTD_createCDict_advanced(dictBuffer, dictBufferSize, ZSTD_dlm_byRef, ZSTD_dct_rawContent, params.cParams, ZSTD_defaultCMem);
esr.zc = ZSTD_createCCtx();
esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX);
if (!esr.dict || !esr.zc || !esr.workPlace) {
eSize = ERROR(memory_allocation);
DISPLAYLEVEL(1, "Not enough memory \n");
goto _cleanup;
}
/* collect stats on all samples */
for (u=0; u<nbFiles; u++) {
@ -745,7 +741,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
/* analyze, build stats, starting with literals */
{ size_t maxNbBits = HUF_buildCTable (hufTable, countLit, 255, huffLog);
if (HUF_isError(maxNbBits)) {
eSize = ERROR(GENERIC);
eSize = maxNbBits;
DISPLAYLEVEL(1, " HUF_buildCTable error \n");
goto _cleanup;
}
@ -768,7 +764,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
total=0; for (u=0; u<=offcodeMax; u++) total+=offcodeCount[u];
errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax);
if (FSE_isError(errorCode)) {
eSize = ERROR(GENERIC);
eSize = errorCode;
DISPLAYLEVEL(1, "FSE_normalizeCount error with offcodeCount \n");
goto _cleanup;
}
@ -777,7 +773,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
total=0; for (u=0; u<=MaxML; u++) total+=matchLengthCount[u];
errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML);
if (FSE_isError(errorCode)) {
eSize = ERROR(GENERIC);
eSize = errorCode;
DISPLAYLEVEL(1, "FSE_normalizeCount error with matchLengthCount \n");
goto _cleanup;
}
@ -786,7 +782,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
total=0; for (u=0; u<=MaxLL; u++) total+=litLengthCount[u];
errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL);
if (FSE_isError(errorCode)) {
eSize = ERROR(GENERIC);
eSize = errorCode;
DISPLAYLEVEL(1, "FSE_normalizeCount error with litLengthCount \n");
goto _cleanup;
}
@ -795,7 +791,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
/* write result to buffer */
{ size_t const hhSize = HUF_writeCTable(dstPtr, maxDstSize, hufTable, 255, huffLog);
if (HUF_isError(hhSize)) {
eSize = ERROR(GENERIC);
eSize = hhSize;
DISPLAYLEVEL(1, "HUF_writeCTable error \n");
goto _cleanup;
}
@ -806,7 +802,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
{ size_t const ohSize = FSE_writeNCount(dstPtr, maxDstSize, offcodeNCount, OFFCODE_MAX, Offlog);
if (FSE_isError(ohSize)) {
eSize = ERROR(GENERIC);
eSize = ohSize;
DISPLAYLEVEL(1, "FSE_writeNCount error with offcodeNCount \n");
goto _cleanup;
}
@ -817,7 +813,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
{ size_t const mhSize = FSE_writeNCount(dstPtr, maxDstSize, matchLengthNCount, MaxML, mlLog);
if (FSE_isError(mhSize)) {
eSize = ERROR(GENERIC);
eSize = mhSize;
DISPLAYLEVEL(1, "FSE_writeNCount error with matchLengthNCount \n");
goto _cleanup;
}
@ -828,7 +824,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
{ size_t const lhSize = FSE_writeNCount(dstPtr, maxDstSize, litLengthNCount, MaxLL, llLog);
if (FSE_isError(lhSize)) {
eSize = ERROR(GENERIC);
eSize = lhSize;
DISPLAYLEVEL(1, "FSE_writeNCount error with litlengthNCount \n");
goto _cleanup;
}
@ -838,7 +834,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
}
if (maxDstSize<12) {
eSize = ERROR(GENERIC);
eSize = ERROR(dstSize_tooSmall);
DISPLAYLEVEL(1, "not enough space to write RepOffsets \n");
goto _cleanup;
}
@ -856,7 +852,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
eSize += 12;
_cleanup:
ZSTD_freeCCtx(esr.ref);
ZSTD_freeCDict(esr.dict);
ZSTD_freeCCtx(esr.zc);
free(esr.workPlace);
@ -867,13 +863,13 @@ _cleanup:
size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
const void* customDictContent, size_t dictContentSize,
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
ZDICT_params_t params)
const void* samplesBuffer, const size_t* samplesSizes,
unsigned nbSamples, ZDICT_params_t params)
{
size_t hSize;
#define HBUFFSIZE 256 /* should prove large enough for all entropy headers */
BYTE header[HBUFFSIZE];
int const compressionLevel = (params.compressionLevel <= 0) ? g_compressionLevel_default : params.compressionLevel;
int const compressionLevel = (params.compressionLevel == 0) ? g_compressionLevel_default : params.compressionLevel;
U32 const notificationLevel = params.notificationLevel;
/* check conditions */
@ -914,11 +910,12 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
}
size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
ZDICT_params_t params)
static size_t ZDICT_addEntropyTablesFromBuffer_advanced(
void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
ZDICT_params_t params)
{
int const compressionLevel = (params.compressionLevel <= 0) ? g_compressionLevel_default : params.compressionLevel;
int const compressionLevel = (params.compressionLevel == 0) ? g_compressionLevel_default : params.compressionLevel;
U32 const notificationLevel = params.notificationLevel;
size_t hSize = 8;
@ -947,7 +944,11 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
return MIN(dictBufferCapacity, hSize+dictContentSize);
}
/* Hidden declaration for dbio.c */
size_t ZDICT_trainFromBuffer_unsafe_legacy(
void* dictBuffer, size_t maxDictSize,
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
ZDICT_legacy_params_t params);
/*! ZDICT_trainFromBuffer_unsafe_legacy() :
* Warning : `samplesBuffer` must be followed by noisy guard band.
* @return : size of dictionary, or an error code which can be tested with ZDICT_isError()
@ -982,31 +983,33 @@ size_t ZDICT_trainFromBuffer_unsafe_legacy(
/* display best matches */
if (params.zParams.notificationLevel>= 3) {
U32 const nb = MIN(25, dictList[0].pos);
U32 const dictContentSize = ZDICT_dictSize(dictList);
U32 u;
DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", dictList[0].pos-1, dictContentSize);
unsigned const nb = MIN(25, dictList[0].pos);
unsigned const dictContentSize = ZDICT_dictSize(dictList);
unsigned u;
DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", (unsigned)dictList[0].pos-1, dictContentSize);
DISPLAYLEVEL(3, "list %u best segments \n", nb-1);
for (u=1; u<nb; u++) {
U32 const pos = dictList[u].pos;
U32 const length = dictList[u].length;
unsigned const pos = dictList[u].pos;
unsigned const length = dictList[u].length;
U32 const printedLength = MIN(40, length);
if ((pos > samplesBuffSize) || ((pos + length) > samplesBuffSize))
if ((pos > samplesBuffSize) || ((pos + length) > samplesBuffSize)) {
free(dictList);
return ERROR(GENERIC); /* should never happen */
}
DISPLAYLEVEL(3, "%3u:%3u bytes at pos %8u, savings %7u bytes |",
u, length, pos, dictList[u].savings);
u, length, pos, (unsigned)dictList[u].savings);
ZDICT_printHex((const char*)samplesBuffer+pos, printedLength);
DISPLAYLEVEL(3, "| \n");
} }
/* create dictionary */
{ U32 dictContentSize = ZDICT_dictSize(dictList);
{ unsigned dictContentSize = ZDICT_dictSize(dictList);
if (dictContentSize < ZDICT_CONTENTSIZE_MIN) { free(dictList); return ERROR(dictionaryCreation_failed); } /* dictionary content too small */
if (dictContentSize < targetDictSize/4) {
DISPLAYLEVEL(2, "! warning : selected content significantly smaller than requested (%u < %u) \n", dictContentSize, (U32)maxDictSize);
DISPLAYLEVEL(2, "! warning : selected content significantly smaller than requested (%u < %u) \n", dictContentSize, (unsigned)maxDictSize);
if (samplesBuffSize < 10 * targetDictSize)
DISPLAYLEVEL(2, "! consider increasing the number of samples (total size : %u MB)\n", (U32)(samplesBuffSize>>20));
DISPLAYLEVEL(2, "! consider increasing the number of samples (total size : %u MB)\n", (unsigned)(samplesBuffSize>>20));
if (minRep > MINRATIO) {
DISPLAYLEVEL(2, "! consider increasing selectivity to produce larger dictionary (-s%u) \n", selectivity+1);
DISPLAYLEVEL(2, "! note : larger dictionaries are not necessarily better, test its efficiency on samples \n");
@ -1014,9 +1017,9 @@ size_t ZDICT_trainFromBuffer_unsafe_legacy(
}
if ((dictContentSize > targetDictSize*3) && (nbSamples > 2*MINRATIO) && (selectivity>1)) {
U32 proposedSelectivity = selectivity-1;
unsigned proposedSelectivity = selectivity-1;
while ((nbSamples >> proposedSelectivity) <= MINRATIO) { proposedSelectivity--; }
DISPLAYLEVEL(2, "! note : calculated dictionary significantly larger than requested (%u > %u) \n", dictContentSize, (U32)maxDictSize);
DISPLAYLEVEL(2, "! note : calculated dictionary significantly larger than requested (%u > %u) \n", dictContentSize, (unsigned)maxDictSize);
DISPLAYLEVEL(2, "! consider increasing dictionary size, or produce denser dictionary (-s%u) \n", proposedSelectivity);
DISPLAYLEVEL(2, "! always test dictionary efficiency on real samples \n");
}
@ -1082,17 +1085,17 @@ size_t ZDICT_trainFromBuffer_legacy(void* dictBuffer, size_t dictBufferCapacity,
size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples)
{
ZDICT_cover_params_t params;
ZDICT_fastCover_params_t params;
DEBUGLOG(3, "ZDICT_trainFromBuffer");
memset(&params, 0, sizeof(params));
params.d = 8;
params.steps = 4;
/* Default to level 6 since no compression level information is available */
params.zParams.compressionLevel = 6;
#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=1)
params.zParams.notificationLevel = ZSTD_DEBUG;
params.zParams.compressionLevel = 3;
#if defined(DEBUGLEVEL) && (DEBUGLEVEL>=1)
params.zParams.notificationLevel = DEBUGLEVEL;
#endif
return ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, dictBufferCapacity,
return ZDICT_optimizeTrainFromBuffer_fastCover(dictBuffer, dictBufferCapacity,
samplesBuffer, samplesSizes, nbSamples,
&params);
}

View file

@ -39,20 +39,27 @@ extern "C" {
/*! ZDICT_trainFromBuffer():
* Train a dictionary from an array of samples.
* Redirect towards ZDICT_optimizeTrainFromBuffer_cover() single-threaded, with d=8 and steps=4.
* Redirect towards ZDICT_optimizeTrainFromBuffer_fastCover() single-threaded, with d=8, steps=4,
* f=20, and accel=1.
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
* The resulting dictionary will be saved into `dictBuffer`.
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
* or an error code, which can be tested with ZDICT_isError().
* Note: ZDICT_trainFromBuffer() requires about 9 bytes of memory for each input byte.
* Note: Dictionary training will fail if there are not enough samples to construct a
* dictionary, or if most of the samples are too small (< 8 bytes being the lower limit).
* If dictionary training fails, you should use zstd without a dictionary, as the dictionary
* would've been ineffective anyways. If you believe your samples would benefit from a dictionary
* please open an issue with details, and we can look into it.
* Note: ZDICT_trainFromBuffer()'s memory usage is about 6 MB.
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
* It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
* In general, it's recommended to provide a few thousands samples, though this can vary a lot.
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
*/
ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
const void* samplesBuffer,
const size_t* samplesSizes, unsigned nbSamples);
/*====== Helper functions ======*/
@ -84,11 +91,27 @@ typedef struct {
typedef struct {
unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
unsigned steps; /* Number of steps : Only used for optimization : 0 means default (32) : Higher means more parameters checked */
unsigned steps; /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */
unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (1.0), 1.0 when all samples are used for both training and testing */
unsigned shrinkDict; /* Train dictionaries to shrink in size starting from the minimum size and selects the smallest dictionary that is shrinkDictMaxRegression% worse than the largest dictionary. 0 means no shrinking and 1 means shrinking */
unsigned shrinkDictMaxRegression; /* Sets shrinkDictMaxRegression so that a smaller dictionary can be at worse shrinkDictMaxRegression% worse than the max dict size dictionary. */
ZDICT_params_t zParams;
} ZDICT_cover_params_t;
typedef struct {
unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
unsigned f; /* log of size of frequency array : constraint: 0 < f <= 31 : 1 means default(20)*/
unsigned steps; /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */
unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (0.75), 1.0 when all samples are used for both training and testing */
unsigned accel; /* Acceleration level: constraint: 0 < accel <= 10, higher means faster and less accurate, 0 means default(1) */
unsigned shrinkDict; /* Train dictionaries to shrink in size starting from the minimum size and selects the smallest dictionary that is shrinkDictMaxRegression% worse than the largest dictionary. 0 means no shrinking and 1 means shrinking */
unsigned shrinkDictMaxRegression; /* Sets shrinkDictMaxRegression so that a smaller dictionary can be at worse shrinkDictMaxRegression% worse than the max dict size dictionary. */
ZDICT_params_t zParams;
} ZDICT_fastCover_params_t;
/*! ZDICT_trainFromBuffer_cover():
* Train a dictionary from an array of samples using the COVER algorithm.
@ -97,6 +120,7 @@ typedef struct {
* The resulting dictionary will be saved into `dictBuffer`.
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
* or an error code, which can be tested with ZDICT_isError().
* See ZDICT_trainFromBuffer() for details on failure modes.
* Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte.
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
* It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
@ -115,13 +139,14 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
* dictionary constructed with those parameters is stored in `dictBuffer`.
*
* All of the parameters d, k, steps are optional.
* If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}.
* If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8}.
* if steps is zero it defaults to its default value.
* If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048].
* If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000].
*
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
* or an error code, which can be tested with ZDICT_isError().
* On success `*parameters` contains the parameters selected.
* or an error code, which can be tested with ZDICT_isError().
* On success `*parameters` contains the parameters selected.
* See ZDICT_trainFromBuffer() for details on failure modes.
* Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
*/
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
@ -129,6 +154,50 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
ZDICT_cover_params_t* parameters);
/*! ZDICT_trainFromBuffer_fastCover():
* Train a dictionary from an array of samples using a modified version of COVER algorithm.
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
* d and k are required.
* All other parameters are optional, will use default values if not provided
* The resulting dictionary will be saved into `dictBuffer`.
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
* or an error code, which can be tested with ZDICT_isError().
* See ZDICT_trainFromBuffer() for details on failure modes.
* Note: ZDICT_trainFromBuffer_fastCover() requires 6 * 2^f bytes of memory.
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
* It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
* In general, it's recommended to provide a few thousands samples, though this can vary a lot.
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
*/
ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover(void *dictBuffer,
size_t dictBufferCapacity, const void *samplesBuffer,
const size_t *samplesSizes, unsigned nbSamples,
ZDICT_fastCover_params_t parameters);
/*! ZDICT_optimizeTrainFromBuffer_fastCover():
* The same requirements as above hold for all the parameters except `parameters`.
* This function tries many parameter combinations (specifically, k and d combinations)
* and picks the best parameters. `*parameters` is filled with the best parameters found,
* dictionary constructed with those parameters is stored in `dictBuffer`.
* All of the parameters d, k, steps, f, and accel are optional.
* If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8}.
* if steps is zero it defaults to its default value.
* If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000].
* If f is zero, default value of 20 is used.
* If accel is zero, default value of 1 is used.
*
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
* or an error code, which can be tested with ZDICT_isError().
* On success `*parameters` contains the parameters selected.
* See ZDICT_trainFromBuffer() for details on failure modes.
* Note: ZDICT_optimizeTrainFromBuffer_fastCover() requires about 6 * 2^f bytes of memory for each thread.
*/
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer,
size_t dictBufferCapacity, const void* samplesBuffer,
const size_t* samplesSizes, unsigned nbSamples,
ZDICT_fastCover_params_t* parameters);
/*! ZDICT_finalizeDictionary():
* Given a custom content as a basis for dictionary, and a set of samples,
* finalize dictionary by adding headers and statistics.
@ -140,7 +209,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
* maxDictSize must be >= dictContentSize, and must be >= ZDICT_DICTSIZE_MIN bytes.
*
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`),
* or an error code, which can be tested by ZDICT_isError().
* or an error code, which can be tested by ZDICT_isError().
* Note: ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0.
* Note 2: dictBuffer and dictContent can overlap
*/
@ -164,6 +233,7 @@ typedef struct {
* `parameters` is optional and can be provided with values set to 0 to mean "default".
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
* or an error code, which can be tested with ZDICT_isError().
* See ZDICT_trainFromBuffer() for details on failure modes.
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
* It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
* In general, it's recommended to provide a few thousands samples, though this can vary a lot.

View file

@ -20,7 +20,7 @@ extern "C" {
***************************************/
#include "mem.h" /* MEM_STATIC */
#include "error_private.h" /* ERROR */
#include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer */
#include "zstd_internal.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTD_frameSizeInfo */
#if !defined (ZSTD_LEGACY_SUPPORT) || (ZSTD_LEGACY_SUPPORT == 0)
# undef ZSTD_LEGACY_SUPPORT
@ -178,43 +178,77 @@ MEM_STATIC size_t ZSTD_decompressLegacy(
}
}
MEM_STATIC size_t ZSTD_findFrameCompressedSizeLegacy(const void *src,
size_t compressedSize)
MEM_STATIC ZSTD_frameSizeInfo ZSTD_findFrameSizeInfoLegacy(const void *src, size_t srcSize)
{
U32 const version = ZSTD_isLegacy(src, compressedSize);
ZSTD_frameSizeInfo frameSizeInfo;
U32 const version = ZSTD_isLegacy(src, srcSize);
switch(version)
{
#if (ZSTD_LEGACY_SUPPORT <= 1)
case 1 :
return ZSTDv01_findFrameCompressedSize(src, compressedSize);
ZSTDv01_findFrameSizeInfoLegacy(src, srcSize,
&frameSizeInfo.compressedSize,
&frameSizeInfo.decompressedBound);
break;
#endif
#if (ZSTD_LEGACY_SUPPORT <= 2)
case 2 :
return ZSTDv02_findFrameCompressedSize(src, compressedSize);
ZSTDv02_findFrameSizeInfoLegacy(src, srcSize,
&frameSizeInfo.compressedSize,
&frameSizeInfo.decompressedBound);
break;
#endif
#if (ZSTD_LEGACY_SUPPORT <= 3)
case 3 :
return ZSTDv03_findFrameCompressedSize(src, compressedSize);
ZSTDv03_findFrameSizeInfoLegacy(src, srcSize,
&frameSizeInfo.compressedSize,
&frameSizeInfo.decompressedBound);
break;
#endif
#if (ZSTD_LEGACY_SUPPORT <= 4)
case 4 :
return ZSTDv04_findFrameCompressedSize(src, compressedSize);
ZSTDv04_findFrameSizeInfoLegacy(src, srcSize,
&frameSizeInfo.compressedSize,
&frameSizeInfo.decompressedBound);
break;
#endif
#if (ZSTD_LEGACY_SUPPORT <= 5)
case 5 :
return ZSTDv05_findFrameCompressedSize(src, compressedSize);
ZSTDv05_findFrameSizeInfoLegacy(src, srcSize,
&frameSizeInfo.compressedSize,
&frameSizeInfo.decompressedBound);
break;
#endif
#if (ZSTD_LEGACY_SUPPORT <= 6)
case 6 :
return ZSTDv06_findFrameCompressedSize(src, compressedSize);
ZSTDv06_findFrameSizeInfoLegacy(src, srcSize,
&frameSizeInfo.compressedSize,
&frameSizeInfo.decompressedBound);
break;
#endif
#if (ZSTD_LEGACY_SUPPORT <= 7)
case 7 :
return ZSTDv07_findFrameCompressedSize(src, compressedSize);
ZSTDv07_findFrameSizeInfoLegacy(src, srcSize,
&frameSizeInfo.compressedSize,
&frameSizeInfo.decompressedBound);
break;
#endif
default :
return ERROR(prefix_unknown);
frameSizeInfo.compressedSize = ERROR(prefix_unknown);
frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR;
break;
}
if (!ZSTD_isError(frameSizeInfo.compressedSize) && frameSizeInfo.compressedSize > srcSize) {
frameSizeInfo.compressedSize = ERROR(srcSize_wrong);
frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR;
}
return frameSizeInfo;
}
MEM_STATIC size_t ZSTD_findFrameCompressedSizeLegacy(const void *src, size_t srcSize)
{
ZSTD_frameSizeInfo frameSizeInfo = ZSTD_findFrameSizeInfoLegacy(src, srcSize);
return frameSizeInfo.compressedSize;
}
MEM_STATIC size_t ZSTD_freeLegacyStreamContext(void* legacyContext, U32 version)

View file

@ -346,7 +346,7 @@ FORCE_INLINE unsigned FSE_highbit32 (U32 val)
_BitScanReverse ( &r, val );
return (unsigned) r;
# elif defined(__GNUC__) && (GCC_VERSION >= 304) /* GCC Intrinsic */
return 31 - __builtin_clz (val);
return __builtin_clz (val) ^ 31;
# else /* Software version */
static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
U32 v = val;
@ -668,11 +668,17 @@ static size_t FSE_initDStream(FSE_DStream_t* bitD, const void* srcBuffer, size_t
switch(srcSize)
{
case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16);
/* fallthrough */
case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24);
/* fallthrough */
case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32);
/* fallthrough */
case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24;
/* fallthrough */
case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16;
/* fallthrough */
case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) << 8;
/* fallthrough */
default:;
}
contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
@ -1067,99 +1073,102 @@ static size_t HUF_decompress_usingDTable( /* -3% slower when non static */
const void* cSrc, size_t cSrcSize,
const U16* DTable)
{
BYTE* const ostart = (BYTE*) dst;
BYTE* op = ostart;
BYTE* const omax = op + maxDstSize;
BYTE* const olimit = omax-15;
const void* ptr = DTable;
const HUF_DElt* const dt = (const HUF_DElt*)(ptr)+1;
const U32 dtLog = DTable[0];
size_t errorCode;
U32 reloadStatus;
/* Init */
const U16* jumpTable = (const U16*)cSrc;
const size_t length1 = FSE_readLE16(jumpTable);
const size_t length2 = FSE_readLE16(jumpTable+1);
const size_t length3 = FSE_readLE16(jumpTable+2);
const size_t length4 = cSrcSize - 6 - length1 - length2 - length3; // check coherency !!
const char* const start1 = (const char*)(cSrc) + 6;
const char* const start2 = start1 + length1;
const char* const start3 = start2 + length2;
const char* const start4 = start3 + length3;
FSE_DStream_t bitD1, bitD2, bitD3, bitD4;
if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
errorCode = FSE_initDStream(&bitD1, start1, length1);
if (FSE_isError(errorCode)) return errorCode;
errorCode = FSE_initDStream(&bitD2, start2, length2);
if (FSE_isError(errorCode)) return errorCode;
errorCode = FSE_initDStream(&bitD3, start3, length3);
if (FSE_isError(errorCode)) return errorCode;
errorCode = FSE_initDStream(&bitD4, start4, length4);
if (FSE_isError(errorCode)) return errorCode;
reloadStatus=FSE_reloadDStream(&bitD2);
/* 16 symbols per loop */
for ( ; (reloadStatus<FSE_DStream_completed) && (op<olimit); /* D2-3-4 are supposed to be synchronized and finish together */
op+=16, reloadStatus = FSE_reloadDStream(&bitD2) | FSE_reloadDStream(&bitD3) | FSE_reloadDStream(&bitD4), FSE_reloadDStream(&bitD1))
if (cSrcSize < 6) return (size_t)-FSE_ERROR_srcSize_wrong;
{
#define HUF_DECODE_SYMBOL_0(n, Dstream) \
op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog);
BYTE* const ostart = (BYTE*) dst;
BYTE* op = ostart;
BYTE* const omax = op + maxDstSize;
BYTE* const olimit = omax-15;
#define HUF_DECODE_SYMBOL_1(n, Dstream) \
op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
if (FSE_32bits() && (HUF_MAX_TABLELOG>12)) FSE_reloadDStream(&Dstream)
const void* ptr = DTable;
const HUF_DElt* const dt = (const HUF_DElt*)(ptr)+1;
const U32 dtLog = DTable[0];
size_t errorCode;
U32 reloadStatus;
#define HUF_DECODE_SYMBOL_2(n, Dstream) \
op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
if (FSE_32bits()) FSE_reloadDStream(&Dstream)
/* Init */
HUF_DECODE_SYMBOL_1( 0, bitD1);
HUF_DECODE_SYMBOL_1( 1, bitD2);
HUF_DECODE_SYMBOL_1( 2, bitD3);
HUF_DECODE_SYMBOL_1( 3, bitD4);
HUF_DECODE_SYMBOL_2( 4, bitD1);
HUF_DECODE_SYMBOL_2( 5, bitD2);
HUF_DECODE_SYMBOL_2( 6, bitD3);
HUF_DECODE_SYMBOL_2( 7, bitD4);
HUF_DECODE_SYMBOL_1( 8, bitD1);
HUF_DECODE_SYMBOL_1( 9, bitD2);
HUF_DECODE_SYMBOL_1(10, bitD3);
HUF_DECODE_SYMBOL_1(11, bitD4);
HUF_DECODE_SYMBOL_0(12, bitD1);
HUF_DECODE_SYMBOL_0(13, bitD2);
HUF_DECODE_SYMBOL_0(14, bitD3);
HUF_DECODE_SYMBOL_0(15, bitD4);
}
const U16* jumpTable = (const U16*)cSrc;
const size_t length1 = FSE_readLE16(jumpTable);
const size_t length2 = FSE_readLE16(jumpTable+1);
const size_t length3 = FSE_readLE16(jumpTable+2);
const size_t length4 = cSrcSize - 6 - length1 - length2 - length3; // check coherency !!
const char* const start1 = (const char*)(cSrc) + 6;
const char* const start2 = start1 + length1;
const char* const start3 = start2 + length2;
const char* const start4 = start3 + length3;
FSE_DStream_t bitD1, bitD2, bitD3, bitD4;
if (reloadStatus!=FSE_DStream_completed) /* not complete : some bitStream might be FSE_DStream_unfinished */
return (size_t)-FSE_ERROR_corruptionDetected;
if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
/* tail */
{
// bitTail = bitD1; // *much* slower : -20% !??!
FSE_DStream_t bitTail;
bitTail.ptr = bitD1.ptr;
bitTail.bitsConsumed = bitD1.bitsConsumed;
bitTail.bitContainer = bitD1.bitContainer; // required in case of FSE_DStream_endOfBuffer
bitTail.start = start1;
for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op<omax) ; op++)
errorCode = FSE_initDStream(&bitD1, start1, length1);
if (FSE_isError(errorCode)) return errorCode;
errorCode = FSE_initDStream(&bitD2, start2, length2);
if (FSE_isError(errorCode)) return errorCode;
errorCode = FSE_initDStream(&bitD3, start3, length3);
if (FSE_isError(errorCode)) return errorCode;
errorCode = FSE_initDStream(&bitD4, start4, length4);
if (FSE_isError(errorCode)) return errorCode;
reloadStatus=FSE_reloadDStream(&bitD2);
/* 16 symbols per loop */
for ( ; (reloadStatus<FSE_DStream_completed) && (op<olimit); /* D2-3-4 are supposed to be synchronized and finish together */
op+=16, reloadStatus = FSE_reloadDStream(&bitD2) | FSE_reloadDStream(&bitD3) | FSE_reloadDStream(&bitD4), FSE_reloadDStream(&bitD1))
{
HUF_DECODE_SYMBOL_0(0, bitTail);
#define HUF_DECODE_SYMBOL_0(n, Dstream) \
op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog);
#define HUF_DECODE_SYMBOL_1(n, Dstream) \
op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
if (FSE_32bits() && (HUF_MAX_TABLELOG>12)) FSE_reloadDStream(&Dstream)
#define HUF_DECODE_SYMBOL_2(n, Dstream) \
op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
if (FSE_32bits()) FSE_reloadDStream(&Dstream)
HUF_DECODE_SYMBOL_1( 0, bitD1);
HUF_DECODE_SYMBOL_1( 1, bitD2);
HUF_DECODE_SYMBOL_1( 2, bitD3);
HUF_DECODE_SYMBOL_1( 3, bitD4);
HUF_DECODE_SYMBOL_2( 4, bitD1);
HUF_DECODE_SYMBOL_2( 5, bitD2);
HUF_DECODE_SYMBOL_2( 6, bitD3);
HUF_DECODE_SYMBOL_2( 7, bitD4);
HUF_DECODE_SYMBOL_1( 8, bitD1);
HUF_DECODE_SYMBOL_1( 9, bitD2);
HUF_DECODE_SYMBOL_1(10, bitD3);
HUF_DECODE_SYMBOL_1(11, bitD4);
HUF_DECODE_SYMBOL_0(12, bitD1);
HUF_DECODE_SYMBOL_0(13, bitD2);
HUF_DECODE_SYMBOL_0(14, bitD3);
HUF_DECODE_SYMBOL_0(15, bitD4);
}
if (FSE_endOfDStream(&bitTail))
return op-ostart;
if (reloadStatus!=FSE_DStream_completed) /* not complete : some bitStream might be FSE_DStream_unfinished */
return (size_t)-FSE_ERROR_corruptionDetected;
/* tail */
{
// bitTail = bitD1; // *much* slower : -20% !??!
FSE_DStream_t bitTail;
bitTail.ptr = bitD1.ptr;
bitTail.bitsConsumed = bitD1.bitsConsumed;
bitTail.bitContainer = bitD1.bitContainer; // required in case of FSE_DStream_endOfBuffer
bitTail.start = start1;
for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op<omax) ; op++)
{
HUF_DECODE_SYMBOL_0(0, bitTail);
}
if (FSE_endOfDStream(&bitTail))
return op-ostart;
}
if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */
return (size_t)-FSE_ERROR_corruptionDetected;
}
if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */
return (size_t)-FSE_ERROR_corruptionDetected;
}
@ -1330,6 +1339,8 @@ static const U32 ZSTD_magicNumber = 0xFD2FB51E; /* 3rd version : seqNb header
#define LITERAL_NOENTROPY 63
#define COMMAND_NOENTROPY 7 /* to remove */
#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2)
static const size_t ZSTD_blockHeaderSize = 3;
static const size_t ZSTD_frameHeaderSize = 4;
@ -1347,8 +1358,6 @@ static unsigned ZSTD_isLittleEndian(void)
static U16 ZSTD_read16(const void* p) { U16 r; memcpy(&r, p, sizeof(r)); return r; }
static U32 ZSTD_read32(const void* p) { U32 r; memcpy(&r, p, sizeof(r)); return r; }
static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
@ -1373,16 +1382,9 @@ static U16 ZSTD_readLE16(const void* memPtr)
}
}
static U32 ZSTD_readLE32(const void* memPtr)
static U32 ZSTD_readLE24(const void* memPtr)
{
if (ZSTD_isLittleEndian())
return ZSTD_read32(memPtr);
else
{
const BYTE* p = (const BYTE*)memPtr;
return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
}
return ZSTD_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
}
static U32 ZSTD_readBE32(const void* memPtr)
@ -1458,7 +1460,7 @@ unsigned ZSTDv01_isError(size_t code) { return ERR_isError(code); }
* Decompression code
**************************************************************/
size_t ZSTDv01_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
static size_t ZSTDv01_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
{
const BYTE* const in = (const BYTE* const)src;
BYTE headerFlags;
@ -1511,7 +1513,7 @@ static size_t ZSTD_decompressLiterals(void* ctx,
}
size_t ZSTDv01_decodeLiteralsBlock(void* ctx,
static size_t ZSTDv01_decodeLiteralsBlock(void* ctx,
void* dst, size_t maxDstSize,
const BYTE** litStart, size_t* litSize,
const void* src, size_t srcSize)
@ -1563,7 +1565,7 @@ size_t ZSTDv01_decodeLiteralsBlock(void* ctx,
}
size_t ZSTDv01_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
static size_t ZSTDv01_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb,
const void* src, size_t srcSize)
{
@ -1696,13 +1698,13 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
seqState->prevOffset = seq->offset;
if (litLength == MaxLL)
{
U32 add = dumps<de ? *dumps++ : 0;
const U32 add = dumps<de ? *dumps++ : 0;
if (add < 255) litLength += add;
else
{
if (dumps<=(de-3))
{
litLength = ZSTD_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
litLength = ZSTD_readLE24(dumps);
dumps += 3;
}
}
@ -1724,13 +1726,13 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
if (matchLength == MaxML)
{
U32 add = dumps<de ? *dumps++ : 0;
const U32 add = dumps<de ? *dumps++ : 0;
if (add < 255) matchLength += add;
else
{
if (dumps<=(de-3))
{
matchLength = ZSTD_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
matchLength = ZSTD_readLE24(dumps);
dumps += 3;
}
}
@ -1751,7 +1753,7 @@ static size_t ZSTD_execSequence(BYTE* op,
BYTE* const base, BYTE* const oend)
{
static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4}; /* added */
static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* substracted */
static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* subtracted */
const BYTE* const ostart = op;
const size_t litLength = sequence.litLength;
BYTE* const endMatch = op + litLength + sequence.matchLength; /* risk : address space overflow (32-bits) */
@ -1993,36 +1995,59 @@ size_t ZSTDv01_decompress(void* dst, size_t maxDstSize, const void* src, size_t
return ZSTDv01_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
}
size_t ZSTDv01_findFrameCompressedSize(const void* src, size_t srcSize)
/* ZSTD_errorFrameSizeInfoLegacy() :
assumes `cSize` and `dBound` are _not_ NULL */
static void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret)
{
*cSize = ret;
*dBound = ZSTD_CONTENTSIZE_ERROR;
}
void ZSTDv01_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound)
{
const BYTE* ip = (const BYTE*)src;
size_t remainingSize = srcSize;
size_t nbBlocks = 0;
U32 magicNumber;
blockProperties_t blockProperties;
/* Frame Header */
if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) {
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
return;
}
magicNumber = ZSTD_readBE32(src);
if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
if (magicNumber != ZSTD_magicNumber) {
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown));
return;
}
ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
/* Loop on each block */
while (1)
{
size_t blockSize = ZSTDv01_getcBlockSize(ip, remainingSize, &blockProperties);
if (ZSTDv01_isError(blockSize)) return blockSize;
if (ZSTDv01_isError(blockSize)) {
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, blockSize);
return;
}
ip += ZSTD_blockHeaderSize;
remainingSize -= ZSTD_blockHeaderSize;
if (blockSize > remainingSize) return ERROR(srcSize_wrong);
if (blockSize > remainingSize) {
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
return;
}
if (blockSize == 0) break; /* bt_end */
ip += blockSize;
remainingSize -= blockSize;
nbBlocks++;
}
return ip - (const BYTE*)src;
*cSize = ip - (const BYTE*)src;
*dBound = nbBlocks * BLOCKSIZE;
}
/*******************************

View file

@ -35,13 +35,18 @@ ZSTDv01_decompress() : decompress ZSTD frames compliant with v0.1.x format
size_t ZSTDv01_decompress( void* dst, size_t maxOriginalSize,
const void* src, size_t compressedSize);
/**
ZSTDv01_getFrameSrcSize() : get the source length of a ZSTD frame compliant with v0.1.x format
compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
return : the number of bytes that would be read to decompress this frame
or an errorCode if it fails (which can be tested using ZSTDv01_isError())
*/
size_t ZSTDv01_findFrameCompressedSize(const void* src, size_t compressedSize);
/**
ZSTDv01_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.1.x format
srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
cSize (output parameter) : the number of bytes that would be read to decompress this frame
or an error code if it fails (which can be tested using ZSTDv01_isError())
dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
or ZSTD_CONTENTSIZE_ERROR if an error occurs
note : assumes `cSize` and `dBound` are _not_ NULL.
*/
void ZSTDv01_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
size_t* cSize, unsigned long long* dBound);
/**
ZSTDv01_isError() : tells if the result of ZSTDv01_decompress() is an error

View file

@ -217,6 +217,11 @@ MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
}
}
MEM_STATIC U32 MEM_readLE24(const void* memPtr)
{
return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
}
MEM_STATIC U32 MEM_readLE32(const void* memPtr)
{
if (MEM_isLittleEndian())
@ -348,7 +353,7 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val)
_BitScanReverse ( &r, val );
return (unsigned) r;
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
return 31 - __builtin_clz (val);
return __builtin_clz (val) ^ 31;
# else /* Software version */
static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
U32 v = val;
@ -399,11 +404,17 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
switch(srcSize)
{
case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16);
/* fallthrough */
case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24);
/* fallthrough */
case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32);
/* fallthrough */
case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24;
/* fallthrough */
case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16;
/* fallthrough */
case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) << 8;
/* fallthrough */
default:;
}
contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
@ -2722,6 +2733,8 @@ static size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_
#define LITERAL_NOENTROPY 63
#define COMMAND_NOENTROPY 7 /* to remove */
#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2)
static const size_t ZSTD_blockHeaderSize = 3;
static const size_t ZSTD_frameHeaderSize = 4;
@ -2876,6 +2889,7 @@ static size_t ZSTD_decodeLiteralsBlock(void* ctx,
const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
if (litSize > srcSize-11) /* risk of reading too far with wildcopy */
{
if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
if (litSize > srcSize-3) return ERROR(corruption_detected);
memcpy(dctx->litBuffer, istart, litSize);
dctx->litPtr = dctx->litBuffer;
@ -3035,11 +3049,11 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
seqState->prevOffset = seq->offset;
if (litLength == MaxLL)
{
U32 add = *dumps++;
const U32 add = dumps<de ? *dumps++ : 0;
if (add < 255) litLength += add;
else
else if (dumps + 3 <= de)
{
litLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
litLength = MEM_readLE24(dumps);
dumps += 3;
}
if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */
@ -3065,11 +3079,11 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
if (matchLength == MaxML)
{
U32 add = *dumps++;
const U32 add = dumps<de ? *dumps++ : 0;
if (add < 255) matchLength += add;
else
else if (dumps + 3 <= de)
{
matchLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
matchLength = MEM_readLE24(dumps);
dumps += 3;
}
if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */
@ -3090,7 +3104,7 @@ static size_t ZSTD_execSequence(BYTE* op,
BYTE* const base, BYTE* const oend)
{
static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4}; /* added */
static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* substracted */
static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* subtracted */
const BYTE* const ostart = op;
BYTE* const oLitEnd = op + sequence.litLength;
BYTE* const oMatchEnd = op + sequence.litLength + sequence.matchLength; /* risk : address space overflow (32-bits) */
@ -3306,37 +3320,59 @@ static size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, siz
return ZSTD_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
}
static size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
/* ZSTD_errorFrameSizeInfoLegacy() :
assumes `cSize` and `dBound` are _not_ NULL */
static void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret)
{
*cSize = ret;
*dBound = ZSTD_CONTENTSIZE_ERROR;
}
void ZSTDv02_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound)
{
const BYTE* ip = (const BYTE*)src;
size_t remainingSize = srcSize;
size_t nbBlocks = 0;
U32 magicNumber;
blockProperties_t blockProperties;
/* Frame Header */
if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) {
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
return;
}
magicNumber = MEM_readLE32(src);
if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
if (magicNumber != ZSTD_magicNumber) {
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown));
return;
}
ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
/* Loop on each block */
while (1)
{
size_t cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
if (ZSTD_isError(cBlockSize)) return cBlockSize;
if (ZSTD_isError(cBlockSize)) {
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, cBlockSize);
return;
}
ip += ZSTD_blockHeaderSize;
remainingSize -= ZSTD_blockHeaderSize;
if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
if (cBlockSize > remainingSize) {
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
return;
}
if (cBlockSize == 0) break; /* bt_end */
ip += cBlockSize;
remainingSize -= cBlockSize;
nbBlocks++;
}
return ip - (const BYTE*)src;
*cSize = ip - (const BYTE*)src;
*dBound = nbBlocks * BLOCKSIZE;
}
/*******************************
@ -3452,11 +3488,6 @@ size_t ZSTDv02_decompress( void* dst, size_t maxOriginalSize,
return ZSTD_decompress(dst, maxOriginalSize, src, compressedSize);
}
size_t ZSTDv02_findFrameCompressedSize(const void *src, size_t compressedSize)
{
return ZSTD_findFrameCompressedSize(src, compressedSize);
}
ZSTDv02_Dctx* ZSTDv02_createDCtx(void)
{
return (ZSTDv02_Dctx*)ZSTD_createDCtx();

View file

@ -35,13 +35,18 @@ ZSTDv02_decompress() : decompress ZSTD frames compliant with v0.2.x format
size_t ZSTDv02_decompress( void* dst, size_t maxOriginalSize,
const void* src, size_t compressedSize);
/**
ZSTDv02_getFrameSrcSize() : get the source length of a ZSTD frame compliant with v0.2.x format
compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
return : the number of bytes that would be read to decompress this frame
or an errorCode if it fails (which can be tested using ZSTDv02_isError())
*/
size_t ZSTDv02_findFrameCompressedSize(const void* src, size_t compressedSize);
/**
ZSTDv02_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.2.x format
srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
cSize (output parameter) : the number of bytes that would be read to decompress this frame
or an error code if it fails (which can be tested using ZSTDv01_isError())
dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
or ZSTD_CONTENTSIZE_ERROR if an error occurs
note : assumes `cSize` and `dBound` are _not_ NULL.
*/
void ZSTDv02_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
size_t* cSize, unsigned long long* dBound);
/**
ZSTDv02_isError() : tells if the result of ZSTDv02_decompress() is an error

View file

@ -219,6 +219,11 @@ MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
}
}
MEM_STATIC U32 MEM_readLE24(const void* memPtr)
{
return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
}
MEM_STATIC U32 MEM_readLE32(const void* memPtr)
{
if (MEM_isLittleEndian())
@ -351,7 +356,7 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val)
_BitScanReverse ( &r, val );
return (unsigned) r;
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
return 31 - __builtin_clz (val);
return __builtin_clz (val) ^ 31;
# else /* Software version */
static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
U32 v = val;
@ -402,11 +407,17 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
switch(srcSize)
{
case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16);
/* fallthrough */
case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24);
/* fallthrough */
case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32);
/* fallthrough */
case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24;
/* fallthrough */
case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16;
/* fallthrough */
case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) << 8;
/* fallthrough */
default:;
}
contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
@ -2363,6 +2374,8 @@ static size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_
#define LITERAL_NOENTROPY 63
#define COMMAND_NOENTROPY 7 /* to remove */
#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2)
static const size_t ZSTD_blockHeaderSize = 3;
static const size_t ZSTD_frameHeaderSize = 4;
@ -2517,6 +2530,7 @@ static size_t ZSTD_decodeLiteralsBlock(void* ctx,
const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
if (litSize > srcSize-11) /* risk of reading too far with wildcopy */
{
if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
if (litSize > srcSize-3) return ERROR(corruption_detected);
memcpy(dctx->litBuffer, istart, litSize);
dctx->litPtr = dctx->litBuffer;
@ -2676,11 +2690,11 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
seqState->prevOffset = seq->offset;
if (litLength == MaxLL)
{
U32 add = *dumps++;
const U32 add = dumps<de ? *dumps++ : 0;
if (add < 255) litLength += add;
else
else if (dumps + 3 <= de)
{
litLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
litLength = MEM_readLE24(dumps);
dumps += 3;
}
if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */
@ -2706,11 +2720,11 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
if (matchLength == MaxML)
{
U32 add = *dumps++;
const U32 add = dumps<de ? *dumps++ : 0;
if (add < 255) matchLength += add;
else
else if (dumps + 3 <= de)
{
matchLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
matchLength = MEM_readLE24(dumps);
dumps += 3;
}
if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */
@ -2731,7 +2745,7 @@ static size_t ZSTD_execSequence(BYTE* op,
BYTE* const base, BYTE* const oend)
{
static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4}; /* added */
static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* substracted */
static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* subtracted */
const BYTE* const ostart = op;
BYTE* const oLitEnd = op + sequence.litLength;
BYTE* const oMatchEnd = op + sequence.litLength + sequence.matchLength; /* risk : address space overflow (32-bits) */
@ -2947,36 +2961,59 @@ static size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, siz
return ZSTD_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
}
static size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize)
/* ZSTD_errorFrameSizeInfoLegacy() :
assumes `cSize` and `dBound` are _not_ NULL */
MEM_STATIC void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret)
{
*cSize = ret;
*dBound = ZSTD_CONTENTSIZE_ERROR;
}
void ZSTDv03_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound)
{
const BYTE* ip = (const BYTE*)src;
size_t remainingSize = srcSize;
size_t nbBlocks = 0;
U32 magicNumber;
blockProperties_t blockProperties;
/* Frame Header */
if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) {
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
return;
}
magicNumber = MEM_readLE32(src);
if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
if (magicNumber != ZSTD_magicNumber) {
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown));
return;
}
ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
/* Loop on each block */
while (1)
{
size_t cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
if (ZSTD_isError(cBlockSize)) return cBlockSize;
if (ZSTD_isError(cBlockSize)) {
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, cBlockSize);
return;
}
ip += ZSTD_blockHeaderSize;
remainingSize -= ZSTD_blockHeaderSize;
if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
if (cBlockSize > remainingSize) {
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
return;
}
if (cBlockSize == 0) break; /* bt_end */
ip += cBlockSize;
remainingSize -= cBlockSize;
nbBlocks++;
}
return ip - (const BYTE*)src;
*cSize = ip - (const BYTE*)src;
*dBound = nbBlocks * BLOCKSIZE;
}
@ -3093,11 +3130,6 @@ size_t ZSTDv03_decompress( void* dst, size_t maxOriginalSize,
return ZSTD_decompress(dst, maxOriginalSize, src, compressedSize);
}
size_t ZSTDv03_findFrameCompressedSize(const void* src, size_t srcSize)
{
return ZSTD_findFrameCompressedSize(src, srcSize);
}
ZSTDv03_Dctx* ZSTDv03_createDCtx(void)
{
return (ZSTDv03_Dctx*)ZSTD_createDCtx();

View file

@ -35,13 +35,18 @@ ZSTDv03_decompress() : decompress ZSTD frames compliant with v0.3.x format
size_t ZSTDv03_decompress( void* dst, size_t maxOriginalSize,
const void* src, size_t compressedSize);
/**
ZSTDv03_getFrameSrcSize() : get the source length of a ZSTD frame compliant with v0.3.x format
compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
return : the number of bytes that would be read to decompress this frame
or an errorCode if it fails (which can be tested using ZSTDv03_isError())
*/
size_t ZSTDv03_findFrameCompressedSize(const void* src, size_t compressedSize);
/**
ZSTDv03_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.3.x format
srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
cSize (output parameter) : the number of bytes that would be read to decompress this frame
or an error code if it fails (which can be tested using ZSTDv01_isError())
dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
or ZSTD_CONTENTSIZE_ERROR if an error occurs
note : assumes `cSize` and `dBound` are _not_ NULL.
*/
void ZSTDv03_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
size_t* cSize, unsigned long long* dBound);
/**
ZSTDv03_isError() : tells if the result of ZSTDv03_decompress() is an error

View file

@ -9,14 +9,19 @@
*/
/*- Dependencies -*/
/******************************************
* Includes
******************************************/
#include <stddef.h> /* size_t, ptrdiff_t */
#include <string.h> /* memcpy */
#include "zstd_v04.h"
#include "error_private.h"
/* ******************************************************************
mem.h
****************************************************************** */
* mem.h
*******************************************************************/
#ifndef MEM_H_MODULE
#define MEM_H_MODULE
@ -24,12 +29,6 @@
extern "C" {
#endif
/******************************************
* Includes
******************************************/
#include <stddef.h> /* size_t, ptrdiff_t */
#include <string.h> /* memcpy */
/******************************************
* Compiler-specific
@ -75,38 +74,9 @@ extern "C" {
/*-*************************************
* Debug
***************************************/
#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=1)
# include <assert.h>
#else
# ifndef assert
# define assert(condition) ((void)0)
# endif
#endif
#define ZSTD_STATIC_ASSERT(c) { enum { ZSTD_static_assert = 1/(int)(!!(c)) }; }
#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=2)
# include <stdio.h>
extern int g_debuglog_enable;
/* recommended values for ZSTD_DEBUG display levels :
* 1 : no display, enables assert() only
* 2 : reserved for currently active debug path
* 3 : events once per object lifetime (CCtx, CDict, etc.)
* 4 : events once per frame
* 5 : events once per block
* 6 : events once per sequence (*very* verbose) */
# define RAWLOG(l, ...) { \
if ((g_debuglog_enable) & (l<=ZSTD_DEBUG)) { \
fprintf(stderr, __VA_ARGS__); \
} }
# define DEBUGLOG(l, ...) { \
if ((g_debuglog_enable) & (l<=ZSTD_DEBUG)) { \
fprintf(stderr, __FILE__ ": " __VA_ARGS__); \
fprintf(stderr, " \n"); \
} }
#else
# define RAWLOG(l, ...) {} /* disabled */
# define DEBUGLOG(l, ...) {} /* disabled */
#include "debug.h"
#ifndef assert
# define assert(condition) ((void)0)
#endif
@ -219,6 +189,11 @@ MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
}
}
MEM_STATIC U32 MEM_readLE24(const void* memPtr)
{
return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
}
MEM_STATIC U32 MEM_readLE32(const void* memPtr)
{
if (MEM_isLittleEndian())
@ -266,29 +241,11 @@ MEM_STATIC size_t MEM_readLEST(const void* memPtr)
#ifndef ZSTD_STATIC_H
#define ZSTD_STATIC_H
/* The objects defined into this file shall be considered experimental.
* They are not considered stable, as their prototype may change in the future.
* You can use them for tests, provide feedback, or if you can endure risks of future changes.
*/
#if defined (__cplusplus)
extern "C" {
#endif
/* *************************************
* Types
***************************************/
#define ZSTD_WINDOWLOG_MAX 26
#define ZSTD_WINDOWLOG_MIN 18
#define ZSTD_WINDOWLOG_ABSOLUTEMIN 11
#define ZSTD_CONTENTLOG_MAX (ZSTD_WINDOWLOG_MAX+1)
#define ZSTD_CONTENTLOG_MIN 4
#define ZSTD_HASHLOG_MAX 28
#define ZSTD_HASHLOG_MIN 4
#define ZSTD_SEARCHLOG_MAX (ZSTD_CONTENTLOG_MAX-1)
#define ZSTD_SEARCHLOG_MIN 1
#define ZSTD_SEARCHLENGTH_MAX 7
#define ZSTD_SEARCHLENGTH_MIN 4
/** from faster to stronger */
typedef enum { ZSTD_fast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2, ZSTD_btlazy2 } ZSTD_strategy;
@ -360,9 +317,6 @@ static size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t maxDstS
*/
#if defined (__cplusplus)
}
#endif
#endif /* ZSTD_STATIC_H */
@ -375,10 +329,6 @@ static size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t maxDstS
#ifndef ZSTD_CCOMMON_H_MODULE
#define ZSTD_CCOMMON_H_MODULE
#if defined (__cplusplus)
extern "C" {
#endif
/* *************************************
* Common macros
***************************************/
@ -428,6 +378,8 @@ static const size_t ZSTD_frameHeaderSize_min = 5;
#define MIN_SEQUENCES_SIZE (2 /*seqNb*/ + 2 /*dumps*/ + 3 /*seqTables*/ + 1 /*bitStream*/)
#define MIN_CBLOCK_SIZE (3 /*litCSize*/ + MIN_SEQUENCES_SIZE)
#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2)
typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
@ -450,10 +402,6 @@ static void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length)
}
#if defined (__cplusplus)
}
#endif
/* ******************************************************************
FSE : Finite State Entropy coder
@ -679,7 +627,7 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val)
_BitScanReverse ( &r, val );
return (unsigned) r;
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
return 31 - __builtin_clz (val);
return __builtin_clz (val) ^ 31;
# else /* Software version */
static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
U32 v = val;
@ -1142,6 +1090,7 @@ static size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, un
if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
/* Init, lay down lowprob symbols */
memset(tableDecode, 0, sizeof(FSE_DECODE_TYPE) * (maxSymbolValue+1) ); /* useless init, but keep static analyzer happy, and we don't need to performance optimize legacy decoders */
DTableH.tableLog = (U16)tableLog;
for (s=0; s<=maxSymbolValue; s++)
{
@ -2706,6 +2655,7 @@ static size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
if (litSize > srcSize-11) /* risk of reading too far with wildcopy */
{
if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
if (litSize > srcSize-3) return ERROR(corruption_detected);
memcpy(dctx->litBuffer, istart, litSize);
dctx->litPtr = dctx->litBuffer;
@ -2864,13 +2814,12 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));
prevOffset = litLength ? seq->offset : seqState->prevOffset;
if (litLength == MaxLL) {
U32 add = *dumps++;
const U32 add = dumps<de ? *dumps++ : 0;
if (add < 255) litLength += add;
else {
litLength = dumps[0] + (dumps[1]<<8) + (dumps[2]<<16);
else if (dumps + 3 <= de) {
litLength = MEM_readLE24(dumps);
dumps += 3;
}
if (dumps > de) { litLength = MaxLL+255; } /* late correction, to avoid using uninitialized memory */
if (dumps >= de) { dumps = de-1; } /* late correction, to avoid read overflow (data is now corrupted anyway) */
}
@ -2893,13 +2842,12 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
/* MatchLength */
matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
if (matchLength == MaxML) {
U32 add = *dumps++;
const U32 add = dumps<de ? *dumps++ : 0;
if (add < 255) matchLength += add;
else {
matchLength = dumps[0] + (dumps[1]<<8) + (dumps[2]<<16);
else if (dumps + 3 <= de){
matchLength = MEM_readLE24(dumps);
dumps += 3;
}
if (dumps > de) { matchLength = MaxML+255; } /* late correction, to avoid using uninitialized memory */
if (dumps >= de) { dumps = de-1; } /* late correction, to avoid read overflow (data is now corrupted anyway) */
}
matchLength += MINMATCH;
@ -2918,7 +2866,7 @@ static size_t ZSTD_execSequence(BYTE* op,
const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
{
static const int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* substracted */
static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
BYTE* const oLitEnd = op + sequence.litLength;
const size_t sequenceLength = sequence.litLength + sequence.matchLength;
BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
@ -2991,7 +2939,7 @@ static size_t ZSTD_execSequence(BYTE* op,
}
else
{
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8, but must be signed */
}
return sequenceLength;
}
@ -3087,9 +3035,12 @@ static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
{
/* blockType == blockCompressed */
const BYTE* ip = (const BYTE*)src;
size_t litCSize;
if (srcSize > BLOCKSIZE) return ERROR(corruption_detected);
/* Decode literals sub-block */
size_t litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
if (ZSTD_isError(litCSize)) return litCSize;
ip += litCSize;
srcSize -= litCSize;
@ -3177,34 +3128,57 @@ static size_t ZSTD_decompress_usingDict(ZSTD_DCtx* ctx,
return op-ostart;
}
static size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize)
/* ZSTD_errorFrameSizeInfoLegacy() :
assumes `cSize` and `dBound` are _not_ NULL */
static void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret)
{
*cSize = ret;
*dBound = ZSTD_CONTENTSIZE_ERROR;
}
void ZSTDv04_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound)
{
const BYTE* ip = (const BYTE*)src;
size_t remainingSize = srcSize;
size_t nbBlocks = 0;
blockProperties_t blockProperties;
/* Frame Header */
if (srcSize < ZSTD_frameHeaderSize_min) return ERROR(srcSize_wrong);
if (MEM_readLE32(src) != ZSTD_MAGICNUMBER) return ERROR(prefix_unknown);
if (srcSize < ZSTD_frameHeaderSize_min) {
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
return;
}
if (MEM_readLE32(src) != ZSTD_MAGICNUMBER) {
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown));
return;
}
ip += ZSTD_frameHeaderSize_min; remainingSize -= ZSTD_frameHeaderSize_min;
/* Loop on each block */
while (1)
{
size_t cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
if (ZSTD_isError(cBlockSize)) return cBlockSize;
if (ZSTD_isError(cBlockSize)) {
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, cBlockSize);
return;
}
ip += ZSTD_blockHeaderSize;
remainingSize -= ZSTD_blockHeaderSize;
if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
if (cBlockSize > remainingSize) {
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
return;
}
if (cBlockSize == 0) break; /* bt_end */
ip += cBlockSize;
remainingSize -= cBlockSize;
nbBlocks++;
}
return ip - (const BYTE*)src;
*cSize = ip - (const BYTE*)src;
*dBound = nbBlocks * BLOCKSIZE;
}
/* ******************************
@ -3636,11 +3610,6 @@ size_t ZSTDv04_decompress(void* dst, size_t maxDstSize, const void* src, size_t
#endif
}
size_t ZSTDv04_findFrameCompressedSize(const void* src, size_t srcSize)
{
return ZSTD_findFrameCompressedSize(src, srcSize);
}
size_t ZSTDv04_resetDCtx(ZSTDv04_Dctx* dctx) { return ZSTD_resetDCtx(dctx); }
size_t ZSTDv04_nextSrcSizeToDecompress(ZSTDv04_Dctx* dctx)
@ -3670,8 +3639,3 @@ size_t ZBUFFv04_decompressContinue(ZBUFFv04_DCtx* dctx, void* dst, size_t* maxDs
ZSTD_DCtx* ZSTDv04_createDCtx(void) { return ZSTD_createDCtx(); }
size_t ZSTDv04_freeDCtx(ZSTD_DCtx* dctx) { return ZSTD_freeDCtx(dctx); }
size_t ZSTDv04_getFrameParams(ZSTD_parameters* params, const void* src, size_t srcSize)
{
return ZSTD_getFrameParams(params, src, srcSize);
}

View file

@ -35,13 +35,18 @@ ZSTDv04_decompress() : decompress ZSTD frames compliant with v0.4.x format
size_t ZSTDv04_decompress( void* dst, size_t maxOriginalSize,
const void* src, size_t compressedSize);
/**
ZSTDv04_getFrameSrcSize() : get the source length of a ZSTD frame compliant with v0.4.x format
compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
return : the number of bytes that would be read to decompress this frame
or an errorCode if it fails (which can be tested using ZSTDv04_isError())
*/
size_t ZSTDv04_findFrameCompressedSize(const void* src, size_t compressedSize);
/**
ZSTDv04_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.4.x format
srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
cSize (output parameter) : the number of bytes that would be read to decompress this frame
or an error code if it fails (which can be tested using ZSTDv01_isError())
dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
or ZSTD_CONTENTSIZE_ERROR if an error occurs
note : assumes `cSize` and `dBound` are _not_ NULL.
*/
void ZSTDv04_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
size_t* cSize, unsigned long long* dBound);
/**
ZSTDv04_isError() : tells if the result of ZSTDv04_decompress() is an error

View file

@ -491,6 +491,8 @@ static const size_t ZSTDv05_frameHeaderSize_min = 5;
#define WILDCOPY_OVERLENGTH 8
#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2)
typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
@ -754,7 +756,7 @@ MEM_STATIC unsigned BITv05_highbit32 (U32 val)
_BitScanReverse ( &r, val );
return (unsigned) r;
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
return 31 - __builtin_clz (val);
return __builtin_clz (val) ^ 31;
# else /* Software version */
static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
U32 v = val;
@ -836,7 +838,7 @@ MEM_STATIC void BITv05_skipBits(BITv05_DStream_t* bitD, U32 nbBits)
bitD->bitsConsumed += nbBits;
}
MEM_STATIC size_t BITv05_readBits(BITv05_DStream_t* bitD, U32 nbBits)
MEM_STATIC size_t BITv05_readBits(BITv05_DStream_t* bitD, unsigned nbBits)
{
size_t value = BITv05_lookBits(bitD, nbBits);
BITv05_skipBits(bitD, nbBits);
@ -845,7 +847,7 @@ MEM_STATIC size_t BITv05_readBits(BITv05_DStream_t* bitD, U32 nbBits)
/*!BITv05_readBitsFast :
* unsafe version; only works only if nbBits >= 1 */
MEM_STATIC size_t BITv05_readBitsFast(BITv05_DStream_t* bitD, U32 nbBits)
MEM_STATIC size_t BITv05_readBitsFast(BITv05_DStream_t* bitD, unsigned nbBits)
{
size_t value = BITv05_lookBitsFast(bitD, nbBits);
BITv05_skipBits(bitD, nbBits);
@ -1162,7 +1164,7 @@ MEM_STATIC unsigned FSEv05_endOfDState(const FSEv05_DState_t* DStatePtr)
/* **************************************************************
* Complex types
****************************************************************/
typedef U32 DTable_max_t[FSEv05_DTABLE_SIZE_U32(FSEv05_MAX_TABLELOG)];
typedef unsigned DTable_max_t[FSEv05_DTABLE_SIZE_U32(FSEv05_MAX_TABLELOG)];
/* **************************************************************
@ -1224,6 +1226,7 @@ size_t FSEv05_buildDTable(FSEv05_DTable* dt, const short* normalizedCounter, uns
if (tableLog > FSEv05_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
/* Init, lay down lowprob symbols */
memset(tableDecode, 0, sizeof(FSEv05_FUNCTION_TYPE) * (maxSymbolValue+1) ); /* useless init, but keep static analyzer happy, and we don't need to performance optimize legacy decoders */
DTableH.tableLog = (U16)tableLog;
for (s=0; s<=maxSymbolValue; s++) {
if (normalizedCounter[s]==-1) {
@ -1995,91 +1998,92 @@ size_t HUFv05_decompress4X2_usingDTable(
const void* cSrc, size_t cSrcSize,
const U16* DTable)
{
const BYTE* const istart = (const BYTE*) cSrc;
BYTE* const ostart = (BYTE*) dst;
BYTE* const oend = ostart + dstSize;
const void* const dtPtr = DTable;
const HUFv05_DEltX2* const dt = ((const HUFv05_DEltX2*)dtPtr) +1;
const U32 dtLog = DTable[0];
size_t errorCode;
/* Init */
BITv05_DStream_t bitD1;
BITv05_DStream_t bitD2;
BITv05_DStream_t bitD3;
BITv05_DStream_t bitD4;
const size_t length1 = MEM_readLE16(istart);
const size_t length2 = MEM_readLE16(istart+2);
const size_t length3 = MEM_readLE16(istart+4);
size_t length4;
const BYTE* const istart1 = istart + 6; /* jumpTable */
const BYTE* const istart2 = istart1 + length1;
const BYTE* const istart3 = istart2 + length2;
const BYTE* const istart4 = istart3 + length3;
const size_t segmentSize = (dstSize+3) / 4;
BYTE* const opStart2 = ostart + segmentSize;
BYTE* const opStart3 = opStart2 + segmentSize;
BYTE* const opStart4 = opStart3 + segmentSize;
BYTE* op1 = ostart;
BYTE* op2 = opStart2;
BYTE* op3 = opStart3;
BYTE* op4 = opStart4;
U32 endSignal;
/* Check */
if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
{
const BYTE* const istart = (const BYTE*) cSrc;
BYTE* const ostart = (BYTE*) dst;
BYTE* const oend = ostart + dstSize;
const void* const dtPtr = DTable;
const HUFv05_DEltX2* const dt = ((const HUFv05_DEltX2*)dtPtr) +1;
const U32 dtLog = DTable[0];
size_t errorCode;
length4 = cSrcSize - (length1 + length2 + length3 + 6);
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
errorCode = BITv05_initDStream(&bitD1, istart1, length1);
if (HUFv05_isError(errorCode)) return errorCode;
errorCode = BITv05_initDStream(&bitD2, istart2, length2);
if (HUFv05_isError(errorCode)) return errorCode;
errorCode = BITv05_initDStream(&bitD3, istart3, length3);
if (HUFv05_isError(errorCode)) return errorCode;
errorCode = BITv05_initDStream(&bitD4, istart4, length4);
if (HUFv05_isError(errorCode)) return errorCode;
/* Init */
BITv05_DStream_t bitD1;
BITv05_DStream_t bitD2;
BITv05_DStream_t bitD3;
BITv05_DStream_t bitD4;
const size_t length1 = MEM_readLE16(istart);
const size_t length2 = MEM_readLE16(istart+2);
const size_t length3 = MEM_readLE16(istart+4);
size_t length4;
const BYTE* const istart1 = istart + 6; /* jumpTable */
const BYTE* const istart2 = istart1 + length1;
const BYTE* const istart3 = istart2 + length2;
const BYTE* const istart4 = istart3 + length3;
const size_t segmentSize = (dstSize+3) / 4;
BYTE* const opStart2 = ostart + segmentSize;
BYTE* const opStart3 = opStart2 + segmentSize;
BYTE* const opStart4 = opStart3 + segmentSize;
BYTE* op1 = ostart;
BYTE* op2 = opStart2;
BYTE* op3 = opStart3;
BYTE* op4 = opStart4;
U32 endSignal;
/* 16-32 symbols per loop (4-8 symbols per stream) */
endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4);
for ( ; (endSignal==BITv05_DStream_unfinished) && (op4<(oend-7)) ; ) {
HUFv05_DECODE_SYMBOLX2_2(op1, &bitD1);
HUFv05_DECODE_SYMBOLX2_2(op2, &bitD2);
HUFv05_DECODE_SYMBOLX2_2(op3, &bitD3);
HUFv05_DECODE_SYMBOLX2_2(op4, &bitD4);
HUFv05_DECODE_SYMBOLX2_1(op1, &bitD1);
HUFv05_DECODE_SYMBOLX2_1(op2, &bitD2);
HUFv05_DECODE_SYMBOLX2_1(op3, &bitD3);
HUFv05_DECODE_SYMBOLX2_1(op4, &bitD4);
HUFv05_DECODE_SYMBOLX2_2(op1, &bitD1);
HUFv05_DECODE_SYMBOLX2_2(op2, &bitD2);
HUFv05_DECODE_SYMBOLX2_2(op3, &bitD3);
HUFv05_DECODE_SYMBOLX2_2(op4, &bitD4);
HUFv05_DECODE_SYMBOLX2_0(op1, &bitD1);
HUFv05_DECODE_SYMBOLX2_0(op2, &bitD2);
HUFv05_DECODE_SYMBOLX2_0(op3, &bitD3);
HUFv05_DECODE_SYMBOLX2_0(op4, &bitD4);
length4 = cSrcSize - (length1 + length2 + length3 + 6);
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
errorCode = BITv05_initDStream(&bitD1, istart1, length1);
if (HUFv05_isError(errorCode)) return errorCode;
errorCode = BITv05_initDStream(&bitD2, istart2, length2);
if (HUFv05_isError(errorCode)) return errorCode;
errorCode = BITv05_initDStream(&bitD3, istart3, length3);
if (HUFv05_isError(errorCode)) return errorCode;
errorCode = BITv05_initDStream(&bitD4, istart4, length4);
if (HUFv05_isError(errorCode)) return errorCode;
/* 16-32 symbols per loop (4-8 symbols per stream) */
endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4);
for ( ; (endSignal==BITv05_DStream_unfinished) && (op4<(oend-7)) ; ) {
HUFv05_DECODE_SYMBOLX2_2(op1, &bitD1);
HUFv05_DECODE_SYMBOLX2_2(op2, &bitD2);
HUFv05_DECODE_SYMBOLX2_2(op3, &bitD3);
HUFv05_DECODE_SYMBOLX2_2(op4, &bitD4);
HUFv05_DECODE_SYMBOLX2_1(op1, &bitD1);
HUFv05_DECODE_SYMBOLX2_1(op2, &bitD2);
HUFv05_DECODE_SYMBOLX2_1(op3, &bitD3);
HUFv05_DECODE_SYMBOLX2_1(op4, &bitD4);
HUFv05_DECODE_SYMBOLX2_2(op1, &bitD1);
HUFv05_DECODE_SYMBOLX2_2(op2, &bitD2);
HUFv05_DECODE_SYMBOLX2_2(op3, &bitD3);
HUFv05_DECODE_SYMBOLX2_2(op4, &bitD4);
HUFv05_DECODE_SYMBOLX2_0(op1, &bitD1);
HUFv05_DECODE_SYMBOLX2_0(op2, &bitD2);
HUFv05_DECODE_SYMBOLX2_0(op3, &bitD3);
HUFv05_DECODE_SYMBOLX2_0(op4, &bitD4);
endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4);
}
/* check corruption */
if (op1 > opStart2) return ERROR(corruption_detected);
if (op2 > opStart3) return ERROR(corruption_detected);
if (op3 > opStart4) return ERROR(corruption_detected);
/* note : op4 supposed already verified within main loop */
/* finish bitStreams one by one */
HUFv05_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
HUFv05_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
HUFv05_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
HUFv05_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
/* check */
endSignal = BITv05_endOfDStream(&bitD1) & BITv05_endOfDStream(&bitD2) & BITv05_endOfDStream(&bitD3) & BITv05_endOfDStream(&bitD4);
if (!endSignal) return ERROR(corruption_detected);
/* decoded size */
return dstSize;
}
/* check corruption */
if (op1 > opStart2) return ERROR(corruption_detected);
if (op2 > opStart3) return ERROR(corruption_detected);
if (op3 > opStart4) return ERROR(corruption_detected);
/* note : op4 supposed already verified within main loop */
/* finish bitStreams one by one */
HUFv05_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
HUFv05_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
HUFv05_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
HUFv05_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
/* check */
endSignal = BITv05_endOfDStream(&bitD1) & BITv05_endOfDStream(&bitD2) & BITv05_endOfDStream(&bitD3) & BITv05_endOfDStream(&bitD4);
if (!endSignal) return ERROR(corruption_detected);
/* decoded size */
return dstSize;
}
@ -2190,7 +2194,7 @@ static void HUFv05_fillDTableX4(HUFv05_DEltX4* DTable, const U32 targetLog,
}
}
size_t HUFv05_readDTableX4 (U32* DTable, const void* src, size_t srcSize)
size_t HUFv05_readDTableX4 (unsigned* DTable, const void* src, size_t srcSize)
{
BYTE weightList[HUFv05_MAX_SYMBOL_VALUE + 1];
sortedSymbol_t sortedSymbol[HUFv05_MAX_SYMBOL_VALUE + 1];
@ -2204,7 +2208,7 @@ size_t HUFv05_readDTableX4 (U32* DTable, const void* src, size_t srcSize)
void* dtPtr = DTable;
HUFv05_DEltX4* const dt = ((HUFv05_DEltX4*)dtPtr) + 1;
HUFv05_STATIC_ASSERT(sizeof(HUFv05_DEltX4) == sizeof(U32)); /* if compilation fails here, assertion is false */
HUFv05_STATIC_ASSERT(sizeof(HUFv05_DEltX4) == sizeof(unsigned)); /* if compilation fails here, assertion is false */
if (memLog > HUFv05_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge);
//memset(weightList, 0, sizeof(weightList)); /* is not necessary, even though some analyzer complain ... */
@ -2331,7 +2335,7 @@ static inline size_t HUFv05_decodeStreamX4(BYTE* p, BITv05_DStream_t* bitDPtr, B
size_t HUFv05_decompress1X4_usingDTable(
void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize,
const U32* DTable)
const unsigned* DTable)
{
const BYTE* const istart = (const BYTE*) cSrc;
BYTE* const ostart = (BYTE*) dst;
@ -2374,7 +2378,7 @@ size_t HUFv05_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t
size_t HUFv05_decompress4X4_usingDTable(
void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize,
const U32* DTable)
const unsigned* DTable)
{
if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
@ -2658,6 +2662,7 @@ struct ZSTDv05_DCtx_s
BYTE headerBuffer[ZSTDv05_frameHeaderSize_max];
}; /* typedef'd to ZSTDv05_DCtx within "zstd_static.h" */
size_t ZSTDv05_sizeofDCtx (void); /* Hidden declaration */
size_t ZSTDv05_sizeofDCtx (void) { return sizeof(ZSTDv05_DCtx); }
size_t ZSTDv05_decompressBegin(ZSTDv05_DCtx* dctx)
@ -2822,7 +2827,7 @@ static size_t ZSTDv05_decodeFrameHeader_Part2(ZSTDv05_DCtx* zc, const void* src,
}
size_t ZSTDv05_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
static size_t ZSTDv05_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
{
const BYTE* const in = (const BYTE* const)src;
BYTE headerFlags;
@ -2845,6 +2850,7 @@ size_t ZSTDv05_getcBlockSize(const void* src, size_t srcSize, blockProperties_t*
static size_t ZSTDv05_copyRawBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
{
if (dst==NULL) return ERROR(dstSize_tooSmall);
if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall);
memcpy(dst, src, srcSize);
return srcSize;
@ -2853,8 +2859,8 @@ static size_t ZSTDv05_copyRawBlock(void* dst, size_t maxDstSize, const void* src
/*! ZSTDv05_decodeLiteralsBlock() :
@return : nb of bytes read from src (< srcSize ) */
size_t ZSTDv05_decodeLiteralsBlock(ZSTDv05_DCtx* dctx,
const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */
static size_t ZSTDv05_decodeLiteralsBlock(ZSTDv05_DCtx* dctx,
const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */
{
const BYTE* const istart = (const BYTE*) src;
@ -2988,7 +2994,7 @@ size_t ZSTDv05_decodeLiteralsBlock(ZSTDv05_DCtx* dctx,
}
size_t ZSTDv05_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
static size_t ZSTDv05_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
FSEv05_DTable* DTableLL, FSEv05_DTable* DTableML, FSEv05_DTable* DTableOffb,
const void* src, size_t srcSize, U32 flagStaticTable)
{
@ -2996,7 +3002,7 @@ size_t ZSTDv05_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumps
const BYTE* ip = istart;
const BYTE* const iend = istart + srcSize;
U32 LLtype, Offtype, MLtype;
U32 LLlog, Offlog, MLlog;
unsigned LLlog, Offlog, MLlog;
size_t dumpsLength;
/* check */
@ -3054,7 +3060,7 @@ size_t ZSTDv05_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumps
break;
case FSEv05_ENCODING_DYNAMIC :
default : /* impossible */
{ U32 max = MaxLL;
{ unsigned max = MaxLL;
headerSize = FSEv05_readNCount(norm, &max, &LLlog, ip, iend-ip);
if (FSEv05_isError(headerSize)) return ERROR(GENERIC);
if (LLlog > LLFSEv05Log) return ERROR(corruption_detected);
@ -3078,7 +3084,7 @@ size_t ZSTDv05_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumps
break;
case FSEv05_ENCODING_DYNAMIC :
default : /* impossible */
{ U32 max = MaxOff;
{ unsigned max = MaxOff;
headerSize = FSEv05_readNCount(norm, &max, &Offlog, ip, iend-ip);
if (FSEv05_isError(headerSize)) return ERROR(GENERIC);
if (Offlog > OffFSEv05Log) return ERROR(corruption_detected);
@ -3102,7 +3108,7 @@ size_t ZSTDv05_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumps
break;
case FSEv05_ENCODING_DYNAMIC :
default : /* impossible */
{ U32 max = MaxML;
{ unsigned max = MaxML;
headerSize = FSEv05_readNCount(norm, &max, &MLlog, ip, iend-ip);
if (FSEv05_isError(headerSize)) return ERROR(GENERIC);
if (MLlog > MLFSEv05Log) return ERROR(corruption_detected);
@ -3145,14 +3151,17 @@ static void ZSTDv05_decodeSequence(seq_t* seq, seqState_t* seqState)
litLength = FSEv05_peakSymbol(&(seqState->stateLL));
prevOffset = litLength ? seq->offset : seqState->prevOffset;
if (litLength == MaxLL) {
U32 add = *dumps++;
const U32 add = *dumps++;
if (add < 255) litLength += add;
else {
litLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no risk : dumps is always followed by seq tables > 1 byte */
if (litLength&1) litLength>>=1, dumps += 3;
else litLength = (U16)(litLength)>>1, dumps += 2;
else if (dumps + 2 <= de) {
litLength = MEM_readLE16(dumps);
dumps += 2;
if ((litLength & 1) && dumps < de) {
litLength += *dumps << 16;
dumps += 1;
}
litLength>>=1;
}
if (dumps > de) { litLength = MaxLL+255; } /* late correction, to avoid using uninitialized memory */
if (dumps >= de) { dumps = de-1; } /* late correction, to avoid read overflow (data is now corrupted anyway) */
}
@ -3179,14 +3188,17 @@ static void ZSTDv05_decodeSequence(seq_t* seq, seqState_t* seqState)
/* MatchLength */
matchLength = FSEv05_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
if (matchLength == MaxML) {
U32 add = *dumps++;
const U32 add = dumps<de ? *dumps++ : 0;
if (add < 255) matchLength += add;
else {
matchLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
if (matchLength&1) matchLength>>=1, dumps += 3;
else matchLength = (U16)(matchLength)>>1, dumps += 2;
else if (dumps + 2 <= de) {
matchLength = MEM_readLE16(dumps);
dumps += 2;
if ((matchLength & 1) && dumps < de) {
matchLength += *dumps << 16;
dumps += 1;
}
matchLength >>= 1;
}
if (dumps > de) { matchLength = MaxML+255; } /* late correction, to avoid using uninitialized memory */
if (dumps >= de) { dumps = de-1; } /* late correction, to avoid read overflow (data is now corrupted anyway) */
}
matchLength += MINMATCH;
@ -3214,7 +3226,7 @@ static size_t ZSTDv05_execSequence(BYTE* op,
const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
{
static const int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* substracted */
static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
BYTE* const oLitEnd = op + sequence.litLength;
const size_t sequenceLength = sequence.litLength + sequence.matchLength;
BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
@ -3297,14 +3309,14 @@ static size_t ZSTDv05_decompressSequences(
BYTE* const ostart = (BYTE* const)dst;
BYTE* op = ostart;
BYTE* const oend = ostart + maxDstSize;
size_t errorCode, dumpsLength;
size_t errorCode, dumpsLength=0;
const BYTE* litPtr = dctx->litPtr;
const BYTE* const litEnd = litPtr + dctx->litSize;
int nbSeq;
const BYTE* dumps;
U32* DTableLL = dctx->LLTable;
U32* DTableML = dctx->MLTable;
U32* DTableOffb = dctx->OffTable;
int nbSeq=0;
const BYTE* dumps = NULL;
unsigned* DTableLL = dctx->LLTable;
unsigned* DTableML = dctx->MLTable;
unsigned* DTableOffb = dctx->OffTable;
const BYTE* const base = (const BYTE*) (dctx->base);
const BYTE* const vBase = (const BYTE*) (dctx->vBase);
const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
@ -3410,10 +3422,10 @@ static size_t ZSTDv05_decompress_continueDCtx(ZSTDv05_DCtx* dctx,
BYTE* const oend = ostart + maxDstSize;
size_t remainingSize = srcSize;
blockProperties_t blockProperties;
memset(&blockProperties, 0, sizeof(blockProperties));
/* Frame Header */
{
size_t frameHeaderSize;
{ size_t frameHeaderSize;
if (srcSize < ZSTDv05_frameHeaderSize_min+ZSTDv05_blockHeaderSize) return ERROR(srcSize_wrong);
frameHeaderSize = ZSTDv05_decodeFrameHeader_Part1(dctx, src, ZSTDv05_frameHeaderSize_min);
if (ZSTDv05_isError(frameHeaderSize)) return frameHeaderSize;
@ -3505,34 +3517,57 @@ size_t ZSTDv05_decompress(void* dst, size_t maxDstSize, const void* src, size_t
#endif
}
size_t ZSTDv05_findFrameCompressedSize(const void *src, size_t srcSize)
/* ZSTD_errorFrameSizeInfoLegacy() :
assumes `cSize` and `dBound` are _not_ NULL */
static void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret)
{
*cSize = ret;
*dBound = ZSTD_CONTENTSIZE_ERROR;
}
void ZSTDv05_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound)
{
const BYTE* ip = (const BYTE*)src;
size_t remainingSize = srcSize;
size_t nbBlocks = 0;
blockProperties_t blockProperties;
/* Frame Header */
if (srcSize < ZSTDv05_frameHeaderSize_min) return ERROR(srcSize_wrong);
if (MEM_readLE32(src) != ZSTDv05_MAGICNUMBER) return ERROR(prefix_unknown);
if (srcSize < ZSTDv05_frameHeaderSize_min) {
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
return;
}
if (MEM_readLE32(src) != ZSTDv05_MAGICNUMBER) {
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown));
return;
}
ip += ZSTDv05_frameHeaderSize_min; remainingSize -= ZSTDv05_frameHeaderSize_min;
/* Loop on each block */
while (1)
{
size_t cBlockSize = ZSTDv05_getcBlockSize(ip, remainingSize, &blockProperties);
if (ZSTDv05_isError(cBlockSize)) return cBlockSize;
if (ZSTDv05_isError(cBlockSize)) {
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, cBlockSize);
return;
}
ip += ZSTDv05_blockHeaderSize;
remainingSize -= ZSTDv05_blockHeaderSize;
if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
if (cBlockSize > remainingSize) {
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
return;
}
if (cBlockSize == 0) break; /* bt_end */
ip += cBlockSize;
remainingSize -= cBlockSize;
nbBlocks++;
}
return ip - (const BYTE*)src;
*cSize = ip - (const BYTE*)src;
*dBound = nbBlocks * BLOCKSIZE;
}
/* ******************************
@ -3630,7 +3665,7 @@ static size_t ZSTDv05_loadEntropy(ZSTDv05_DCtx* dctx, const void* dict, size_t d
{
size_t hSize, offcodeHeaderSize, matchlengthHeaderSize, errorCode, litlengthHeaderSize;
short offcodeNCount[MaxOff+1];
U32 offcodeMaxValue=MaxOff, offcodeLog;
unsigned offcodeMaxValue=MaxOff, offcodeLog;
short matchlengthNCount[MaxML+1];
unsigned matchlengthMaxValue = MaxML, matchlengthLog;
short litlengthNCount[MaxLL+1];

View file

@ -33,13 +33,18 @@ extern "C" {
size_t ZSTDv05_decompress( void* dst, size_t dstCapacity,
const void* src, size_t compressedSize);
/**
ZSTDv05_getFrameSrcSize() : get the source length of a ZSTD frame
compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
return : the number of bytes that would be read to decompress this frame
or an errorCode if it fails (which can be tested using ZSTDv05_isError())
*/
size_t ZSTDv05_findFrameCompressedSize(const void* src, size_t compressedSize);
/**
ZSTDv05_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.5.x format
srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
cSize (output parameter) : the number of bytes that would be read to decompress this frame
or an error code if it fails (which can be tested using ZSTDv01_isError())
dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
or ZSTD_CONTENTSIZE_ERROR if an error occurs
note : assumes `cSize` and `dBound` are _not_ NULL.
*/
void ZSTDv05_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
size_t* cSize, unsigned long long* dBound);
/* *************************************
* Helper functions

View file

@ -506,6 +506,8 @@ typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
#define FSEv06_ENCODING_STATIC 2
#define FSEv06_ENCODING_DYNAMIC 3
#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2)
static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9,10,11,12,
13,14,15,16 };
@ -858,7 +860,7 @@ MEM_STATIC unsigned BITv06_highbit32 ( U32 val)
_BitScanReverse ( &r, val );
return (unsigned) r;
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
return 31 - __builtin_clz (val);
return __builtin_clz (val) ^ 31;
# else /* Software version */
static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
U32 v = val;
@ -1250,9 +1252,7 @@ const char* FSEv06_getErrorName(size_t code) { return ERR_getErrorName(code); }
/* **************************************************************
* HUF Error Management
****************************************************************/
unsigned HUFv06_isError(size_t code) { return ERR_isError(code); }
const char* HUFv06_getErrorName(size_t code) { return ERR_getErrorName(code); }
static unsigned HUFv06_isError(size_t code) { return ERR_isError(code); }
/*-**************************************************************
@ -2823,7 +2823,8 @@ struct ZSTDv06_DCtx_s
BYTE headerBuffer[ZSTDv06_FRAMEHEADERSIZE_MAX];
}; /* typedef'd to ZSTDv06_DCtx within "zstd_static.h" */
size_t ZSTDv06_sizeofDCtx (void) { return sizeof(ZSTDv06_DCtx); } /* non published interface */
size_t ZSTDv06_sizeofDCtx (void); /* Hidden declaration */
size_t ZSTDv06_sizeofDCtx (void) { return sizeof(ZSTDv06_DCtx); }
size_t ZSTDv06_decompressBegin(ZSTDv06_DCtx* dctx)
{
@ -3022,7 +3023,7 @@ typedef struct
/*! ZSTDv06_getcBlockSize() :
* Provides the size of compressed block from block header `src` */
size_t ZSTDv06_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
static size_t ZSTDv06_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
{
const BYTE* const in = (const BYTE* const)src;
U32 cSize;
@ -3041,6 +3042,7 @@ size_t ZSTDv06_getcBlockSize(const void* src, size_t srcSize, blockProperties_t*
static size_t ZSTDv06_copyRawBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
{
if (dst==NULL) return ERROR(dstSize_tooSmall);
if (srcSize > dstCapacity) return ERROR(dstSize_tooSmall);
memcpy(dst, src, srcSize);
return srcSize;
@ -3049,7 +3051,7 @@ static size_t ZSTDv06_copyRawBlock(void* dst, size_t dstCapacity, const void* sr
/*! ZSTDv06_decodeLiteralsBlock() :
@return : nb of bytes read from src (< srcSize ) */
size_t ZSTDv06_decodeLiteralsBlock(ZSTDv06_DCtx* dctx,
static size_t ZSTDv06_decodeLiteralsBlock(ZSTDv06_DCtx* dctx,
const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */
{
const BYTE* const istart = (const BYTE*) src;
@ -3183,7 +3185,7 @@ size_t ZSTDv06_decodeLiteralsBlock(ZSTDv06_DCtx* dctx,
@return : nb bytes read from src,
or an error code if it fails, testable with ZSTDv06_isError()
*/
size_t ZSTDv06_buildSeqTable(FSEv06_DTable* DTable, U32 type, U32 max, U32 maxLog,
static size_t ZSTDv06_buildSeqTable(FSEv06_DTable* DTable, U32 type, U32 max, U32 maxLog,
const void* src, size_t srcSize,
const S16* defaultNorm, U32 defaultLog, U32 flagRepeatTable)
{
@ -3213,7 +3215,7 @@ size_t ZSTDv06_buildSeqTable(FSEv06_DTable* DTable, U32 type, U32 max, U32 maxLo
}
size_t ZSTDv06_decodeSeqHeaders(int* nbSeqPtr,
static size_t ZSTDv06_decodeSeqHeaders(int* nbSeqPtr,
FSEv06_DTable* DTableLL, FSEv06_DTable* DTableML, FSEv06_DTable* DTableOffb, U32 flagRepeatTable,
const void* src, size_t srcSize)
{
@ -3240,14 +3242,12 @@ size_t ZSTDv06_decodeSeqHeaders(int* nbSeqPtr,
}
/* FSE table descriptors */
if (ip + 4 > iend) return ERROR(srcSize_wrong); /* min : header byte + all 3 are "raw", hence no header, but at least xxLog bits per type */
{ U32 const LLtype = *ip >> 6;
U32 const Offtype = (*ip >> 4) & 3;
U32 const MLtype = (*ip >> 2) & 3;
ip++;
/* check */
if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
/* Build DTables */
{ size_t const bhSize = ZSTDv06_buildSeqTable(DTableLL, LLtype, MaxLL, LLFSELog, ip, iend-ip, LL_defaultNorm, LL_defaultNormLog, flagRepeatTable);
if (ZSTDv06_isError(bhSize)) return ERROR(corruption_detected);
@ -3358,7 +3358,7 @@ static void ZSTDv06_decodeSequence(seq_t* seq, seqState_t* seqState)
}
size_t ZSTDv06_execSequence(BYTE* op,
static size_t ZSTDv06_execSequence(BYTE* op,
BYTE* const oend, seq_t sequence,
const BYTE** litPtr, const BYTE* const litLimit,
const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
@ -3406,7 +3406,7 @@ size_t ZSTDv06_execSequence(BYTE* op,
if (sequence.offset < 8) {
/* close range match, overlap */
static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* substracted */
static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
int const sub2 = dec64table[sequence.offset];
op[0] = match[0];
op[1] = match[1];
@ -3654,36 +3654,62 @@ size_t ZSTDv06_decompress(void* dst, size_t dstCapacity, const void* src, size_t
#endif
}
size_t ZSTDv06_findFrameCompressedSize(const void* src, size_t srcSize)
/* ZSTD_errorFrameSizeInfoLegacy() :
assumes `cSize` and `dBound` are _not_ NULL */
static void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret)
{
*cSize = ret;
*dBound = ZSTD_CONTENTSIZE_ERROR;
}
void ZSTDv06_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound)
{
const BYTE* ip = (const BYTE*)src;
size_t remainingSize = srcSize;
size_t nbBlocks = 0;
blockProperties_t blockProperties = { bt_compressed, 0 };
/* Frame Header */
{ size_t const frameHeaderSize = ZSTDv06_frameHeaderSize(src, ZSTDv06_frameHeaderSize_min);
if (ZSTDv06_isError(frameHeaderSize)) return frameHeaderSize;
if (MEM_readLE32(src) != ZSTDv06_MAGICNUMBER) return ERROR(prefix_unknown);
if (srcSize < frameHeaderSize+ZSTDv06_blockHeaderSize) return ERROR(srcSize_wrong);
{ size_t const frameHeaderSize = ZSTDv06_frameHeaderSize(src, srcSize);
if (ZSTDv06_isError(frameHeaderSize)) {
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, frameHeaderSize);
return;
}
if (MEM_readLE32(src) != ZSTDv06_MAGICNUMBER) {
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown));
return;
}
if (srcSize < frameHeaderSize+ZSTDv06_blockHeaderSize) {
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
return;
}
ip += frameHeaderSize; remainingSize -= frameHeaderSize;
}
/* Loop on each block */
while (1) {
size_t const cBlockSize = ZSTDv06_getcBlockSize(ip, remainingSize, &blockProperties);
if (ZSTDv06_isError(cBlockSize)) return cBlockSize;
if (ZSTDv06_isError(cBlockSize)) {
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, cBlockSize);
return;
}
ip += ZSTDv06_blockHeaderSize;
remainingSize -= ZSTDv06_blockHeaderSize;
if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
if (cBlockSize > remainingSize) {
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
return;
}
if (cBlockSize == 0) break; /* bt_end */
ip += cBlockSize;
remainingSize -= cBlockSize;
nbBlocks++;
}
return ip - (const BYTE*)src;
*cSize = ip - (const BYTE*)src;
*dBound = nbBlocks * ZSTDv06_BLOCKSIZE_MAX;
}
/*_******************************
@ -4006,7 +4032,7 @@ size_t ZBUFFv06_decompressContinue(ZBUFFv06_DCtx* zbd,
if (ZSTDv06_isError(hSize)) return hSize;
if (toLoad > (size_t)(iend-ip)) { /* not enough input to load full header */
memcpy(zbd->headerBuffer + zbd->lhSize, ip, iend-ip);
zbd->lhSize += iend-ip; ip = iend; notDone = 0;
zbd->lhSize += iend-ip;
*dstCapacityPtr = 0;
return (hSize - zbd->lhSize) + ZSTDv06_blockHeaderSize; /* remaining header bytes + next block header */
}

View file

@ -43,12 +43,17 @@ ZSTDLIBv06_API size_t ZSTDv06_decompress( void* dst, size_t dstCapacity,
const void* src, size_t compressedSize);
/**
ZSTDv06_getFrameSrcSize() : get the source length of a ZSTD frame
compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
return : the number of bytes that would be read to decompress this frame
or an errorCode if it fails (which can be tested using ZSTDv06_isError())
ZSTDv06_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.6.x format
srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
cSize (output parameter) : the number of bytes that would be read to decompress this frame
or an error code if it fails (which can be tested using ZSTDv01_isError())
dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
or ZSTD_CONTENTSIZE_ERROR if an error occurs
note : assumes `cSize` and `dBound` are _not_ NULL.
*/
size_t ZSTDv06_findFrameCompressedSize(const void* src, size_t compressedSize);
void ZSTDv06_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
size_t* cSize, unsigned long long* dBound);
/* *************************************
* Helper functions

View file

@ -530,7 +530,7 @@ MEM_STATIC unsigned BITv07_highbit32 (U32 val)
_BitScanReverse ( &r, val );
return (unsigned) r;
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
return 31 - __builtin_clz (val);
return __builtin_clz (val) ^ 31;
# else /* Software version */
static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
U32 v = val;
@ -2628,7 +2628,7 @@ const char* ZBUFFv07_getErrorName(size_t errorCode) { return ERR_getErrorName(er
void* ZSTDv07_defaultAllocFunction(void* opaque, size_t size)
static void* ZSTDv07_defaultAllocFunction(void* opaque, size_t size)
{
void* address = malloc(size);
(void)opaque;
@ -2636,7 +2636,7 @@ void* ZSTDv07_defaultAllocFunction(void* opaque, size_t size)
return address;
}
void ZSTDv07_defaultFreeFunction(void* opaque, void* address)
static void ZSTDv07_defaultFreeFunction(void* opaque, void* address)
{
(void)opaque;
/* if (address) printf("free %p opaque=%p \n", address, opaque); */
@ -2740,6 +2740,8 @@ typedef enum { lbt_huffman, lbt_repeat, lbt_raw, lbt_rle } litBlockType_t;
#define FSEv07_ENCODING_STATIC 2
#define FSEv07_ENCODING_DYNAMIC 3
#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2)
static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9,10,11,12,
13,14,15,16 };
@ -3150,10 +3152,10 @@ size_t ZSTDv07_getFrameParams(ZSTDv07_frameParams* fparamsPtr, const void* src,
const BYTE* ip = (const BYTE*)src;
if (srcSize < ZSTDv07_frameHeaderSize_min) return ZSTDv07_frameHeaderSize_min;
memset(fparamsPtr, 0, sizeof(*fparamsPtr));
if (MEM_readLE32(src) != ZSTDv07_MAGICNUMBER) {
if ((MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTDv07_MAGIC_SKIPPABLE_START) {
if (srcSize < ZSTDv07_skippableHeaderSize) return ZSTDv07_skippableHeaderSize; /* magic number + skippable frame length */
memset(fparamsPtr, 0, sizeof(*fparamsPtr));
fparamsPtr->frameContentSize = MEM_readLE32((const char *)src + 4);
fparamsPtr->windowSize = 0; /* windowSize==0 means a frame is skippable */
return 0;
@ -3175,11 +3177,13 @@ size_t ZSTDv07_getFrameParams(ZSTDv07_frameParams* fparamsPtr, const void* src,
U32 windowSize = 0;
U32 dictID = 0;
U64 frameContentSize = 0;
if ((fhdByte & 0x08) != 0) return ERROR(frameParameter_unsupported); /* reserved bits, which must be zero */
if ((fhdByte & 0x08) != 0) /* reserved bits, which must be zero */
return ERROR(frameParameter_unsupported);
if (!directMode) {
BYTE const wlByte = ip[pos++];
U32 const windowLog = (wlByte >> 3) + ZSTDv07_WINDOWLOG_ABSOLUTEMIN;
if (windowLog > ZSTDv07_WINDOWLOG_MAX) return ERROR(frameParameter_unsupported);
if (windowLog > ZSTDv07_WINDOWLOG_MAX)
return ERROR(frameParameter_unsupported);
windowSize = (1U << windowLog);
windowSize += (windowSize >> 3) * (wlByte&7);
}
@ -3201,7 +3205,8 @@ size_t ZSTDv07_getFrameParams(ZSTDv07_frameParams* fparamsPtr, const void* src,
case 3 : frameContentSize = MEM_readLE64(ip+pos); break;
}
if (!windowSize) windowSize = (U32)frameContentSize;
if (windowSize > windowSizeMax) return ERROR(frameParameter_unsupported);
if (windowSize > windowSizeMax)
return ERROR(frameParameter_unsupported);
fparamsPtr->frameContentSize = frameContentSize;
fparamsPtr->windowSize = windowSize;
fparamsPtr->dictID = dictID;
@ -3220,11 +3225,10 @@ size_t ZSTDv07_getFrameParams(ZSTDv07_frameParams* fparamsPtr, const void* src,
- frame header not completely provided (`srcSize` too small) */
unsigned long long ZSTDv07_getDecompressedSize(const void* src, size_t srcSize)
{
{ ZSTDv07_frameParams fparams;
size_t const frResult = ZSTDv07_getFrameParams(&fparams, src, srcSize);
if (frResult!=0) return 0;
return fparams.frameContentSize;
}
ZSTDv07_frameParams fparams;
size_t const frResult = ZSTDv07_getFrameParams(&fparams, src, srcSize);
if (frResult!=0) return 0;
return fparams.frameContentSize;
}
@ -3248,7 +3252,7 @@ typedef struct
/*! ZSTDv07_getcBlockSize() :
* Provides the size of compressed block from block header `src` */
size_t ZSTDv07_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
static size_t ZSTDv07_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
{
const BYTE* const in = (const BYTE* const)src;
U32 cSize;
@ -3275,7 +3279,7 @@ static size_t ZSTDv07_copyRawBlock(void* dst, size_t dstCapacity, const void* sr
/*! ZSTDv07_decodeLiteralsBlock() :
@return : nb of bytes read from src (< srcSize ) */
size_t ZSTDv07_decodeLiteralsBlock(ZSTDv07_DCtx* dctx,
static size_t ZSTDv07_decodeLiteralsBlock(ZSTDv07_DCtx* dctx,
const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */
{
const BYTE* const istart = (const BYTE*) src;
@ -3409,7 +3413,7 @@ size_t ZSTDv07_decodeLiteralsBlock(ZSTDv07_DCtx* dctx,
@return : nb bytes read from src,
or an error code if it fails, testable with ZSTDv07_isError()
*/
size_t ZSTDv07_buildSeqTable(FSEv07_DTable* DTable, U32 type, U32 max, U32 maxLog,
static size_t ZSTDv07_buildSeqTable(FSEv07_DTable* DTable, U32 type, U32 max, U32 maxLog,
const void* src, size_t srcSize,
const S16* defaultNorm, U32 defaultLog, U32 flagRepeatTable)
{
@ -3439,7 +3443,7 @@ size_t ZSTDv07_buildSeqTable(FSEv07_DTable* DTable, U32 type, U32 max, U32 maxLo
}
size_t ZSTDv07_decodeSeqHeaders(int* nbSeqPtr,
static size_t ZSTDv07_decodeSeqHeaders(int* nbSeqPtr,
FSEv07_DTable* DTableLL, FSEv07_DTable* DTableML, FSEv07_DTable* DTableOffb, U32 flagRepeatTable,
const void* src, size_t srcSize)
{
@ -3466,14 +3470,12 @@ size_t ZSTDv07_decodeSeqHeaders(int* nbSeqPtr,
}
/* FSE table descriptors */
if (ip + 4 > iend) return ERROR(srcSize_wrong); /* min : header byte + all 3 are "raw", hence no header, but at least xxLog bits per type */
{ U32 const LLtype = *ip >> 6;
U32 const OFtype = (*ip >> 4) & 3;
U32 const MLtype = (*ip >> 2) & 3;
ip++;
/* check */
if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
/* Build DTables */
{ size_t const llhSize = ZSTDv07_buildSeqTable(DTableLL, LLtype, MaxLL, LLFSELog, ip, iend-ip, LL_defaultNorm, LL_defaultNormLog, flagRepeatTable);
if (ZSTDv07_isError(llhSize)) return ERROR(corruption_detected);
@ -3629,7 +3631,7 @@ size_t ZSTDv07_execSequence(BYTE* op,
if (sequence.offset < 8) {
/* close range match, overlap */
static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* substracted */
static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
int const sub2 = dec64table[sequence.offset];
op[0] = match[0];
op[1] = match[1];
@ -3771,7 +3773,7 @@ ZSTDLIBv07_API size_t ZSTDv07_insertBlock(ZSTDv07_DCtx* dctx, const void* blockS
}
size_t ZSTDv07_generateNxBytes(void* dst, size_t dstCapacity, BYTE byte, size_t length)
static size_t ZSTDv07_generateNxBytes(void* dst, size_t dstCapacity, BYTE byte, size_t length)
{
if (length > dstCapacity) return ERROR(dstSize_tooSmall);
memset(dst, byte, length);
@ -3851,7 +3853,7 @@ static size_t ZSTDv07_decompressFrame(ZSTDv07_DCtx* dctx,
* It avoids reloading the dictionary each time.
* `preparedDCtx` must have been properly initialized using ZSTDv07_decompressBegin_usingDict().
* Requires 2 contexts : 1 for reference (preparedDCtx), which will not be modified, and 1 to run the decompression operation (dctx) */
size_t ZSTDv07_decompress_usingPreparedDCtx(ZSTDv07_DCtx* dctx, const ZSTDv07_DCtx* refDCtx,
static size_t ZSTDv07_decompress_usingPreparedDCtx(ZSTDv07_DCtx* dctx, const ZSTDv07_DCtx* refDCtx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize)
{
@ -3893,19 +3895,40 @@ size_t ZSTDv07_decompress(void* dst, size_t dstCapacity, const void* src, size_t
#endif
}
size_t ZSTDv07_findFrameCompressedSize(const void* src, size_t srcSize)
/* ZSTD_errorFrameSizeInfoLegacy() :
assumes `cSize` and `dBound` are _not_ NULL */
static void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret)
{
*cSize = ret;
*dBound = ZSTD_CONTENTSIZE_ERROR;
}
void ZSTDv07_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound)
{
const BYTE* ip = (const BYTE*)src;
size_t remainingSize = srcSize;
size_t nbBlocks = 0;
/* check */
if (srcSize < ZSTDv07_frameHeaderSize_min+ZSTDv07_blockHeaderSize) return ERROR(srcSize_wrong);
if (srcSize < ZSTDv07_frameHeaderSize_min+ZSTDv07_blockHeaderSize) {
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
return;
}
/* Frame Header */
{ size_t const frameHeaderSize = ZSTDv07_frameHeaderSize(src, ZSTDv07_frameHeaderSize_min);
if (ZSTDv07_isError(frameHeaderSize)) return frameHeaderSize;
if (MEM_readLE32(src) != ZSTDv07_MAGICNUMBER) return ERROR(prefix_unknown);
if (srcSize < frameHeaderSize+ZSTDv07_blockHeaderSize) return ERROR(srcSize_wrong);
{ size_t const frameHeaderSize = ZSTDv07_frameHeaderSize(src, srcSize);
if (ZSTDv07_isError(frameHeaderSize)) {
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, frameHeaderSize);
return;
}
if (MEM_readLE32(src) != ZSTDv07_MAGICNUMBER) {
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown));
return;
}
if (srcSize < frameHeaderSize+ZSTDv07_blockHeaderSize) {
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
return;
}
ip += frameHeaderSize; remainingSize -= frameHeaderSize;
}
@ -3913,20 +3936,28 @@ size_t ZSTDv07_findFrameCompressedSize(const void* src, size_t srcSize)
while (1) {
blockProperties_t blockProperties;
size_t const cBlockSize = ZSTDv07_getcBlockSize(ip, remainingSize, &blockProperties);
if (ZSTDv07_isError(cBlockSize)) return cBlockSize;
if (ZSTDv07_isError(cBlockSize)) {
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, cBlockSize);
return;
}
ip += ZSTDv07_blockHeaderSize;
remainingSize -= ZSTDv07_blockHeaderSize;
if (blockProperties.blockType == bt_end) break;
if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
if (cBlockSize > remainingSize) {
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
return;
}
ip += cBlockSize;
remainingSize -= cBlockSize;
nbBlocks++;
}
return ip - (const BYTE*)src;
*cSize = ip - (const BYTE*)src;
*dBound = nbBlocks * ZSTDv07_BLOCKSIZE_ABSOLUTEMAX;
}
/*_******************************
@ -4146,7 +4177,7 @@ struct ZSTDv07_DDict_s {
ZSTDv07_DCtx* refContext;
}; /* typedef'd tp ZSTDv07_CDict within zstd.h */
ZSTDv07_DDict* ZSTDv07_createDDict_advanced(const void* dict, size_t dictSize, ZSTDv07_customMem customMem)
static ZSTDv07_DDict* ZSTDv07_createDDict_advanced(const void* dict, size_t dictSize, ZSTDv07_customMem customMem)
{
if (!customMem.customAlloc && !customMem.customFree)
customMem = defaultCustomMem;

View file

@ -50,12 +50,17 @@ ZSTDLIBv07_API size_t ZSTDv07_decompress( void* dst, size_t dstCapacity,
const void* src, size_t compressedSize);
/**
ZSTDv07_getFrameSrcSize() : get the source length of a ZSTD frame
compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
return : the number of bytes that would be read to decompress this frame
or an errorCode if it fails (which can be tested using ZSTDv07_isError())
ZSTDv07_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.7.x format
srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
cSize (output parameter) : the number of bytes that would be read to decompress this frame
or an error code if it fails (which can be tested using ZSTDv01_isError())
dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
or ZSTD_CONTENTSIZE_ERROR if an error occurs
note : assumes `cSize` and `dBound` are _not_ NULL.
*/
size_t ZSTDv07_findFrameCompressedSize(const void* src, size_t compressedSize);
void ZSTDv07_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
size_t* cSize, unsigned long long* dBound);
/*====== Helper functions ======*/
ZSTDLIBv07_API unsigned ZSTDv07_isError(size_t code); /*!< tells if a `size_t` function result is an error code */

File diff suppressed because it is too large Load diff