staging: Add Snappy compression support to zram
Zram currently uses LZO compression. With Snappy, it uses less CPU time and is thus more useful. The sacrifice in compression ratio is small. Zram's LZO and Snappy support can be independently enabled at compile time and each zram device can switch between compression methods when unused. When only a single compression method was enabled at compile time, no idirection penalty is incurred. http://driverdev.linuxdriverproject.org/pipermail/devel/2011-April/015114.html
This commit is contained in:
parent
f3818c8864
commit
4b10fc19b9
@ -1697,6 +1697,10 @@ CONFIG_ZRAM_NUM_DEVICES=1
|
||||
CONFIG_ZRAM_DEFAULT_PERCENTAGE=18
|
||||
# CONFIG_ZRAM_DEBUG is not set
|
||||
CONFIG_ZRAM_DEFAULT_DISKSIZE=100000000
|
||||
# CONFIG_ZRAM_LZO is not set
|
||||
CONFIG_ZRAM_SNAPPY=y
|
||||
CONFIG_SNAPPY_COMPRESS=y
|
||||
CONFIG_SNAPPY_DECOMPRESS=y
|
||||
|
||||
#
|
||||
# File systems
|
||||
|
2
drivers/staging/Kconfig
Normal file → Executable file
2
drivers/staging/Kconfig
Normal file → Executable file
@ -125,5 +125,7 @@ source "drivers/staging/iio/Kconfig"
|
||||
|
||||
source "drivers/staging/zram/Kconfig"
|
||||
|
||||
source "drivers/staging/snappy/Kconfig"
|
||||
|
||||
endif # !STAGING_EXCLUDE_BUILD
|
||||
endif # STAGING
|
||||
|
3
drivers/staging/Makefile
Normal file → Executable file
3
drivers/staging/Makefile
Normal file → Executable file
@ -45,4 +45,5 @@ obj-$(CONFIG_DX_SEP) += sep/
|
||||
obj-$(CONFIG_IIO) += iio/
|
||||
obj-$(CONFIG_ZRAM) += zram/
|
||||
obj-$(CONFIG_XVMALLOC) += zram/
|
||||
|
||||
obj-$(CONFIG_SNAPPY_COMPRESS) += snappy/
|
||||
obj-$(CONFIG_SNAPPY_DECOMPRESS) += snappy/
|
||||
|
5
drivers/staging/snappy/Kconfig
Executable file
5
drivers/staging/snappy/Kconfig
Executable file
@ -0,0 +1,5 @@
|
||||
config SNAPPY_COMPRESS
|
||||
tristate "Google Snappy Compression"
|
||||
|
||||
config SNAPPY_DECOMPRESS
|
||||
tristate "Google Snappy Decompression"
|
5
drivers/staging/snappy/Makefile
Executable file
5
drivers/staging/snappy/Makefile
Executable file
@ -0,0 +1,5 @@
|
||||
snappy_compress-objs := csnappy_compress.o
|
||||
snappy_decompress-objs := csnappy_decompress.o
|
||||
|
||||
obj-$(CONFIG_SNAPPY_COMPRESS) += csnappy_compress.o
|
||||
obj-$(CONFIG_SNAPPY_DECOMPRESS) += csnappy_decompress.o
|
125
drivers/staging/snappy/csnappy.h
Executable file
125
drivers/staging/snappy/csnappy.h
Executable file
@ -0,0 +1,125 @@
|
||||
#ifndef __CSNAPPY_H__
|
||||
#define __CSNAPPY_H__
|
||||
/*
|
||||
File modified for the Linux Kernel by
|
||||
Zeev Tarantov <zeev.tarantov at gmail.com>
|
||||
*/
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define CSNAPPY_VERSION 4
|
||||
|
||||
#define CSNAPPY_WORKMEM_BYTES_POWER_OF_TWO 15
|
||||
#define CSNAPPY_WORKMEM_BYTES (1 << CSNAPPY_WORKMEM_BYTES_POWER_OF_TWO)
|
||||
|
||||
/*
|
||||
* Returns the maximal size of the compressed representation of
|
||||
* input data that is "source_len" bytes in length;
|
||||
*/
|
||||
uint32_t
|
||||
csnappy_max_compressed_length(uint32_t source_len) __attribute__((const));
|
||||
|
||||
/*
|
||||
* Flat array compression that does not emit the "uncompressed length"
|
||||
* prefix. Compresses "input" array to the "output" array.
|
||||
*
|
||||
* REQUIRES: "input" is at most 32KiB long.
|
||||
* REQUIRES: "output" points to an array of memory that is at least
|
||||
* "csnappy_max_compressed_length(input_length)" in size.
|
||||
* REQUIRES: working_memory has (1 << workmem_bytes_power_of_two) bytes.
|
||||
* REQUIRES: 9 <= workmem_bytes_power_of_two <= 15.
|
||||
*
|
||||
* Returns an "end" pointer into "output" buffer.
|
||||
* "end - output" is the compressed size of "input".
|
||||
*/
|
||||
char*
|
||||
csnappy_compress_fragment(
|
||||
const char *input,
|
||||
const uint32_t input_length,
|
||||
char *output,
|
||||
void *working_memory,
|
||||
const int workmem_bytes_power_of_two);
|
||||
|
||||
/*
|
||||
* REQUIRES: "compressed" must point to an area of memory that is at
|
||||
* least "csnappy_max_compressed_length(input_length)" bytes in length.
|
||||
* REQUIRES: working_memory has (1 << workmem_bytes_power_of_two) bytes.
|
||||
* REQUIRES: 9 <= workmem_bytes_power_of_two <= 15.
|
||||
*
|
||||
* Takes the data stored in "input[0..input_length]" and stores
|
||||
* it in the array pointed to by "compressed".
|
||||
*
|
||||
* "*out_compressed_length" is set to the length of the compressed output.
|
||||
*/
|
||||
void
|
||||
csnappy_compress(
|
||||
const char *input,
|
||||
uint32_t input_length,
|
||||
char *compressed,
|
||||
uint32_t *out_compressed_length,
|
||||
void *working_memory,
|
||||
const int workmem_bytes_power_of_two);
|
||||
|
||||
/*
|
||||
* Reads header of compressed data to get stored length of uncompressed data.
|
||||
* REQUIRES: start points to compressed data.
|
||||
* REQUIRES: n is length of available compressed data.
|
||||
*
|
||||
* Returns SNAPPY_E_HEADER_BAD on error.
|
||||
* Returns number of bytes read from input on success.
|
||||
* Stores decoded length into *result.
|
||||
*/
|
||||
int
|
||||
csnappy_get_uncompressed_length(
|
||||
const char *start,
|
||||
uint32_t n,
|
||||
uint32_t *result);
|
||||
|
||||
/*
|
||||
* Safely decompresses all data from array "src" of length "src_len" containing
|
||||
* entire compressed stream (with header) into array "dst" of size "dst_len".
|
||||
* REQUIRES: dst_len is at least csnappy_get_uncompressed_length(...).
|
||||
*
|
||||
* Iff sucessful, returns CSNAPPY_E_OK.
|
||||
* If recorded length in header is greater than dst_len, returns
|
||||
* CSNAPPY_E_OUTPUT_INSUF.
|
||||
* If compressed data is malformed, does not write more than dst_len into dst.
|
||||
*/
|
||||
int
|
||||
csnappy_decompress(
|
||||
const char *src,
|
||||
uint32_t src_len,
|
||||
char *dst,
|
||||
uint32_t dst_len);
|
||||
|
||||
/*
|
||||
* Safely decompresses stream src_len bytes long read from src to dst.
|
||||
* Amount of available space at dst must be provided in *dst_len by caller.
|
||||
* If compressed stream needs more space, it will not overflow and return
|
||||
* CSNAPPY_E_OUTPUT_OVERRUN.
|
||||
* On success, sets *dst_len to actal number of bytes decompressed.
|
||||
* Iff sucessful, returns CSNAPPY_E_OK.
|
||||
*/
|
||||
int
|
||||
csnappy_decompress_noheader(
|
||||
const char *src,
|
||||
uint32_t src_len,
|
||||
char *dst,
|
||||
uint32_t *dst_len);
|
||||
|
||||
/*
|
||||
* Return values (< 0 = Error)
|
||||
*/
|
||||
#define CSNAPPY_E_OK 0
|
||||
#define CSNAPPY_E_HEADER_BAD (-1)
|
||||
#define CSNAPPY_E_OUTPUT_INSUF (-2)
|
||||
#define CSNAPPY_E_OUTPUT_OVERRUN (-3)
|
||||
#define CSNAPPY_E_INPUT_NOT_CONSUMED (-4)
|
||||
#define CSNAPPY_E_DATA_MALFORMED (-5)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
497
drivers/staging/snappy/csnappy_compress.c
Executable file
497
drivers/staging/snappy/csnappy_compress.c
Executable file
@ -0,0 +1,497 @@
|
||||
/*
|
||||
Copyright 2011, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
File modified for the Linux Kernel by
|
||||
Zeev Tarantov <zeev.tarantov at gmail.com>
|
||||
*/
|
||||
|
||||
#include "csnappy_internal.h"
|
||||
#ifdef __KERNEL__
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#endif
|
||||
#include "csnappy.h"
|
||||
|
||||
|
||||
static inline char*
|
||||
encode_varint32(char *sptr, uint32_t v)
|
||||
{
|
||||
uint8_t* ptr = (uint8_t *)sptr;
|
||||
static const int B = 128;
|
||||
if (v < (1<<7)) {
|
||||
*(ptr++) = v;
|
||||
} else if (v < (1<<14)) {
|
||||
*(ptr++) = v | B;
|
||||
*(ptr++) = v>>7;
|
||||
} else if (v < (1<<21)) {
|
||||
*(ptr++) = v | B;
|
||||
*(ptr++) = (v>>7) | B;
|
||||
*(ptr++) = v>>14;
|
||||
} else if (v < (1<<28)) {
|
||||
*(ptr++) = v | B;
|
||||
*(ptr++) = (v>>7) | B;
|
||||
*(ptr++) = (v>>14) | B;
|
||||
*(ptr++) = v>>21;
|
||||
} else {
|
||||
*(ptr++) = v | B;
|
||||
*(ptr++) = (v>>7) | B;
|
||||
*(ptr++) = (v>>14) | B;
|
||||
*(ptr++) = (v>>21) | B;
|
||||
*(ptr++) = v>>28;
|
||||
}
|
||||
return (char *)ptr;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Any hash function will produce a valid compressed bitstream, but a good
|
||||
* hash function reduces the number of collisions and thus yields better
|
||||
* compression for compressible input, and more speed for incompressible
|
||||
* input. Of course, it doesn't hurt if the hash function is reasonably fast
|
||||
* either, as it gets called a lot.
|
||||
*/
|
||||
static inline uint32_t HashBytes(uint32_t bytes, int shift)
|
||||
{
|
||||
uint32_t kMul = 0x1e35a7bd;
|
||||
return (bytes * kMul) >> shift;
|
||||
}
|
||||
static inline uint32_t Hash(const char *p, int shift)
|
||||
{
|
||||
return HashBytes(UNALIGNED_LOAD32(p), shift);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* *** DO NOT CHANGE THE VALUE OF kBlockSize ***
|
||||
|
||||
* New Compression code chops up the input into blocks of at most
|
||||
* the following size. This ensures that back-references in the
|
||||
* output never cross kBlockSize block boundaries. This can be
|
||||
* helpful in implementing blocked decompression. However the
|
||||
* decompression code should not rely on this guarantee since older
|
||||
* compression code may not obey it.
|
||||
*/
|
||||
#define kBlockLog 15
|
||||
#define kBlockSize (1 << kBlockLog)
|
||||
|
||||
|
||||
/*
|
||||
* Return the largest n such that
|
||||
*
|
||||
* s1[0,n-1] == s2[0,n-1]
|
||||
* and n <= (s2_limit - s2).
|
||||
*
|
||||
* Does not read *s2_limit or beyond.
|
||||
* Does not read *(s1 + (s2_limit - s2)) or beyond.
|
||||
* Requires that s2_limit >= s2.
|
||||
*
|
||||
* Separate implementation for x86_64, for speed. Uses the fact that
|
||||
* x86_64 is little endian.
|
||||
*/
|
||||
#if defined(__x86_64__)
|
||||
static inline int
|
||||
FindMatchLength(const char *s1, const char *s2, const char *s2_limit)
|
||||
{
|
||||
uint64_t x;
|
||||
int matched, matching_bits;
|
||||
DCHECK_GE(s2_limit, s2);
|
||||
matched = 0;
|
||||
/*
|
||||
* Find out how long the match is. We loop over the data 64 bits at a
|
||||
* time until we find a 64-bit block that doesn't match; then we find
|
||||
* the first non-matching bit and use that to calculate the total
|
||||
* length of the match.
|
||||
*/
|
||||
while (likely(s2 <= s2_limit - 8)) {
|
||||
if (unlikely(UNALIGNED_LOAD64(s1 + matched) ==
|
||||
UNALIGNED_LOAD64(s2))) {
|
||||
s2 += 8;
|
||||
matched += 8;
|
||||
} else {
|
||||
/*
|
||||
* On current (mid-2008) Opteron models there is a 3%
|
||||
* more efficient code sequence to find the first
|
||||
* non-matching byte. However, what follows is ~10%
|
||||
* better on Intel Core 2 and newer, and we expect AMD's
|
||||
* bsf instruction to improve.
|
||||
*/
|
||||
x = UNALIGNED_LOAD64(s1 + matched) ^
|
||||
UNALIGNED_LOAD64(s2);
|
||||
matching_bits = FindLSBSetNonZero64(x);
|
||||
matched += matching_bits >> 3;
|
||||
return matched;
|
||||
}
|
||||
}
|
||||
while (likely(s2 < s2_limit)) {
|
||||
if (likely(s1[matched] == *s2)) {
|
||||
++s2;
|
||||
++matched;
|
||||
} else {
|
||||
return matched;
|
||||
}
|
||||
}
|
||||
return matched;
|
||||
}
|
||||
#else /* !defined(__x86_64__) */
|
||||
static inline int
|
||||
FindMatchLength(const char *s1, const char *s2, const char *s2_limit)
|
||||
{
|
||||
/* Implementation based on the x86-64 version, above. */
|
||||
int matched = 0;
|
||||
DCHECK_GE(s2_limit, s2);
|
||||
|
||||
while (s2 <= s2_limit - 4 &&
|
||||
UNALIGNED_LOAD32(s2) == UNALIGNED_LOAD32(s1 + matched)) {
|
||||
s2 += 4;
|
||||
matched += 4;
|
||||
}
|
||||
#if defined(__LITTLE_ENDIAN)
|
||||
if (s2 <= s2_limit - 4) {
|
||||
uint32_t x = UNALIGNED_LOAD32(s1 + matched) ^
|
||||
UNALIGNED_LOAD32(s2);
|
||||
int matching_bits = FindLSBSetNonZero(x);
|
||||
matched += matching_bits >> 3;
|
||||
} else {
|
||||
while ((s2 < s2_limit) && (s1[matched] == *s2)) {
|
||||
++s2;
|
||||
++matched;
|
||||
}
|
||||
}
|
||||
#else
|
||||
while ((s2 < s2_limit) && (s1[matched] == *s2)) {
|
||||
++s2;
|
||||
++matched;
|
||||
}
|
||||
#endif
|
||||
return matched;
|
||||
}
|
||||
#endif /* !defined(__x86_64__) */
|
||||
|
||||
|
||||
static inline char*
|
||||
EmitLiteral(char *op, const char *literal, int len, int allow_fast_path)
|
||||
{
|
||||
int n = len - 1; /* Zero-length literals are disallowed */
|
||||
if (n < 60) {
|
||||
/* Fits in tag byte */
|
||||
*op++ = LITERAL | (n << 2);
|
||||
/*
|
||||
The vast majority of copies are below 16 bytes, for which a
|
||||
call to memcpy is overkill. This fast path can sometimes
|
||||
copy up to 15 bytes too much, but that is okay in the
|
||||
main loop, since we have a bit to go on for both sides:
|
||||
- The input will always have kInputMarginBytes = 15 extra
|
||||
available bytes, as long as we're in the main loop, and
|
||||
if not, allow_fast_path = false.
|
||||
- The output will always have 32 spare bytes (see
|
||||
snappy_max_compressed_length).
|
||||
*/
|
||||
if (allow_fast_path && len <= 16) {
|
||||
UNALIGNED_STORE64(op, UNALIGNED_LOAD64(literal));
|
||||
UNALIGNED_STORE64(op + 8,
|
||||
UNALIGNED_LOAD64(literal + 8));
|
||||
return op + len;
|
||||
}
|
||||
} else {
|
||||
/* Encode in upcoming bytes */
|
||||
char *base = op;
|
||||
int count = 0;
|
||||
op++;
|
||||
while (n > 0) {
|
||||
*op++ = n & 0xff;
|
||||
n >>= 8;
|
||||
count++;
|
||||
}
|
||||
DCHECK_GE(count, 1);
|
||||
DCHECK_LE(count, 4);
|
||||
*base = LITERAL | ((59+count) << 2);
|
||||
}
|
||||
memcpy(op, literal, len);
|
||||
return op + len;
|
||||
}
|
||||
|
||||
static inline char*
|
||||
EmitCopyLessThan64(char *op, int offset, int len)
|
||||
{
|
||||
DCHECK_LE(len, 64);
|
||||
DCHECK_GE(len, 4);
|
||||
DCHECK_LT(offset, 65536);
|
||||
|
||||
if ((len < 12) && (offset < 2048)) {
|
||||
int len_minus_4 = len - 4;
|
||||
DCHECK_LT(len_minus_4, 8); /* Must fit in 3 bits */
|
||||
*op++ = COPY_1_BYTE_OFFSET |
|
||||
((len_minus_4) << 2) |
|
||||
((offset >> 8) << 5);
|
||||
*op++ = offset & 0xff;
|
||||
} else {
|
||||
*op++ = COPY_2_BYTE_OFFSET | ((len-1) << 2);
|
||||
put_unaligned_le16(offset, op);
|
||||
op += 2;
|
||||
}
|
||||
return op;
|
||||
}
|
||||
|
||||
static inline char*
|
||||
EmitCopy(char *op, int offset, int len)
|
||||
{
|
||||
/* Emit 64 byte copies but make sure to keep at least four bytes
|
||||
* reserved */
|
||||
while (len >= 68) {
|
||||
op = EmitCopyLessThan64(op, offset, 64);
|
||||
len -= 64;
|
||||
}
|
||||
|
||||
/* Emit an extra 60 byte copy if have too much data to fit in one
|
||||
* copy */
|
||||
if (len > 64) {
|
||||
op = EmitCopyLessThan64(op, offset, 60);
|
||||
len -= 60;
|
||||
}
|
||||
|
||||
/* Emit remainder */
|
||||
op = EmitCopyLessThan64(op, offset, len);
|
||||
return op;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* For 0 <= offset <= 4, GetUint32AtOffset(UNALIGNED_LOAD64(p), offset) will
|
||||
* equal UNALIGNED_LOAD32(p + offset). Motivation: On x86-64 hardware we have
|
||||
* empirically found that overlapping loads such as
|
||||
* UNALIGNED_LOAD32(p) ... UNALIGNED_LOAD32(p+1) ... UNALIGNED_LOAD32(p+2)
|
||||
* are slower than UNALIGNED_LOAD64(p) followed by shifts and casts to uint32_t.
|
||||
*/
|
||||
static inline uint32_t
|
||||
GetUint32AtOffset(uint64_t v, int offset)
|
||||
{
|
||||
DCHECK(0 <= offset && offset <= 4);
|
||||
#ifdef __LITTLE_ENDIAN
|
||||
return v >> (8 * offset);
|
||||
#else
|
||||
return v >> (32 - 8 * offset);
|
||||
#endif
|
||||
}
|
||||
|
||||
#define kInputMarginBytes 15
|
||||
char*
|
||||
csnappy_compress_fragment(
|
||||
const char *input,
|
||||
const uint32_t input_size,
|
||||
char *op,
|
||||
void *working_memory,
|
||||
const int workmem_bytes_power_of_two)
|
||||
{
|
||||
const char *ip, *ip_end, *base_ip, *next_emit, *ip_limit, *next_ip,
|
||||
*candidate, *base;
|
||||
uint16_t *table = (uint16_t *)working_memory;
|
||||
uint64_t input_bytes;
|
||||
uint32_t hash, next_hash, prev_hash, cur_hash, skip, candidate_bytes;
|
||||
int shift, matched;
|
||||
|
||||
DCHECK_GE(workmem_bytes_power_of_two, 9);
|
||||
DCHECK_LE(workmem_bytes_power_of_two, 15);
|
||||
/* Table of 2^X bytes, need (X-1) bits to address table of uint16_t.
|
||||
* How many bits of 32bit hash function result are discarded? */
|
||||
shift = 33 - workmem_bytes_power_of_two;
|
||||
/* "ip" is the input pointer, and "op" is the output pointer. */
|
||||
ip = input;
|
||||
DCHECK_LE(input_size, kBlockSize);
|
||||
ip_end = input + input_size;
|
||||
base_ip = ip;
|
||||
/* Bytes in [next_emit, ip) will be emitted as literal bytes. Or
|
||||
[next_emit, ip_end) after the main loop. */
|
||||
next_emit = ip;
|
||||
|
||||
if (unlikely(input_size < kInputMarginBytes))
|
||||
goto emit_remainder;
|
||||
|
||||
memset(working_memory, 0, 1 << workmem_bytes_power_of_two);
|
||||
|
||||
ip_limit = input + input_size - kInputMarginBytes;
|
||||
next_hash = Hash(++ip, shift);
|
||||
|
||||
main_loop:
|
||||
DCHECK_LT(next_emit, ip);
|
||||
/*
|
||||
* The body of this loop calls EmitLiteral once and then EmitCopy one or
|
||||
* more times. (The exception is that when we're close to exhausting
|
||||
* the input we goto emit_remainder.)
|
||||
*
|
||||
* In the first iteration of this loop we're just starting, so
|
||||
* there's nothing to copy, so calling EmitLiteral once is
|
||||
* necessary. And we only start a new iteration when the
|
||||
* current iteration has determined that a call to EmitLiteral will
|
||||
* precede the next call to EmitCopy (if any).
|
||||
*
|
||||
* Step 1: Scan forward in the input looking for a 4-byte-long match.
|
||||
* If we get close to exhausting the input then goto emit_remainder.
|
||||
*
|
||||
* Heuristic match skipping: If 32 bytes are scanned with no matches
|
||||
* found, start looking only at every other byte. If 32 more bytes are
|
||||
* scanned, look at every third byte, etc.. When a match is found,
|
||||
* immediately go back to looking at every byte. This is a small loss
|
||||
* (~5% performance, ~0.1% density) for compressible data due to more
|
||||
* bookkeeping, but for non-compressible data (such as JPEG) it's a huge
|
||||
* win since the compressor quickly "realizes" the data is incompressible
|
||||
* and doesn't bother looking for matches everywhere.
|
||||
*
|
||||
* The "skip" variable keeps track of how many bytes there are since the
|
||||
* last match; dividing it by 32 (ie. right-shifting by five) gives the
|
||||
* number of bytes to move ahead for each iteration.
|
||||
*/
|
||||
skip = 32;
|
||||
|
||||
next_ip = ip;
|
||||
do {
|
||||
ip = next_ip;
|
||||
hash = next_hash;
|
||||
DCHECK_EQ(hash, Hash(ip, shift));
|
||||
next_ip = ip + (skip++ >> 5);
|
||||
if (unlikely(next_ip > ip_limit))
|
||||
goto emit_remainder;
|
||||
next_hash = Hash(next_ip, shift);
|
||||
candidate = base_ip + table[hash];
|
||||
DCHECK_GE(candidate, base_ip);
|
||||
DCHECK_LT(candidate, ip);
|
||||
|
||||
table[hash] = ip - base_ip;
|
||||
} while (likely(UNALIGNED_LOAD32(ip) !=
|
||||
UNALIGNED_LOAD32(candidate)));
|
||||
|
||||
/*
|
||||
* Step 2: A 4-byte match has been found. We'll later see if more
|
||||
* than 4 bytes match. But, prior to the match, input
|
||||
* bytes [next_emit, ip) are unmatched. Emit them as "literal bytes."
|
||||
*/
|
||||
DCHECK_LE(next_emit + 16, ip_end);
|
||||
op = EmitLiteral(op, next_emit, ip - next_emit, 1);
|
||||
|
||||
/*
|
||||
* Step 3: Call EmitCopy, and then see if another EmitCopy could
|
||||
* be our next move. Repeat until we find no match for the
|
||||
* input immediately after what was consumed by the last EmitCopy call.
|
||||
*
|
||||
* If we exit this loop normally then we need to call EmitLiteral next,
|
||||
* though we don't yet know how big the literal will be. We handle that
|
||||
* by proceeding to the next iteration of the main loop. We also can exit
|
||||
* this loop via goto if we get close to exhausting the input.
|
||||
*/
|
||||
input_bytes = 0;
|
||||
candidate_bytes = 0;
|
||||
|
||||
do {
|
||||
/* We have a 4-byte match at ip, and no need to emit any
|
||||
"literal bytes" prior to ip. */
|
||||
base = ip;
|
||||
matched = 4 + FindMatchLength(candidate + 4, ip + 4, ip_end);
|
||||
ip += matched;
|
||||
DCHECK_EQ(0, memcmp(base, candidate, matched));
|
||||
op = EmitCopy(op, base - candidate, matched);
|
||||
/* We could immediately start working at ip now, but to improve
|
||||
compression we first update table[Hash(ip - 1, ...)]. */
|
||||
next_emit = ip;
|
||||
if (unlikely(ip >= ip_limit))
|
||||
goto emit_remainder;
|
||||
input_bytes = UNALIGNED_LOAD64(ip - 1);
|
||||
prev_hash = HashBytes(GetUint32AtOffset(input_bytes, 0), shift);
|
||||
table[prev_hash] = ip - base_ip - 1;
|
||||
cur_hash = HashBytes(GetUint32AtOffset(input_bytes, 1), shift);
|
||||
candidate = base_ip + table[cur_hash];
|
||||
candidate_bytes = UNALIGNED_LOAD32(candidate);
|
||||
table[cur_hash] = ip - base_ip;
|
||||
} while (GetUint32AtOffset(input_bytes, 1) == candidate_bytes);
|
||||
|
||||
next_hash = HashBytes(GetUint32AtOffset(input_bytes, 2), shift);
|
||||
++ip;
|
||||
goto main_loop;
|
||||
|
||||
emit_remainder:
|
||||
/* Emit the remaining bytes as a literal */
|
||||
if (next_emit < ip_end)
|
||||
op = EmitLiteral(op, next_emit, ip_end - next_emit, 0);
|
||||
|
||||
return op;
|
||||
}
|
||||
#if defined(__KERNEL__) && !defined(STATIC)
|
||||
EXPORT_SYMBOL(csnappy_compress_fragment);
|
||||
#endif
|
||||
|
||||
uint32_t __attribute__((const))
|
||||
csnappy_max_compressed_length(uint32_t source_len)
|
||||
{
|
||||
return 32 + source_len + source_len/6;
|
||||
}
|
||||
#if defined(__KERNEL__) && !defined(STATIC)
|
||||
EXPORT_SYMBOL(csnappy_max_compressed_length);
|
||||
#endif
|
||||
|
||||
void
|
||||
csnappy_compress(
|
||||
const char *input,
|
||||
uint32_t input_length,
|
||||
char *compressed,
|
||||
uint32_t *compressed_length,
|
||||
void *working_memory,
|
||||
const int workmem_bytes_power_of_two)
|
||||
{
|
||||
int workmem_size;
|
||||
int num_to_read;
|
||||
uint32_t written = 0;
|
||||
char *p = encode_varint32(compressed, input_length);
|
||||
written += (p - compressed);
|
||||
compressed = p;
|
||||
while (input_length > 0) {
|
||||
num_to_read = min(input_length, (uint32_t)kBlockSize);
|
||||
workmem_size = workmem_bytes_power_of_two;
|
||||
if (num_to_read < kBlockSize) {
|
||||
for (workmem_size = 9;
|
||||
workmem_size < workmem_bytes_power_of_two;
|
||||
++workmem_size) {
|
||||
if ((1 << (workmem_size-1)) >= num_to_read)
|
||||
break;
|
||||
}
|
||||
}
|
||||
p = csnappy_compress_fragment(
|
||||
input, num_to_read, compressed,
|
||||
working_memory, workmem_size);
|
||||
written += (p - compressed);
|
||||
compressed = p;
|
||||
input_length -= num_to_read;
|
||||
input += num_to_read;
|
||||
}
|
||||
*compressed_length = written;
|
||||
}
|
||||
#if defined(__KERNEL__) && !defined(STATIC)
|
||||
EXPORT_SYMBOL(csnappy_compress);
|
||||
|
||||
MODULE_LICENSE("BSD");
|
||||
MODULE_DESCRIPTION("Snappy Compressor");
|
||||
#endif
|
321
drivers/staging/snappy/csnappy_decompress.c
Executable file
321
drivers/staging/snappy/csnappy_decompress.c
Executable file
@ -0,0 +1,321 @@
|
||||
/*
|
||||
Copyright 2011, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
File modified for the Linux Kernel by
|
||||
Zeev Tarantov <zeev.tarantov at gmail.com>
|
||||
*/
|
||||
|
||||
#include "csnappy_internal.h"
|
||||
#ifdef __KERNEL__
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#endif
|
||||
#include "csnappy.h"
|
||||
|
||||
|
||||
/* Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits */
|
||||
static const uint32_t wordmask[] = {
|
||||
0u, 0xffu, 0xffffu, 0xffffffu, 0xffffffffu
|
||||
};
|
||||
|
||||
/*
|
||||
* Data stored per entry in lookup table:
|
||||
* Range Bits-used Description
|
||||
* ------------------------------------
|
||||
* 1..64 0..7 Literal/copy length encoded in opcode byte
|
||||
* 0..7 8..10 Copy offset encoded in opcode byte / 256
|
||||
* 0..4 11..13 Extra bytes after opcode
|
||||
*
|
||||
* We use eight bits for the length even though 7 would have sufficed
|
||||
* because of efficiency reasons:
|
||||
* (1) Extracting a byte is faster than a bit-field
|
||||
* (2) It properly aligns copy offset so we do not need a <<8
|
||||
*/
|
||||
static const uint16_t char_table[256] = {
|
||||
0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002,
|
||||
0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004,
|
||||
0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006,
|
||||
0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008,
|
||||
0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a,
|
||||
0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c,
|
||||
0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e,
|
||||
0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010,
|
||||
0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012,
|
||||
0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014,
|
||||
0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016,
|
||||
0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018,
|
||||
0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a,
|
||||
0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c,
|
||||
0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e,
|
||||
0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020,
|
||||
0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022,
|
||||
0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024,
|
||||
0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026,
|
||||
0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028,
|
||||
0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a,
|
||||
0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c,
|
||||
0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e,
|
||||
0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030,
|
||||
0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032,
|
||||
0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034,
|
||||
0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036,
|
||||
0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038,
|
||||
0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a,
|
||||
0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c,
|
||||
0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e,
|
||||
0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040
|
||||
};
|
||||
|
||||
/*
|
||||
* Copy "len" bytes from "src" to "op", one byte at a time. Used for
|
||||
* handling COPY operations where the input and output regions may
|
||||
* overlap. For example, suppose:
|
||||
* src == "ab"
|
||||
* op == src + 2
|
||||
* len == 20
|
||||
* After IncrementalCopy(src, op, len), the result will have
|
||||
* eleven copies of "ab"
|
||||
* ababababababababababab
|
||||
* Note that this does not match the semantics of either memcpy()
|
||||
* or memmove().
|
||||
*/
|
||||
static inline void IncrementalCopy(const char *src, char *op, int len)
|
||||
{
|
||||
DCHECK_GT(len, 0);
|
||||
do {
|
||||
*op++ = *src++;
|
||||
} while (--len > 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Equivalent to IncrementalCopy except that it can write up to ten extra
|
||||
* bytes after the end of the copy, and that it is faster.
|
||||
*
|
||||
* The main part of this loop is a simple copy of eight bytes at a time until
|
||||
* we've copied (at least) the requested amount of bytes. However, if op and
|
||||
* src are less than eight bytes apart (indicating a repeating pattern of
|
||||
* length < 8), we first need to expand the pattern in order to get the correct
|
||||
* results. For instance, if the buffer looks like this, with the eight-byte
|
||||
* <src> and <op> patterns marked as intervals:
|
||||
*
|
||||
* abxxxxxxxxxxxx
|
||||
* [------] src
|
||||
* [------] op
|
||||
*
|
||||
* a single eight-byte copy from <src> to <op> will repeat the pattern once,
|
||||
* after which we can move <op> two bytes without moving <src>:
|
||||
*
|
||||
* ababxxxxxxxxxx
|
||||
* [------] src
|
||||
* [------] op
|
||||
*
|
||||
* and repeat the exercise until the two no longer overlap.
|
||||
*
|
||||
* This allows us to do very well in the special case of one single byte
|
||||
* repeated many times, without taking a big hit for more general cases.
|
||||
*
|
||||
* The worst case of extra writing past the end of the match occurs when
|
||||
* op - src == 1 and len == 1; the last copy will read from byte positions
|
||||
* [0..7] and write to [4..11], whereas it was only supposed to write to
|
||||
* position 1. Thus, ten excess bytes.
|
||||
*/
|
||||
static const int kMaxIncrementCopyOverflow = 10;
|
||||
static inline void IncrementalCopyFastPath(const char *src, char *op, int len)
|
||||
{
|
||||
while (op - src < 8) {
|
||||
UNALIGNED_STORE64(op, UNALIGNED_LOAD64(src));
|
||||
len -= op - src;
|
||||
op += op - src;
|
||||
}
|
||||
while (len > 0) {
|
||||
UNALIGNED_STORE64(op, UNALIGNED_LOAD64(src));
|
||||
src += 8;
|
||||
op += 8;
|
||||
len -= 8;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* A type that writes to a flat array. */
|
||||
struct SnappyArrayWriter {
|
||||
char *base;
|
||||
char *op;
|
||||
char *op_limit;
|
||||
};
|
||||
|
||||
static inline int
|
||||
SAW__Append(struct SnappyArrayWriter *this,
|
||||
const char *ip, uint32_t len, int allow_fast_path)
|
||||
{
|
||||
char *op = this->op;
|
||||
const int space_left = this->op_limit - op;
|
||||
/*Fast path, used for the majority (about 90%) of dynamic invocations.*/
|
||||
if (allow_fast_path && len <= 16 && space_left >= 16) {
|
||||
UNALIGNED_STORE64(op, UNALIGNED_LOAD64(ip));
|
||||
UNALIGNED_STORE64(op + 8, UNALIGNED_LOAD64(ip + 8));
|
||||
} else {
|
||||
if (space_left < len)
|
||||
return CSNAPPY_E_OUTPUT_OVERRUN;
|
||||
memcpy(op, ip, len);
|
||||
}
|
||||
this->op = op + len;
|
||||
return CSNAPPY_E_OK;
|
||||
}
|
||||
|
||||
static inline int
|
||||
SAW__AppendFromSelf(struct SnappyArrayWriter *this,
|
||||
uint32_t offset, uint32_t len)
|
||||
{
|
||||
char *op = this->op;
|
||||
const int space_left = this->op_limit - op;
|
||||
/* -1u catches offset==0 */
|
||||
if (op - this->base <= offset - 1u)
|
||||
return CSNAPPY_E_DATA_MALFORMED;
|
||||
/* Fast path, used for the majority (70-80%) of dynamic invocations. */
|
||||
if (len <= 16 && offset >= 8 && space_left >= 16) {
|
||||
UNALIGNED_STORE64(op, UNALIGNED_LOAD64(op - offset));
|
||||
UNALIGNED_STORE64(op + 8, UNALIGNED_LOAD64(op - offset + 8));
|
||||
} else if (space_left >= len + kMaxIncrementCopyOverflow) {
|
||||
IncrementalCopyFastPath(op - offset, op, len);
|
||||
} else {
|
||||
if (space_left < len)
|
||||
return CSNAPPY_E_OUTPUT_OVERRUN;
|
||||
IncrementalCopy(op - offset, op, len);
|
||||
}
|
||||
this->op = op + len;
|
||||
return CSNAPPY_E_OK;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
csnappy_get_uncompressed_length(
|
||||
const char *src,
|
||||
uint32_t src_len,
|
||||
uint32_t *result)
|
||||
{
|
||||
const char *src_base = src;
|
||||
uint32_t shift = 0;
|
||||
uint8_t c;
|
||||
/* Length is encoded in 1..5 bytes */
|
||||
*result = 0;
|
||||
for (;;) {
|
||||
if (shift >= 32)
|
||||
goto err_out;
|
||||
if (src_len == 0)
|
||||
goto err_out;
|
||||
c = *(const uint8_t *)src++;
|
||||
src_len -= 1;
|
||||
*result |= (uint32_t)(c & 0x7f) << shift;
|
||||
if (c < 128)
|
||||
break;
|
||||
shift += 7;
|
||||
}
|
||||
return src - src_base;
|
||||
err_out:
|
||||
return CSNAPPY_E_HEADER_BAD;
|
||||
}
|
||||
#if defined(__KERNEL__) && !defined(STATIC)
|
||||
EXPORT_SYMBOL(csnappy_get_uncompressed_length);
|
||||
#endif
|
||||
|
||||
int
|
||||
csnappy_decompress_noheader(
|
||||
const char *src,
|
||||
uint32_t src_remaining,
|
||||
char *dst,
|
||||
uint32_t *dst_len)
|
||||
{
|
||||
struct SnappyArrayWriter writer;
|
||||
uint32_t length, trailer, opword, extra_bytes;
|
||||
int ret;
|
||||
uint8_t opcode;
|
||||
char scratch[5];
|
||||
writer.op = writer.base = dst;
|
||||
writer.op_limit = writer.op + *dst_len;
|
||||
while (src_remaining) {
|
||||
if (unlikely(src_remaining < 5)) {
|
||||
memcpy(scratch, src, src_remaining);
|
||||
src = scratch;
|
||||
}
|
||||
opcode = *(const uint8_t *)src++;
|
||||
opword = char_table[opcode];
|
||||
extra_bytes = opword >> 11;
|
||||
trailer = get_unaligned_le32(src) & wordmask[extra_bytes];
|
||||
src += extra_bytes;
|
||||
src_remaining -= 1 + extra_bytes;
|
||||
length = opword & 0xff;
|
||||
if (opcode & 0x3) {
|
||||
trailer += opword & 0x700;
|
||||
ret = SAW__AppendFromSelf(&writer, trailer, length);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
} else {
|
||||
length += trailer;
|
||||
if (unlikely(src_remaining < length))
|
||||
return CSNAPPY_E_DATA_MALFORMED;
|
||||
ret = src_remaining >= 16;
|
||||
ret = SAW__Append(&writer, src, length, ret);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
src += length;
|
||||
src_remaining -= length;
|
||||
}
|
||||
}
|
||||
*dst_len = writer.op - writer.base;
|
||||
return CSNAPPY_E_OK;
|
||||
}
|
||||
#if defined(__KERNEL__) && !defined(STATIC)
|
||||
EXPORT_SYMBOL(csnappy_decompress_noheader);
|
||||
#endif
|
||||
|
||||
int
|
||||
csnappy_decompress(
|
||||
const char *src,
|
||||
uint32_t src_len,
|
||||
char *dst,
|
||||
uint32_t dst_len)
|
||||
{
|
||||
int n;
|
||||
uint32_t olen = 0;
|
||||
/* Read uncompressed length from the front of the compressed input */
|
||||
n = csnappy_get_uncompressed_length(src, src_len, &olen);
|
||||
if (unlikely(n < CSNAPPY_E_OK))
|
||||
return n;
|
||||
/* Protect against possible DoS attack */
|
||||
if (unlikely(olen > dst_len))
|
||||
return CSNAPPY_E_OUTPUT_INSUF;
|
||||
return csnappy_decompress_noheader(src + n, src_len - n, dst, &olen);
|
||||
}
|
||||
#if defined(__KERNEL__) && !defined(STATIC)
|
||||
EXPORT_SYMBOL(csnappy_decompress);
|
||||
|
||||
MODULE_LICENSE("BSD");
|
||||
MODULE_DESCRIPTION("Snappy Decompressor");
|
||||
#endif
|
83
drivers/staging/snappy/csnappy_internal.h
Executable file
83
drivers/staging/snappy/csnappy_internal.h
Executable file
@ -0,0 +1,83 @@
|
||||
/*
|
||||
Copyright 2011 Google Inc. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
Various stubs for the open-source version of Snappy.
|
||||
|
||||
File modified for the Linux Kernel by
|
||||
Zeev Tarantov <zeev.tarantov at gmail.com>
|
||||
*/
|
||||
|
||||
#ifndef CSNAPPY_INTERNAL_H_
|
||||
#define CSNAPPY_INTERNAL_H_
|
||||
|
||||
#ifndef __KERNEL__
|
||||
#include "csnappy_internal_userspace.h"
|
||||
#else
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <asm/byteorder.h>
|
||||
#include <asm/unaligned.h>
|
||||
|
||||
#ifdef DEBUG
|
||||
#define DCHECK(cond) if (!(cond)) \
|
||||
printk(KERN_DEBUG "assert failed @ %s:%i\n", \
|
||||
__FILE__, __LINE__)
|
||||
#else
|
||||
#define DCHECK(cond)
|
||||
#endif
|
||||
|
||||
#define UNALIGNED_LOAD16(_p) get_unaligned((const uint16_t *)(_p))
|
||||
#define UNALIGNED_LOAD32(_p) get_unaligned((const uint32_t *)(_p))
|
||||
#define UNALIGNED_LOAD64(_p) get_unaligned((const uint64_t *)(_p))
|
||||
#define UNALIGNED_STORE16(_p, _val) put_unaligned((_val), (uint16_t *)(_p))
|
||||
#define UNALIGNED_STORE32(_p, _val) put_unaligned((_val), (uint32_t *)(_p))
|
||||
#define UNALIGNED_STORE64(_p, _val) put_unaligned((_val), (uint64_t *)(_p))
|
||||
|
||||
#define FindLSBSetNonZero(n) __builtin_ctz(n)
|
||||
#define FindLSBSetNonZero64(n) __builtin_ctzll(n)
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
|
||||
#define DCHECK_EQ(a, b) DCHECK(((a) == (b)))
|
||||
#define DCHECK_NE(a, b) DCHECK(((a) != (b)))
|
||||
#define DCHECK_GT(a, b) DCHECK(((a) > (b)))
|
||||
#define DCHECK_GE(a, b) DCHECK(((a) >= (b)))
|
||||
#define DCHECK_LT(a, b) DCHECK(((a) < (b)))
|
||||
#define DCHECK_LE(a, b) DCHECK(((a) <= (b)))
|
||||
|
||||
enum {
|
||||
LITERAL = 0,
|
||||
COPY_1_BYTE_OFFSET = 1, /* 3 bit length + 3 bits of offset in opcode */
|
||||
COPY_2_BYTE_OFFSET = 2,
|
||||
COPY_4_BYTE_OFFSET = 3
|
||||
};
|
||||
|
||||
#endif /* CSNAPPY_INTERNAL_H_ */
|
@ -6,8 +6,6 @@ config ZRAM
|
||||
tristate "Compressed RAM block device support"
|
||||
depends on BLOCK && SYSFS
|
||||
select XVMALLOC
|
||||
select LZO_COMPRESS
|
||||
select LZO_DECOMPRESS
|
||||
default n
|
||||
help
|
||||
Creates virtual block devices called /dev/zramX (X = 0, 1, ...).
|
||||
@ -28,3 +26,15 @@ config ZRAM_DEBUG
|
||||
help
|
||||
This option adds additional debugging code to the compressed
|
||||
RAM block device driver.
|
||||
config ZRAM_LZO
|
||||
bool "LZO compression"
|
||||
default y
|
||||
depends on ZRAM
|
||||
select LZO_COMPRESS
|
||||
select LZO_DECOMPRESS
|
||||
|
||||
config ZRAM_SNAPPY
|
||||
bool "Snappy compression"
|
||||
depends on ZRAM
|
||||
select SNAPPY_COMPRESS
|
||||
select SNAPPY_DECOMPRESS
|
||||
|
@ -29,12 +29,90 @@
|
||||
#include <linux/genhd.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/lzo.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/vmalloc.h>
|
||||
|
||||
#include "zram_drv.h"
|
||||
|
||||
#if defined(CONFIG_ZRAM_LZO)
|
||||
#include <linux/lzo.h>
|
||||
#ifdef MULTIPLE_COMPRESSORS
|
||||
static const struct zram_compressor lzo_compressor = {
|
||||
.name = "LZO",
|
||||
.workmem_bytes = LZO1X_MEM_COMPRESS,
|
||||
.compress = &lzo1x_1_compress,
|
||||
.decompress = &lzo1x_decompress_safe
|
||||
};
|
||||
#else /* !MULTIPLE_COMPRESSORS */
|
||||
#define WMSIZE LZO1X_MEM_COMPRESS
|
||||
#define COMPRESS(s, sl, d, dl, wm) \
|
||||
lzo1x_1_compress(s, sl, d, dl, wm)
|
||||
#define DECOMPRESS(s, sl, d, dl) \
|
||||
lzo1x_decompress_safe(s, sl, d, dl)
|
||||
#endif /* !MULTIPLE_COMPRESSORS */
|
||||
#endif /* defined(CONFIG_ZRAM_LZO) */
|
||||
|
||||
#if defined(CONFIG_ZRAM_SNAPPY)
|
||||
#include "../snappy/csnappy.h" /* if built in drivers/staging */
|
||||
#define WMSIZE_ORDER ((PAGE_SHIFT > 14) ? (15) : (PAGE_SHIFT+1))
|
||||
static int
|
||||
snappy_compress_(
|
||||
const unsigned char *src,
|
||||
size_t src_len,
|
||||
unsigned char *dst,
|
||||
size_t *dst_len,
|
||||
void *workmem)
|
||||
{
|
||||
const unsigned char *end = csnappy_compress_fragment(
|
||||
src, (uint32_t)src_len, dst, workmem, WMSIZE_ORDER);
|
||||
*dst_len = end - dst;
|
||||
return 0;
|
||||
}
|
||||
static int
|
||||
snappy_decompress_(
|
||||
const unsigned char *src,
|
||||
size_t src_len,
|
||||
unsigned char *dst,
|
||||
size_t *dst_len)
|
||||
{
|
||||
uint32_t dst_len_ = (uint32_t)*dst_len;
|
||||
int ret = csnappy_decompress_noheader(src, src_len, dst, &dst_len_);
|
||||
*dst_len = (size_t)dst_len_;
|
||||
return ret;
|
||||
}
|
||||
#ifdef MULTIPLE_COMPRESSORS
|
||||
static const struct zram_compressor snappy_compressor = {
|
||||
.name = "SNAPPY",
|
||||
.workmem_bytes = (1 << WMSIZE_ORDER),
|
||||
.compress = &snappy_compress_,
|
||||
.decompress = &snappy_decompress_
|
||||
};
|
||||
#else /* !MULTIPLE_COMPRESSORS */
|
||||
#define WMSIZE (1 << WMSIZE_ORDER)
|
||||
#define COMPRESS(s, sl, d, dl, wm) \
|
||||
snappy_compress_(s, sl, d, dl, wm)
|
||||
#define DECOMPRESS(s, sl, d, dl) \
|
||||
snappy_decompress_(s, sl, d, dl)
|
||||
#endif /* !MULTIPLE_COMPRESSORS */
|
||||
#endif /* defined(CONFIG_ZRAM_SNAPPY) */
|
||||
|
||||
#ifdef MULTIPLE_COMPRESSORS
|
||||
const struct zram_compressor * const zram_compressors[] = {
|
||||
#if defined(CONFIG_ZRAM_LZO)
|
||||
&lzo_compressor,
|
||||
#endif
|
||||
#if defined(CONFIG_ZRAM_SNAPPY)
|
||||
&snappy_compressor,
|
||||
#endif
|
||||
NULL
|
||||
};
|
||||
#define WMSIZE (zram->compressor->workmem_bytes)
|
||||
#define COMPRESS(s, sl, d, dl, wm) \
|
||||
(zram->compressor->compress(s, sl, d, dl, wm))
|
||||
#define DECOMPRESS(s, sl, d, dl) \
|
||||
(zram->compressor->decompress(s, sl, d, dl))
|
||||
#endif /* MULTIPLE_COMPRESSORS */
|
||||
|
||||
/* Globals */
|
||||
static int zram_major;
|
||||
struct zram *zram_devices;
|
||||
@ -257,7 +335,7 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
|
||||
cmem = kmap_atomic(zram->table[index].page, KM_USER1) +
|
||||
zram->table[index].offset;
|
||||
|
||||
ret = lzo1x_decompress_safe(cmem + sizeof(*zheader),
|
||||
ret = DECOMPRESS(cmem + sizeof(*zheader),
|
||||
xv_get_object_size(cmem) - sizeof(*zheader),
|
||||
uncmem, &clen);
|
||||
|
||||
@ -271,7 +349,7 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
|
||||
kunmap_atomic(user_mem, KM_USER0);
|
||||
|
||||
/* Should NEVER happen. Return bio error if it does. */
|
||||
if (unlikely(ret != LZO_E_OK)) {
|
||||
if (unlikely(ret)) {
|
||||
pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
|
||||
zram_stat64_inc(zram, &zram->stats.failed_reads);
|
||||
return ret;
|
||||
@ -305,13 +383,13 @@ static int zram_read_before_write(struct zram *zram, char *mem, u32 index)
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = lzo1x_decompress_safe(cmem + sizeof(*zheader),
|
||||
ret = DECOMPRESS(cmem + sizeof(*zheader),
|
||||
xv_get_object_size(cmem) - sizeof(*zheader),
|
||||
mem, &clen);
|
||||
kunmap_atomic(cmem, KM_USER0);
|
||||
|
||||
/* Should NEVER happen. Return bio error if it does. */
|
||||
if (unlikely(ret != LZO_E_OK)) {
|
||||
if (unlikely(ret)) {
|
||||
pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
|
||||
zram_stat64_inc(zram, &zram->stats.failed_reads);
|
||||
return ret;
|
||||
@ -377,18 +455,13 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = lzo1x_1_compress(uncmem, PAGE_SIZE, src, &clen,
|
||||
COMPRESS(uncmem, PAGE_SIZE, src, &clen,
|
||||
zram->compress_workmem);
|
||||
|
||||
kunmap_atomic(user_mem, KM_USER0);
|
||||
if (is_partial_io(bvec))
|
||||
kfree(uncmem);
|
||||
|
||||
if (unlikely(ret != LZO_E_OK)) {
|
||||
pr_err("Compression failed! err=%d\n", ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Page is incompressible. Store it as-is (uncompressed)
|
||||
* since we do not want to return too many disk write
|
||||
@ -648,7 +721,7 @@ int zram_init_device(struct zram *zram)
|
||||
|
||||
zram_set_disksize(zram, totalram_pages << PAGE_SHIFT);
|
||||
|
||||
zram->compress_workmem = kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL);
|
||||
zram->compress_workmem = kzalloc(WMSIZE, GFP_KERNEL);
|
||||
if (!zram->compress_workmem) {
|
||||
pr_err("Error allocating compressor working memory!\n");
|
||||
ret = -ENOMEM;
|
||||
@ -753,6 +826,10 @@ static int create_device(struct zram *zram, int device_id)
|
||||
/* Actual capacity set using syfs (/sys/block/zram<id>/disksize */
|
||||
set_capacity(zram->disk, 0);
|
||||
|
||||
/* Can be changed using sysfs (/sys/block/zram<id>/compressor) */
|
||||
#ifdef MULTIPLE_COMPRESSORS
|
||||
zram->compressor = zram_compressors[0];
|
||||
#endif
|
||||
/*
|
||||
* To ensure that we always get PAGE_SIZE aligned
|
||||
* and n*PAGE_SIZED sized I/O requests.
|
||||
|
@ -66,6 +66,13 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3;
|
||||
#define ZRAM_SECTOR_PER_LOGICAL_BLOCK \
|
||||
(1 << (ZRAM_LOGICAL_BLOCK_SHIFT - SECTOR_SHIFT))
|
||||
|
||||
#if defined(CONFIG_ZRAM_LZO) + defined(CONFIG_ZRAM_SNAPPY) == 0
|
||||
#error At least one of CONFIG_ZRAM_LZO, CONFIG_ZRAM_SNAPPY must be defined!
|
||||
#endif
|
||||
#if defined(CONFIG_ZRAM_LZO) + defined(CONFIG_ZRAM_SNAPPY) > 1
|
||||
#define MULTIPLE_COMPRESSORS
|
||||
#endif
|
||||
|
||||
/* Flags for zram pages (table[page_no].flags) */
|
||||
enum zram_pageflags {
|
||||
/* Page is stored uncompressed */
|
||||
@ -103,6 +110,9 @@ struct zram_stats {
|
||||
|
||||
struct zram {
|
||||
struct xv_pool *mem_pool;
|
||||
#ifdef MULTIPLE_COMPRESSORS
|
||||
const struct zram_compressor *compressor;
|
||||
#endif
|
||||
void *compress_workmem;
|
||||
void *compress_buffer;
|
||||
struct table *table;
|
||||
@ -132,5 +142,25 @@ extern struct attribute_group zram_disk_attr_group;
|
||||
extern int zram_init_device(struct zram *zram);
|
||||
extern void __zram_reset_device(struct zram *zram);
|
||||
|
||||
#ifdef MULTIPLE_COMPRESSORS
|
||||
struct zram_compressor {
|
||||
const char *name;
|
||||
int (*compress)(
|
||||
const unsigned char *src,
|
||||
size_t src_len,
|
||||
unsigned char *dst,
|
||||
size_t *dst_len,
|
||||
void *workmem);
|
||||
int (*decompress)(
|
||||
const unsigned char *src,
|
||||
size_t src_len,
|
||||
unsigned char *dst,
|
||||
size_t *dst_len);
|
||||
unsigned workmem_bytes;
|
||||
};
|
||||
|
||||
extern const struct zram_compressor * const zram_compressors[];
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -76,6 +76,57 @@ static ssize_t disksize_store(struct device *dev,
|
||||
return len;
|
||||
}
|
||||
|
||||
#ifdef MULTIPLE_COMPRESSORS
|
||||
static ssize_t compressor_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
char * const buf_base = buf;
|
||||
const struct zram_compressor *p, *curr;
|
||||
unsigned int i = 0;
|
||||
struct zram *zram = dev_to_zram(dev);
|
||||
curr = zram->compressor;
|
||||
p = zram_compressors[i];
|
||||
while (p) {
|
||||
if (curr == p)
|
||||
buf += sprintf(buf, "*");
|
||||
buf += sprintf(buf, "%u - %s\n", i, p->name);
|
||||
p = zram_compressors[++i];
|
||||
}
|
||||
return buf - buf_base;
|
||||
}
|
||||
|
||||
static ssize_t compressor_store(struct device *dev,
|
||||
struct device_attribute *attr, const char *buf, size_t len)
|
||||
{
|
||||
const struct zram_compressor *p;
|
||||
unsigned long requested;
|
||||
unsigned int i = 0;
|
||||
int ret;
|
||||
struct zram *zram = dev_to_zram(dev);
|
||||
|
||||
if (zram->init_done) {
|
||||
pr_info("Cannot change compressor for initialized device\n");
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
ret = strict_strtoul(buf, 10, &requested);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
p = zram_compressors[i];
|
||||
while (p && (i < requested))
|
||||
p = zram_compressors[++i];
|
||||
|
||||
if (!p) {
|
||||
pr_info("No compressor with index #%lu\n", requested);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
zram->compressor = p;
|
||||
return len;
|
||||
}
|
||||
#endif /* MULTIPLE_COMPRESSORS */
|
||||
|
||||
static ssize_t initstate_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
@ -194,6 +245,10 @@ static ssize_t mem_used_total_show(struct device *dev,
|
||||
return sprintf(buf, "%llu\n", val);
|
||||
}
|
||||
|
||||
#ifdef MULTIPLE_COMPRESSORS
|
||||
static DEVICE_ATTR(compressor, S_IRUGO | S_IWUSR,
|
||||
compressor_show, compressor_store);
|
||||
#endif
|
||||
static DEVICE_ATTR(disksize, S_IRUGO | S_IWUSR,
|
||||
disksize_show, disksize_store);
|
||||
static DEVICE_ATTR(initstate, S_IRUGO, initstate_show, NULL);
|
||||
@ -208,6 +263,9 @@ static DEVICE_ATTR(compr_data_size, S_IRUGO, compr_data_size_show, NULL);
|
||||
static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL);
|
||||
|
||||
static struct attribute *zram_disk_attrs[] = {
|
||||
#ifdef MULTIPLE_COMPRESSORS
|
||||
&dev_attr_compressor.attr,
|
||||
#endif
|
||||
&dev_attr_disksize.attr,
|
||||
&dev_attr_initstate.attr,
|
||||
&dev_attr_reset.attr,
|
||||
|
Loading…
x
Reference in New Issue
Block a user