From 0d06b3f2b461deecdc72bed321c8bd62dbacb2cb Mon Sep 17 00:00:00 2001 From: tytung Date: Sat, 5 May 2012 00:19:05 +0800 Subject: [PATCH] drivers: staging: zram: added ZRAM support: /dev/zramX (X = 0, 1, ...). --- drivers/staging/Kconfig | 2 + drivers/staging/Makefile | 3 + drivers/staging/zram/Kconfig | 55 ++ drivers/staging/zram/Makefile | 4 + drivers/staging/zram/xvmalloc.c | 511 +++++++++++++++++ drivers/staging/zram/xvmalloc.h | 31 + drivers/staging/zram/xvmalloc_int.h | 96 ++++ drivers/staging/zram/zram.txt | 76 +++ drivers/staging/zram/zram_drv.c | 848 ++++++++++++++++++++++++++++ drivers/staging/zram/zram_drv.h | 136 +++++ drivers/staging/zram/zram_sysfs.c | 222 ++++++++ include/linux/blkdev.h | 2 + mm/swapfile.c | 3 + 13 files changed, 1989 insertions(+) create mode 100644 drivers/staging/zram/Kconfig create mode 100644 drivers/staging/zram/Makefile create mode 100644 drivers/staging/zram/xvmalloc.c create mode 100644 drivers/staging/zram/xvmalloc.h create mode 100644 drivers/staging/zram/xvmalloc_int.h create mode 100644 drivers/staging/zram/zram.txt create mode 100644 drivers/staging/zram/zram_drv.c create mode 100644 drivers/staging/zram/zram_drv.h create mode 100644 drivers/staging/zram/zram_sysfs.c diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig index b926b00b..8ee4bfa6 100644 --- a/drivers/staging/Kconfig +++ b/drivers/staging/Kconfig @@ -123,5 +123,7 @@ source "drivers/staging/sep/Kconfig" source "drivers/staging/iio/Kconfig" +source "drivers/staging/zram/Kconfig" + endif # !STAGING_EXCLUDE_BUILD endif # STAGING diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile index 3422f616..5a1b7341 100644 --- a/drivers/staging/Makefile +++ b/drivers/staging/Makefile @@ -43,3 +43,6 @@ obj-$(CONFIG_VME_BUS) += vme/ obj-$(CONFIG_RAR_REGISTER) += rar/ obj-$(CONFIG_DX_SEP) += sep/ obj-$(CONFIG_IIO) += iio/ +obj-$(CONFIG_ZRAM) += zram/ +obj-$(CONFIG_XVMALLOC) += zram/ + diff --git a/drivers/staging/zram/Kconfig b/drivers/staging/zram/Kconfig new file mode 100644 index 00000000..18c1a971 --- /dev/null +++ b/drivers/staging/zram/Kconfig @@ -0,0 +1,55 @@ +config XVMALLOC + bool + default n + +config ZRAM + tristate "Compressed RAM block device support" + depends on BLOCK && SYSFS + select XVMALLOC + select LZO_COMPRESS + select LZO_DECOMPRESS + default n + help + Creates virtual block devices called /dev/zramX (X = 0, 1, ...). + Pages written to these disks are compressed and stored in memory + itself. These disks allow very fast I/O and compression provides + good amounts of memory savings. + + It has several use cases, for example: /tmp storage, use as swap + disks and maybe many more. + + See zram.txt for more information. + Project home: http://compcache.googlecode.com/ + +config ZRAM_NUM_DEVICES + int "Default number of zram devices" + depends on ZRAM + range 1 32 + default 1 + help + Select default number of zram devices. You can override this value + using 'num_devices' module parameter. + +config ZRAM_DEFAULT_PERCENTAGE + int "Default number of zram percentage" + depends on ZRAM + range 10 80 + default 25 + help + Select default zram disk size: percentage of total RAM + +config ZRAM_DEBUG + bool "Compressed RAM block device debug support" + depends on ZRAM + default n + help + This option adds additional debugging code to the compressed + RAM block device driver. + +config ZRAM_DEFAULT_DISKSIZE + int "Default size of zram in bytes" + depends on ZRAM + default 100663296 + help + Set default zram disk size (default ~ 96MB) + diff --git a/drivers/staging/zram/Makefile b/drivers/staging/zram/Makefile new file mode 100644 index 00000000..2a6d3213 --- /dev/null +++ b/drivers/staging/zram/Makefile @@ -0,0 +1,4 @@ +zram-y := zram_drv.o zram_sysfs.o + +obj-$(CONFIG_ZRAM) += zram.o +obj-$(CONFIG_XVMALLOC) += xvmalloc.o \ No newline at end of file diff --git a/drivers/staging/zram/xvmalloc.c b/drivers/staging/zram/xvmalloc.c new file mode 100644 index 00000000..e03f5932 --- /dev/null +++ b/drivers/staging/zram/xvmalloc.c @@ -0,0 +1,511 @@ +/* + * xvmalloc memory allocator + * + * Copyright (C) 2008, 2009, 2010 Nitin Gupta + * + * This code is released using a dual license strategy: BSD/GPL + * You can choose the licence that better fits your requirements. + * + * Released under the terms of 3-clause BSD License + * Released under the terms of GNU General Public License Version 2.0 + */ + +#ifdef CONFIG_ZRAM_DEBUG +#define DEBUG +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "xvmalloc.h" +#include "xvmalloc_int.h" + +static void stat_inc(u64 *value) +{ + *value = *value + 1; +} + +static void stat_dec(u64 *value) +{ + *value = *value - 1; +} + +static int test_flag(struct block_header *block, enum blockflags flag) +{ + return block->prev & BIT(flag); +} + +static void set_flag(struct block_header *block, enum blockflags flag) +{ + block->prev |= BIT(flag); +} + +static void clear_flag(struct block_header *block, enum blockflags flag) +{ + block->prev &= ~BIT(flag); +} + +/* + * Given pair, provide a dereferencable pointer. + * This is called from xv_malloc/xv_free path, so it + * needs to be fast. + */ +static void *get_ptr_atomic(struct page *page, u16 offset, enum km_type type) +{ + unsigned char *base; + + base = kmap_atomic(page, type); + return base + offset; +} + +static void put_ptr_atomic(void *ptr, enum km_type type) +{ + kunmap_atomic(ptr, type); +} + +static u32 get_blockprev(struct block_header *block) +{ + return block->prev & PREV_MASK; +} + +static void set_blockprev(struct block_header *block, u16 new_offset) +{ + block->prev = new_offset | (block->prev & FLAGS_MASK); +} + +static struct block_header *BLOCK_NEXT(struct block_header *block) +{ + return (struct block_header *) + ((char *)block + block->size + XV_ALIGN); +} + +/* + * Get index of free list containing blocks of maximum size + * which is less than or equal to given size. + */ +static u32 get_index_for_insert(u32 size) +{ + if (unlikely(size > XV_MAX_ALLOC_SIZE)) + size = XV_MAX_ALLOC_SIZE; + size &= ~FL_DELTA_MASK; + return (size - XV_MIN_ALLOC_SIZE) >> FL_DELTA_SHIFT; +} + +/* + * Get index of free list having blocks of size greater than + * or equal to requested size. + */ +static u32 get_index(u32 size) +{ + if (unlikely(size < XV_MIN_ALLOC_SIZE)) + size = XV_MIN_ALLOC_SIZE; + size = ALIGN(size, FL_DELTA); + return (size - XV_MIN_ALLOC_SIZE) >> FL_DELTA_SHIFT; +} + +/** + * find_block - find block of at least given size + * @pool: memory pool to search from + * @size: size of block required + * @page: page containing required block + * @offset: offset within the page where block is located. + * + * Searches two level bitmap to locate block of at least + * the given size. If such a block is found, it provides + * to identify this block and returns index + * in freelist where we found this block. + * Otherwise, returns 0 and params are not touched. + */ +static u32 find_block(struct xv_pool *pool, u32 size, + struct page **page, u32 *offset) +{ + ulong flbitmap, slbitmap; + u32 flindex, slindex, slbitstart; + + /* There are no free blocks in this pool */ + if (!pool->flbitmap) + return 0; + + /* Get freelist index correspoding to this size */ + slindex = get_index(size); + slbitmap = pool->slbitmap[slindex / BITS_PER_LONG]; + slbitstart = slindex % BITS_PER_LONG; + + /* + * If freelist is not empty at this index, we found the + * block - head of this list. This is approximate best-fit match. + */ + if (test_bit(slbitstart, &slbitmap)) { + *page = pool->freelist[slindex].page; + *offset = pool->freelist[slindex].offset; + return slindex; + } + + /* + * No best-fit found. Search a bit further in bitmap for a free block. + * Second level bitmap consists of series of 32-bit chunks. Search + * further in the chunk where we expected a best-fit, starting from + * index location found above. + */ + slbitstart++; + slbitmap >>= slbitstart; + + /* Skip this search if we were already at end of this bitmap chunk */ + if ((slbitstart != BITS_PER_LONG) && slbitmap) { + slindex += __ffs(slbitmap) + 1; + *page = pool->freelist[slindex].page; + *offset = pool->freelist[slindex].offset; + return slindex; + } + + /* Now do a full two-level bitmap search to find next nearest fit */ + flindex = slindex / BITS_PER_LONG; + + flbitmap = (pool->flbitmap) >> (flindex + 1); + if (!flbitmap) + return 0; + + flindex += __ffs(flbitmap) + 1; + slbitmap = pool->slbitmap[flindex]; + slindex = (flindex * BITS_PER_LONG) + __ffs(slbitmap); + *page = pool->freelist[slindex].page; + *offset = pool->freelist[slindex].offset; + + return slindex; +} + +/* + * Insert block at in freelist of given pool. + * freelist used depends on block size. + */ +static void insert_block(struct xv_pool *pool, struct page *page, u32 offset, + struct block_header *block) +{ + u32 flindex, slindex; + struct block_header *nextblock; + + slindex = get_index_for_insert(block->size); + flindex = slindex / BITS_PER_LONG; + + block->link.prev_page = NULL; + block->link.prev_offset = 0; + block->link.next_page = pool->freelist[slindex].page; + block->link.next_offset = pool->freelist[slindex].offset; + pool->freelist[slindex].page = page; + pool->freelist[slindex].offset = offset; + + if (block->link.next_page) { + nextblock = get_ptr_atomic(block->link.next_page, + block->link.next_offset, KM_USER1); + nextblock->link.prev_page = page; + nextblock->link.prev_offset = offset; + put_ptr_atomic(nextblock, KM_USER1); + /* If there was a next page then the free bits are set. */ + return; + } + + __set_bit(slindex % BITS_PER_LONG, &pool->slbitmap[flindex]); + __set_bit(flindex, &pool->flbitmap); +} + +/* + * Remove block from freelist. Index 'slindex' identifies the freelist. + */ +static void remove_block(struct xv_pool *pool, struct page *page, u32 offset, + struct block_header *block, u32 slindex) +{ + u32 flindex = slindex / BITS_PER_LONG; + struct block_header *tmpblock; + + if (block->link.prev_page) { + tmpblock = get_ptr_atomic(block->link.prev_page, + block->link.prev_offset, KM_USER1); + tmpblock->link.next_page = block->link.next_page; + tmpblock->link.next_offset = block->link.next_offset; + put_ptr_atomic(tmpblock, KM_USER1); + } + + if (block->link.next_page) { + tmpblock = get_ptr_atomic(block->link.next_page, + block->link.next_offset, KM_USER1); + tmpblock->link.prev_page = block->link.prev_page; + tmpblock->link.prev_offset = block->link.prev_offset; + put_ptr_atomic(tmpblock, KM_USER1); + } + + /* Is this block is at the head of the freelist? */ + if (pool->freelist[slindex].page == page + && pool->freelist[slindex].offset == offset) { + + pool->freelist[slindex].page = block->link.next_page; + pool->freelist[slindex].offset = block->link.next_offset; + + if (pool->freelist[slindex].page) { + struct block_header *tmpblock; + tmpblock = get_ptr_atomic(pool->freelist[slindex].page, + pool->freelist[slindex].offset, + KM_USER1); + tmpblock->link.prev_page = NULL; + tmpblock->link.prev_offset = 0; + put_ptr_atomic(tmpblock, KM_USER1); + } else { + /* This freelist bucket is empty */ + __clear_bit(slindex % BITS_PER_LONG, + &pool->slbitmap[flindex]); + if (!pool->slbitmap[flindex]) + __clear_bit(flindex, &pool->flbitmap); + } + } + + block->link.prev_page = NULL; + block->link.prev_offset = 0; + block->link.next_page = NULL; + block->link.next_offset = 0; +} + +/* + * Allocate a page and add it to freelist of given pool. + */ +static int grow_pool(struct xv_pool *pool, gfp_t flags) +{ + struct page *page; + struct block_header *block; + + page = alloc_page(flags); + if (unlikely(!page)) + return -ENOMEM; + + stat_inc(&pool->total_pages); + + spin_lock(&pool->lock); + block = get_ptr_atomic(page, 0, KM_USER0); + + block->size = PAGE_SIZE - XV_ALIGN; + set_flag(block, BLOCK_FREE); + clear_flag(block, PREV_FREE); + set_blockprev(block, 0); + + insert_block(pool, page, 0, block); + + put_ptr_atomic(block, KM_USER0); + spin_unlock(&pool->lock); + + return 0; +} + +/* + * Create a memory pool. Allocates freelist, bitmaps and other + * per-pool metadata. + */ +struct xv_pool *xv_create_pool(void) +{ + u32 ovhd_size; + struct xv_pool *pool; + + ovhd_size = roundup(sizeof(*pool), PAGE_SIZE); + pool = kzalloc(ovhd_size, GFP_KERNEL); + if (!pool) + return NULL; + + spin_lock_init(&pool->lock); + + return pool; +} +EXPORT_SYMBOL_GPL(xv_create_pool); + +void xv_destroy_pool(struct xv_pool *pool) +{ + kfree(pool); +} +EXPORT_SYMBOL_GPL(xv_destroy_pool); + +/** + * xv_malloc - Allocate block of given size from pool. + * @pool: pool to allocate from + * @size: size of block to allocate + * @page: page no. that holds the object + * @offset: location of object within page + * + * On success, identifies block allocated + * and 0 is returned. On failure, is set to + * 0 and -ENOMEM is returned. + * + * Allocation requests with size > XV_MAX_ALLOC_SIZE will fail. + */ +int xv_malloc(struct xv_pool *pool, u32 size, struct page **page, + u32 *offset, gfp_t flags) +{ + int error; + u32 index, tmpsize, origsize, tmpoffset; + struct block_header *block, *tmpblock; + + *page = NULL; + *offset = 0; + origsize = size; + + if (unlikely(!size || size > XV_MAX_ALLOC_SIZE)) + return -ENOMEM; + + size = ALIGN(size, XV_ALIGN); + + spin_lock(&pool->lock); + + index = find_block(pool, size, page, offset); + + if (!*page) { + spin_unlock(&pool->lock); + if (flags & GFP_NOWAIT) + return -ENOMEM; + error = grow_pool(pool, flags); + if (unlikely(error)) + return error; + + spin_lock(&pool->lock); + index = find_block(pool, size, page, offset); + } + + if (!*page) { + spin_unlock(&pool->lock); + return -ENOMEM; + } + + block = get_ptr_atomic(*page, *offset, KM_USER0); + + remove_block(pool, *page, *offset, block, index); + + /* Split the block if required */ + tmpoffset = *offset + size + XV_ALIGN; + tmpsize = block->size - size; + tmpblock = (struct block_header *)((char *)block + size + XV_ALIGN); + if (tmpsize) { + tmpblock->size = tmpsize - XV_ALIGN; + set_flag(tmpblock, BLOCK_FREE); + clear_flag(tmpblock, PREV_FREE); + + set_blockprev(tmpblock, *offset); + if (tmpblock->size >= XV_MIN_ALLOC_SIZE) + insert_block(pool, *page, tmpoffset, tmpblock); + + if (tmpoffset + XV_ALIGN + tmpblock->size != PAGE_SIZE) { + tmpblock = BLOCK_NEXT(tmpblock); + set_blockprev(tmpblock, tmpoffset); + } + } else { + /* This block is exact fit */ + if (tmpoffset != PAGE_SIZE) + clear_flag(tmpblock, PREV_FREE); + } + + block->size = origsize; + clear_flag(block, BLOCK_FREE); + + put_ptr_atomic(block, KM_USER0); + spin_unlock(&pool->lock); + + *offset += XV_ALIGN; + + return 0; +} +EXPORT_SYMBOL_GPL(xv_malloc); + +/* + * Free block identified with + */ +void xv_free(struct xv_pool *pool, struct page *page, u32 offset) +{ + void *page_start; + struct block_header *block, *tmpblock; + + offset -= XV_ALIGN; + + spin_lock(&pool->lock); + + page_start = get_ptr_atomic(page, 0, KM_USER0); + block = (struct block_header *)((char *)page_start + offset); + + /* Catch double free bugs */ + BUG_ON(test_flag(block, BLOCK_FREE)); + + block->size = ALIGN(block->size, XV_ALIGN); + + tmpblock = BLOCK_NEXT(block); + if (offset + block->size + XV_ALIGN == PAGE_SIZE) + tmpblock = NULL; + + /* Merge next block if its free */ + if (tmpblock && test_flag(tmpblock, BLOCK_FREE)) { + /* + * Blocks smaller than XV_MIN_ALLOC_SIZE + * are not inserted in any free list. + */ + if (tmpblock->size >= XV_MIN_ALLOC_SIZE) { + remove_block(pool, page, + offset + block->size + XV_ALIGN, tmpblock, + get_index_for_insert(tmpblock->size)); + } + block->size += tmpblock->size + XV_ALIGN; + } + + /* Merge previous block if its free */ + if (test_flag(block, PREV_FREE)) { + tmpblock = (struct block_header *)((char *)(page_start) + + get_blockprev(block)); + offset = offset - tmpblock->size - XV_ALIGN; + + if (tmpblock->size >= XV_MIN_ALLOC_SIZE) + remove_block(pool, page, offset, tmpblock, + get_index_for_insert(tmpblock->size)); + + tmpblock->size += block->size + XV_ALIGN; + block = tmpblock; + } + + /* No used objects in this page. Free it. */ + if (block->size == PAGE_SIZE - XV_ALIGN) { + put_ptr_atomic(page_start, KM_USER0); + spin_unlock(&pool->lock); + + __free_page(page); + stat_dec(&pool->total_pages); + return; + } + + set_flag(block, BLOCK_FREE); + if (block->size >= XV_MIN_ALLOC_SIZE) + insert_block(pool, page, offset, block); + + if (offset + block->size + XV_ALIGN != PAGE_SIZE) { + tmpblock = BLOCK_NEXT(block); + set_flag(tmpblock, PREV_FREE); + set_blockprev(tmpblock, offset); + } + + put_ptr_atomic(page_start, KM_USER0); + spin_unlock(&pool->lock); +} +EXPORT_SYMBOL_GPL(xv_free); + +u32 xv_get_object_size(void *obj) +{ + struct block_header *blk; + + blk = (struct block_header *)((char *)(obj) - XV_ALIGN); + return blk->size; +} +EXPORT_SYMBOL_GPL(xv_get_object_size); + +/* + * Returns total memory used by allocator (userdata + metadata) + */ +u64 xv_get_total_size_bytes(struct xv_pool *pool) +{ + return pool->total_pages << PAGE_SHIFT; +} +EXPORT_SYMBOL_GPL(xv_get_total_size_bytes); + diff --git a/drivers/staging/zram/xvmalloc.h b/drivers/staging/zram/xvmalloc.h new file mode 100644 index 00000000..f7875356 --- /dev/null +++ b/drivers/staging/zram/xvmalloc.h @@ -0,0 +1,31 @@ +/* + * xvmalloc memory allocator + * + * Copyright (C) 2008, 2009, 2010 Nitin Gupta + * + * This code is released using a dual license strategy: BSD/GPL + * You can choose the licence that better fits your requirements. + * + * Released under the terms of 3-clause BSD License + * Released under the terms of GNU General Public License Version 2.0 + */ + +#ifndef _XV_MALLOC_H_ +#define _XV_MALLOC_H_ + +#include + +struct xv_pool; + +struct xv_pool *xv_create_pool(void); +void xv_destroy_pool(struct xv_pool *pool); + +int xv_malloc(struct xv_pool *pool, u32 size, struct page **page, + u32 *offset, gfp_t flags); +void xv_free(struct xv_pool *pool, struct page *page, u32 offset); + +u32 xv_get_object_size(void *obj); +u64 xv_get_total_size_bytes(struct xv_pool *pool); + +#endif + diff --git a/drivers/staging/zram/xvmalloc_int.h b/drivers/staging/zram/xvmalloc_int.h new file mode 100644 index 00000000..273fe3cb --- /dev/null +++ b/drivers/staging/zram/xvmalloc_int.h @@ -0,0 +1,96 @@ +/* + * xvmalloc memory allocator + * + * Copyright (C) 2008, 2009, 2010 Nitin Gupta + * + * This code is released using a dual license strategy: BSD/GPL + * You can choose the licence that better fits your requirements. + * + * Released under the terms of 3-clause BSD License + * Released under the terms of GNU General Public License Version 2.0 + */ + +#ifndef _XV_MALLOC_INT_H_ +#define _XV_MALLOC_INT_H_ + +#include +#include + +/* User configurable params */ + +/* Must be power of two */ +#ifdef CONFIG_64BIT +#define XV_ALIGN_SHIFT 3 +#else +#define XV_ALIGN_SHIFT 2 +#endif +#define XV_ALIGN (1 << XV_ALIGN_SHIFT) +#define XV_ALIGN_MASK (XV_ALIGN - 1) + +/* This must be greater than sizeof(link_free) */ +#define XV_MIN_ALLOC_SIZE 32 +#define XV_MAX_ALLOC_SIZE (PAGE_SIZE - XV_ALIGN) + +/* + * Free lists are separated by FL_DELTA bytes + * This value is 3 for 4k pages and 4 for 64k pages, for any + * other page size, a conservative (PAGE_SHIFT - 9) is used. + */ +#if PAGE_SHIFT == 16 +#define FL_DELTA_SHIFT 4 +#else +#define FL_DELTA_SHIFT (PAGE_SHIFT - 9) +#endif +#define FL_DELTA (1 << FL_DELTA_SHIFT) +#define FL_DELTA_MASK (FL_DELTA - 1) +#define NUM_FREE_LISTS ((XV_MAX_ALLOC_SIZE - XV_MIN_ALLOC_SIZE) \ + / FL_DELTA + 1) + +#define MAX_FLI DIV_ROUND_UP(NUM_FREE_LISTS, BITS_PER_LONG) + +/* End of user params */ + +enum blockflags { + BLOCK_FREE, + PREV_FREE, + __NR_BLOCKFLAGS, +}; + +#define FLAGS_MASK XV_ALIGN_MASK +#define PREV_MASK (~FLAGS_MASK) + +struct freelist_entry { + struct page *page; + u16 offset; + u16 pad; +}; + +struct link_free { + struct page *prev_page; + struct page *next_page; + u16 prev_offset; + u16 next_offset; +}; + +struct block_header { + union { + /* This common header must be XV_ALIGN bytes */ + u8 common[XV_ALIGN]; + struct { + u16 size; + u16 prev; + }; + }; + struct link_free link; +}; + +struct xv_pool { + ulong flbitmap; + ulong slbitmap[MAX_FLI]; + u64 total_pages; /* stats */ + struct freelist_entry freelist[NUM_FREE_LISTS]; + spinlock_t lock; +}; + +#endif + diff --git a/drivers/staging/zram/zram.txt b/drivers/staging/zram/zram.txt new file mode 100644 index 00000000..5f75d298 --- /dev/null +++ b/drivers/staging/zram/zram.txt @@ -0,0 +1,76 @@ +zram: Compressed RAM based block devices +---------------------------------------- + +Project home: http://compcache.googlecode.com/ + +* Introduction + +The zram module creates RAM based block devices named /dev/zram +( = 0, 1, ...). Pages written to these disks are compressed and stored +in memory itself. These disks allow very fast I/O and compression provides +good amounts of memory savings. Some of the usecases include /tmp storage, +use as swap disks, various caches under /var and maybe many more :) + +Statistics for individual zram devices are exported through sysfs nodes at +/sys/block/zram/ + +* Usage + +Following shows a typical sequence of steps for using zram. + +1) Load Module: + modprobe zram num_devices=4 + This creates 4 devices: /dev/zram{0,1,2,3} + (num_devices parameter is optional. Default: 1) + +2) Set Disksize (Optional): + Set disk size by writing the value to sysfs node 'disksize' + (in bytes). If disksize is not given, default value of 25% + of RAM is used. + + # Initialize /dev/zram0 with 50MB disksize + echo $((50*1024*1024)) > /sys/block/zram0/disksize + + NOTE: disksize cannot be changed if the disk contains any + data. So, for such a disk, you need to issue 'reset' (see below) + before you can change its disksize. + +3) Activate: + mkswap /dev/zram0 + swapon /dev/zram0 + + mkfs.ext4 /dev/zram1 + mount /dev/zram1 /tmp + +4) Stats: + Per-device statistics are exported as various nodes under + /sys/block/zram/ + disksize + num_reads + num_writes + invalid_io + notify_free + discard + zero_pages + orig_data_size + compr_data_size + mem_used_total + +5) Deactivate: + swapoff /dev/zram0 + umount /dev/zram1 + +6) Reset: + Write any positive value to 'reset' sysfs node + echo 1 > /sys/block/zram0/reset + echo 1 > /sys/block/zram1/reset + + (This frees all the memory allocated for the given device). + + +Please report any problems at: + - Mailing list: linux-mm-cc at laptop dot org + - Issue tracker: http://code.google.com/p/compcache/issues/list + +Nitin Gupta +ngupta@vflare.org diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c new file mode 100644 index 00000000..fc4c2e6f --- /dev/null +++ b/drivers/staging/zram/zram_drv.c @@ -0,0 +1,848 @@ +/* + * Compressed RAM block device + * + * Copyright (C) 2008, 2009, 2010 Nitin Gupta + * + * This code is released using a dual license strategy: BSD/GPL + * You can choose the licence that better fits your requirements. + * + * Released under the terms of 3-clause BSD License + * Released under the terms of GNU General Public License Version 2.0 + * + * Project home: http://compcache.googlecode.com + */ + +#define KMSG_COMPONENT "zram" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#ifdef CONFIG_ZRAM_DEBUG +#define DEBUG +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "zram_drv.h" + +/* Globals */ +static int zram_major; +struct zram *zram_devices; + +/* Module params (documentation at end) */ +unsigned int zram_num_devices; + +static void zram_stat_inc(u32 *v) +{ + *v = *v + 1; +} + +static void zram_stat_dec(u32 *v) +{ + *v = *v - 1; +} + +static void zram_stat64_add(struct zram *zram, u64 *v, u64 inc) +{ + spin_lock(&zram->stat64_lock); + *v = *v + inc; + spin_unlock(&zram->stat64_lock); +} + +static void zram_stat64_sub(struct zram *zram, u64 *v, u64 dec) +{ + spin_lock(&zram->stat64_lock); + *v = *v - dec; + spin_unlock(&zram->stat64_lock); +} + +static void zram_stat64_inc(struct zram *zram, u64 *v) +{ + zram_stat64_add(zram, v, 1); +} + +static int zram_test_flag(struct zram *zram, u32 index, + enum zram_pageflags flag) +{ + return zram->table[index].flags & BIT(flag); +} + +static void zram_set_flag(struct zram *zram, u32 index, + enum zram_pageflags flag) +{ + zram->table[index].flags |= BIT(flag); +} + +static void zram_clear_flag(struct zram *zram, u32 index, + enum zram_pageflags flag) +{ + zram->table[index].flags &= ~BIT(flag); +} + +static int page_zero_filled(void *ptr) +{ + unsigned int pos; + unsigned long *page; + + page = (unsigned long *)ptr; + + for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) { + if (page[pos]) + return 0; + } + + return 1; +} + +static u64 zram_default_disksize_bytes(void) +{ +#if 0 + return ((totalram_pages << PAGE_SHIFT) * + default_disksize_perc_ram / 100) & PAGE_MASK; +#endif + return CONFIG_ZRAM_DEFAULT_DISKSIZE; +} + +static void zram_set_disksize(struct zram *zram, u64 size_bytes) +{ + zram->disksize = size_bytes; + set_capacity(zram->disk, size_bytes >> SECTOR_SHIFT); +} + +static void zram_free_page(struct zram *zram, size_t index) +{ + u32 clen; + void *obj; + + struct page *page = zram->table[index].page; + u32 offset = zram->table[index].offset; + + if (unlikely(!page)) { + /* + * No memory is allocated for zero filled pages. + * Simply clear zero page flag. + */ + if (zram_test_flag(zram, index, ZRAM_ZERO)) { + zram_clear_flag(zram, index, ZRAM_ZERO); + zram_stat_dec(&zram->stats.pages_zero); + } + return; + } + + if (unlikely(zram_test_flag(zram, index, ZRAM_UNCOMPRESSED))) { + clen = PAGE_SIZE; + __free_page(page); + zram_clear_flag(zram, index, ZRAM_UNCOMPRESSED); + zram_stat_dec(&zram->stats.pages_expand); + goto out; + } + + obj = kmap_atomic(page, KM_USER0) + offset; + clen = xv_get_object_size(obj) - sizeof(struct zobj_header); + kunmap_atomic(obj, KM_USER0); + + xv_free(zram->mem_pool, page, offset); + if (clen <= PAGE_SIZE / 2) + zram_stat_dec(&zram->stats.good_compress); + +out: + zram_stat64_sub(zram, &zram->stats.compr_size, clen); + zram_stat_dec(&zram->stats.pages_stored); + + zram->table[index].page = NULL; + zram->table[index].offset = 0; +} + +static void handle_zero_page(struct bio_vec *bvec) +{ + struct page *page = bvec->bv_page; + void *user_mem; + + user_mem = kmap_atomic(page, KM_USER0); + memset(user_mem + bvec->bv_offset, 0, bvec->bv_len); + kunmap_atomic(user_mem, KM_USER0); + + flush_dcache_page(page); +} + +static void handle_uncompressed_page(struct zram *zram, struct bio_vec *bvec, + u32 index, int offset) +{ + struct page *page = bvec->bv_page; + unsigned char *user_mem, *cmem; + + user_mem = kmap_atomic(page, KM_USER0); + cmem = kmap_atomic(zram->table[index].page, KM_USER1); + + memcpy(user_mem + bvec->bv_offset, cmem + offset, bvec->bv_len); + kunmap_atomic(cmem, KM_USER1); + kunmap_atomic(user_mem, KM_USER0); + + flush_dcache_page(page); +} + +static inline int is_partial_io(struct bio_vec *bvec) +{ + return bvec->bv_len != PAGE_SIZE; +} + +static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, + u32 index, int offset, struct bio *bio) +{ + int ret; + size_t clen; + struct page *page; + struct zobj_header *zheader; + unsigned char *user_mem, *cmem, *uncmem = NULL; + + page = bvec->bv_page; + + if (zram_test_flag(zram, index, ZRAM_ZERO)) { + handle_zero_page(bvec); + return 0; + } + + /* Requested page is not present in compressed area */ + if (unlikely(!zram->table[index].page)) { + pr_debug("Read before write: sector=%lu, size=%u", + (ulong)(bio->bi_sector), bio->bi_size); + handle_zero_page(bvec); + return 0; + } + + /* Page is stored uncompressed since it's incompressible */ + if (unlikely(zram_test_flag(zram, index, ZRAM_UNCOMPRESSED))) { + handle_uncompressed_page(zram, bvec, index, offset); + return 0; + } + + if (is_partial_io(bvec)) { + /* Use a temporary buffer to decompress the page */ + uncmem = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!uncmem) { + pr_info("Error allocating temp memory!\n"); + return -ENOMEM; + } + } + + user_mem = kmap_atomic(page, KM_USER0); + if (!is_partial_io(bvec)) + uncmem = user_mem; + clen = PAGE_SIZE; + + cmem = kmap_atomic(zram->table[index].page, KM_USER1) + + zram->table[index].offset; + + ret = lzo1x_decompress_safe(cmem + sizeof(*zheader), + xv_get_object_size(cmem) - sizeof(*zheader), + uncmem, &clen); + + if (is_partial_io(bvec)) { + memcpy(user_mem + bvec->bv_offset, uncmem + offset, + bvec->bv_len); + kfree(uncmem); + } + + kunmap_atomic(cmem, KM_USER1); + kunmap_atomic(user_mem, KM_USER0); + + /* Should NEVER happen. Return bio error if it does. */ + if (unlikely(ret != LZO_E_OK)) { + pr_err("Decompression failed! err=%d, page=%u\n", ret, index); + zram_stat64_inc(zram, &zram->stats.failed_reads); + return ret; + } + + flush_dcache_page(page); + + return 0; +} + +static int zram_read_before_write(struct zram *zram, char *mem, u32 index) +{ + int ret; + size_t clen = PAGE_SIZE; + struct zobj_header *zheader; + unsigned char *cmem; + + if (zram_test_flag(zram, index, ZRAM_ZERO) || + !zram->table[index].page) { + memset(mem, 0, PAGE_SIZE); + return 0; + } + + cmem = kmap_atomic(zram->table[index].page, KM_USER0) + + zram->table[index].offset; + + /* Page is stored uncompressed since it's incompressible */ + if (unlikely(zram_test_flag(zram, index, ZRAM_UNCOMPRESSED))) { + memcpy(mem, cmem, PAGE_SIZE); + kunmap_atomic(cmem, KM_USER0); + return 0; + } + + ret = lzo1x_decompress_safe(cmem + sizeof(*zheader), + xv_get_object_size(cmem) - sizeof(*zheader), + mem, &clen); + kunmap_atomic(cmem, KM_USER0); + + /* Should NEVER happen. Return bio error if it does. */ + if (unlikely(ret != LZO_E_OK)) { + pr_err("Decompression failed! err=%d, page=%u\n", ret, index); + zram_stat64_inc(zram, &zram->stats.failed_reads); + return ret; + } + + return 0; +} + +static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, + int offset) +{ + int ret; + u32 store_offset; + size_t clen; + struct zobj_header *zheader; + struct page *page, *page_store; + unsigned char *user_mem, *cmem, *src, *uncmem = NULL; + + page = bvec->bv_page; + src = zram->compress_buffer; + + if (is_partial_io(bvec)) { + /* + * This is a partial IO. We need to read the full page + * before to write the changes. + */ + uncmem = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!uncmem) { + pr_info("Error allocating temp memory!\n"); + ret = -ENOMEM; + goto out; + } + ret = zram_read_before_write(zram, uncmem, index); + if (ret) { + kfree(uncmem); + goto out; + } + } + + /* + * System overwrites unused sectors. Free memory associated + * with this sector now. + */ + if (zram->table[index].page || + zram_test_flag(zram, index, ZRAM_ZERO)) + zram_free_page(zram, index); + + user_mem = kmap_atomic(page, KM_USER0); + + if (is_partial_io(bvec)) + memcpy(uncmem + offset, user_mem + bvec->bv_offset, + bvec->bv_len); + else + uncmem = user_mem; + + if (page_zero_filled(uncmem)) { + kunmap_atomic(user_mem, KM_USER0); + if (is_partial_io(bvec)) + kfree(uncmem); + zram_stat_inc(&zram->stats.pages_zero); + zram_set_flag(zram, index, ZRAM_ZERO); + ret = 0; + goto out; + } + + ret = lzo1x_1_compress(uncmem, PAGE_SIZE, src, &clen, + zram->compress_workmem); + + kunmap_atomic(user_mem, KM_USER0); + if (is_partial_io(bvec)) + kfree(uncmem); + + if (unlikely(ret != LZO_E_OK)) { + pr_err("Compression failed! err=%d\n", ret); + goto out; + } + + /* + * Page is incompressible. Store it as-is (uncompressed) + * since we do not want to return too many disk write + * errors which has side effect of hanging the system. + */ + if (unlikely(clen > max_zpage_size)) { + clen = PAGE_SIZE; + page_store = alloc_page(GFP_NOIO | __GFP_HIGHMEM); + if (unlikely(!page_store)) { + pr_info("Error allocating memory for " + "incompressible page: %u\n", index); + ret = -ENOMEM; + goto out; + } + + store_offset = 0; + zram_set_flag(zram, index, ZRAM_UNCOMPRESSED); + zram_stat_inc(&zram->stats.pages_expand); + zram->table[index].page = page_store; + src = kmap_atomic(page, KM_USER0); + goto memstore; + } + + if (xv_malloc(zram->mem_pool, clen + sizeof(*zheader), + &zram->table[index].page, &store_offset, + GFP_NOIO | __GFP_HIGHMEM)) { + pr_info("Error allocating memory for compressed " + "page: %u, size=%zu\n", index, clen); + ret = -ENOMEM; + goto out; + } + +memstore: + zram->table[index].offset = store_offset; + + cmem = kmap_atomic(zram->table[index].page, KM_USER1) + + zram->table[index].offset; + +#if 0 + /* Back-reference needed for memory defragmentation */ + if (!zram_test_flag(zram, index, ZRAM_UNCOMPRESSED)) { + zheader = (struct zobj_header *)cmem; + zheader->table_idx = index; + cmem += sizeof(*zheader); + } +#endif + + memcpy(cmem, src, clen); + + kunmap_atomic(cmem, KM_USER1); + if (unlikely(zram_test_flag(zram, index, ZRAM_UNCOMPRESSED))) + kunmap_atomic(src, KM_USER0); + + /* Update stats */ + zram_stat64_add(zram, &zram->stats.compr_size, clen); + zram_stat_inc(&zram->stats.pages_stored); + if (clen <= PAGE_SIZE / 2) + zram_stat_inc(&zram->stats.good_compress); + + return 0; + +out: + if (ret) + zram_stat64_inc(zram, &zram->stats.failed_writes); + return ret; +} + +static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, + int offset, struct bio *bio, int rw) +{ + int ret; + + if (rw == READ) { + down_read(&zram->lock); + ret = zram_bvec_read(zram, bvec, index, offset, bio); + up_read(&zram->lock); + } else { + down_write(&zram->lock); + ret = zram_bvec_write(zram, bvec, index, offset); + up_write(&zram->lock); + } + + return ret; +} + +static void update_position(u32 *index, int *offset, struct bio_vec *bvec) +{ + if (*offset + bvec->bv_len >= PAGE_SIZE) + (*index)++; + *offset = (*offset + bvec->bv_len) % PAGE_SIZE; +} + +static void __zram_make_request(struct zram *zram, struct bio *bio, int rw) +{ + int i, offset; + u32 index; + struct bio_vec *bvec; + + switch (rw) { + case READ: + zram_stat64_inc(zram, &zram->stats.num_reads); + break; + case WRITE: + zram_stat64_inc(zram, &zram->stats.num_writes); + break; + } + + index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT; + offset = (bio->bi_sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; + + bio_for_each_segment(bvec, bio, i) { + int max_transfer_size = PAGE_SIZE - offset; + + if (bvec->bv_len > max_transfer_size) { + /* + * zram_bvec_rw() can only make operation on a single + * zram page. Split the bio vector. + */ + struct bio_vec bv; + + bv.bv_page = bvec->bv_page; + bv.bv_len = max_transfer_size; + bv.bv_offset = bvec->bv_offset; + + if (zram_bvec_rw(zram, &bv, index, offset, bio, rw) < 0) + goto out; + + bv.bv_len = bvec->bv_len - max_transfer_size; + bv.bv_offset += max_transfer_size; + if (zram_bvec_rw(zram, &bv, index+1, 0, bio, rw) < 0) + goto out; + } else + if (zram_bvec_rw(zram, bvec, index, offset, bio, rw) + < 0) + goto out; + + update_position(&index, &offset, bvec); + } + + set_bit(BIO_UPTODATE, &bio->bi_flags); + bio_endio(bio, 0); + return; + +out: + bio_io_error(bio); +} + +/* + * Check if request is within bounds and aligned on zram logical blocks. + */ +static inline int valid_io_request(struct zram *zram, struct bio *bio) +{ + if (unlikely( + (bio->bi_sector >= (zram->disksize >> SECTOR_SHIFT)) || + (bio->bi_sector & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)) || + (bio->bi_size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))) { + + return 0; + } + + /* I/O request is valid */ + return 1; +} + +/* + * Handler function for all zram I/O requests. + */ +static int zram_make_request(struct request_queue *queue, struct bio *bio) +{ + struct zram *zram = queue->queuedata; + + if (!valid_io_request(zram, bio)) { + zram_stat64_inc(zram, &zram->stats.invalid_io); + bio_io_error(bio); + return 0; + } + + if (unlikely(!zram->init_done) && zram_init_device(zram)) { + bio_io_error(bio); + return 0; + } + + __zram_make_request(zram, bio, bio_data_dir(bio)); + + return 0; +} + +void zram_reset_device(struct zram *zram) +{ + size_t index; + + mutex_lock(&zram->init_lock); + zram->init_done = 0; + + /* Free various per-device buffers */ + kfree(zram->compress_workmem); + free_pages((unsigned long)zram->compress_buffer, 1); + + zram->compress_workmem = NULL; + zram->compress_buffer = NULL; + + /* Free all pages that are still in this zram device */ + for (index = 0; index < zram->disksize >> PAGE_SHIFT; index++) { + struct page *page; + u16 offset; + + page = zram->table[index].page; + offset = zram->table[index].offset; + + if (!page) + continue; + + if (unlikely(zram_test_flag(zram, index, ZRAM_UNCOMPRESSED))) + __free_page(page); + else + xv_free(zram->mem_pool, page, offset); + } + + vfree(zram->table); + zram->table = NULL; + + xv_destroy_pool(zram->mem_pool); + zram->mem_pool = NULL; + + /* Reset stats */ + memset(&zram->stats, 0, sizeof(zram->stats)); + + zram_set_disksize(zram, zram_default_disksize_bytes()); + mutex_unlock(&zram->init_lock); +} + +int zram_init_device(struct zram *zram) +{ + int ret; + size_t num_pages; + + mutex_lock(&zram->init_lock); + + if (zram->init_done) { + mutex_unlock(&zram->init_lock); + return 0; + } + + zram->compress_workmem = kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL); + if (!zram->compress_workmem) { + pr_err("Error allocating compressor working memory!\n"); + ret = -ENOMEM; + goto fail; + } + + zram->compress_buffer = (void *)__get_free_pages(__GFP_ZERO, 1); + if (!zram->compress_buffer) { + pr_err("Error allocating compressor buffer space\n"); + ret = -ENOMEM; + goto fail; + } + + num_pages = zram->disksize >> PAGE_SHIFT; + zram->table = vmalloc(num_pages * sizeof(*zram->table)); + if (!zram->table) { + pr_err("Error allocating zram address table\n"); + /* To prevent accessing table entries during cleanup */ + zram->disksize = 0; + ret = -ENOMEM; + goto fail; + } + memset(zram->table, 0, num_pages * sizeof(*zram->table)); + + /* zram devices sort of resembles non-rotational disks */ + queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue); + + zram->mem_pool = xv_create_pool(); + if (!zram->mem_pool) { + pr_err("Error creating memory pool\n"); + ret = -ENOMEM; + goto fail; + } + + zram->init_done = 1; + mutex_unlock(&zram->init_lock); + + pr_debug("Initialization done!\n"); + return 0; + +fail: + mutex_unlock(&zram->init_lock); + zram_reset_device(zram); + + pr_err("Initialization failed: err=%d\n", ret); + return ret; +} + +static void zram_slot_free_notify(struct block_device *bdev, + unsigned long index) +{ + struct zram *zram; + + zram = bdev->bd_disk->private_data; + zram_free_page(zram, index); + zram_stat64_inc(zram, &zram->stats.notify_free); +} + +static const struct block_device_operations zram_devops = { + .swap_slot_free_notify = zram_slot_free_notify, + .owner = THIS_MODULE +}; + +static int create_device(struct zram *zram, int device_id) +{ + int ret = 0; + + init_rwsem(&zram->lock); + mutex_init(&zram->init_lock); + spin_lock_init(&zram->stat64_lock); + + zram->queue = blk_alloc_queue(GFP_KERNEL); + if (!zram->queue) { + pr_err("Error allocating disk queue for device %d\n", + device_id); + ret = -ENOMEM; + goto out; + } + + blk_queue_make_request(zram->queue, zram_make_request); + zram->queue->queuedata = zram; + + /* gendisk structure */ + zram->disk = alloc_disk(1); + if (!zram->disk) { + blk_cleanup_queue(zram->queue); + pr_warning("Error allocating disk structure for device %d\n", + device_id); + ret = -ENOMEM; + goto out; + } + + zram->disk->major = zram_major; + zram->disk->first_minor = device_id; + zram->disk->fops = &zram_devops; + zram->disk->queue = zram->queue; + zram->disk->private_data = zram; + snprintf(zram->disk->disk_name, 16, "zram%d", device_id); + + /* + * Set some default disksize. To set another disksize, user + * must reset the device and then write a new disksize to + * corresponding device's sysfs node. + */ + zram_set_disksize(zram, zram_default_disksize_bytes()); + + /* + * To ensure that we always get PAGE_SIZE aligned + * and n*PAGE_SIZED sized I/O requests. + */ + blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE); + blk_queue_logical_block_size(zram->disk->queue, + ZRAM_LOGICAL_BLOCK_SIZE); + blk_queue_io_min(zram->disk->queue, PAGE_SIZE); + blk_queue_io_opt(zram->disk->queue, PAGE_SIZE); + + add_disk(zram->disk); + + ret = sysfs_create_group(&disk_to_dev(zram->disk)->kobj, + &zram_disk_attr_group); + if (ret < 0) { + pr_warning("Error creating sysfs group"); + goto out; + } + + zram->init_done = 0; + +out: + return ret; +} + +static void destroy_device(struct zram *zram) +{ + sysfs_remove_group(&disk_to_dev(zram->disk)->kobj, + &zram_disk_attr_group); + + if (zram->disk) { + del_gendisk(zram->disk); + put_disk(zram->disk); + } + + if (zram->queue) + blk_cleanup_queue(zram->queue); +} + +static int __init zram_init(void) +{ + int ret, dev_id; + + /* + * Module parameter not specified by user. Use default + * value as defined during kernel config. + */ + if (zram_num_devices == 0) + zram_num_devices = CONFIG_ZRAM_NUM_DEVICES; + + if (zram_num_devices > max_num_devices) { + pr_warning("Invalid value for num_devices: %u\n", + zram_num_devices); + ret = -EINVAL; + goto out; + } + + zram_major = register_blkdev(0, "zram"); + if (zram_major <= 0) { + pr_warning("Unable to get major number\n"); + ret = -EBUSY; + goto out; + } + + /* Allocate the device array and initialize each one */ + pr_info("Creating %u devices ...\n", zram_num_devices); + zram_devices = kzalloc(zram_num_devices * sizeof(struct zram), + GFP_KERNEL); + if (!zram_devices) + { + ret = -ENOMEM; + goto unregister; + } + + for (dev_id = 0; dev_id < zram_num_devices; dev_id++) { + ret = create_device(&zram_devices[dev_id], dev_id); + if (ret) + goto free_devices; + } + + return 0; + +free_devices: + while (dev_id) + destroy_device(&zram_devices[--dev_id]); + kfree(zram_devices); +unregister: + unregister_blkdev(zram_major, "zram"); +out: + return ret; +} + +static void __exit zram_exit(void) +{ + int i; + struct zram *zram; + + for (i = 0; i < zram_num_devices; i++) { + zram = &zram_devices[i]; + + destroy_device(zram); + if (zram->init_done) + zram_reset_device(zram); + } + + unregister_blkdev(zram_major, "zram"); + + kfree(zram_devices); + pr_debug("Cleanup done!\n"); +} + +module_param_named(num_devices, zram_num_devices, uint, 0); +MODULE_PARM_DESC(num_devices, "Number of zram devices"); + +module_init(zram_init); +module_exit(zram_exit); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Nitin Gupta "); +MODULE_DESCRIPTION("Compressed RAM Block Device"); + diff --git a/drivers/staging/zram/zram_drv.h b/drivers/staging/zram/zram_drv.h new file mode 100644 index 00000000..fed0d14b --- /dev/null +++ b/drivers/staging/zram/zram_drv.h @@ -0,0 +1,136 @@ +/* + * Compressed RAM block device + * + * Copyright (C) 2008, 2009, 2010 Nitin Gupta + * + * This code is released using a dual license strategy: BSD/GPL + * You can choose the licence that better fits your requirements. + * + * Released under the terms of 3-clause BSD License + * Released under the terms of GNU General Public License Version 2.0 + * + * Project home: http://compcache.googlecode.com + */ + +#ifndef _ZRAM_DRV_H_ +#define _ZRAM_DRV_H_ + +#include +#include + +#include "xvmalloc.h" + +/* + * Some arbitrary value. This is just to catch + * invalid value for num_devices module parameter. + */ +static const unsigned max_num_devices = 32; + +/* + * Stored at beginning of each compressed object. + * + * It stores back-reference to table entry which points to this + * object. This is required to support memory defragmentation. + */ +struct zobj_header { +#if 0 + u32 table_idx; +#endif +}; + +/*-- Configurable parameters */ + +/* Default zram disk size: 25% of total RAM */ +static const unsigned default_disksize_perc_ram = CONFIG_ZRAM_DEFAULT_PERCENTAGE; + +/* + * Pages that compress to size greater than this are stored + * uncompressed in memory. + */ +static const size_t max_zpage_size = PAGE_SIZE / 4 * 3; + +/* + * NOTE: max_zpage_size must be less than or equal to: + * XV_MAX_ALLOC_SIZE - sizeof(struct zobj_header) + * otherwise, xv_malloc() would always return failure. + */ + +/*-- End of configurable params */ + +#define SECTOR_SHIFT 9 +#define SECTOR_SIZE (1 << SECTOR_SHIFT) +#define SECTORS_PER_PAGE_SHIFT (PAGE_SHIFT - SECTOR_SHIFT) +#define SECTORS_PER_PAGE (1 << SECTORS_PER_PAGE_SHIFT) +#define ZRAM_LOGICAL_BLOCK_SHIFT 12 +#define ZRAM_LOGICAL_BLOCK_SIZE (1 << ZRAM_LOGICAL_BLOCK_SHIFT) +#define ZRAM_SECTOR_PER_LOGICAL_BLOCK \ + (1 << (ZRAM_LOGICAL_BLOCK_SHIFT - SECTOR_SHIFT)) + +/* Flags for zram pages (table[page_no].flags) */ +enum zram_pageflags { + /* Page is stored uncompressed */ + ZRAM_UNCOMPRESSED, + + /* Page consists entirely of zeros */ + ZRAM_ZERO, + + __NR_ZRAM_PAGEFLAGS, +}; + +/*-- Data structures */ + +/* Allocated for each disk page */ +struct table { + struct page *page; + u16 offset; + u8 count; /* object ref count (not yet used) */ + u8 flags; +} __attribute__((aligned(4))); + +struct zram_stats { + u64 compr_size; /* compressed size of pages stored */ + u64 num_reads; /* failed + successful */ + u64 num_writes; /* --do-- */ + u64 failed_reads; /* should NEVER! happen */ + u64 failed_writes; /* can happen when memory is too low */ + u64 invalid_io; /* non-page-aligned I/O requests */ + u64 notify_free; /* no. of swap slot free notifications */ + u32 pages_zero; /* no. of zero filled pages */ + u32 pages_stored; /* no. of pages currently stored */ + u32 good_compress; /* % of pages with compression ratio<=50% */ + u32 pages_expand; /* % of incompressible pages */ +}; + +struct zram { + struct xv_pool *mem_pool; + void *compress_workmem; + void *compress_buffer; + struct table *table; + spinlock_t stat64_lock; /* protect 64-bit stats */ + struct rw_semaphore lock; /* protect compression buffers and table + * against concurrent read and writes */ + struct request_queue *queue; + struct gendisk *disk; + int init_done; + /* Prevent concurrent execution of device init and reset */ + struct mutex init_lock; + /* + * This is the limit on amount of *uncompressed* worth of data + * we can store in a disk. + */ + u64 disksize; /* bytes */ + + struct zram_stats stats; +}; + +extern struct zram *zram_devices; +extern unsigned int zram_num_devices; +#ifdef CONFIG_SYSFS +extern struct attribute_group zram_disk_attr_group; +#endif + +extern int zram_init_device(struct zram *zram); +extern void zram_reset_device(struct zram *zram); + +#endif + diff --git a/drivers/staging/zram/zram_sysfs.c b/drivers/staging/zram/zram_sysfs.c new file mode 100644 index 00000000..d894928a --- /dev/null +++ b/drivers/staging/zram/zram_sysfs.c @@ -0,0 +1,222 @@ +/* + * Compressed RAM block device + * + * Copyright (C) 2008, 2009, 2010 Nitin Gupta + * + * This code is released using a dual license strategy: BSD/GPL + * You can choose the licence that better fits your requirements. + * + * Released under the terms of 3-clause BSD License + * Released under the terms of GNU General Public License Version 2.0 + * + * Project home: http://compcache.googlecode.com/ + */ + +#include +#include +#include + +#include "zram_drv.h" + +static u64 zram_stat64_read(struct zram *zram, u64 *v) +{ + u64 val; + + spin_lock(&zram->stat64_lock); + val = *v; + spin_unlock(&zram->stat64_lock); + + return val; +} + +static struct zram *dev_to_zram(struct device *dev) +{ + int i; + struct zram *zram = NULL; + + for (i = 0; i < zram_num_devices; i++) { + zram = &zram_devices[i]; + if (disk_to_dev(zram->disk) == dev) + break; + } + + return zram; +} + +static ssize_t disksize_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + + return sprintf(buf, "%llu\n", zram->disksize); +} + +static ssize_t disksize_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + int ret; + struct zram *zram = dev_to_zram(dev); + + if (zram->init_done) { + pr_info("Cannot change disksize for initialized device\n"); + return -EBUSY; + } + + ret = strict_strtoull(buf, 10, &zram->disksize); + if (ret) + return ret; + + zram->disksize = PAGE_ALIGN(zram->disksize); + set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); + + return len; +} + +static ssize_t initstate_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + + return sprintf(buf, "%u\n", zram->init_done); +} + +static ssize_t reset_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + int ret; + unsigned long do_reset; + struct zram *zram; + struct block_device *bdev; + + zram = dev_to_zram(dev); + bdev = bdget_disk(zram->disk, 0); + + /* Do not reset an active device! */ + if (bdev->bd_holders) + return -EBUSY; + + ret = strict_strtoul(buf, 10, &do_reset); + if (ret) + return ret; + + if (!do_reset) + return -EINVAL; + + /* Make sure all pending I/O is finished */ + if (bdev) + fsync_bdev(bdev); + + if (zram->init_done) + zram_reset_device(zram); + + return len; +} + +static ssize_t num_reads_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + + return sprintf(buf, "%llu\n", + zram_stat64_read(zram, &zram->stats.num_reads)); +} + +static ssize_t num_writes_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + + return sprintf(buf, "%llu\n", + zram_stat64_read(zram, &zram->stats.num_writes)); +} + +static ssize_t invalid_io_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + + return sprintf(buf, "%llu\n", + zram_stat64_read(zram, &zram->stats.invalid_io)); +} + +static ssize_t notify_free_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + + return sprintf(buf, "%llu\n", + zram_stat64_read(zram, &zram->stats.notify_free)); +} + +static ssize_t zero_pages_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + + return sprintf(buf, "%u\n", zram->stats.pages_zero); +} + +static ssize_t orig_data_size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + + return sprintf(buf, "%llu\n", + (u64)(zram->stats.pages_stored) << PAGE_SHIFT); +} + +static ssize_t compr_data_size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + + return sprintf(buf, "%llu\n", + zram_stat64_read(zram, &zram->stats.compr_size)); +} + +static ssize_t mem_used_total_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + u64 val = 0; + struct zram *zram = dev_to_zram(dev); + + if (zram->init_done) { + val = xv_get_total_size_bytes(zram->mem_pool) + + ((u64)(zram->stats.pages_expand) << PAGE_SHIFT); + } + + return sprintf(buf, "%llu\n", val); +} + +static DEVICE_ATTR(disksize, S_IRUGO | S_IWUSR, + disksize_show, disksize_store); +static DEVICE_ATTR(initstate, S_IRUGO, initstate_show, NULL); +static DEVICE_ATTR(reset, S_IWUSR, NULL, reset_store); +static DEVICE_ATTR(num_reads, S_IRUGO, num_reads_show, NULL); +static DEVICE_ATTR(num_writes, S_IRUGO, num_writes_show, NULL); +static DEVICE_ATTR(invalid_io, S_IRUGO, invalid_io_show, NULL); +static DEVICE_ATTR(notify_free, S_IRUGO, notify_free_show, NULL); +static DEVICE_ATTR(zero_pages, S_IRUGO, zero_pages_show, NULL); +static DEVICE_ATTR(orig_data_size, S_IRUGO, orig_data_size_show, NULL); +static DEVICE_ATTR(compr_data_size, S_IRUGO, compr_data_size_show, NULL); +static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL); + +static struct attribute *zram_disk_attrs[] = { + &dev_attr_disksize.attr, + &dev_attr_initstate.attr, + &dev_attr_reset.attr, + &dev_attr_num_reads.attr, + &dev_attr_num_writes.attr, + &dev_attr_invalid_io.attr, + &dev_attr_notify_free.attr, + &dev_attr_zero_pages.attr, + &dev_attr_orig_data_size.attr, + &dev_attr_compr_data_size.attr, + &dev_attr_mem_used_total.attr, + NULL, +}; + +struct attribute_group zram_disk_attr_group = { + .attrs = zram_disk_attrs, +}; + diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 912b8ff3..cb3b092c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1274,6 +1274,8 @@ struct block_device_operations { unsigned long long); int (*revalidate_disk) (struct gendisk *); int (*getgeo)(struct block_device *, struct hd_geometry *); + /* this callback is with swap_lock and often page lock also held */ + void (*swap_slot_free_notify) (struct block_device *, unsigned long); struct module *owner; }; diff --git a/mm/swapfile.c b/mm/swapfile.c index 9c590eef..dd026db6 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -577,6 +577,7 @@ static int swap_entry_free(struct swap_info_struct *p, count = p->swap_map[offset]; /* free if no reference */ if (!count) { + struct gendisk *disk = p->bdev->bd_disk; if (offset < p->lowest_bit) p->lowest_bit = offset; if (offset > p->highest_bit) @@ -585,6 +586,8 @@ static int swap_entry_free(struct swap_info_struct *p, swap_list.next = p - swap_info; nr_swap_pages++; p->inuse_pages--; + if (disk->fops->swap_slot_free_notify) + disk->fops->swap_slot_free_notify(p->bdev, offset); } if (!swap_count(count)) mem_cgroup_uncharge_swap(ent);