diff --git a/Makefile b/Makefile
index 958cbb0f..acde5126 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,6 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 32
-EXTRAVERSION = -ics
 NAME = Man-Eating Seals of Antiquity
 
 # *DOCUMENTATION*
diff --git a/README b/README
index 5d014817..35d18fb0 100644
--- a/README
+++ b/README
@@ -20,19 +20,18 @@ Primary features:
 - Two-way call recording (Credits: avs333, snq-, and tytung)
 - T-Mobile Wi-Fi Calling (Credits: tytung)
 - Wi-Fi IEEE 802.1x/EAP authentication (Credits: tytung)
-- Native USB Tethering (for Gingerbread) (Credits: tytung)
+- Native USB Tethering (Credits: tytung)
 - Native Wi-Fi Tethering (Credits: tytung)
-- Real Wi-Fi MAC address (only for SD build on WinMo 6.5) (Credits: savan and tytung)
-- Unique Wi-Fi MAC address (for MAGLDR and cLK) (Credits: markinus)
-- Unique Bluetooth MAC address (Credits: markinus and tytung)
 - Official HTC extended battery support (HTC EB 2300mAh) (Credits: arne)
 - ALSA sound driver as kernel modules (alsa-pcm-htc-leo.ko and alsa-mix-htc-leo.ko) (Credits: cotulla)
 - Wired headphones support for ICS. (Credits: zivan56)
 - Backported xt_qtaguid and xt_quota2 to support data usage for ICS. (Credits: tytung)
 - Improved Flashlight compatibility for ICS. (Credits: tytung)
 - Backported the GPU driver to enable the Hardware Acceleration for ICS. (Credits: Securecrt and Rick_1995)
+- Updated to msm-kgsl3d0 v3.8 to match the latest QCOM Adreno200 drivers for ICS. (Credits: Rick_1995)
+- Real WiFi and Bluetooth MAC addresses. (Credits: Franck78, Rick_1995 and Marc1706)
 
-Credits: Cotulla, Markinus, Hastarin, TYTung, Letama, Rajko, Dan1j3l, Cedesmith, Arne, Trilu, Charansingh, Mdebeljuh, Jdivic, Avs333, Snq-, Savan, Drizztje, Marc1706, Zivan56, Securecrt, Rick_1995, other devs, and testers.
+Credits: Cotulla, Markinus, Hastarin, TYTung, Letama, Rajko, Dan1j3l, Cedesmith, Arne, Trilu, Charansingh, Mdebeljuh, Jdivic, Avs333, Snq-, Savan, Drizztje, Marc1706, Zivan56, Securecrt, Rick_1995, Franck78, other devs, and testers.
 
 ===============================================================================
 
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index a73caaf6..f35b1588 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -17,7 +17,7 @@ endif
 
 OBJCOPYFLAGS	:=-O binary -R .note -R .note.gnu.build-id -R .comment -S
 GZFLAGS		:=-9
-#KBUILD_CFLAGS	+=-pipe
+KBUILD_CFLAGS	+=-pipe
 # Explicitly specifiy 32-bit ARM ISA since toolchain default can be -mthumb:
 KBUILD_CFLAGS	+=$(call cc-option,-marm,)
 
diff --git a/arch/arm/configs/htcleo_defconfig b/arch/arm/configs/htcleo_defconfig
index a567f969..220270c5 100644
--- a/arch/arm/configs/htcleo_defconfig
+++ b/arch/arm/configs/htcleo_defconfig
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
 # Linux kernel version: 2.6.32-ics
-# Sat May  12 16:06:22 CST 2012
+# Sun Oct 14 20:28:45 CST 2012
 #
 CONFIG_ARM=y
 CONFIG_SYS_SUPPORTS_APM_EMULATION=y
@@ -604,11 +604,10 @@ CONFIG_NETFILTER_XT_CONNMARK=y
 #
 CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y
 CONFIG_NETFILTER_XT_TARGET_CONNMARK=y
-# CONFIG_NETFILTER_XT_TARGET_CT is not set
 # CONFIG_NETFILTER_XT_TARGET_DSCP is not set
 # CONFIG_NETFILTER_XT_TARGET_HL is not set
 CONFIG_NETFILTER_XT_TARGET_MARK=y
-# CONFIG_NETFILTER_XT_TARGET_NFLOG is not set
+CONFIG_NETFILTER_XT_TARGET_NFLOG=y
 CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y
 # CONFIG_NETFILTER_XT_TARGET_NOTRACK is not set
 # CONFIG_NETFILTER_XT_TARGET_RATEEST is not set
@@ -632,7 +631,7 @@ CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y
 # CONFIG_NETFILTER_XT_MATCH_ESP is not set
 CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y
 CONFIG_NETFILTER_XT_MATCH_HELPER=y
-CONFIG_NETFILTER_XT_MATCH_HL=y
+# CONFIG_NETFILTER_XT_MATCH_HL is not set
 CONFIG_NETFILTER_XT_MATCH_IPRANGE=y
 CONFIG_NETFILTER_XT_MATCH_LENGTH=y
 CONFIG_NETFILTER_XT_MATCH_LIMIT=y
@@ -709,8 +708,21 @@ CONFIG_IP_NF_ARP_MANGLE=y
 CONFIG_NF_DEFRAG_IPV6=y
 CONFIG_NF_CONNTRACK_IPV6=y
 # CONFIG_IP6_NF_QUEUE is not set
-# CONFIG_IP6_NF_IPTABLES is not set
-# CONFIG_BRIDGE_NF_EBTABLES is not set
+CONFIG_IP6_NF_IPTABLES=y
+# CONFIG_IP6_NF_MATCH_AH is not set
+# CONFIG_IP6_NF_MATCH_EUI64 is not set
+# CONFIG_IP6_NF_MATCH_FRAG is not set
+# CONFIG_IP6_NF_MATCH_OPTS is not set
+# CONFIG_IP6_NF_MATCH_HL is not set
+# CONFIG_IP6_NF_MATCH_IPV6HEADER is not set
+# CONFIG_IP6_NF_MATCH_MH is not set
+# CONFIG_IP6_NF_MATCH_RT is not set
+# CONFIG_IP6_NF_TARGET_HL is not set
+CONFIG_IP6_NF_TARGET_LOG=y
+CONFIG_IP6_NF_FILTER=y
+CONFIG_IP6_NF_TARGET_REJECT=y
+CONFIG_IP6_NF_MANGLE=y
+CONFIG_IP6_NF_RAW=y
 # CONFIG_IP_DCCP is not set
 # CONFIG_IP_SCTP is not set
 # CONFIG_RDS is not set
@@ -1681,10 +1693,14 @@ CONFIG_ANDROID_LOW_MEMORY_KILLER=y
 #
 CONFIG_XVMALLOC=y
 CONFIG_ZRAM=y
-CONFIG_ZRAM_NUM_DEVICES=3
+CONFIG_ZRAM_NUM_DEVICES=1
 CONFIG_ZRAM_DEFAULT_PERCENTAGE=18
 # CONFIG_ZRAM_DEBUG is not set
 CONFIG_ZRAM_DEFAULT_DISKSIZE=100000000
+# CONFIG_ZRAM_LZO is not set
+CONFIG_ZRAM_SNAPPY=y
+CONFIG_SNAPPY_COMPRESS=y
+CONFIG_SNAPPY_DECOMPRESS=y
 
 #
 # File systems
diff --git a/arch/arm/mach-msm/board-htcleo.c b/arch/arm/mach-msm/board-htcleo.c
old mode 100644
new mode 100755
index 948af5cd..4913f043
--- a/arch/arm/mach-msm/board-htcleo.c
+++ b/arch/arm/mach-msm/board-htcleo.c
@@ -755,7 +755,7 @@ static struct android_pmem_platform_data mdp_pmem_pdata = {
 	.start		= MSM_PMEM_MDP_BASE,
 	.size		= MSM_PMEM_MDP_SIZE,
 #ifdef CONFIG_MSM_KGSL
-	.allocator_type = PMEM_ALLOCATORTYPE_BITMAP,
+	.allocator_type = PMEM_ALLOCATORTYPE_ALLORNOTHING,
 #else
 	.no_allocator	= 0,
 #endif
@@ -771,7 +771,7 @@ static struct android_pmem_platform_data android_pmem_adsp_pdata = {
 #else
 	.no_allocator	= 0,
 #endif
-	.cached		= 1,
+	.cached		= 0,
 };
 
 
@@ -784,7 +784,7 @@ static struct android_pmem_platform_data android_pmem_venc_pdata = {
 #else
 	.no_allocator	= 0,
 #endif
-	.cached		= 1,
+	.cached		= 0,
 };
 
 static struct platform_device android_pmem_mdp_device = {
diff --git a/arch/arm/mach-msm/board-htcleo.h b/arch/arm/mach-msm/board-htcleo.h
old mode 100644
new mode 100755
index daf3db03..4b61ff8f
--- a/arch/arm/mach-msm/board-htcleo.h
+++ b/arch/arm/mach-msm/board-htcleo.h
@@ -26,6 +26,7 @@
 #define MSM_EBI1_BANK0_SIZE     0x1E7C0000 /* 488MB - 0x00040000 RAM CONSOLE*/
 #endif
 
+
 /* Don't change that */
 #define MSM_SMI_BASE		0x00000000
 #define MSM_SMI_SIZE		0x04000000
@@ -42,7 +43,7 @@
 #define MSM_PMEM_MDP_SIZE  	0x02000000
 
 #define MSM_PMEM_ADSP_BASE	0x3D700000
-#define MSM_PMEM_ADSP_SIZE	0x02900000
+#define MSM_PMEM_ADSP_SIZE	0x02200000
 
 #define MSM_GPU_PHYS_BASE	(MSM_PMEM_SMI_BASE + MSM_FB_SIZE)
 #define MSM_GPU_PHYS_SIZE	0x00800000
diff --git a/arch/arm/mach-msm/include/mach/ion.h b/arch/arm/mach-msm/include/mach/ion.h
new file mode 100755
index 00000000..4d12249d
--- /dev/null
+++ b/arch/arm/mach-msm/include/mach/ion.h
@@ -0,0 +1,23 @@
+/**
+ *
+ * Copyright (c) 2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __MACH_ION_H_
+#define __MACH_ION_H_
+
+enum ion_memory_types {
+	ION_EBI_TYPE,
+	ION_SMI_TYPE,
+};
+
+#endif
diff --git a/arch/arm/mach-msm/pm.c b/arch/arm/mach-msm/pm.c
index ed19e679..3155ec3f 100644
--- a/arch/arm/mach-msm/pm.c
+++ b/arch/arm/mach-msm/pm.c
@@ -777,6 +777,9 @@ static int msm_reboot_call(struct notifier_block *this, unsigned long code, void
 		} else if (!strncmp(cmd, "oem-", 4)) {
 			unsigned code = simple_strtoul(cmd + 4, 0, 16) & 0xff;
 			restart_reason = 0x6f656d00 | code;
+		} else if (!strncmp(cmd, "S", 1)) {
+			unsigned code = simple_strtoul(cmd + 1, 0, 16) & 0x00ffffff;
+			restart_reason = 0x53000000 | code;
 		} else if (!strcmp(cmd, "force-hard")) {
 			restart_reason = 0x776655AA;
 		} else {
diff --git a/arch/arm/mach-msm/qdsp6_1550/msm_q6vdec.c b/arch/arm/mach-msm/qdsp6_1550/msm_q6vdec.c
old mode 100644
new mode 100755
index 69ce1804..b6162d89
--- a/arch/arm/mach-msm/qdsp6_1550/msm_q6vdec.c
+++ b/arch/arm/mach-msm/qdsp6_1550/msm_q6vdec.c
@@ -61,7 +61,7 @@
 #define VDEC_GET_MAJOR_VERSION(version)	(((version)&MAJOR_MASK)>>16)
 
 #define VDEC_GET_MINOR_VERSION(version)	((version)&MINOR_MASK)
-
+//#define DEBUG_TRACE_VDEC
 #ifdef DEBUG_TRACE_VDEC
 #define TRACE(fmt,x...)			\
 	do { pr_debug("%s:%d " fmt, __func__, __LINE__, ##x); } while (0)
@@ -69,6 +69,8 @@
 #define TRACE(fmt,x...)		do { } while (0)
 #endif
 
+/* the version check will cause vdec hang up!!! */
+#define VERSION_CHECK        0
 
 static DEFINE_MUTEX(idlecount_lock);
 static int idlecount;
@@ -696,7 +698,7 @@ static long vdec_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		break;
 
 	default:
-		pr_err("%s: invalid ioctl!\n", __func__);
+		pr_err("%s: invalid ioctl! cmd= %08x \n", __func__,cmd);
 		ret = -EINVAL;
 		break;
 	}
@@ -799,8 +801,9 @@ static int vdec_open(struct inode *inode, struct file *file)
 	int i;
 	struct vdec_msg_list *l;
 	struct vdec_data *vd;
+#if VERSION_CHECK    
 	struct dal_info version_info;
-
+#endif
 	pr_info("q6vdec_open()\n");
 	mutex_lock(&vdec_ref_lock);
 	if (ref_cnt >= MAX_SUPPORTED_INSTANCES) {
@@ -845,6 +848,7 @@ static int vdec_open(struct inode *inode, struct file *file)
 		ret = -EIO;
 		goto vdec_open_err_handle_list;
 	}
+#if VERSION_CHECK    
 	ret = dal_call_f9(vd->vdec_handle, DAL_OP_INFO,
 				&version_info, sizeof(struct dal_info));
 
@@ -859,12 +863,15 @@ static int vdec_open(struct inode *inode, struct file *file)
 		pr_err("%s: driver version mismatch !\n", __func__);
 		goto vdec_open_err_handle_version;
 	}
-
+#endif
 	vd->running = 1;
 	prevent_sleep();
 	return 0;
+
+#if VERSION_CHECK    
 vdec_open_err_handle_version:
 	dal_detach(vd->vdec_handle);
+#endif    
 vdec_open_err_handle_list:
 	{
 		struct vdec_msg_list *l, *n;
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index b547cbca..d5873c6c 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -17,10 +17,10 @@
 /*
  * See Documentation/block/deadline-iosched.txt
  */
-static const int read_expire = HZ / 2;  /* max time before a read is submitted. */
+static const int read_expire = HZ / 4;  /* max time before a read is submitted. */
 static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */
-static const int writes_starved = 2;    /* max times reads can starve a write */
-static const int fifo_batch = 16;       /* # of sequential requests treated as one
+static const int writes_starved = 4;    /* max times reads can starve a write */
+static const int fifo_batch = 1;       /* # of sequential requests treated as one
 				     by the above parameters. For throughput. */
 
 struct deadline_data {
@@ -362,7 +362,7 @@ static void *deadline_init_queue(struct request_queue *q)
 	dd->fifo_expire[READ] = read_expire;
 	dd->fifo_expire[WRITE] = write_expire;
 	dd->writes_starved = writes_starved;
-	dd->front_merges = 1;
+	dd->front_merges = 0;
 	dd->fifo_batch = fifo_batch;
 	return dd;
 }
diff --git a/build.sh b/build.sh
new file mode 100755
index 00000000..69b27d68
--- /dev/null
+++ b/build.sh
@@ -0,0 +1,36 @@
+#!/bin/sh
+
+export KERNELBASEDIR=$PWD/../JB_Kernel_update-zip-files
+#export TOOLCHAIN=$HOME/CodeSourcery/Sourcery_G++_Lite/bin/arm-none-eabi-
+export TOOLCHAIN=$HOME/arm-2010q1/bin/arm-none-eabi-
+
+export KERNEL_FILE=HTCLEO-Kernel_2.6.32_tytung_jellybean
+
+rm arch/arm/boot/zImage
+make htcleo_defconfig
+make ARCH=arm CROSS_COMPILE=$TOOLCHAIN zImage -j8 && make ARCH=arm CROSS_COMPILE=$TOOLCHAIN modules -j8
+
+if [ -f arch/arm/boot/zImage ]; then
+
+mkdir -p $KERNELBASEDIR/
+rm -rf $KERNELBASEDIR/boot/*
+rm -rf $KERNELBASEDIR/system/lib/modules/*
+mkdir -p $KERNELBASEDIR/boot
+mkdir -p $KERNELBASEDIR/system/
+mkdir -p $KERNELBASEDIR/system/lib/
+mkdir -p $KERNELBASEDIR/system/lib/modules
+
+cp -a arch/arm/boot/zImage $KERNELBASEDIR/boot/zImage
+
+make ARCH=arm CROSS_COMPILE=$TOOLCHAIN INSTALL_MOD_PATH=$KERNELBASEDIR/system/lib/modules modules_install -j8
+
+cd $KERNELBASEDIR/system/lib/modules
+find -iname *.ko | xargs -i -t cp {} .
+rm -rf $KERNELBASEDIR/system/lib/modules/lib
+stat $KERNELBASEDIR/boot/zImage
+cd ../../../
+zip -r ${KERNEL_FILE}_`date +"%Y%m%d_%H_%M"`.zip boot system META-INF work
+else
+echo "Kernel STUCK in BUILD! no zImage exist"
+fi
+
diff --git a/drivers/base/genlock.c b/drivers/base/genlock.c
index afe8eb1c..8c064888 100644
--- a/drivers/base/genlock.c
+++ b/drivers/base/genlock.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2011, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2011-2012, Code Aurora Forum. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -22,7 +22,7 @@
 #include <linux/anon_inodes.h>
 #include <linux/miscdevice.h>
 #include <linux/genlock.h>
-#include <linux/interrupt.h> /* for in_interrupt() */
+#include <linux/interrupt.h>
 
 /* Lock states - can either be unlocked, held as an exclusive write lock or a
  * shared read lock
@@ -32,7 +32,18 @@
 #define _RDLOCK  GENLOCK_RDLOCK
 #define _WRLOCK GENLOCK_WRLOCK
 
+#define GENLOCK_LOG_ERR(fmt, args...) \
+pr_err("genlock: %s: " fmt, __func__, ##args)
+
+/* The genlock magic stored in the kernel private data is used to protect
+ * against the possibility of user space passing a valid fd to a
+ * non-genlock file for genlock_attach_lock()
+ */
+#define GENLOCK_MAGIC_OK  0xD2EAD10C
+#define GENLOCK_MAGIC_BAD 0xD2EADBAD
+
 struct genlock {
+	unsigned int magic;       /* Magic for attach verification */
 	struct list_head active;  /* List of handles holding lock */
 	spinlock_t lock;          /* Spinlock to protect the lock internals */
 	wait_queue_head_t queue;  /* Holding pen for processes pending lock */
@@ -49,12 +60,28 @@ struct genlock_handle {
 				     taken */
 };
 
+/*
+ * Create a spinlock to protect against a race condition when a lock gets
+ * released while another process tries to attach it
+ */
+
+static DEFINE_SPINLOCK(genlock_ref_lock);
+
 static void genlock_destroy(struct kref *kref)
 {
-       struct genlock *lock = container_of(kref, struct genlock,
-                       refcount);
+	struct genlock *lock = container_of(kref, struct genlock,
+			refcount);
 
-       kfree(lock);
+	/*
+	 * Clear the private data for the file descriptor in case the fd is
+	 * still active after the lock gets released
+	 */
+
+	if (lock->file)
+		lock->file->private_data = NULL;
+	lock->magic = GENLOCK_MAGIC_BAD;
+
+	kfree(lock);
 }
 
 /*
@@ -64,6 +91,15 @@ static void genlock_destroy(struct kref *kref)
 
 static int genlock_release(struct inode *inodep, struct file *file)
 {
+	struct genlock *lock = file->private_data;
+	/*
+	 * Clear the refrence back to this file structure to avoid
+	 * somehow reusing the lock after the file has been destroyed
+	 */
+
+	if (lock)
+		lock->file = NULL;
+
 	return 0;
 }
 
@@ -81,18 +117,29 @@ static const struct file_operations genlock_fops = {
 struct genlock *genlock_create_lock(struct genlock_handle *handle)
 {
 	struct genlock *lock;
+	void *ret;
 
-	if (handle->lock != NULL)
+	if (IS_ERR_OR_NULL(handle)) {
+		GENLOCK_LOG_ERR("Invalid handle\n");
 		return ERR_PTR(-EINVAL);
+	}
+
+	if (handle->lock != NULL) {
+		GENLOCK_LOG_ERR("Handle already has a lock attached\n");
+		return ERR_PTR(-EINVAL);
+	}
 
 	lock = kzalloc(sizeof(*lock), GFP_KERNEL);
-	if (lock == NULL)
+	if (lock == NULL) {
+		GENLOCK_LOG_ERR("Unable to allocate memory for a lock\n");
 		return ERR_PTR(-ENOMEM);
+	}
 
 	INIT_LIST_HEAD(&lock->active);
 	init_waitqueue_head(&lock->queue);
 	spin_lock_init(&lock->lock);
 
+	lock->magic = GENLOCK_MAGIC_OK;
 	lock->state = _UNLOCKED;
 
 	/*
@@ -100,8 +147,13 @@ struct genlock *genlock_create_lock(struct genlock_handle *handle)
 	 * other processes
 	 */
 
-	lock->file = anon_inode_getfile("genlock", &genlock_fops,
-		lock, O_RDWR);
+	ret = anon_inode_getfile("genlock", &genlock_fops, lock, O_RDWR);
+	if (IS_ERR_OR_NULL(ret)) {
+		GENLOCK_LOG_ERR("Unable to create lock inode\n");
+		kfree(lock);
+		return ret;
+	}
+	lock->file = ret;
 
 	/* Attach the new lock to the handle */
 	handle->lock = lock;
@@ -120,8 +172,10 @@ static int genlock_get_fd(struct genlock *lock)
 {
 	int ret;
 
-	if (!lock->file)
+	if (!lock->file) {
+		GENLOCK_LOG_ERR("No file attached to the lock\n");
 		return -EINVAL;
+	}
 
 	ret = get_unused_fd_flags(0);
 	if (ret < 0)
@@ -143,24 +197,51 @@ struct genlock *genlock_attach_lock(struct genlock_handle *handle, int fd)
 	struct file *file;
 	struct genlock *lock;
 
-	if (handle->lock != NULL)
+	if (IS_ERR_OR_NULL(handle)) {
+		GENLOCK_LOG_ERR("Invalid handle\n");
 		return ERR_PTR(-EINVAL);
+	}
+
+	if (handle->lock != NULL) {
+		GENLOCK_LOG_ERR("Handle already has a lock attached\n");
+		return ERR_PTR(-EINVAL);
+	}
 
 	file = fget(fd);
-	if (file == NULL)
+	if (file == NULL) {
+		GENLOCK_LOG_ERR("Bad file descriptor\n");
 		return ERR_PTR(-EBADF);
+	}
 
+	/*
+	 * take a spinlock to avoid a race condition if the lock is
+	 * released and then attached
+	 */
+
+	spin_lock(&genlock_ref_lock);
 	lock = file->private_data;
 
 	fput(file);
 
-	if (lock == NULL)
-		return ERR_PTR(-EINVAL);
+	if (lock == NULL) {
+		GENLOCK_LOG_ERR("File descriptor is invalid\n");
+		goto fail_invalid;
+	}
+
+	if (lock->magic != GENLOCK_MAGIC_OK) {
+		GENLOCK_LOG_ERR("Magic is invalid - 0x%X\n", lock->magic);
+		goto fail_invalid;
+	}
 
 	handle->lock = lock;
 	kref_get(&lock->refcount);
+	spin_unlock(&genlock_ref_lock);
 
 	return lock;
+
+fail_invalid:
+	spin_unlock(&genlock_ref_lock);
+	return ERR_PTR(-EINVAL);
 }
 EXPORT_SYMBOL(genlock_attach_lock);
 
@@ -199,13 +280,16 @@ static int _genlock_unlock(struct genlock *lock, struct genlock_handle *handle)
 
 	spin_lock_irqsave(&lock->lock, irqflags);
 
-	if (lock->state == _UNLOCKED)
+	if (lock->state == _UNLOCKED) {
+		GENLOCK_LOG_ERR("Trying to unlock an unlocked handle\n");
 		goto done;
+	}
 
 	/* Make sure this handle is an owner of the lock */
-	if (!handle_has_lock(lock, handle))
+	if (!handle_has_lock(lock, handle)) {
+		GENLOCK_LOG_ERR("handle does not have lock attached to it\n");
 		goto done;
-
+	}
 	/* If the handle holds no more references to the lock then
 	   release it (maybe) */
 
@@ -228,7 +312,7 @@ static int _genlock_lock(struct genlock *lock, struct genlock_handle *handle,
 {
 	unsigned long irqflags;
 	int ret = 0;
-	unsigned int ticks = msecs_to_jiffies(timeout);
+	unsigned long ticks = msecs_to_jiffies(timeout);
 
 	spin_lock_irqsave(&lock->lock, irqflags);
 
@@ -247,12 +331,15 @@ static int _genlock_lock(struct genlock *lock, struct genlock_handle *handle,
 	if (handle_has_lock(lock, handle)) {
 
 		/*
-		 * If the handle already holds the lock and the type matches,
-		 * then just increment the active pointer. This allows the
-		 * handle to do recursive locks
+		 * If the handle already holds the lock and the lock type is
+		 * a read lock then just increment the active pointer. This
+		 * allows the handle to do recursive read locks. Recursive
+		 * write locks are not allowed in order to support
+		 * synchronization within a process using a single gralloc
+		 * handle.
 		 */
 
-		if (lock->state == op) {
+		if (lock->state == _RDLOCK && op == _RDLOCK) {
 			handle->active++;
 			goto done;
 		}
@@ -261,32 +348,46 @@ static int _genlock_lock(struct genlock *lock, struct genlock_handle *handle,
 		 * If the handle holds a write lock then the owner can switch
 		 * to a read lock if they want. Do the transition atomically
 		 * then wake up any pending waiters in case they want a read
-		 * lock too.
+		 * lock too. In order to support synchronization within a
+		 * process the caller must explicity request to convert the
+		 * lock type with the GENLOCK_WRITE_TO_READ flag.
 		 */
 
-		if (op == _RDLOCK && handle->active == 1) {
-			lock->state = _RDLOCK;
-			wake_up(&lock->queue);
+		if (flags & GENLOCK_WRITE_TO_READ) {
+			if (lock->state == _WRLOCK && op == _RDLOCK) {
+				lock->state = _RDLOCK;
+				wake_up(&lock->queue);
+				goto done;
+			} else {
+				GENLOCK_LOG_ERR("Invalid state to convert"
+					"write to read\n");
+				ret = -EINVAL;
+				goto done;
+			}
+		}
+	} else {
+
+		/*
+		 * Check to ensure the caller has not attempted to convert a
+		 * write to a read without holding the lock.
+		 */
+
+		if (flags & GENLOCK_WRITE_TO_READ) {
+			GENLOCK_LOG_ERR("Handle must have lock to convert"
+				"write to read\n");
+			ret = -EINVAL;
 			goto done;
 		}
 
 		/*
-		 * Otherwise the user tried to turn a read into a write, and we
-		 * don't allow that.
+		 * If we request a read and the lock is held by a read, then go
+		 * ahead and share the lock
 		 */
 
-		ret = -EINVAL;
-		goto done;
+		if (op == GENLOCK_RDLOCK && lock->state == _RDLOCK)
+			goto dolock;
 	}
 
-	/*
-	 * If we request a read and the lock is held by a read, then go
-	 * ahead and share the lock
-	 */
-
-	if (op == GENLOCK_RDLOCK && lock->state == _RDLOCK)
-		goto dolock;
-
 	/* Treat timeout 0 just like a NOBLOCK flag and return if the
 	   lock cannot be aquired without blocking */
 
@@ -295,15 +396,26 @@ static int _genlock_lock(struct genlock *lock, struct genlock_handle *handle,
 		goto done;
 	}
 
-	/* Wait while the lock remains in an incompatible state */
+	/*
+	 * Wait while the lock remains in an incompatible state
+	 * state    op    wait
+	 * -------------------
+	 * unlocked n/a   no
+	 * read     read  no
+	 * read     write yes
+	 * write    n/a   yes
+	 */
 
-	while (lock->state != _UNLOCKED) {
-		unsigned int elapsed;
+	while ((lock->state == _RDLOCK && op == _WRLOCK) ||
+			lock->state == _WRLOCK) {
+		signed long elapsed;
 
 		spin_unlock_irqrestore(&lock->lock, irqflags);
 
 		elapsed = wait_event_interruptible_timeout(lock->queue,
-			lock->state == _UNLOCKED, ticks);
+			lock->state == _UNLOCKED ||
+			(lock->state == _RDLOCK && op == _RDLOCK),
+			ticks);
 
 		spin_lock_irqsave(&lock->lock, irqflags);
 
@@ -312,7 +424,7 @@ static int _genlock_lock(struct genlock *lock, struct genlock_handle *handle,
 			goto done;
 		}
 
-		ticks = elapsed;
+		ticks = (unsigned long) elapsed;
 	}
 
 dolock:
@@ -320,7 +432,7 @@ dolock:
 
 	list_add_tail(&handle->entry, &lock->active);
 	lock->state = op;
-	handle->active = 1;
+	handle->active++;
 
 done:
 	spin_unlock_irqrestore(&lock->lock, irqflags);
@@ -329,7 +441,7 @@ done:
 }
 
 /**
- * genlock_lock - Acquire or release a lock
+ * genlock_lock - Acquire or release a lock (depreciated)
  * @handle - pointer to the genlock handle that is requesting the lock
  * @op - the operation to perform (RDLOCK, WRLOCK, UNLOCK)
  * @flags - flags to control the operation
@@ -341,11 +453,76 @@ done:
 int genlock_lock(struct genlock_handle *handle, int op, int flags,
 	uint32_t timeout)
 {
-	struct genlock *lock = handle->lock;
+	struct genlock *lock;
+	unsigned long irqflags;
+
 	int ret = 0;
 
-	if (lock == NULL)
+	if (IS_ERR_OR_NULL(handle)) {
+		GENLOCK_LOG_ERR("Invalid handle\n");
 		return -EINVAL;
+	}
+
+	lock = handle->lock;
+
+	if (lock == NULL) {
+		GENLOCK_LOG_ERR("Handle does not have a lock attached\n");
+		return -EINVAL;
+	}
+
+	switch (op) {
+	case GENLOCK_UNLOCK:
+		ret = _genlock_unlock(lock, handle);
+		break;
+	case GENLOCK_RDLOCK:
+		spin_lock_irqsave(&lock->lock, irqflags);
+		if (handle_has_lock(lock, handle)) {
+			/* request the WRITE_TO_READ flag for compatibility */
+			flags |= GENLOCK_WRITE_TO_READ;
+		}
+		spin_unlock_irqrestore(&lock->lock, irqflags);
+		/* fall through to take lock */
+	case GENLOCK_WRLOCK:
+		ret = _genlock_lock(lock, handle, op, flags, timeout);
+		break;
+	default:
+		GENLOCK_LOG_ERR("Invalid lock operation\n");
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(genlock_lock);
+
+/**
+ * genlock_dreadlock - Acquire or release a lock
+ * @handle - pointer to the genlock handle that is requesting the lock
+ * @op - the operation to perform (RDLOCK, WRLOCK, UNLOCK)
+ * @flags - flags to control the operation
+ * @timeout - optional timeout to wait for the lock to come free
+ *
+ * Returns: 0 on success or error code on failure
+ */
+
+int genlock_dreadlock(struct genlock_handle *handle, int op, int flags,
+	uint32_t timeout)
+{
+	struct genlock *lock;
+
+	int ret = 0;
+
+	if (IS_ERR_OR_NULL(handle)) {
+		GENLOCK_LOG_ERR("Invalid handle\n");
+		return -EINVAL;
+	}
+
+	lock = handle->lock;
+
+	if (lock == NULL) {
+		GENLOCK_LOG_ERR("Handle does not have a lock attached\n");
+		return -EINVAL;
+	}
 
 	switch (op) {
 	case GENLOCK_UNLOCK:
@@ -356,13 +533,14 @@ int genlock_lock(struct genlock_handle *handle, int op, int flags,
 		ret = _genlock_lock(lock, handle, op, flags, timeout);
 		break;
 	default:
+		GENLOCK_LOG_ERR("Invalid lock operation\n");
 		ret = -EINVAL;
 		break;
 	}
 
 	return ret;
 }
-EXPORT_SYMBOL(genlock_lock);
+EXPORT_SYMBOL(genlock_dreadlock);
 
 /**
  * genlock_wait - Wait for the lock to be released
@@ -372,13 +550,22 @@ EXPORT_SYMBOL(genlock_lock);
 
 int genlock_wait(struct genlock_handle *handle, uint32_t timeout)
 {
-	struct genlock *lock = handle->lock;
+	struct genlock *lock;
 	unsigned long irqflags;
 	int ret = 0;
-	unsigned int ticks = msecs_to_jiffies(timeout);
+	unsigned long ticks = msecs_to_jiffies(timeout);
 
-	if (lock == NULL)
+	if (IS_ERR_OR_NULL(handle)) {
+		GENLOCK_LOG_ERR("Invalid handle\n");
 		return -EINVAL;
+	}
+
+	lock = handle->lock;
+
+	if (lock == NULL) {
+		GENLOCK_LOG_ERR("Handle does not have a lock attached\n");
+		return -EINVAL;
+	}
 
 	spin_lock_irqsave(&lock->lock, irqflags);
 
@@ -393,7 +580,7 @@ int genlock_wait(struct genlock_handle *handle, uint32_t timeout)
 	}
 
 	while (lock->state != _UNLOCKED) {
-		unsigned int elapsed;
+		signed long elapsed;
 
 		spin_unlock_irqrestore(&lock->lock, irqflags);
 
@@ -407,7 +594,7 @@ int genlock_wait(struct genlock_handle *handle, uint32_t timeout)
 			break;
 		}
 
-		ticks = elapsed;
+		ticks = (unsigned long) elapsed;
 	}
 
 done:
@@ -415,12 +602,7 @@ done:
 	return ret;
 }
 
-/**
- * genlock_release_lock - Release a lock attached to a handle
- * @handle - Pointer to the handle holding the lock
- */
-
-void genlock_release_lock(struct genlock_handle *handle)
+static void genlock_release_lock(struct genlock_handle *handle)
 {
 	unsigned long flags;
 
@@ -437,11 +619,12 @@ void genlock_release_lock(struct genlock_handle *handle)
 	}
 	spin_unlock_irqrestore(&handle->lock->lock, flags);
 
+	spin_lock(&genlock_ref_lock);
 	kref_put(&handle->lock->refcount, genlock_destroy);
+	spin_unlock(&genlock_ref_lock);
 	handle->lock = NULL;
 	handle->active = 0;
 }
-EXPORT_SYMBOL(genlock_release_lock);
 
 /*
  * Release function called when all references to a handle are released
@@ -468,8 +651,10 @@ static const struct file_operations genlock_handle_fops = {
 static struct genlock_handle *_genlock_get_handle(void)
 {
 	struct genlock_handle *handle = kzalloc(sizeof(*handle), GFP_KERNEL);
-	if (handle == NULL)
+	if (handle == NULL) {
+		GENLOCK_LOG_ERR("Unable to allocate memory for the handle\n");
 		return ERR_PTR(-ENOMEM);
+	}
 
 	return handle;
 }
@@ -482,12 +667,19 @@ static struct genlock_handle *_genlock_get_handle(void)
 
 struct genlock_handle *genlock_get_handle(void)
 {
+	void *ret;
 	struct genlock_handle *handle = _genlock_get_handle();
 	if (IS_ERR(handle))
 		return handle;
 
-	handle->file = anon_inode_getfile("genlock-handle",
+	ret = anon_inode_getfile("genlock-handle",
 		&genlock_handle_fops, handle, O_RDWR);
+	if (IS_ERR_OR_NULL(ret)) {
+		GENLOCK_LOG_ERR("Unable to create handle inode\n");
+		kfree(handle);
+		return ret;
+	}
+	handle->file = ret;
 
 	return handle;
 }
@@ -531,6 +723,9 @@ static long genlock_dev_ioctl(struct file *filep, unsigned int cmd,
 	struct genlock *lock;
 	int ret;
 
+	if (IS_ERR_OR_NULL(handle))
+		return -EINVAL;
+
 	switch (cmd) {
 	case GENLOCK_IOC_NEW: {
 		lock = genlock_create_lock(handle);
@@ -540,8 +735,11 @@ static long genlock_dev_ioctl(struct file *filep, unsigned int cmd,
 		return 0;
 	}
 	case GENLOCK_IOC_EXPORT: {
-		if (handle->lock == NULL)
+		if (handle->lock == NULL) {
+			GENLOCK_LOG_ERR("Handle does not have a lock"
+					"attached\n");
 			return -EINVAL;
+		}
 
 		ret = genlock_get_fd(handle->lock);
 		if (ret < 0)
@@ -574,6 +772,14 @@ static long genlock_dev_ioctl(struct file *filep, unsigned int cmd,
 		return genlock_lock(handle, param.op, param.flags,
 			param.timeout);
 	}
+	case GENLOCK_IOC_DREADLOCK: {
+		if (copy_from_user(&param, (void __user *) arg,
+		sizeof(param)))
+			return -EFAULT;
+
+		return genlock_dreadlock(handle, param.op, param.flags,
+			param.timeout);
+	}
 	case GENLOCK_IOC_WAIT: {
 		if (copy_from_user(&param, (void __user *) arg,
 		sizeof(param)))
@@ -582,10 +788,16 @@ static long genlock_dev_ioctl(struct file *filep, unsigned int cmd,
 		return genlock_wait(handle, param.timeout);
 	}
 	case GENLOCK_IOC_RELEASE: {
-		genlock_release_lock(handle);
-		return 0;
+		/*
+		 * Return error - this ioctl has been deprecated.
+		 * Locks should only be released when the handle is
+		 * destroyed
+		 */
+		GENLOCK_LOG_ERR("Deprecated RELEASE ioctl called\n");
+		return -EINVAL;
 	}
 	default:
+		GENLOCK_LOG_ERR("Invalid ioctl\n");
 		return -EINVAL;
 	}
 }
diff --git a/drivers/gpu/msm/Makefile b/drivers/gpu/msm/Makefile
old mode 100644
new mode 100755
index f49e7164..8ef724e2
--- a/drivers/gpu/msm/Makefile
+++ b/drivers/gpu/msm/Makefile
@@ -19,6 +19,7 @@ msm_adreno-y += \
 	adreno_drawctxt.o \
 	adreno_postmortem.o \
 	adreno_a2xx.o \
+	adreno_a3xx.o \
 	adreno.o
 
 msm_adreno-$(CONFIG_DEBUG_FS) += adreno_debugfs.o
diff --git a/drivers/gpu/msm/a2xx_reg.h b/drivers/gpu/msm/a2xx_reg.h
old mode 100644
new mode 100755
index d859d61c..1c7be3e6
--- a/drivers/gpu/msm/a2xx_reg.h
+++ b/drivers/gpu/msm/a2xx_reg.h
@@ -140,24 +140,9 @@ union reg_rb_edram_info {
 	struct rb_edram_info_t f;
 };
 
-#define RBBM_READ_ERROR_UNUSED0_SIZE		2
-#define RBBM_READ_ERROR_READ_ADDRESS_SIZE	15
-#define RBBM_READ_ERROR_UNUSED1_SIZE		13
-#define RBBM_READ_ERROR_READ_REQUESTER_SIZE	1
-#define RBBM_READ_ERROR_READ_ERROR_SIZE		1
-
-struct rbbm_read_error_t {
-	unsigned int unused0:RBBM_READ_ERROR_UNUSED0_SIZE;
-	unsigned int read_address:RBBM_READ_ERROR_READ_ADDRESS_SIZE;
-	unsigned int unused1:RBBM_READ_ERROR_UNUSED1_SIZE;
-	unsigned int read_requester:RBBM_READ_ERROR_READ_REQUESTER_SIZE;
-	unsigned int read_error:RBBM_READ_ERROR_READ_ERROR_SIZE;
-};
-
-union rbbm_read_error_u {
-	unsigned int val:32;
-	struct rbbm_read_error_t f;
-};
+#define RBBM_READ_ERROR_ADDRESS_MASK	0x0001fffc
+#define RBBM_READ_ERROR_REQUESTER	(1<<30)
+#define RBBM_READ_ERROR_ERROR		(1<<31)
 
 #define CP_RB_CNTL_RB_BUFSZ_SIZE                           6
 #define CP_RB_CNTL_UNUSED0_SIZE                            2
@@ -278,6 +263,7 @@ union reg_cp_rb_cntl {
 #define REG_CP_ME_CNTL                   0x01F6
 #define REG_CP_ME_RAM_DATA               0x01FA
 #define REG_CP_ME_RAM_WADDR              0x01F8
+#define REG_CP_ME_RAM_RADDR              0x01F9
 #define REG_CP_ME_STATUS                 0x01F7
 #define REG_CP_PFP_UCODE_ADDR            0x00C0
 #define REG_CP_PFP_UCODE_DATA            0x00C1
diff --git a/drivers/gpu/msm/a3xx_reg.h b/drivers/gpu/msm/a3xx_reg.h
new file mode 100755
index 00000000..84c83b8c
--- /dev/null
+++ b/drivers/gpu/msm/a3xx_reg.h
@@ -0,0 +1,453 @@
+/* Copyright (c) 2012, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _A300_REG_H
+#define _A300_REG_H
+
+/* Interrupt bit positions within RBBM_INT_0 */
+
+#define A3XX_INT_RBBM_GPU_IDLE 0
+#define A3XX_INT_RBBM_AHB_ERROR 1
+#define A3XX_INT_RBBM_REG_TIMEOUT 2
+#define A3XX_INT_RBBM_ME_MS_TIMEOUT 3
+#define A3XX_INT_RBBM_PFP_MS_TIMEOUT 4
+#define A3XX_INT_RBBM_ATB_BUS_OVERFLOW 5
+#define A3XX_INT_VFD_ERROR 6
+#define A3XX_INT_CP_SW_INT 7
+#define A3XX_INT_CP_T0_PACKET_IN_IB 8
+#define A3XX_INT_CP_OPCODE_ERROR 9
+#define A3XX_INT_CP_RESERVED_BIT_ERROR 10
+#define A3XX_INT_CP_HW_FAULT 11
+#define A3xx_INT_CP_DMA 12
+#define A3XX_INT_CP_IB2_INT 13
+#define A3XX_INT_CP_IB1_INT 14
+#define A3XX_INT_CP_RB_INT 15
+#define A3XX_INT_CP_REG_PROTECT_FAULT 16
+#define A3XX_INT_CP_RB_DONE_TS 17
+#define A3XX_INT_CP_VS_DONE_TS 18
+#define A3XX_INT_CP_PS_DONE_TS 19
+#define A3XX_INT_CACHE_FLUSH_TS 20
+#define A3XX_INT_CP_AHB_ERROR_HALT 21
+#define A3XX_INT_MISC_HANG_DETECT 24
+#define A3XX_INT_UCHE_OOB_ACCESS 25
+
+/* Register definitions */
+
+#define A3XX_RBBM_HW_VERSION 0x000
+#define A3XX_RBBM_HW_RELEASE 0x001
+#define A3XX_RBBM_HW_CONFIGURATION 0x002
+#define A3XX_RBBM_SW_RESET_CMD 0x018
+#define A3XX_RBBM_AHB_CTL0 0x020
+#define A3XX_RBBM_AHB_CTL1 0x021
+#define A3XX_RBBM_AHB_CMD 0x022
+#define A3XX_RBBM_AHB_ERROR_STATUS 0x027
+#define A3XX_RBBM_GPR0_CTL 0x02E
+/* This the same register as on A2XX, just in a different place */
+#define A3XX_RBBM_STATUS 0x030
+#define A3XX_RBBM_INTERFACE_HANG_INT_CTL 0x50
+#define A3XX_RBBM_INTERFACE_HANG_MASK_CTL0 0x51
+#define A3XX_RBBM_INTERFACE_HANG_MASK_CTL1 0x54
+#define A3XX_RBBM_INTERFACE_HANG_MASK_CTL2 0x57
+#define A3XX_RBBM_INTERFACE_HANG_MASK_CTL3 0x5A
+#define A3XX_RBBM_INT_CLEAR_CMD 0x061
+#define A3XX_RBBM_INT_0_MASK 0x063
+#define A3XX_RBBM_INT_0_STATUS 0x064
+#define A3XX_RBBM_GPU_BUSY_MASKED 0x88
+#define A3XX_RBBM_RBBM_CTL 0x100
+#define A3XX_RBBM_RBBM_CTL 0x100
+#define A3XX_RBBM_PERFCTR_PWR_1_LO 0x0EC
+#define A3XX_RBBM_PERFCTR_PWR_1_HI 0x0ED
+/* Following two are same as on A2XX, just in a different place */
+#define A3XX_CP_PFP_UCODE_ADDR 0x1C9
+#define A3XX_CP_PFP_UCODE_DATA 0x1CA
+#define A3XX_CP_HW_FAULT  0x45C
+#define A3XX_CP_AHB_FAULT 0x54D
+#define A3XX_CP_PROTECT_CTRL 0x45E
+#define A3XX_CP_PROTECT_STATUS 0x45F
+#define A3XX_CP_PROTECT_REG_0 0x460
+#define A3XX_CP_PROTECT_REG_1 0x461
+#define A3XX_CP_PROTECT_REG_2 0x462
+#define A3XX_CP_PROTECT_REG_3 0x463
+#define A3XX_CP_PROTECT_REG_4 0x464
+#define A3XX_CP_PROTECT_REG_5 0x465
+#define A3XX_CP_PROTECT_REG_6 0x466
+#define A3XX_CP_PROTECT_REG_7 0x467
+#define A3XX_CP_PROTECT_REG_8 0x468
+#define A3XX_CP_PROTECT_REG_9 0x469
+#define A3XX_CP_PROTECT_REG_A 0x46A
+#define A3XX_CP_PROTECT_REG_B 0x46B
+#define A3XX_CP_PROTECT_REG_C 0x46C
+#define A3XX_CP_PROTECT_REG_D 0x46D
+#define A3XX_CP_PROTECT_REG_E 0x46E
+#define A3XX_CP_PROTECT_REG_F 0x46F
+#define A3XX_CP_SCRATCH_REG2 0x57A
+#define A3XX_CP_SCRATCH_REG3 0x57B
+#define A3XX_VSC_BIN_SIZE 0xC01
+#define A3XX_VSC_SIZE_ADDRESS 0xC02
+#define A3XX_VSC_PIPE_CONFIG_0 0xC06
+#define A3XX_VSC_PIPE_DATA_ADDRESS_0 0xC07
+#define A3XX_VSC_PIPE_DATA_LENGTH_0 0xC08
+#define A3XX_VSC_PIPE_CONFIG_1 0xC09
+#define A3XX_VSC_PIPE_DATA_ADDRESS_1 0xC0A
+#define A3XX_VSC_PIPE_DATA_LENGTH_1 0xC0B
+#define A3XX_VSC_PIPE_CONFIG_2 0xC0C
+#define A3XX_VSC_PIPE_DATA_ADDRESS_2 0xC0D
+#define A3XX_VSC_PIPE_DATA_LENGTH_2 0xC0E
+#define A3XX_VSC_PIPE_CONFIG_3 0xC0F
+#define A3XX_VSC_PIPE_DATA_ADDRESS_3 0xC10
+#define A3XX_VSC_PIPE_DATA_LENGTH_3 0xC11
+#define A3XX_VSC_PIPE_CONFIG_4 0xC12
+#define A3XX_VSC_PIPE_DATA_ADDRESS_4 0xC13
+#define A3XX_VSC_PIPE_DATA_LENGTH_4 0xC14
+#define A3XX_VSC_PIPE_CONFIG_5 0xC15
+#define A3XX_VSC_PIPE_DATA_ADDRESS_5 0xC16
+#define A3XX_VSC_PIPE_DATA_LENGTH_5 0xC17
+#define A3XX_VSC_PIPE_CONFIG_6 0xC18
+#define A3XX_VSC_PIPE_DATA_ADDRESS_6 0xC19
+#define A3XX_VSC_PIPE_DATA_LENGTH_6 0xC1A
+#define A3XX_VSC_PIPE_CONFIG_7 0xC1B
+#define A3XX_VSC_PIPE_DATA_ADDRESS_7 0xC1C
+#define A3XX_VSC_PIPE_DATA_LENGTH_7 0xC1D
+#define A3XX_GRAS_CL_USER_PLANE_X0 0xCA0
+#define A3XX_GRAS_CL_USER_PLANE_Y0 0xCA1
+#define A3XX_GRAS_CL_USER_PLANE_Z0 0xCA2
+#define A3XX_GRAS_CL_USER_PLANE_W0 0xCA3
+#define A3XX_GRAS_CL_USER_PLANE_X1 0xCA4
+#define A3XX_GRAS_CL_USER_PLANE_Y1 0xCA5
+#define A3XX_GRAS_CL_USER_PLANE_Z1 0xCA6
+#define A3XX_GRAS_CL_USER_PLANE_W1 0xCA7
+#define A3XX_GRAS_CL_USER_PLANE_X2 0xCA8
+#define A3XX_GRAS_CL_USER_PLANE_Y2 0xCA9
+#define A3XX_GRAS_CL_USER_PLANE_Z2 0xCAA
+#define A3XX_GRAS_CL_USER_PLANE_W2 0xCAB
+#define A3XX_GRAS_CL_USER_PLANE_X3 0xCAC
+#define A3XX_GRAS_CL_USER_PLANE_Y3 0xCAD
+#define A3XX_GRAS_CL_USER_PLANE_Z3 0xCAE
+#define A3XX_GRAS_CL_USER_PLANE_W3 0xCAF
+#define A3XX_GRAS_CL_USER_PLANE_X4 0xCB0
+#define A3XX_GRAS_CL_USER_PLANE_Y4 0xCB1
+#define A3XX_GRAS_CL_USER_PLANE_Z4 0xCB2
+#define A3XX_GRAS_CL_USER_PLANE_W4 0xCB3
+#define A3XX_GRAS_CL_USER_PLANE_X5 0xCB4
+#define A3XX_GRAS_CL_USER_PLANE_Y5 0xCB5
+#define A3XX_GRAS_CL_USER_PLANE_Z5 0xCB6
+#define A3XX_GRAS_CL_USER_PLANE_W5 0xCB7
+#define A3XX_UCHE_CACHE_INVALIDATE0_REG 0xEA0
+#define A3XX_GRAS_CL_CLIP_CNTL 0x2040
+#define A3XX_GRAS_CL_GB_CLIP_ADJ 0x2044
+#define A3XX_GRAS_CL_VPORT_XOFFSET 0x2048
+#define A3XX_GRAS_CL_VPORT_ZOFFSET 0x204C
+#define A3XX_GRAS_CL_VPORT_ZSCALE 0x204D
+#define A3XX_GRAS_SU_POINT_MINMAX 0x2068
+#define A3XX_GRAS_SU_POINT_SIZE 0x2069
+#define A3XX_GRAS_SU_POLY_OFFSET_SCALE 0x206C
+#define A3XX_GRAS_SU_POLY_OFFSET_OFFSET 0x206D
+#define A3XX_GRAS_SU_MODE_CONTROL 0x2070
+#define A3XX_GRAS_SC_CONTROL 0x2072
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL 0x2074
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR 0x2075
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL 0x2079
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR 0x207A
+#define A3XX_RB_MODE_CONTROL 0x20C0
+#define A3XX_RB_RENDER_CONTROL 0x20C1
+#define A3XX_RB_MSAA_CONTROL 0x20C2
+#define A3XX_RB_MRT_CONTROL0 0x20C4
+#define A3XX_RB_MRT_BUF_INFO0 0x20C5
+#define A3XX_RB_MRT_BLEND_CONTROL0 0x20C7
+#define A3XX_RB_MRT_BLEND_CONTROL1 0x20CB
+#define A3XX_RB_MRT_BLEND_CONTROL2 0x20CF
+#define A3XX_RB_MRT_BLEND_CONTROL3 0x20D3
+#define A3XX_RB_BLEND_RED 0x20E4
+#define A3XX_RB_COPY_CONTROL 0x20EC
+#define A3XX_RB_COPY_DEST_INFO 0x20EF
+#define A3XX_RB_DEPTH_CONTROL 0x2100
+#define A3XX_RB_STENCIL_CONTROL 0x2104
+#define A3XX_PC_VSTREAM_CONTROL 0x21E4
+#define A3XX_PC_VERTEX_REUSE_BLOCK_CNTL 0x21EA
+#define A3XX_PC_PRIM_VTX_CNTL 0x21EC
+#define A3XX_PC_RESTART_INDEX 0x21ED
+#define A3XX_HLSQ_CONTROL_0_REG 0x2200
+#define A3XX_HLSQ_VS_CONTROL_REG 0x2204
+#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG 0x2207
+#define A3XX_HLSQ_CL_NDRANGE_0_REG 0x220A
+#define A3XX_HLSQ_CL_NDRANGE_2_REG 0x220C
+#define A3XX_HLSQ_CL_CONTROL_0_REG 0x2211
+#define A3XX_HLSQ_CL_CONTROL_1_REG 0x2212
+#define A3XX_HLSQ_CL_KERNEL_CONST_REG 0x2214
+#define A3XX_HLSQ_CL_KERNEL_GROUP_X_REG 0x2215
+#define A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG 0x2217
+#define A3XX_HLSQ_CL_WG_OFFSET_REG 0x221A
+#define A3XX_VFD_CONTROL_0 0x2240
+#define A3XX_VFD_INDEX_MIN 0x2242
+#define A3XX_VFD_FETCH_INSTR_0_0 0x2246
+#define A3XX_VFD_FETCH_INSTR_0_4 0x224E
+#define A3XX_VFD_DECODE_INSTR_0 0x2266
+#define A3XX_VFD_VS_THREADING_THRESHOLD 0x227E
+#define A3XX_VPC_ATTR 0x2280
+#define A3XX_VPC_VARY_CYLWRAP_ENABLE_1 0x228B
+#define A3XX_SP_SP_CTRL_REG 0x22C0
+#define A3XX_SP_VS_CTRL_REG0 0x22C4
+#define A3XX_SP_VS_CTRL_REG1 0x22C5
+#define A3XX_SP_VS_PARAM_REG 0x22C6
+#define A3XX_SP_VS_OUT_REG_7 0x22CE
+#define A3XX_SP_VS_VPC_DST_REG_0 0x22D0
+#define A3XX_SP_VS_OBJ_OFFSET_REG 0x22D4
+#define A3XX_SP_VS_PVT_MEM_SIZE_REG 0x22D8
+#define A3XX_SP_VS_LENGTH_REG 0x22DF
+#define A3XX_SP_FS_CTRL_REG0 0x22E0
+#define A3XX_SP_FS_CTRL_REG1 0x22E1
+#define A3XX_SP_FS_OBJ_OFFSET_REG 0x22E2
+#define A3XX_SP_FS_PVT_MEM_SIZE_REG 0x22E6
+#define A3XX_SP_FS_FLAT_SHAD_MODE_REG_0 0x22E8
+#define A3XX_SP_FS_FLAT_SHAD_MODE_REG_1 0x22E9
+#define A3XX_SP_FS_OUTPUT_REG 0x22EC
+#define A3XX_SP_FS_MRT_REG_0 0x22F0
+#define A3XX_SP_FS_IMAGE_OUTPUT_REG_0 0x22F4
+#define A3XX_SP_FS_IMAGE_OUTPUT_REG_3 0x22F7
+#define A3XX_SP_FS_LENGTH_REG 0x22FF
+#define A3XX_TPL1_TP_VS_TEX_OFFSET 0x2340
+#define A3XX_TPL1_TP_FS_TEX_OFFSET 0x2342
+#define A3XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR 0x2343
+#define A3XX_VBIF_FIXED_SORT_EN 0x300C
+#define A3XX_VBIF_FIXED_SORT_SEL0 0x300D
+#define A3XX_VBIF_FIXED_SORT_SEL1 0x300E
+
+/* Bit flags for RBBM_CTL */
+#define RBBM_RBBM_CTL_RESET_PWR_CTR1  (1 << 1)
+#define RBBM_RBBM_CTL_ENABLE_PWR_CTR1  (17 << 1)
+
+/* Various flags used by the context switch code */
+
+#define SP_MULTI 0
+#define SP_BUFFER_MODE 1
+#define SP_TWO_VTX_QUADS 0
+#define SP_PIXEL_BASED 0
+#define SP_R8G8B8A8_UNORM 8
+#define SP_FOUR_PIX_QUADS 1
+
+#define HLSQ_DIRECT 0
+#define HLSQ_BLOCK_ID_SP_VS 4
+#define HLSQ_SP_VS_INSTR 0
+#define HLSQ_SP_FS_INSTR 0
+#define HLSQ_BLOCK_ID_SP_FS 6
+#define HLSQ_TWO_PIX_QUADS 0
+#define HLSQ_TWO_VTX_QUADS 0
+#define HLSQ_BLOCK_ID_TP_TEX 2
+#define HLSQ_TP_TEX_SAMPLERS 0
+#define HLSQ_TP_TEX_MEMOBJ 1
+#define HLSQ_BLOCK_ID_TP_MIPMAP 3
+#define HLSQ_TP_MIPMAP_BASE 1
+#define HLSQ_FOUR_PIX_QUADS 1
+
+#define RB_FACTOR_ONE 1
+#define RB_BLEND_OP_ADD 0
+#define RB_FACTOR_ZERO 0
+#define RB_DITHER_DISABLE 0
+#define RB_DITHER_ALWAYS 1
+#define RB_FRAG_NEVER 0
+#define RB_ENDIAN_NONE 0
+#define RB_R8G8B8A8_UNORM 8
+#define RB_RESOLVE_PASS 2
+#define RB_CLEAR_MODE_RESOLVE 1
+#define RB_TILINGMODE_LINEAR 0
+#define RB_REF_NEVER 0
+#define RB_STENCIL_KEEP 0
+#define RB_RENDERING_PASS 0
+#define RB_TILINGMODE_32X32 2
+
+#define PC_DRAW_TRIANGLES 2
+#define PC_DI_PT_RECTLIST 8
+#define PC_DI_SRC_SEL_AUTO_INDEX 2
+#define PC_DI_INDEX_SIZE_16_BIT 0
+#define PC_DI_IGNORE_VISIBILITY 0
+#define PC_DI_PT_TRILIST 4
+#define PC_DI_SRC_SEL_IMMEDIATE 1
+#define PC_DI_INDEX_SIZE_32_BIT 1
+
+#define UCHE_ENTIRE_CACHE 1
+#define UCHE_OP_INVALIDATE 1
+
+/*
+ * The following are bit field shifts within some of the registers defined
+ * above. These are used in the context switch code in conjunction with the
+ * _SET macro
+ */
+
+#define GRAS_CL_CLIP_CNTL_CLIP_DISABLE 16
+#define GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER 12
+#define GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE 21
+#define GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE 19
+#define GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE 20
+#define GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE 17
+#define GRAS_CL_VPORT_XSCALE_VPORT_XSCALE 0
+#define GRAS_CL_VPORT_YSCALE_VPORT_YSCALE 0
+#define GRAS_CL_VPORT_ZSCALE_VPORT_ZSCALE 0
+#define GRAS_SC_CONTROL_RASTER_MODE 12
+#define GRAS_SC_CONTROL_RENDER_MODE 4
+#define GRAS_SC_SCREEN_SCISSOR_BR_BR_X 0
+#define GRAS_SC_SCREEN_SCISSOR_BR_BR_Y 16
+#define GRAS_SC_WINDOW_SCISSOR_BR_BR_X 0
+#define GRAS_SC_WINDOW_SCISSOR_BR_BR_Y 16
+#define HLSQ_CONSTFSPRESERVEDRANGEREG_ENDENTRY 16
+#define HLSQ_CONSTFSPRESERVEDRANGEREG_STARTENTRY 0
+#define HLSQ_CTRL0REG_CHUNKDISABLE 26
+#define HLSQ_CTRL0REG_CONSTSWITCHMODE 27
+#define HLSQ_CTRL0REG_FSSUPERTHREADENABLE 6
+#define HLSQ_CTRL0REG_FSTHREADSIZE 4
+#define HLSQ_CTRL0REG_LAZYUPDATEDISABLE 28
+#define HLSQ_CTRL0REG_RESERVED2 10
+#define HLSQ_CTRL0REG_SPCONSTFULLUPDATE 29
+#define HLSQ_CTRL0REG_SPSHADERRESTART 9
+#define HLSQ_CTRL0REG_TPFULLUPDATE 30
+#define HLSQ_CTRL1REG_RESERVED1 9
+#define HLSQ_CTRL1REG_VSSUPERTHREADENABLE 8
+#define HLSQ_CTRL1REG_VSTHREADSIZE 6
+#define HLSQ_CTRL2REG_PRIMALLOCTHRESHOLD 26
+#define HLSQ_FSCTRLREG_FSCONSTLENGTH 0
+#define HLSQ_FSCTRLREG_FSCONSTSTARTOFFSET 12
+#define HLSQ_FSCTRLREG_FSINSTRLENGTH 24
+#define HLSQ_VSCTRLREG_VSINSTRLENGTH 24
+#define PC_PRIM_VTX_CONTROL_POLYMODE_BACK_PTYPE 8
+#define PC_PRIM_VTX_CONTROL_POLYMODE_FRONT_PTYPE 5
+#define PC_PRIM_VTX_CONTROL_PROVOKING_VTX_LAST 25
+#define PC_PRIM_VTX_CONTROL_STRIDE_IN_VPC 0
+#define PC_DRAW_INITIATOR_PRIM_TYPE 0
+#define PC_DRAW_INITIATOR_SOURCE_SELECT 6
+#define PC_DRAW_INITIATOR_VISIBILITY_CULLING_MODE 9
+#define PC_DRAW_INITIATOR_INDEX_SIZE 0x0B
+#define PC_DRAW_INITIATOR_SMALL_INDEX 0x0D
+#define PC_DRAW_INITIATOR_PRE_DRAW_INITIATOR_ENABLE 0x0E
+#define RB_COPYCONTROL_COPY_GMEM_BASE 14
+#define RB_COPYCONTROL_RESOLVE_CLEAR_MODE 4
+#define RB_COPYDESTBASE_COPY_DEST_BASE 4
+#define RB_COPYDESTINFO_COPY_COMPONENT_ENABLE 14
+#define RB_COPYDESTINFO_COPY_DEST_ENDIAN 18
+#define RB_COPYDESTINFO_COPY_DEST_FORMAT 2
+#define RB_COPYDESTINFO_COPY_DEST_TILE 0
+#define RB_COPYDESTPITCH_COPY_DEST_PITCH 0
+#define RB_DEPTHCONTROL_Z_TEST_FUNC 4
+#define RB_MODECONTROL_RENDER_MODE 8
+#define RB_MODECONTROL_MARB_CACHE_SPLIT_MODE 15
+#define RB_MODECONTROL_PACKER_TIMER_ENABLE 16
+#define RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE 21
+#define RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR 24
+#define RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR 16
+#define RB_MRTBLENDCONTROL_CLAMP_ENABLE 29
+#define RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE 5
+#define RB_MRTBLENDCONTROL_RGB_DEST_FACTOR 8
+#define RB_MRTBLENDCONTROL_RGB_SRC_FACTOR 0
+#define RB_MRTBUFBASE_COLOR_BUF_BASE 4
+#define RB_MRTBUFINFO_COLOR_BUF_PITCH 17
+#define RB_MRTBUFINFO_COLOR_FORMAT 0
+#define RB_MRTBUFINFO_COLOR_TILE_MODE 6
+#define RB_MRTCONTROL_COMPONENT_ENABLE 24
+#define RB_MRTCONTROL_DITHER_MODE 12
+#define RB_MRTCONTROL_READ_DEST_ENABLE 3
+#define RB_MRTCONTROL_ROP_CODE 8
+#define RB_MSAACONTROL_MSAA_DISABLE 10
+#define RB_MSAACONTROL_SAMPLE_MASK 16
+#define RB_RENDERCONTROL_ALPHA_TEST_FUNC 24
+#define RB_RENDERCONTROL_BIN_WIDTH 4
+#define RB_RENDERCONTROL_DISABLE_COLOR_PIPE 12
+#define RB_STENCILCONTROL_STENCIL_FAIL 11
+#define RB_STENCILCONTROL_STENCIL_FAIL_BF 23
+#define RB_STENCILCONTROL_STENCIL_FUNC 8
+#define RB_STENCILCONTROL_STENCIL_FUNC_BF 20
+#define RB_STENCILCONTROL_STENCIL_ZFAIL 17
+#define RB_STENCILCONTROL_STENCIL_ZFAIL_BF 29
+#define RB_STENCILCONTROL_STENCIL_ZPASS 14
+#define RB_STENCILCONTROL_STENCIL_ZPASS_BF 26
+#define SP_FSCTRLREG0_FSFULLREGFOOTPRINT 10
+#define SP_FSCTRLREG0_FSICACHEINVALID 2
+#define SP_FSCTRLREG0_FSINOUTREGOVERLAP 18
+#define SP_FSCTRLREG0_FSINSTRBUFFERMODE 1
+#define SP_FSCTRLREG0_FSLENGTH 24
+#define SP_FSCTRLREG0_FSSUPERTHREADMODE 21
+#define SP_FSCTRLREG0_FSTHREADMODE 0
+#define SP_FSCTRLREG0_FSTHREADSIZE 20
+#define SP_FSCTRLREG0_PIXLODENABLE 22
+#define SP_FSCTRLREG1_FSCONSTLENGTH 0
+#define SP_FSCTRLREG1_FSINITIALOUTSTANDING 20
+#define SP_FSCTRLREG1_HALFPRECVAROFFSET 24
+#define SP_FSMRTREG_REGID 0
+#define SP_FSOUTREG_PAD0 2
+#define SP_IMAGEOUTPUTREG_MRTFORMAT 0
+#define SP_IMAGEOUTPUTREG_PAD0 6
+#define SP_OBJOFFSETREG_CONSTOBJECTSTARTOFFSET 16
+#define SP_OBJOFFSETREG_SHADEROBJOFFSETINIC 25
+#define SP_SHADERLENGTH_LEN 0
+#define SP_SPCTRLREG_CONSTMODE 18
+#define SP_SPCTRLREG_SLEEPMODE 20
+#define SP_VSCTRLREG0_VSFULLREGFOOTPRINT 10
+#define SP_VSCTRLREG0_VSICACHEINVALID 2
+#define SP_VSCTRLREG0_VSINSTRBUFFERMODE 1
+#define SP_VSCTRLREG0_VSLENGTH 24
+#define SP_VSCTRLREG0_VSSUPERTHREADMODE 21
+#define SP_VSCTRLREG0_VSTHREADMODE 0
+#define SP_VSCTRLREG0_VSTHREADSIZE 20
+#define SP_VSCTRLREG1_VSINITIALOUTSTANDING 24
+#define SP_VSOUTREG_COMPMASK0 9
+#define SP_VSPARAMREG_POSREGID 0
+#define SP_VSPARAMREG_PSIZEREGID 8
+#define SP_VSPARAMREG_TOTALVSOUTVAR 20
+#define SP_VSVPCDSTREG_OUTLOC0 0
+#define TPL1_TPTEXOFFSETREG_BASETABLEPTR 16
+#define TPL1_TPTEXOFFSETREG_MEMOBJOFFSET 8
+#define TPL1_TPTEXOFFSETREG_SAMPLEROFFSET 0
+#define UCHE_INVALIDATE1REG_OPCODE 0x1C
+#define UCHE_INVALIDATE1REG_ALLORPORTION 0x1F
+#define VFD_BASEADDR_BASEADDR 0
+#define VFD_CTRLREG0_PACKETSIZE 18
+#define VFD_CTRLREG0_STRMDECINSTRCNT 22
+#define VFD_CTRLREG0_STRMFETCHINSTRCNT 27
+#define VFD_CTRLREG0_TOTALATTRTOVS 0
+#define VFD_CTRLREG1_MAXSTORAGE 0
+#define VFD_CTRLREG1_REGID4INST 24
+#define VFD_CTRLREG1_REGID4VTX 16
+#define VFD_DECODEINSTRUCTIONS_CONSTFILL 4
+#define VFD_DECODEINSTRUCTIONS_FORMAT 6
+#define VFD_DECODEINSTRUCTIONS_LASTCOMPVALID 29
+#define VFD_DECODEINSTRUCTIONS_REGID 12
+#define VFD_DECODEINSTRUCTIONS_SHIFTCNT 24
+#define VFD_DECODEINSTRUCTIONS_SWITCHNEXT 30
+#define VFD_DECODEINSTRUCTIONS_WRITEMASK 0
+#define VFD_FETCHINSTRUCTIONS_BUFSTRIDE 7
+#define VFD_FETCHINSTRUCTIONS_FETCHSIZE 0
+#define VFD_FETCHINSTRUCTIONS_INDEXDECODE 18
+#define VFD_FETCHINSTRUCTIONS_STEPRATE 24
+#define VFD_FETCHINSTRUCTIONS_SWITCHNEXT 17
+#define VFD_THREADINGTHRESHOLD_REGID_VTXCNT 8
+#define VFD_THREADINGTHRESHOLD_RESERVED6 4
+#define VPC_VPCATTR_LMSIZE 28
+#define VPC_VPCATTR_THRHDASSIGN 12
+#define VPC_VPCATTR_TOTALATTR 0
+#define VPC_VPCPACK_NUMFPNONPOSVAR 8
+#define VPC_VPCPACK_NUMNONPOSVSVAR 16
+#define VPC_VPCVARPSREPLMODE_COMPONENT08 0
+#define VPC_VPCVARPSREPLMODE_COMPONENT09 2
+#define VPC_VPCVARPSREPLMODE_COMPONENT0A 4
+#define VPC_VPCVARPSREPLMODE_COMPONENT0B 6
+#define VPC_VPCVARPSREPLMODE_COMPONENT0C 8
+#define VPC_VPCVARPSREPLMODE_COMPONENT0D 10
+#define VPC_VPCVARPSREPLMODE_COMPONENT0E 12
+#define VPC_VPCVARPSREPLMODE_COMPONENT0F 14
+#define VPC_VPCVARPSREPLMODE_COMPONENT10 16
+#define VPC_VPCVARPSREPLMODE_COMPONENT11 18
+#define VPC_VPCVARPSREPLMODE_COMPONENT12 20
+#define VPC_VPCVARPSREPLMODE_COMPONENT13 22
+#define VPC_VPCVARPSREPLMODE_COMPONENT14 24
+#define VPC_VPCVARPSREPLMODE_COMPONENT15 26
+#define VPC_VPCVARPSREPLMODE_COMPONENT16 28
+#define VPC_VPCVARPSREPLMODE_COMPONENT17 30
+
+#endif
diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c
old mode 100644
new mode 100755
index 61f14a4a..96e9c75c
--- a/drivers/gpu/msm/adreno.c
+++ b/drivers/gpu/msm/adreno.c
@@ -72,6 +72,7 @@
 	 | (MMU_CONFIG << MH_MMU_CONFIG__PA_W_CLNT_BEHAVIOR__SHIFT))
 
 static const struct kgsl_functable adreno_functable;
+unsigned int kgsl_cff_dump_enable=0;
 
 static struct adreno_device device_3d0 = {
 	.dev = {
@@ -120,8 +121,11 @@ static struct adreno_device device_3d0 = {
 	},
 	.pfp_fw = NULL,
 	.pm4_fw = NULL,
+	.wait_timeout = 10000, /* in milliseconds */
+	.ib_check_level = 0,
 };
 
+
 /*
  * This is the master list of all GPU cores that are supported by this
  * driver.
@@ -135,50 +139,47 @@ static const struct {
 	const char *pm4fw;
 	const char *pfpfw;
 	struct adreno_gpudev *gpudev;
+	unsigned int istore_size;
+	unsigned int pix_shader_start;
+	unsigned int instruction_size; /* Size of an instruction in dwords */
+	unsigned int gmem_size; /* size of gmem for gpu*/
 } adreno_gpulist[] = {
 	{ ADRENO_REV_A200, 0, 2, ANY_ID, ANY_ID,
-		"yamato_pm4.fw", "yamato_pfp.fw", &adreno_a2xx_gpudev },
+		"yamato_pm4.fw", "yamato_pfp.fw", &adreno_a2xx_gpudev,
+		512, 384, 3, SZ_256K },
+	{ ADRENO_REV_A203, 0, 1, 1, ANY_ID,
+		"yamato_pm4.fw", "yamato_pfp.fw", &adreno_a2xx_gpudev,
+		512, 384, 3, SZ_256K },
 	{ ADRENO_REV_A205, 0, 1, 0, ANY_ID,
-		"yamato_pm4.fw", "yamato_pfp.fw", &adreno_a2xx_gpudev },
+		"yamato_pm4.fw", "yamato_pfp.fw", &adreno_a2xx_gpudev,
+		512, 384, 3, SZ_256K },
 	{ ADRENO_REV_A220, 2, 1, ANY_ID, ANY_ID,
-		"leia_pm4_470.fw", "leia_pfp_470.fw", &adreno_a2xx_gpudev },
+		"leia_pm4_470.fw", "leia_pfp_470.fw", &adreno_a2xx_gpudev,
+		512, 384, 3, SZ_512K },
 	/*
 	 * patchlevel 5 (8960v2) needs special pm4 firmware to work around
 	 * a hardware problem.
 	 */
 	{ ADRENO_REV_A225, 2, 2, 0, 5,
-		"a225p5_pm4.fw", "a225_pfp.fw", &adreno_a2xx_gpudev },
+		"a225p5_pm4.fw", "a225_pfp.fw", &adreno_a2xx_gpudev,
+		1536, 768, 3, SZ_512K },
+	{ ADRENO_REV_A225, 2, 2, 0, 6,
+		"a225_pm4.fw", "a225_pfp.fw", &adreno_a2xx_gpudev,
+		1536, 768, 3, SZ_512K },
 	{ ADRENO_REV_A225, 2, 2, ANY_ID, ANY_ID,
-		"a225_pm4.fw", "a225_pfp.fw", &adreno_a2xx_gpudev },
+		"a225_pm4.fw", "a225_pfp.fw", &adreno_a2xx_gpudev,
+		1536, 768, 3, SZ_512K },
+	/* A3XX doesn't use the pix_shader_start */
+	{ ADRENO_REV_A305, 3, 1, ANY_ID, ANY_ID,
+		"a300_pm4.fw", "a300_pfp.fw", &adreno_a3xx_gpudev,
+		512, 0, 2, SZ_256K },
+	/* A3XX doesn't use the pix_shader_start */
+	{ ADRENO_REV_A320, 3, 1, ANY_ID, ANY_ID,
+		"a300_pm4.fw", "a300_pfp.fw", &adreno_a3xx_gpudev,
+		512, 0, 2, SZ_512K },
+
 };
 
-static void adreno_gmeminit(struct adreno_device *adreno_dev)
-{
-	struct kgsl_device *device = &adreno_dev->dev;
-	union reg_rb_edram_info rb_edram_info;
-	unsigned int gmem_size;
-	unsigned int edram_value = 0;
-
-	/* make sure edram range is aligned to size */
-	BUG_ON(adreno_dev->gmemspace.gpu_base &
-				(adreno_dev->gmemspace.sizebytes - 1));
-
-	/* get edram_size value equivalent */
-	gmem_size = (adreno_dev->gmemspace.sizebytes >> 14);
-	while (gmem_size >>= 1)
-		edram_value++;
-
-	rb_edram_info.val = 0;
-
-	rb_edram_info.f.edram_size = edram_value;
-	rb_edram_info.f.edram_mapping_mode = 0; /* EDRAM_MAP_UPPER */
-
-	/* must be aligned to size */
-	rb_edram_info.f.edram_range = (adreno_dev->gmemspace.gpu_base >> 14);
-
-	adreno_regwrite(device, REG_RB_EDRAM_INFO, rb_edram_info.val);
-}
-
 static irqreturn_t adreno_isr(int irq, void *data)
 {
 	irqreturn_t result;
@@ -268,10 +269,13 @@ static void adreno_setstate(struct kgsl_device *device,
 	int sizedwords = 0;
 	unsigned int mh_mmu_invalidate = 0x00000003; /*invalidate all and tc */
 
-	/* If possible, then set the state via the command stream to avoid
-	   a CPU idle.  Otherwise, use the default setstate which uses register
-	   writes */
-	if (adreno_dev->drawctxt_active) {
+	/*
+	 * If possible, then set the state via the command stream to avoid
+	 * a CPU idle.  Otherwise, use the default setstate which uses register
+	 * writes For CFF dump we must idle and use the registers so that it is
+	 * easier to filter out the mmu accesses from the dump
+	 */
+	if (!kgsl_cff_dump_enable && adreno_dev->drawctxt_active) {
 		if (flags & KGSL_MMUFLAGS_PTUPDATE) {
 			/* wait for graphics pipe to be idle */
 			*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
@@ -419,6 +423,10 @@ adreno_identify_gpu(struct adreno_device *adreno_dev)
 	adreno_dev->gpudev = adreno_gpulist[i].gpudev;
 	adreno_dev->pfp_fwfile = adreno_gpulist[i].pfpfw;
 	adreno_dev->pm4_fwfile = adreno_gpulist[i].pm4fw;
+	adreno_dev->istore_size = adreno_gpulist[i].istore_size;
+	adreno_dev->pix_shader_start = adreno_gpulist[i].pix_shader_start;
+	adreno_dev->instruction_size = adreno_gpulist[i].instruction_size;
+	adreno_dev->gmemspace.sizebytes = adreno_gpulist[i].gmem_size;
 }
 
 static int __devinit
@@ -434,8 +442,6 @@ adreno_probe(struct platform_device *pdev)
 
 	adreno_dev->wait_timeout = 10000; /* default value in milliseconds */
 
-	init_completion(&device->recovery_gate);
-
 	status = adreno_ringbuffer_init(device);
 	if (status != 0)
 		goto error;
@@ -480,7 +486,6 @@ static int adreno_start(struct kgsl_device *device, unsigned int init_ram)
 {
 	int status = -EINVAL;
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
-	int init_reftimestamp = 0x7fffffff;
 
 	device->state = KGSL_STATE_INIT;
 	device->requested_state = KGSL_STATE_NONE;
@@ -508,80 +513,22 @@ static int adreno_start(struct kgsl_device *device, unsigned int init_ram)
 
 	kgsl_mh_start(device);
 
-	if (kgsl_mmu_start(device))
+	status = kgsl_mmu_start(device);
+	if (status)
 		goto error_clk_off;
 
-	/*We need to make sure all blocks are powered up and clocked before
-	*issuing a soft reset.  The overrides will then be turned off (set to 0)
-	*/
-	adreno_regwrite(device, REG_RBBM_PM_OVERRIDE1, 0xfffffffe);
-	adreno_regwrite(device, REG_RBBM_PM_OVERRIDE2, 0xffffffff);
-
-	/* Only reset CP block if all blocks have previously been reset */
-	if (!(device->flags & KGSL_FLAGS_SOFT_RESET) ||
-		!adreno_is_a22x(adreno_dev)) {
-		adreno_regwrite(device, REG_RBBM_SOFT_RESET, 0xFFFFFFFF);
-		device->flags |= KGSL_FLAGS_SOFT_RESET;
-	} else
-		adreno_regwrite(device, REG_RBBM_SOFT_RESET, 0x00000001);
-
-	/* The core is in an indeterminate state until the reset completes
-	 * after 30ms.
-	 */
-	msleep(30);
-
-	adreno_regwrite(device, REG_RBBM_SOFT_RESET, 0x00000000);
-
-	adreno_regwrite(device, REG_RBBM_CNTL, 0x00004442);
-
-	if (adreno_is_a225(adreno_dev)) {
-		/* Enable large instruction store for A225 */
-		adreno_regwrite(device, REG_SQ_FLOW_CONTROL, 0x18000000);
-	}
-
-	adreno_regwrite(device, REG_SQ_VS_PROGRAM, 0x00000000);
-	adreno_regwrite(device, REG_SQ_PS_PROGRAM, 0x00000000);
-
-	if (cpu_is_msm8960() || cpu_is_msm8930())
-		adreno_regwrite(device, REG_RBBM_PM_OVERRIDE1, 0x200);
-	else
-		adreno_regwrite(device, REG_RBBM_PM_OVERRIDE1, 0);
-
-	if (!adreno_is_a22x(adreno_dev))
-		adreno_regwrite(device, REG_RBBM_PM_OVERRIDE2, 0);
-	else
-		adreno_regwrite(device, REG_RBBM_PM_OVERRIDE2, 0x80);
-
-	kgsl_sharedmem_set(&device->memstore, 0, 0, device->memstore.size);
-
-	kgsl_sharedmem_writel(&device->memstore,
-			      KGSL_DEVICE_MEMSTORE_OFFSET(ref_wait_ts),
-			      init_reftimestamp);
-
-	adreno_regwrite(device, REG_RBBM_DEBUG, 0x00080000);
-
-	/* Make sure interrupts are disabled */
-
-	adreno_regwrite(device, REG_RBBM_INT_CNTL, 0);
-	adreno_regwrite(device, REG_CP_INT_CNTL, 0);
-	adreno_regwrite(device, REG_SQ_INT_CNTL, 0);
-
-	if (adreno_is_a22x(adreno_dev))
-		adreno_dev->gmemspace.sizebytes = SZ_512K;
-	else
-		adreno_dev->gmemspace.sizebytes = SZ_256K;
-	adreno_gmeminit(adreno_dev);
+	/* Start the GPU */
+	adreno_dev->gpudev->start(adreno_dev);
 
 	kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_ON);
+	device->ftbl->irqctrl(device, 1);
 
 	status = adreno_ringbuffer_start(&adreno_dev->ringbuffer, init_ram);
-	if (status != 0)
-		goto error_irq_off;
-
+	if (status == 0) {
 	mod_timer(&device->idle_timer, jiffies + FIRST_TIMEOUT);
-	return status;
+		return 0;
+	}
 
-error_irq_off:
 	kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF);
 	kgsl_mmu_stop(device);
 error_clk_off:
@@ -618,12 +565,14 @@ adreno_recover_hang(struct kgsl_device *device)
 	struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
 	unsigned int timestamp;
 	unsigned int num_rb_contents;
-	unsigned int bad_context;
 	unsigned int reftimestamp;
 	unsigned int enable_ts;
 	unsigned int soptimestamp;
 	unsigned int eoptimestamp;
-	struct adreno_context *drawctxt;
+	unsigned int context_id;
+	struct kgsl_context *context;
+	struct adreno_context *adreno_context;
+	int next = 0;
 
 	KGSL_DRV_ERR(device, "Starting recovery from 3D GPU hang....\n");
 	rb_buffer = vmalloc(rb->buffer_desc.size);
@@ -638,22 +587,35 @@ adreno_recover_hang(struct kgsl_device *device)
 	ret = adreno_ringbuffer_extract(rb, rb_buffer, &num_rb_contents);
 	if (ret)
 		goto done;
-	timestamp = rb->timestamp;
-	KGSL_DRV_ERR(device, "Last issued timestamp: %x\n", timestamp);
-	kgsl_sharedmem_readl(&device->memstore, &bad_context,
-				KGSL_DEVICE_MEMSTORE_OFFSET(current_context));
+	kgsl_sharedmem_readl(&device->memstore, &context_id,
+				KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
+					current_context));
+	context = idr_find(&device->context_idr, context_id);
+	if (context == NULL) {
+		KGSL_DRV_ERR(device, "Last context unknown id:%d\n",
+				context_id);
+		context_id = KGSL_MEMSTORE_GLOBAL;
+	}
+
+	timestamp = rb->timestamp[KGSL_MEMSTORE_GLOBAL];
+	KGSL_DRV_ERR(device, "Last issued global timestamp: %x\n", timestamp);
+
 	kgsl_sharedmem_readl(&device->memstore, &reftimestamp,
-				KGSL_DEVICE_MEMSTORE_OFFSET(ref_wait_ts));
+				KGSL_MEMSTORE_OFFSET(context_id,
+					ref_wait_ts));
 	kgsl_sharedmem_readl(&device->memstore, &enable_ts,
-				KGSL_DEVICE_MEMSTORE_OFFSET(ts_cmp_enable));
+				KGSL_MEMSTORE_OFFSET(context_id,
+					ts_cmp_enable));
 	kgsl_sharedmem_readl(&device->memstore, &soptimestamp,
-				KGSL_DEVICE_MEMSTORE_OFFSET(soptimestamp));
+				KGSL_MEMSTORE_OFFSET(context_id,
+					soptimestamp));
 	kgsl_sharedmem_readl(&device->memstore, &eoptimestamp,
-				KGSL_DEVICE_MEMSTORE_OFFSET(eoptimestamp));
+				KGSL_MEMSTORE_OFFSET(context_id,
+					eoptimestamp));
 	/* Make sure memory is synchronized before restarting the GPU */
 	mb();
 	KGSL_CTXT_ERR(device,
-		"Context that caused a GPU hang: %x\n", bad_context);
+		"Context id that caused a GPU hang: %d\n", context_id);
 	/* restart device */
 	ret = adreno_stop(device);
 	if (ret)
@@ -664,20 +626,20 @@ adreno_recover_hang(struct kgsl_device *device)
 	KGSL_DRV_ERR(device, "Device has been restarted after hang\n");
 	/* Restore timestamp states */
 	kgsl_sharedmem_writel(&device->memstore,
-			KGSL_DEVICE_MEMSTORE_OFFSET(soptimestamp),
+			KGSL_MEMSTORE_OFFSET(context_id, soptimestamp),
 			soptimestamp);
 	kgsl_sharedmem_writel(&device->memstore,
-			KGSL_DEVICE_MEMSTORE_OFFSET(eoptimestamp),
+			KGSL_MEMSTORE_OFFSET(context_id, eoptimestamp),
 			eoptimestamp);
 	kgsl_sharedmem_writel(&device->memstore,
-			KGSL_DEVICE_MEMSTORE_OFFSET(soptimestamp),
+			KGSL_MEMSTORE_OFFSET(context_id, soptimestamp),
 			soptimestamp);
 	if (num_rb_contents) {
 		kgsl_sharedmem_writel(&device->memstore,
-			KGSL_DEVICE_MEMSTORE_OFFSET(ref_wait_ts),
+			KGSL_MEMSTORE_OFFSET(context_id, ref_wait_ts),
 			reftimestamp);
 		kgsl_sharedmem_writel(&device->memstore,
-			KGSL_DEVICE_MEMSTORE_OFFSET(ts_cmp_enable),
+			KGSL_MEMSTORE_OFFSET(context_id, ts_cmp_enable),
 			enable_ts);
 	}
 	/* Make sure all writes are posted before the GPU reads them */
@@ -685,16 +647,34 @@ adreno_recover_hang(struct kgsl_device *device)
 	/* Mark the invalid context so no more commands are accepted from
 	 * that context */
 
-	drawctxt = (struct adreno_context *) bad_context;
+	adreno_context = context->devctxt;
 
 	KGSL_CTXT_ERR(device,
-		"Context that caused a GPU hang: %x\n", bad_context);
+		"Context that caused a GPU hang: %d\n", adreno_context->id);
 
-	drawctxt->flags |= CTXT_FLAGS_GPU_HANG;
+	adreno_context->flags |= CTXT_FLAGS_GPU_HANG;
+
+	/*
+	 * Set the reset status of all contexts to
+	 * INNOCENT_CONTEXT_RESET_EXT except for the bad context
+	 * since thats the guilty party
+	 */
+	while ((context = idr_get_next(&device->context_idr, &next))) {
+		if (KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT !=
+			context->reset_status) {
+			if (context->id != context_id)
+				context->reset_status =
+				KGSL_CTX_STAT_INNOCENT_CONTEXT_RESET_EXT;
+			else
+				context->reset_status =
+				KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT;
+		}
+		next = next + 1;
+	}
 
 	/* Restore valid commands in ringbuffer */
 	adreno_ringbuffer_restore(rb, rb_buffer, num_rb_contents);
-	rb->timestamp = timestamp;
+	rb->timestamp[KGSL_MEMSTORE_GLOBAL] = timestamp;
 done:
 	vfree(rb_buffer);
 	return ret;
@@ -788,7 +768,8 @@ static int adreno_getproperty(struct kgsl_device *device,
 				shadowprop.size = device->memstore.size;
 				/* GSL needs this to be set, even if it
 				   appears to be meaningless */
-				shadowprop.flags = KGSL_FLAGS_INITIALIZED;
+				shadowprop.flags = KGSL_FLAGS_INITIALIZED |
+					KGSL_FLAGS_PER_CONTEXT_TIMESTAMPS;
 			}
 			if (copy_to_user(value, &shadowprop,
 				sizeof(shadowprop))) {
@@ -834,6 +815,12 @@ static int adreno_getproperty(struct kgsl_device *device,
 	return status;
 }
 
+static inline void adreno_poke(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	adreno_regwrite(device, REG_CP_RB_WPTR, adreno_dev->ringbuffer.wptr);
+}
+
 /* Caller must hold the device mutex. */
 int adreno_idle(struct kgsl_device *device, unsigned int timeout)
 {
@@ -842,16 +829,32 @@ int adreno_idle(struct kgsl_device *device, unsigned int timeout)
 	unsigned int rbbm_status;
 	unsigned long wait_timeout =
 		msecs_to_jiffies(adreno_dev->wait_timeout);
-	unsigned long wait_time = jiffies + wait_timeout;
+	unsigned long wait_time;
+	unsigned long wait_time_part;
+	unsigned int msecs;
+	unsigned int msecs_first;
+	unsigned int msecs_part;
 
-	kgsl_cffdump_regpoll(device->id, REG_RBBM_STATUS << 2,
+	kgsl_cffdump_regpoll(device->id,
+		adreno_dev->gpudev->reg_rbbm_status << 2,
 		0x00000000, 0x80000000);
 	/* first, wait until the CP has consumed all the commands in
 	 * the ring buffer
 	 */
 retry:
 	if (rb->flags & KGSL_FLAGS_STARTED) {
+		msecs = adreno_dev->wait_timeout;
+		msecs_first = (msecs <= 100) ? ((msecs + 4) / 5) : 100;
+		msecs_part = (msecs - msecs_first + 3) / 4;
+		wait_time = jiffies + wait_timeout;
+		wait_time_part = jiffies + msecs_to_jiffies(msecs_first);
+		adreno_poke(device);
 		do {
+			if (time_after(jiffies, wait_time_part)) {
+				adreno_poke(device);
+				wait_time_part = jiffies +
+					msecs_to_jiffies(msecs_part);
+			}
 			GSL_RB_GET_READPTR(rb, &rb->rptr);
 			if (time_after(jiffies, wait_time)) {
 				KGSL_DRV_ERR(device, "rptr: %x, wptr: %x\n",
@@ -864,7 +867,8 @@ retry:
 	/* now, wait for the GPU to finish its operations */
 	wait_time = jiffies + wait_timeout;
 	while (time_before(jiffies, wait_time)) {
-		adreno_regread(device, REG_RBBM_STATUS, &rbbm_status);
+		adreno_regread(device, adreno_dev->gpudev->reg_rbbm_status,
+			&rbbm_status);
 		if (rbbm_status == 0x110)
 			return 0;
 	}
@@ -918,58 +922,70 @@ static int adreno_suspend_context(struct kgsl_device *device)
 	return status;
 }
 
-uint8_t *kgsl_sharedmem_convertaddr(struct kgsl_device *device,
-	unsigned int pt_base, unsigned int gpuaddr, unsigned int *size)
+struct kgsl_memdesc *adreno_find_region(struct kgsl_device *device,
+						unsigned int pt_base,
+						unsigned int gpuaddr,
+						unsigned int size)
 {
-	uint8_t *result = NULL;
+	struct kgsl_memdesc *result = NULL;
 	struct kgsl_mem_entry *entry;
-	struct kgsl_process_private *priv;
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
 	struct adreno_ringbuffer *ringbuffer = &adreno_dev->ringbuffer;
+	struct kgsl_context *context;
+	int next = 0;
 
-	if (kgsl_gpuaddr_in_memdesc(&ringbuffer->buffer_desc, gpuaddr)) {
-		return kgsl_gpuaddr_to_vaddr(&ringbuffer->buffer_desc,
-					gpuaddr, size);
-	}
+	if (kgsl_gpuaddr_in_memdesc(&ringbuffer->buffer_desc, gpuaddr, size))
+		return &ringbuffer->buffer_desc;
 
-	if (kgsl_gpuaddr_in_memdesc(&ringbuffer->memptrs_desc, gpuaddr)) {
-		return kgsl_gpuaddr_to_vaddr(&ringbuffer->memptrs_desc,
-					gpuaddr, size);
-	}
+	if (kgsl_gpuaddr_in_memdesc(&ringbuffer->memptrs_desc, gpuaddr, size))
+		return &ringbuffer->memptrs_desc;
 
-	if (kgsl_gpuaddr_in_memdesc(&device->memstore, gpuaddr)) {
-		return kgsl_gpuaddr_to_vaddr(&device->memstore,
-					gpuaddr, size);
-	}
+	if (kgsl_gpuaddr_in_memdesc(&device->memstore, gpuaddr, size))
+		return &device->memstore;
 
-	mutex_lock(&kgsl_driver.process_mutex);
-	list_for_each_entry(priv, &kgsl_driver.process_list, list) {
-		if (!kgsl_mmu_pt_equal(priv->pagetable, pt_base))
-			continue;
-		spin_lock(&priv->mem_lock);
-		entry = kgsl_sharedmem_find_region(priv, gpuaddr,
-						sizeof(unsigned int));
-		if (entry) {
-			result = kgsl_gpuaddr_to_vaddr(&entry->memdesc,
-							gpuaddr, size);
-			spin_unlock(&priv->mem_lock);
-			mutex_unlock(&kgsl_driver.process_mutex);
-			return result;
-		}
-		spin_unlock(&priv->mem_lock);
-	}
-	mutex_unlock(&kgsl_driver.process_mutex);
+	entry = kgsl_get_mem_entry(pt_base, gpuaddr, size);
 
-	BUG_ON(!mutex_is_locked(&device->mutex));
-	list_for_each_entry(entry, &device->memqueue, list) {
-		if (kgsl_gpuaddr_in_memdesc(&entry->memdesc, gpuaddr)) {
-			result = kgsl_gpuaddr_to_vaddr(&entry->memdesc,
-							gpuaddr, size);
+	if (entry)
+		return &entry->memdesc;
+
+	while (1) {
+		struct adreno_context *adreno_context = NULL;
+		context = idr_get_next(&device->context_idr, &next);
+		if (context == NULL)
 			break;
+
+		adreno_context = (struct adreno_context *)context->devctxt;
+
+		if (kgsl_mmu_pt_equal(adreno_context->pagetable, pt_base)) {
+			struct kgsl_memdesc *desc;
+
+			desc = &adreno_context->gpustate;
+			if (kgsl_gpuaddr_in_memdesc(desc, gpuaddr, size)) {
+				result = desc;
+				return result;
 		}
 
+			desc = &adreno_context->context_gmem_shadow.gmemshadow;
+			if (kgsl_gpuaddr_in_memdesc(desc, gpuaddr, size)) {
+				result = desc;
+				return result;
+			}
 	}
-	return result;
+		next = next + 1;
+	}
+
+	return NULL;
+
+}
+
+uint8_t *adreno_convertaddr(struct kgsl_device *device, unsigned int pt_base,
+			    unsigned int gpuaddr, unsigned int size)
+{
+	struct kgsl_memdesc *memdesc;
+
+	memdesc = adreno_find_region(device, pt_base, gpuaddr, size);
+
+	return memdesc ? kgsl_gpuaddr_to_vaddr(memdesc, gpuaddr) : NULL;
 }
 
 void adreno_regread(struct kgsl_device *device, unsigned int offsetwords,
@@ -1009,45 +1025,66 @@ void adreno_regwrite(struct kgsl_device *device, unsigned int offsetwords,
 	__raw_writel(value, reg);
 }
 
+static unsigned int _get_context_id(struct kgsl_context *k_ctxt)
+{
+	unsigned int context_id = KGSL_MEMSTORE_GLOBAL;
+
+	if (k_ctxt != NULL) {
+		struct adreno_context *a_ctxt = k_ctxt->devctxt;
+		/*
+		 * if the context was not created with per context timestamp
+		 * support, we must use the global timestamp since issueibcmds
+		 * will be returning that one.
+		 */
+		if (a_ctxt->flags & CTXT_FLAGS_PER_CONTEXT_TS)
+			context_id = a_ctxt->id;
+	}
+
+	return context_id;
+}
+
 static int kgsl_check_interrupt_timestamp(struct kgsl_device *device,
-					unsigned int timestamp)
+		struct kgsl_context *context, unsigned int timestamp)
 {
 	int status;
 	unsigned int ref_ts, enableflag;
+	unsigned int context_id = _get_context_id(context);
 
-	status = kgsl_check_timestamp(device, timestamp);
+	status = kgsl_check_timestamp(device, context, timestamp);
 	if (!status) {
 		mutex_lock(&device->mutex);
 		kgsl_sharedmem_readl(&device->memstore, &enableflag,
-			KGSL_DEVICE_MEMSTORE_OFFSET(ts_cmp_enable));
+			KGSL_MEMSTORE_OFFSET(context_id, ts_cmp_enable));
 		mb();
 
 		if (enableflag) {
 			kgsl_sharedmem_readl(&device->memstore, &ref_ts,
-				KGSL_DEVICE_MEMSTORE_OFFSET(ref_wait_ts));
+				KGSL_MEMSTORE_OFFSET(context_id,
+					ref_wait_ts));
 			mb();
 			if (timestamp_cmp(ref_ts, timestamp) >= 0) {
 				kgsl_sharedmem_writel(&device->memstore,
-				KGSL_DEVICE_MEMSTORE_OFFSET(ref_wait_ts),
-				timestamp);
+				KGSL_MEMSTORE_OFFSET(context_id,
+					ref_wait_ts), timestamp);
 				wmb();
 			}
 		} else {
 			unsigned int cmds[2];
 			kgsl_sharedmem_writel(&device->memstore,
-				KGSL_DEVICE_MEMSTORE_OFFSET(ref_wait_ts),
-				timestamp);
+				KGSL_MEMSTORE_OFFSET(context_id,
+					ref_wait_ts), timestamp);
 			enableflag = 1;
 			kgsl_sharedmem_writel(&device->memstore,
-				KGSL_DEVICE_MEMSTORE_OFFSET(ts_cmp_enable),
-				enableflag);
+				KGSL_MEMSTORE_OFFSET(context_id,
+					ts_cmp_enable), enableflag);
 			wmb();
 			/* submit a dummy packet so that even if all
 			* commands upto timestamp get executed we will still
 			* get an interrupt */
 			cmds[0] = cp_type3_packet(CP_NOP, 1);
 			cmds[1] = 0;
-			adreno_ringbuffer_issuecmds(device, 0, &cmds[0], 2);
+			adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
+					&cmds[0], 2);
 		}
 		mutex_unlock(&device->mutex);
 	}
@@ -1073,80 +1110,110 @@ static int kgsl_check_interrupt_timestamp(struct kgsl_device *device,
 
 /* MUST be called with the device mutex held */
 static int adreno_waittimestamp(struct kgsl_device *device,
+				struct kgsl_context *context,
 				unsigned int timestamp,
 				unsigned int msecs)
 {
 	long status = 0;
 	uint io = 1;
+	static uint io_cnt;
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
 	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	int retries;
+	unsigned int msecs_first;
+	unsigned int msecs_part;
+	unsigned int ts_issued;
+	unsigned int context_id = _get_context_id(context);
+
+	ts_issued = adreno_dev->ringbuffer.timestamp[context_id];
 
 	/* Don't wait forever, set a max value for now */
 	if (msecs == -1)
 		msecs = adreno_dev->wait_timeout;
 
-	if (timestamp_cmp(timestamp, adreno_dev->ringbuffer.timestamp) > 0) {
-		KGSL_DRV_ERR(device, "Cannot wait for invalid ts: %x, "
-			"rb->timestamp: %x\n",
-			timestamp, adreno_dev->ringbuffer.timestamp);
+	if (timestamp_cmp(timestamp, ts_issued) > 0) {
+		KGSL_DRV_ERR(device, "Cannot wait for invalid ts <%d:0x%x>, "
+			"last issued ts <%d:0x%x>\n",
+			context_id, timestamp, context_id, ts_issued);
 		status = -EINVAL;
 		goto done;
 	}
-	if (!kgsl_check_timestamp(device, timestamp)) {
-		if (pwr->active_pwrlevel) {
-			int low_pwrlevel = pwr->num_pwrlevels -
-					KGSL_PWRLEVEL_LOW_OFFSET;
-			if (pwr->active_pwrlevel == low_pwrlevel)
-				io = 0;
+
+	/* Keep the first timeout as 100msecs before rewriting
+	 * the WPTR. Less visible impact if the WPTR has not
+	 * been updated properly.
+	 */
+	msecs_first = (msecs <= 100) ? ((msecs + 4) / 5) : 100;
+	msecs_part = (msecs - msecs_first + 3) / 4;
+	for (retries = 0; retries < 5; retries++) {
+		if (kgsl_check_timestamp(device, context, timestamp)) {
+			/* if the timestamp happens while we're not
+			 * waiting, there's a chance that an interrupt
+			 * will not be generated and thus the timestamp
+			 * work needs to be queued.
+			 */
+			queue_work(device->work_queue, &device->ts_expired_ws);
+			status = 0;
+			goto done;
 		}
+			adreno_poke(device);
+//          the QSD8X50 don't support io_fraction ?? // SecureCRT 2012-06-20
+//			io_cnt = (io_cnt + 1) % 100;
+//			if (io_cnt <
+//				pwr->pwrlevels[pwr->active_pwrlevel].o_fraction)
+//				io = 0;
 		mutex_unlock(&device->mutex);
-		/* We need to make sure that the process is placed in wait-q
-		 * before its condition is called */
+			/* We need to make sure that the process is
+			 * placed in wait-q before its condition is called
+			 */
 		status = kgsl_wait_event_interruptible_timeout(
 				device->wait_queue,
 				kgsl_check_interrupt_timestamp(device,
-					timestamp),
-				msecs_to_jiffies(msecs), io);
+					context, timestamp),
+					msecs_to_jiffies(retries ?
+						msecs_part : msecs_first), io);
 		mutex_lock(&device->mutex);
 
-		if (status > 0)
-			status = 0;
-		else if (status == 0) {
-			if (!kgsl_check_timestamp(device, timestamp)) {
+			if (status > 0) {
+                                /*completed before the wait finished */
+			        status = 0;
+				goto done;
+                        } else if (status < 0) {
+                               /*an error occurred*/
+                                goto done;
+                        }
+                        /*this wait timed out*/
+		}
 				status = -ETIMEDOUT;
 				KGSL_DRV_ERR(device,
-					"Device hang detected while waiting "
-					"for timestamp: %x, last "
-					"submitted(rb->timestamp): %x, wptr: "
-					"%x\n", timestamp,
-					adreno_dev->ringbuffer.timestamp,
+		     "Device hang detected while waiting for timestamp: "
+		     "<%d:0x%x>, last submitted timestamp: <%d:0x%x>, "
+		     "wptr: 0x%x\n",
+		      context_id, timestamp, context_id, ts_issued,
 					adreno_dev->ringbuffer.wptr);
 				if (!adreno_dump_and_recover(device)) {
 					/* wait for idle after recovery as the
 					 * timestamp that this process wanted
 					 * to wait on may be invalid */
-					if (!adreno_idle(device,
-						KGSL_TIMEOUT_DEFAULT))
-						status = 0;
-				}
-			}
-		}
+		if (!adreno_idle(device, KGSL_TIMEOUT_DEFAULT))
+		status = 0;
 	}
-
 done:
 	return (int)status;
 }
 
 static unsigned int adreno_readtimestamp(struct kgsl_device *device,
-			     enum kgsl_timestamp_type type)
+		struct kgsl_context *context, enum kgsl_timestamp_type type)
 {
 	unsigned int timestamp = 0;
+	unsigned int context_id = _get_context_id(context);
 
 	if (type == KGSL_TIMESTAMP_CONSUMED)
 		adreno_regread(device, REG_CP_TIMESTAMP, &timestamp);
 	else if (type == KGSL_TIMESTAMP_RETIRED)
 		kgsl_sharedmem_readl(&device->memstore, &timestamp,
-				 KGSL_DEVICE_MEMSTORE_OFFSET(eoptimestamp));
+				 KGSL_MEMSTORE_OFFSET(context_id,
+					 eoptimestamp));
 	rmb();
 
 	return timestamp;
@@ -1179,7 +1246,7 @@ static long adreno_ioctl(struct kgsl_device_private *dev_priv,
 	default:
 		KGSL_DRV_INFO(dev_priv->device,
 			"invalid ioctl code %08x\n", cmd);
-		result = -EINVAL;
+		result = -ENOIOCTLCMD;
 		break;
 	}
 	return result;
@@ -1195,44 +1262,29 @@ static inline s64 adreno_ticks_to_us(u32 ticks, u32 gpu_freq)
 static void adreno_power_stats(struct kgsl_device *device,
 				struct kgsl_power_stats *stats)
 {
-	unsigned int reg;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
 	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	unsigned int cycles;
+
+	/* Get the busy cycles counted since the counter was last reset */
+	/* Calling this function also resets and restarts the counter */
+
+	cycles = adreno_dev->gpudev->busy_cycles(adreno_dev);
 
 	/* In order to calculate idle you have to have run the algorithm *
 	 * at least once to get a start time. */
 	if (pwr->time != 0) {
-		s64 tmp;
-		/* Stop the performance moniter and read the current *
-		 * busy cycles. */
-		adreno_regwrite(device,
-			REG_CP_PERFMON_CNTL,
-			REG_PERF_MODE_CNT |
-			REG_PERF_STATE_FREEZE);
-		adreno_regread(device, REG_RBBM_PERFCOUNTER1_LO, &reg);
-		tmp = ktime_to_us(ktime_get());
+		s64 tmp = ktime_to_us(ktime_get());
 		stats->total_time = tmp - pwr->time;
 		pwr->time = tmp;
-		stats->busy_time = adreno_ticks_to_us(reg, device->pwrctrl.
+		stats->busy_time = adreno_ticks_to_us(cycles, device->pwrctrl.
 				pwrlevels[device->pwrctrl.active_pwrlevel].
 				gpu_freq);
-
-		adreno_regwrite(device,
-			REG_CP_PERFMON_CNTL,
-			REG_PERF_MODE_CNT |
-			REG_PERF_STATE_RESET);
 	} else {
 		stats->total_time = 0;
 		stats->busy_time = 0;
 		pwr->time = ktime_to_us(ktime_get());
 	}
-
-	/* re-enable the performance moniters */
-	adreno_regread(device, REG_RBBM_PM_OVERRIDE2, &reg);
-	adreno_regwrite(device, REG_RBBM_PM_OVERRIDE2, (reg | 0x40));
-	adreno_regwrite(device, REG_RBBM_PERFCOUNTER1_SELECT, 0x1);
-	adreno_regwrite(device,
-		REG_CP_PERFMON_CNTL,
-		REG_PERF_MODE_CNT | REG_PERF_STATE_ENABLE);
 }
 
 void adreno_irqctrl(struct kgsl_device *device, int state)
diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h
old mode 100644
new mode 100755
index 51ee31a5..929b5f08
--- a/drivers/gpu/msm/adreno.h
+++ b/drivers/gpu/msm/adreno.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2008-2011, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2008-2012, Code Aurora Forum. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -24,13 +24,37 @@
 		KGSL_CONTAINER_OF(device, struct adreno_device, dev)
 
 /* Flags to control command packet settings */
+#define KGSL_CMD_FLAGS_NONE             0x00000000
 #define KGSL_CMD_FLAGS_PMODE		0x00000001
 #define KGSL_CMD_FLAGS_NO_TS_CMP	0x00000002
 #define KGSL_CMD_FLAGS_NOT_KERNEL_CMD	0x00000004
 
 /* Command identifiers */
-#define KGSL_CONTEXT_TO_MEM_IDENTIFIER	0xDEADBEEF
-#define KGSL_CMD_IDENTIFIER		0xFEEDFACE
+#define KGSL_CONTEXT_TO_MEM_IDENTIFIER	0x2EADBEEF
+#define KGSL_CMD_IDENTIFIER		0x2EEDFACE
+#define KGSL_START_OF_IB_IDENTIFIER	0x2EADEABE
+#define KGSL_END_OF_IB_IDENTIFIER	0x2ABEDEAD
+
+#ifdef CONFIG_MSM_SCM
+#define ADRENO_DEFAULT_PWRSCALE_POLICY  (&kgsl_pwrscale_policy_tz)
+#else
+#define ADRENO_DEFAULT_PWRSCALE_POLICY  NULL
+#endif
+
+#define ADRENO_ISTORE_START 0x5000 /* Istore offset */
+
+enum adreno_gpurev {
+	ADRENO_REV_UNKNOWN = 0,
+	ADRENO_REV_A200 = 200,
+	ADRENO_REV_A203 = 203,
+	ADRENO_REV_A205 = 205,
+	ADRENO_REV_A220 = 220,
+	ADRENO_REV_A225 = 225,
+	ADRENO_REV_A305 = 305,
+	ADRENO_REV_A320 = 320,
+};
+
+struct adreno_gpudev;
 
 #ifdef CONFIG_MSM_SCM
 #define ADRENO_DEFAULT_PWRSCALE_POLICY  (&kgsl_pwrscale_policy_tz)
@@ -64,20 +88,34 @@ struct adreno_device {
 	unsigned int mharb;
 	struct adreno_gpudev *gpudev;
 	unsigned int wait_timeout;
+	unsigned int istore_size;
+	unsigned int pix_shader_start;
+	unsigned int instruction_size;
+	unsigned int ib_check_level;
 };
 
 struct adreno_gpudev {
-	int (*ctxt_gpustate_shadow)(struct adreno_device *,
-		struct adreno_context *);
-	int (*ctxt_gmem_shadow)(struct adreno_device *,
-		struct adreno_context *);
+	/*
+	 * These registers are in a different location on A3XX,  so define
+	 * them in the structure and use them as variables.
+	 */
+	unsigned int reg_rbbm_status;
+	unsigned int reg_cp_pfp_ucode_data;
+	unsigned int reg_cp_pfp_ucode_addr;
+
+	/* GPU specific function hooks */
+	int (*ctxt_create)(struct adreno_device *, struct adreno_context *);
 	void (*ctxt_save)(struct adreno_device *, struct adreno_context *);
 	void (*ctxt_restore)(struct adreno_device *, struct adreno_context *);
 	irqreturn_t (*irq_handler)(struct adreno_device *);
 	void (*irq_control)(struct adreno_device *, int);
+	void (*rb_init)(struct adreno_device *, struct adreno_ringbuffer *);
+	void (*start)(struct adreno_device *);
+	unsigned int (*busy_cycles)(struct adreno_device *);
 };
 
 extern struct adreno_gpudev adreno_a2xx_gpudev;
+extern struct adreno_gpudev adreno_a3xx_gpudev;
 
 int adreno_idle(struct kgsl_device *device, unsigned int timeout);
 void adreno_regread(struct kgsl_device *device, unsigned int offsetwords,
@@ -85,23 +123,32 @@ void adreno_regread(struct kgsl_device *device, unsigned int offsetwords,
 void adreno_regwrite(struct kgsl_device *device, unsigned int offsetwords,
 				unsigned int value);
 
-uint8_t *kgsl_sharedmem_convertaddr(struct kgsl_device *device,
-	unsigned int pt_base, unsigned int gpuaddr, unsigned int *size);
+struct kgsl_memdesc *adreno_find_region(struct kgsl_device *device,
+						unsigned int pt_base,
+						unsigned int gpuaddr,
+						unsigned int size);
+
+uint8_t *adreno_convertaddr(struct kgsl_device *device,
+	unsigned int pt_base, unsigned int gpuaddr, unsigned int size);
 
 static inline int adreno_is_a200(struct adreno_device *adreno_dev)
 {
 	return (adreno_dev->gpurev == ADRENO_REV_A200);
 }
 
+static inline int adreno_is_a203(struct adreno_device *adreno_dev)
+{
+	return (adreno_dev->gpurev == ADRENO_REV_A203);
+}
+
 static inline int adreno_is_a205(struct adreno_device *adreno_dev)
 {
-	return (adreno_dev->gpurev == ADRENO_REV_A200);
+	return (adreno_dev->gpurev == ADRENO_REV_A205);
 }
 
 static inline int adreno_is_a20x(struct adreno_device *adreno_dev)
 {
-	return (adreno_dev->gpurev  == ADRENO_REV_A200 ||
-		adreno_dev->gpurev == ADRENO_REV_A205);
+	return (adreno_dev->gpurev <= 209);
 }
 
 static inline int adreno_is_a220(struct adreno_device *adreno_dev)
@@ -122,7 +169,36 @@ static inline int adreno_is_a22x(struct adreno_device *adreno_dev)
 
 static inline int adreno_is_a2xx(struct adreno_device *adreno_dev)
 {
-	return (adreno_dev->gpurev <= ADRENO_REV_A225);
+	return (adreno_dev->gpurev <= 299);
+}
+
+static inline int adreno_is_a3xx(struct adreno_device *adreno_dev)
+{
+	return (adreno_dev->gpurev >= 300);
+}
+
+/**
+ * adreno_encode_istore_size - encode istore size in CP format
+ * @adreno_dev - The 3D device.
+ *
+ * Encode the istore size into the format expected that the
+ * CP_SET_SHADER_BASES and CP_ME_INIT commands:
+ * bits 31:29 - istore size as encoded by this function
+ * bits 27:16 - vertex shader start offset in instructions
+ * bits 11:0 - pixel shader start offset in instructions.
+ */
+static inline int adreno_encode_istore_size(struct adreno_device *adreno_dev)
+{
+	unsigned int size;
+	/* in a225 the CP microcode multiplies the encoded
+	 * value by 3 while decoding.
+	 */
+	if (adreno_is_a225(adreno_dev))
+		size = adreno_dev->istore_size/3;
+	else
+		size = adreno_dev->istore_size;
+
+	return (ilog2(size) - 5) << 29;
 }
 
 
diff --git a/drivers/gpu/msm/adreno_a2xx.c b/drivers/gpu/msm/adreno_a2xx.c
index 064b05e9..dc43062e 100644
--- a/drivers/gpu/msm/adreno_a2xx.c
+++ b/drivers/gpu/msm/adreno_a2xx.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -11,6 +11,8 @@
  *
  */
 
+#include <linux/delay.h>
+
 #include "kgsl.h"
 #include "kgsl_sharedmem.h"
 #include "kgsl_cffdump.h"
@@ -72,10 +74,6 @@
 #define TEX_CONSTANTS		(32*6)	/* DWORDS */
 #define BOOL_CONSTANTS		8	/* DWORDS */
 #define LOOP_CONSTANTS		56	/* DWORDS */
-#define SHADER_INSTRUCT_LOG2	9U	/* 2^n == SHADER_INSTRUCTIONS */
-
-/* 96-bit instructions */
-#define SHADER_INSTRUCT		(1<<SHADER_INSTRUCT_LOG2)
 
 /* LOAD_CONSTANT_CONTEXT shadow size */
 #define LCC_SHADOW_SIZE		0x2000	/* 8KB */
@@ -88,14 +86,22 @@
 #define CMD_BUFFER_SIZE		0x3000	/* 12KB */
 #endif
 #define TEX_SHADOW_SIZE		(TEX_CONSTANTS*4)	/* 768 bytes */
-#define SHADER_SHADOW_SIZE	(SHADER_INSTRUCT*12)	/* 6KB */
 
 #define REG_OFFSET		LCC_SHADOW_SIZE
 #define CMD_OFFSET		(REG_OFFSET + REG_SHADOW_SIZE)
 #define TEX_OFFSET		(CMD_OFFSET + CMD_BUFFER_SIZE)
 #define SHADER_OFFSET		((TEX_OFFSET + TEX_SHADOW_SIZE + 32) & ~31)
 
-#define CONTEXT_SIZE		(SHADER_OFFSET + 3 * SHADER_SHADOW_SIZE)
+static inline int _shader_shadow_size(struct adreno_device *adreno_dev)
+{
+	return adreno_dev->istore_size *
+		(adreno_dev->instruction_size * sizeof(unsigned int));
+}
+
+static inline int _context_size(struct adreno_device *adreno_dev)
+{
+	return SHADER_OFFSET + 3*_shader_shadow_size(adreno_dev);
+}
 
 /* A scratchpad used to build commands during context create */
 
@@ -546,6 +552,7 @@ static unsigned int *build_gmem2sys_cmds(struct adreno_device *adreno_dev,
 	unsigned int addr = shadow->gmemshadow.gpuaddr;
 	unsigned int offset = (addr - (addr & 0xfffff000)) / bytesperpixel;
 
+	if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) {
 	/* Store TP0_CHICKEN register */
 	*cmds++ = cp_type3_packet(CP_REG_TO_MEM, 2);
 	*cmds++ = REG_TP0_CHICKEN;
@@ -554,6 +561,7 @@ static unsigned int *build_gmem2sys_cmds(struct adreno_device *adreno_dev,
 
 	*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
 	*cmds++ = 0;
+	}
 
 	/* Set TP0_CHICKEN to zero */
 	*cmds++ = cp_type0_packet(REG_TP0_CHICKEN, 1);
@@ -601,7 +609,8 @@ static unsigned int *build_gmem2sys_cmds(struct adreno_device *adreno_dev,
 	*cmds++ = 0x00003F00;
 
 	*cmds++ = cp_type3_packet(CP_SET_SHADER_BASES, 1);
-	*cmds++ = (0x80000000) | 0x180;
+	*cmds++ = adreno_encode_istore_size(adreno_dev)
+		  | adreno_dev->pix_shader_start;
 
 	/* load the patched vertex shader stream */
 	cmds = program_shader(cmds, 0, gmem2sys_vtx_pgm, GMEM2SYS_VTX_PGM_LEN);
@@ -755,6 +764,7 @@ static unsigned int *build_sys2gmem_cmds(struct adreno_device *adreno_dev,
 	unsigned int *cmds = shadow->gmem_restore_commands;
 	unsigned int *start = cmds;
 
+	if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) {
 	/* Store TP0_CHICKEN register */
 	*cmds++ = cp_type3_packet(CP_REG_TO_MEM, 2);
 	*cmds++ = REG_TP0_CHICKEN;
@@ -762,6 +772,7 @@ static unsigned int *build_sys2gmem_cmds(struct adreno_device *adreno_dev,
 
 	*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
 	*cmds++ = 0;
+	}
 
 	/* Set TP0_CHICKEN to zero */
 	*cmds++ = cp_type0_packet(REG_TP0_CHICKEN, 1);
@@ -802,7 +813,8 @@ static unsigned int *build_sys2gmem_cmds(struct adreno_device *adreno_dev,
 	*cmds++ = 0x00000300; /* 0x100 = Vertex, 0x200 = Pixel */
 
 	*cmds++ = cp_type3_packet(CP_SET_SHADER_BASES, 1);
-	*cmds++ = (0x80000000) | 0x180;
+	*cmds++ = adreno_encode_istore_size(adreno_dev)
+		  | adreno_dev->pix_shader_start;
 
 	/* Load the patched fragment shader stream */
 	cmds =
@@ -1089,7 +1101,8 @@ static void build_regrestore_cmds(struct adreno_device *adreno_dev,
 }
 
 static void
-build_shader_save_restore_cmds(struct adreno_context *drawctxt)
+build_shader_save_restore_cmds(struct adreno_device *adreno_dev,
+				struct adreno_context *drawctxt)
 {
 	unsigned int *cmd = tmp_ctx.cmd;
 	unsigned int *save, *restore, *fixup;
@@ -1099,8 +1112,10 @@ build_shader_save_restore_cmds(struct adreno_context *drawctxt)
 
 	/* compute vertex, pixel and shared instruction shadow GPU addresses */
 	tmp_ctx.shader_vertex = drawctxt->gpustate.gpuaddr + SHADER_OFFSET;
-	tmp_ctx.shader_pixel = tmp_ctx.shader_vertex + SHADER_SHADOW_SIZE;
-	tmp_ctx.shader_shared = tmp_ctx.shader_pixel + SHADER_SHADOW_SIZE;
+	tmp_ctx.shader_pixel = tmp_ctx.shader_vertex
+				+ _shader_shadow_size(adreno_dev);
+	tmp_ctx.shader_shared = tmp_ctx.shader_pixel
+				+  _shader_shadow_size(adreno_dev);
 
 	/* restore shader partitioning and instructions */
 
@@ -1156,8 +1171,8 @@ build_shader_save_restore_cmds(struct adreno_context *drawctxt)
 	*cmd++ = REG_SCRATCH_REG2;
 	/* AND off invalid bits. */
 	*cmd++ = 0x0FFF0FFF;
-	/* OR in instruction memory size */
-	*cmd++ = (unsigned int)((SHADER_INSTRUCT_LOG2 - 5U) << 29);
+	/* OR in instruction memory size.  */
+	*cmd++ = adreno_encode_istore_size(adreno_dev);
 
 	/* write the computed value to the SET_SHADER_BASES data field */
 	*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
@@ -1219,45 +1234,22 @@ build_shader_save_restore_cmds(struct adreno_context *drawctxt)
 }
 
 /* create buffers for saving/restoring registers, constants, & GMEM */
-static int a2xx_ctxt_gpustate_shadow(struct adreno_device *adreno_dev,
+static int a2xx_create_gpustate_shadow(struct adreno_device *adreno_dev,
 			struct adreno_context *drawctxt)
 {
-	int result;
-
-	/* Allocate vmalloc memory to store the gpustate */
-	result = kgsl_allocate(&drawctxt->gpustate,
-		drawctxt->pagetable, CONTEXT_SIZE);
-
-	if (result)
-		return result;
-
 	drawctxt->flags |= CTXT_FLAGS_STATE_SHADOW;
 
-	/* Blank out h/w register, constant, and command buffer shadows. */
-	kgsl_sharedmem_set(&drawctxt->gpustate, 0, 0, CONTEXT_SIZE);
-
-	/* set-up command and vertex buffer pointers */
-	tmp_ctx.cmd = tmp_ctx.start
-	    = (unsigned int *)((char *)drawctxt->gpustate.hostptr + CMD_OFFSET);
-
 	/* build indirect command buffers to save & restore regs/constants */
-	adreno_idle(&adreno_dev->dev, KGSL_TIMEOUT_DEFAULT);
 	build_regrestore_cmds(adreno_dev, drawctxt);
 	build_regsave_cmds(adreno_dev, drawctxt);
 
-	build_shader_save_restore_cmds(drawctxt);
+	build_shader_save_restore_cmds(adreno_dev, drawctxt);
 
-	kgsl_cache_range_op(&drawctxt->gpustate,
-			    KGSL_CACHE_OP_FLUSH);
-
-	kgsl_cffdump_syncmem(NULL, &drawctxt->gpustate,
-			drawctxt->gpustate.gpuaddr,
-			drawctxt->gpustate.size, false);
 	return 0;
 }
 
 /* create buffers for saving/restoring registers, constants, & GMEM */
-static int a2xx_ctxt_gmem_shadow(struct adreno_device *adreno_dev,
+static int a2xx_create_gmem_shadow(struct adreno_device *adreno_dev,
 			struct adreno_context *drawctxt)
 {
 	int result;
@@ -1272,8 +1264,8 @@ static int a2xx_ctxt_gmem_shadow(struct adreno_device *adreno_dev,
 	if (result)
 		return result;
 
-	/* we've allocated the shadow, when swapped out, GMEM must be saved. */
-	drawctxt->flags |= CTXT_FLAGS_GMEM_SHADOW | CTXT_FLAGS_GMEM_SAVE;
+	/* set the gmem shadow flag for the context */
+	drawctxt->flags |= CTXT_FLAGS_GMEM_SHADOW;
 
 	/* blank out gmem shadow. */
 	kgsl_sharedmem_set(&drawctxt->context_gmem_shadow.gmemshadow, 0, 0,
@@ -1284,6 +1276,7 @@ static int a2xx_ctxt_gmem_shadow(struct adreno_device *adreno_dev,
 		&tmp_ctx.cmd);
 
 	/* build TP0_CHICKEN register restore command buffer */
+	if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE))
 	tmp_ctx.cmd = build_chicken_restore_cmds(drawctxt);
 
 	/* build indirect command buffers to save & restore gmem */
@@ -1309,7 +1302,61 @@ static int a2xx_ctxt_gmem_shadow(struct adreno_device *adreno_dev,
 	return 0;
 }
 
-static void a2xx_ctxt_save(struct adreno_device *adreno_dev,
+static int a2xx_drawctxt_create(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt)
+{
+	int ret;
+
+	/*
+	 * Allocate memory for the GPU state and the context commands.
+	 * Despite the name, this is much more then just storage for
+	 * the gpustate.  This contains command space for gmem save
+	 * and texture and vertex buffer storage too
+	 */
+
+	ret = kgsl_allocate(&drawctxt->gpustate,
+		drawctxt->pagetable, _context_size(adreno_dev));
+
+	if (ret)
+		return ret;
+
+	kgsl_sharedmem_set(&drawctxt->gpustate, 0, 0,
+		_context_size(adreno_dev));
+
+	tmp_ctx.cmd = tmp_ctx.start
+	    = (unsigned int *)((char *)drawctxt->gpustate.hostptr + CMD_OFFSET);
+
+	if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) {
+		ret = a2xx_create_gpustate_shadow(adreno_dev, drawctxt);
+		if (ret)
+			goto done;
+
+		drawctxt->flags |= CTXT_FLAGS_SHADER_SAVE;
+	}
+
+	if (!(drawctxt->flags & CTXT_FLAGS_NOGMEMALLOC)) {
+		ret = a2xx_create_gmem_shadow(adreno_dev, drawctxt);
+		if (ret)
+			goto done;
+	}
+
+	/* Flush and sync the gpustate memory */
+
+	kgsl_cache_range_op(&drawctxt->gpustate,
+			    KGSL_CACHE_OP_FLUSH);
+
+	kgsl_cffdump_syncmem(NULL, &drawctxt->gpustate,
+			drawctxt->gpustate.gpuaddr,
+			drawctxt->gpustate.size, false);
+
+done:
+	if (ret)
+		kgsl_sharedmem_free(&drawctxt->gpustate);
+
+	return ret;
+}
+
+static void a2xx_drawctxt_save(struct adreno_device *adreno_dev,
 			struct adreno_context *context)
 {
 	struct kgsl_device *device = &adreno_dev->dev;
@@ -1321,25 +1368,28 @@ static void a2xx_ctxt_save(struct adreno_device *adreno_dev,
 		KGSL_CTXT_WARN(device,
 			"Current active context has caused gpu hang\n");
 
-	KGSL_CTXT_INFO(device,
-		"active context flags %08x\n", context->flags);
+	if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
 
 	/* save registers and constants. */
-	adreno_ringbuffer_issuecmds(device, 0, context->reg_save, 3);
+		adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
+			context->reg_save, 3);
 
 	if (context->flags & CTXT_FLAGS_SHADER_SAVE) {
 		/* save shader partitioning and instructions. */
-		adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE,
+			adreno_ringbuffer_issuecmds(device,
+				KGSL_CMD_FLAGS_PMODE,
 			context->shader_save, 3);
 
-		/* fixup shader partitioning parameter for
+			/*
+			 * fixup shader partitioning parameter for
 		 *  SET_SHADER_BASES.
 		 */
-		adreno_ringbuffer_issuecmds(device, 0,
+			adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
 			context->shader_fixup, 3);
 
 		context->flags |= CTXT_FLAGS_SHADER_RESTORE;
 	}
+	}
 
 	if ((context->flags & CTXT_FLAGS_GMEM_SAVE) &&
 	    (context->flags & CTXT_FLAGS_GMEM_SHADOW)) {
@@ -1350,14 +1400,16 @@ static void a2xx_ctxt_save(struct adreno_device *adreno_dev,
 			context->context_gmem_shadow.gmem_save, 3);
 
 		/* Restore TP0_CHICKEN */
-		adreno_ringbuffer_issuecmds(device, 0,
+		if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
+			adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
 			context->chicken_restore, 3);
+		}
 
 		context->flags |= CTXT_FLAGS_GMEM_RESTORE;
 	}
 }
 
-static void a2xx_ctxt_restore(struct adreno_device *adreno_dev,
+static void a2xx_drawctxt_restore(struct adreno_device *adreno_dev,
 			struct adreno_context *context)
 {
 	struct kgsl_device *device = &adreno_dev->dev;
@@ -1375,9 +1427,9 @@ static void a2xx_ctxt_restore(struct adreno_device *adreno_dev,
 	cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER;
 	cmds[2] = cp_type3_packet(CP_MEM_WRITE, 2);
 	cmds[3] = device->memstore.gpuaddr +
-		KGSL_DEVICE_MEMSTORE_OFFSET(current_context);
-	cmds[4] = (unsigned int) context;
-	adreno_ringbuffer_issuecmds(device, 0, cmds, 5);
+		KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, current_context);
+	cmds[4] = context->id;
+	adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, cmds, 5);
 	kgsl_mmu_setstate(device, context->pagetable);
 
 #ifndef CONFIG_MSM_KGSL_CFF_DUMP_NO_CONTEXT_MEM_DUMP
@@ -1393,27 +1445,34 @@ static void a2xx_ctxt_restore(struct adreno_device *adreno_dev,
 		adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE,
 			context->context_gmem_shadow.gmem_restore, 3);
 
+		if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
 		/* Restore TP0_CHICKEN */
-		adreno_ringbuffer_issuecmds(device, 0,
+			adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
 			context->chicken_restore, 3);
+		}
 
 		context->flags &= ~CTXT_FLAGS_GMEM_RESTORE;
 	}
 
+	if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
+
 	/* restore registers and constants. */
-	adreno_ringbuffer_issuecmds(device, 0,
+		adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
 		context->reg_restore, 3);
 
 	/* restore shader instructions & partitioning. */
 	if (context->flags & CTXT_FLAGS_SHADER_RESTORE) {
-		adreno_ringbuffer_issuecmds(device, 0,
+			adreno_ringbuffer_issuecmds(device,
+				KGSL_CMD_FLAGS_NONE,
 			context->shader_restore, 3);
 	}
+	}
 
 	if (adreno_is_a20x(adreno_dev)) {
 		cmds[0] = cp_type3_packet(CP_SET_BIN_BASE_OFFSET, 1);
 		cmds[1] = context->bin_base_offset;
-		adreno_ringbuffer_issuecmds(device, 0, cmds, 2);
+		adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
+			cmds, 2);
 	}
 }
 
@@ -1492,11 +1551,18 @@ static void a2xx_cp_intrcallback(struct kgsl_device *device)
 
 	if (status & CP_INT_CNTL__RB_INT_MASK) {
 		/* signal intr completion event */
-		unsigned int enableflag = 0;
+		unsigned int context_id;
+		kgsl_sharedmem_readl(&device->memstore,
+				&context_id,
+				KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
+					current_context));
+		if (context_id < KGSL_MEMSTORE_MAX) {
 		kgsl_sharedmem_writel(&rb->device->memstore,
-			KGSL_DEVICE_MEMSTORE_OFFSET(ts_cmp_enable),
-			enableflag);
+					KGSL_MEMSTORE_OFFSET(context_id,
+						ts_cmp_enable), 0);
+			device->last_expired_ctxt_id = context_id;
 		wmb();
+		}
 		KGSL_CMD_WARN(rb->device, "ringbuffer rb interrupt\n");
 	}
 
@@ -1532,21 +1598,33 @@ static void a2xx_rbbm_intrcallback(struct kgsl_device *device)
 {
 	unsigned int status = 0;
 	unsigned int rderr = 0;
+	unsigned int addr = 0;
+	const char *source;
 
 	adreno_regread(device, REG_RBBM_INT_STATUS, &status);
 
 	if (status & RBBM_INT_CNTL__RDERR_INT_MASK) {
-		union rbbm_read_error_u rerr;
 		adreno_regread(device, REG_RBBM_READ_ERROR, &rderr);
-		rerr.val = rderr;
-		if (rerr.f.read_address == REG_CP_INT_STATUS &&
-			rerr.f.read_error &&
-			rerr.f.read_requester)
+		source = (rderr & RBBM_READ_ERROR_REQUESTER)
+			 ? "host" : "cp";
+		/* convert to dword address */
+		addr = (rderr & RBBM_READ_ERROR_ADDRESS_MASK) >> 2;
+
+		/*
+		 * Log CP_INT_STATUS interrupts from the CP at a
+		 * lower level because they can happen frequently
+		 * and are worked around in a2xx_irq_handler.
+		 */
+		if (addr == REG_CP_INT_STATUS &&
+			rderr & RBBM_READ_ERROR_ERROR &&
+			rderr & RBBM_READ_ERROR_REQUESTER)
 			KGSL_DRV_WARN(device,
-				"rbbm read error interrupt: %08x\n", rderr);
+				"rbbm read error interrupt: %s reg: %04X\n",
+				source, addr);
 		else
 			KGSL_DRV_CRIT(device,
-				"rbbm read error interrupt: %08x\n", rderr);
+				"rbbm read error interrupt: %s reg: %04X\n",
+				source, addr);
 	}
 
 	status &= RBBM_INT_MASK;
@@ -1597,11 +1675,195 @@ static void a2xx_irq_control(struct adreno_device *adreno_dev, int state)
 	wmb();
 }
 
+static void a2xx_rb_init(struct adreno_device *adreno_dev,
+			struct adreno_ringbuffer *rb)
+{
+	unsigned int *cmds, cmds_gpu;
+
+	/* ME_INIT */
+	cmds = adreno_ringbuffer_allocspace(rb, 19);
+	cmds_gpu = rb->buffer_desc.gpuaddr + sizeof(uint)*(rb->wptr-19);
+
+	GSL_RB_WRITE(cmds, cmds_gpu, cp_type3_packet(CP_ME_INIT, 18));
+	/* All fields present (bits 9:0) */
+	GSL_RB_WRITE(cmds, cmds_gpu, 0x000003ff);
+	/* Disable/Enable Real-Time Stream processing (present but ignored) */
+	GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
+	/* Enable (2D <-> 3D) implicit synchronization (present but ignored) */
+	GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
+
+	GSL_RB_WRITE(cmds, cmds_gpu,
+		SUBBLOCK_OFFSET(REG_RB_SURFACE_INFO));
+	GSL_RB_WRITE(cmds, cmds_gpu,
+		SUBBLOCK_OFFSET(REG_PA_SC_WINDOW_OFFSET));
+	GSL_RB_WRITE(cmds, cmds_gpu,
+		SUBBLOCK_OFFSET(REG_VGT_MAX_VTX_INDX));
+	GSL_RB_WRITE(cmds, cmds_gpu,
+		SUBBLOCK_OFFSET(REG_SQ_PROGRAM_CNTL));
+	GSL_RB_WRITE(cmds, cmds_gpu,
+		SUBBLOCK_OFFSET(REG_RB_DEPTHCONTROL));
+	GSL_RB_WRITE(cmds, cmds_gpu,
+		SUBBLOCK_OFFSET(REG_PA_SU_POINT_SIZE));
+	GSL_RB_WRITE(cmds, cmds_gpu,
+		SUBBLOCK_OFFSET(REG_PA_SC_LINE_CNTL));
+	GSL_RB_WRITE(cmds, cmds_gpu,
+		SUBBLOCK_OFFSET(REG_PA_SU_POLY_OFFSET_FRONT_SCALE));
+
+	/* Instruction memory size: */
+	GSL_RB_WRITE(cmds, cmds_gpu,
+		(adreno_encode_istore_size(adreno_dev)
+		| adreno_dev->pix_shader_start));
+	/* Maximum Contexts */
+	GSL_RB_WRITE(cmds, cmds_gpu, 0x00000001);
+	/* Write Confirm Interval and The CP will wait the
+	* wait_interval * 16 clocks between polling  */
+	GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
+
+	/* NQ and External Memory Swap */
+	GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
+	/* Protected mode error checking */
+	GSL_RB_WRITE(cmds, cmds_gpu, GSL_RB_PROTECTED_MODE_CONTROL);
+	/* Disable header dumping and Header dump address */
+	GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
+	/* Header dump size */
+	GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
+
+	adreno_ringbuffer_submit(rb);
+}
+
+static unsigned int a2xx_busy_cycles(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	unsigned int reg, val;
+
+	/* Freeze the counter */
+	adreno_regwrite(device, REG_CP_PERFMON_CNTL,
+		REG_PERF_MODE_CNT | REG_PERF_STATE_FREEZE);
+
+	/* Get the value */
+	adreno_regread(device, REG_RBBM_PERFCOUNTER1_LO, &val);
+
+	/* Reset the counter */
+	adreno_regwrite(device, REG_CP_PERFMON_CNTL,
+		REG_PERF_MODE_CNT | REG_PERF_STATE_RESET);
+
+	/* Re-Enable the performance monitors */
+	adreno_regread(device, REG_RBBM_PM_OVERRIDE2, &reg);
+	adreno_regwrite(device, REG_RBBM_PM_OVERRIDE2, (reg | 0x40));
+	adreno_regwrite(device, REG_RBBM_PERFCOUNTER1_SELECT, 0x1);
+	adreno_regwrite(device, REG_CP_PERFMON_CNTL,
+		REG_PERF_MODE_CNT | REG_PERF_STATE_ENABLE);
+
+	return val;
+}
+
+static void a2xx_gmeminit(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	union reg_rb_edram_info rb_edram_info;
+	unsigned int gmem_size;
+	unsigned int edram_value = 0;
+
+	/* make sure edram range is aligned to size */
+	BUG_ON(adreno_dev->gmemspace.gpu_base &
+				(adreno_dev->gmemspace.sizebytes - 1));
+
+	/* get edram_size value equivalent */
+	gmem_size = (adreno_dev->gmemspace.sizebytes >> 14);
+	while (gmem_size >>= 1)
+		edram_value++;
+
+	rb_edram_info.val = 0;
+
+	rb_edram_info.f.edram_size = edram_value;
+	rb_edram_info.f.edram_mapping_mode = 0; /* EDRAM_MAP_UPPER */
+
+	/* must be aligned to size */
+	rb_edram_info.f.edram_range = (adreno_dev->gmemspace.gpu_base >> 14);
+
+	adreno_regwrite(device, REG_RB_EDRAM_INFO, rb_edram_info.val);
+}
+
+static void a2xx_start(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+
+	/*
+	 * We need to make sure all blocks are powered up and clocked
+	 * before issuing a soft reset.  The overrides will then be
+	 * turned off (set to 0)
+	 */
+	adreno_regwrite(device, REG_RBBM_PM_OVERRIDE1, 0xfffffffe);
+	adreno_regwrite(device, REG_RBBM_PM_OVERRIDE2, 0xffffffff);
+
+	/*
+	 * Only reset CP block if all blocks have previously been
+	 * reset
+	 */
+	if (!(device->flags & KGSL_FLAGS_SOFT_RESET) ||
+		!adreno_is_a22x(adreno_dev)) {
+		adreno_regwrite(device, REG_RBBM_SOFT_RESET,
+			0xFFFFFFFF);
+		device->flags |= KGSL_FLAGS_SOFT_RESET;
+	} else {
+		adreno_regwrite(device, REG_RBBM_SOFT_RESET,
+			0x00000001);
+	}
+	/*
+	 * The core is in an indeterminate state until the reset
+	 * completes after 30ms.
+	 */
+	msleep(30);
+
+	adreno_regwrite(device, REG_RBBM_SOFT_RESET, 0x00000000);
+
+	if (adreno_is_a225(adreno_dev)) {
+		/* Enable large instruction store for A225 */
+		adreno_regwrite(device, REG_SQ_FLOW_CONTROL,
+			0x18000000);
+	}
+
+	adreno_regwrite(device, REG_RBBM_CNTL, 0x00004442);
+
+	adreno_regwrite(device, REG_SQ_VS_PROGRAM, 0x00000000);
+	adreno_regwrite(device, REG_SQ_PS_PROGRAM, 0x00000000);
+
+//	if (cpu_is_msm8960() || cpu_is_msm8930())
+        if(0)
+		adreno_regwrite(device, REG_RBBM_PM_OVERRIDE1, 0x200);
+	else
+		adreno_regwrite(device, REG_RBBM_PM_OVERRIDE1, 0);
+
+	if (!adreno_is_a22x(adreno_dev))
+		adreno_regwrite(device, REG_RBBM_PM_OVERRIDE2, 0);
+	else
+		adreno_regwrite(device, REG_RBBM_PM_OVERRIDE2, 0x80);
+
+	adreno_regwrite(device, REG_RBBM_DEBUG, 0x00080000);
+
+	/* Make sure interrupts are disabled */
+	adreno_regwrite(device, REG_RBBM_INT_CNTL, 0);
+	adreno_regwrite(device, REG_CP_INT_CNTL, 0);
+	adreno_regwrite(device, REG_SQ_INT_CNTL, 0);
+
+	a2xx_gmeminit(adreno_dev);
+}
+
+/* Defined in adreno_a2xx_snapshot.c */
+void *a2xx_snapshot(struct adreno_device *adreno_dev, void *snapshot,
+	int *remain, int hang);
+
 struct adreno_gpudev adreno_a2xx_gpudev = {
-	.ctxt_gpustate_shadow = a2xx_ctxt_gpustate_shadow,
-	.ctxt_gmem_shadow = a2xx_ctxt_gmem_shadow,
-	.ctxt_save = a2xx_ctxt_save,
-	.ctxt_restore = a2xx_ctxt_restore,
+	.reg_rbbm_status = REG_RBBM_STATUS,
+	.reg_cp_pfp_ucode_addr = REG_CP_PFP_UCODE_ADDR,
+	.reg_cp_pfp_ucode_data = REG_CP_PFP_UCODE_DATA,
+
+	.ctxt_create = a2xx_drawctxt_create,
+	.ctxt_save = a2xx_drawctxt_save,
+	.ctxt_restore = a2xx_drawctxt_restore,
 	.irq_handler = a2xx_irq_handler,
 	.irq_control = a2xx_irq_control,
+	.rb_init = a2xx_rb_init,
+	.busy_cycles = a2xx_busy_cycles,
+	.start = a2xx_start,
 };
diff --git a/drivers/gpu/msm/adreno_a3xx.c b/drivers/gpu/msm/adreno_a3xx.c
new file mode 100755
index 00000000..507ad02e
--- /dev/null
+++ b/drivers/gpu/msm/adreno_a3xx.c
@@ -0,0 +1,2555 @@
+/* Copyright (c) 2012, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/delay.h>
+
+#include "kgsl.h"
+#include "adreno.h"
+#include "kgsl_sharedmem.h"
+#include "kgsl_cffdump.h"
+#include "a3xx_reg.h"
+
+/* Simple macro to facilitate bit setting in the gmem2sys and sys2gmem
+ * functions.
+ */
+
+#define _SET(_shift, _val) ((_val) << (_shift))
+
+/*
+ ****************************************************************************
+ *
+ * Context state shadow structure:
+ *
+ * +---------------------+------------+-------------+---------------------+---+
+ * | ALU Constant Shadow | Reg Shadow | C&V Buffers | Shader Instr Shadow |Tex|
+ * +---------------------+------------+-------------+---------------------+---+
+ *
+ *		 8K - ALU Constant Shadow (8K aligned)
+ *		 4K - H/W Register Shadow (8K aligned)
+ *		 5K - Command and Vertex Buffers
+ *		 8K - Shader Instruction Shadow
+ *		 ~6K - Texture Constant Shadow
+ *
+ *
+ ***************************************************************************
+ */
+
+/* Sizes of all sections in state shadow memory */
+#define ALU_SHADOW_SIZE      (8*1024) /* 8KB */
+#define REG_SHADOW_SIZE      (4*1024) /* 4KB */
+#define CMD_BUFFER_SIZE      (5*1024) /* 5KB */
+#define TEX_SIZE_MEM_OBJECTS 896      /* bytes */
+#define TEX_SIZE_MIPMAP      1936     /* bytes */
+#define TEX_SIZE_SAMPLER_OBJ 256      /* bytes */
+#define TEX_SHADOW_SIZE                            \
+	((TEX_SIZE_MEM_OBJECTS + TEX_SIZE_MIPMAP + \
+	TEX_SIZE_SAMPLER_OBJ)*2) /* ~6KB */
+#define SHADER_SHADOW_SIZE   (8*1024) /* 8KB */
+
+/* Total context size, excluding GMEM shadow */
+#define CONTEXT_SIZE                         \
+	(ALU_SHADOW_SIZE+REG_SHADOW_SIZE +   \
+	CMD_BUFFER_SIZE+SHADER_SHADOW_SIZE + \
+	TEX_SHADOW_SIZE)
+
+/* Offsets to different sections in context shadow memory */
+#define REG_OFFSET ALU_SHADOW_SIZE
+#define CMD_OFFSET (REG_OFFSET+REG_SHADOW_SIZE)
+#define SHADER_OFFSET (CMD_OFFSET+CMD_BUFFER_SIZE)
+#define TEX_OFFSET (SHADER_OFFSET+SHADER_SHADOW_SIZE)
+#define VS_TEX_OFFSET_MEM_OBJECTS TEX_OFFSET
+#define VS_TEX_OFFSET_MIPMAP (VS_TEX_OFFSET_MEM_OBJECTS+TEX_SIZE_MEM_OBJECTS)
+#define VS_TEX_OFFSET_SAMPLER_OBJ (VS_TEX_OFFSET_MIPMAP+TEX_SIZE_MIPMAP)
+#define FS_TEX_OFFSET_MEM_OBJECTS \
+	(VS_TEX_OFFSET_SAMPLER_OBJ+TEX_SIZE_SAMPLER_OBJ)
+#define FS_TEX_OFFSET_MIPMAP (FS_TEX_OFFSET_MEM_OBJECTS+TEX_SIZE_MEM_OBJECTS)
+#define FS_TEX_OFFSET_SAMPLER_OBJ (FS_TEX_OFFSET_MIPMAP+TEX_SIZE_MIPMAP)
+
+/* The offset for fragment shader data in HLSQ context */
+#define SSIZE (16*1024)
+
+#define HLSQ_SAMPLER_OFFSET 0x000
+#define HLSQ_MEMOBJ_OFFSET  0x400
+#define HLSQ_MIPMAP_OFFSET  0x800
+
+#ifdef GSL_USE_A3XX_HLSQ_SHADOW_RAM
+/* Use shadow RAM */
+#define HLSQ_SHADOW_BASE		(0x10000+SSIZE*2)
+#else
+/* Use working RAM */
+#define HLSQ_SHADOW_BASE		0x10000
+#endif
+
+#define REG_TO_MEM_LOOP_COUNT_SHIFT	15
+
+#define BUILD_PC_DRAW_INITIATOR(prim_type, source_select, index_size, \
+	vis_cull_mode) \
+	(((prim_type)      << PC_DRAW_INITIATOR_PRIM_TYPE) | \
+	((source_select)   << PC_DRAW_INITIATOR_SOURCE_SELECT) | \
+	((index_size & 1)  << PC_DRAW_INITIATOR_INDEX_SIZE) | \
+	((index_size >> 1) << PC_DRAW_INITIATOR_SMALL_INDEX) | \
+	((vis_cull_mode)   << PC_DRAW_INITIATOR_VISIBILITY_CULLING_MODE) | \
+	(1                 << PC_DRAW_INITIATOR_PRE_DRAW_INITIATOR_ENABLE))
+
+/*
+ * List of context registers (starting from dword offset 0x2000).
+ * Each line contains start and end of a range of registers.
+ */
+static const unsigned int context_register_ranges[] = {
+	A3XX_GRAS_CL_CLIP_CNTL, A3XX_GRAS_CL_CLIP_CNTL,
+	A3XX_GRAS_CL_GB_CLIP_ADJ, A3XX_GRAS_CL_GB_CLIP_ADJ,
+	A3XX_GRAS_CL_VPORT_XOFFSET, A3XX_GRAS_CL_VPORT_ZSCALE,
+	A3XX_GRAS_SU_POINT_MINMAX, A3XX_GRAS_SU_POINT_SIZE,
+	A3XX_GRAS_SU_POLY_OFFSET_SCALE, A3XX_GRAS_SU_POLY_OFFSET_OFFSET,
+	A3XX_GRAS_SU_MODE_CONTROL, A3XX_GRAS_SU_MODE_CONTROL,
+	A3XX_GRAS_SC_CONTROL, A3XX_GRAS_SC_CONTROL,
+	A3XX_GRAS_SC_SCREEN_SCISSOR_TL, A3XX_GRAS_SC_SCREEN_SCISSOR_BR,
+	A3XX_GRAS_SC_WINDOW_SCISSOR_TL, A3XX_GRAS_SC_WINDOW_SCISSOR_BR,
+	A3XX_RB_MODE_CONTROL, A3XX_RB_MRT_BLEND_CONTROL3,
+	A3XX_RB_BLEND_RED, A3XX_RB_COPY_DEST_INFO,
+	A3XX_RB_DEPTH_CONTROL, A3XX_RB_DEPTH_CONTROL,
+	A3XX_PC_VSTREAM_CONTROL, A3XX_PC_VSTREAM_CONTROL,
+	A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, A3XX_PC_VERTEX_REUSE_BLOCK_CNTL,
+	A3XX_PC_PRIM_VTX_CNTL, A3XX_PC_RESTART_INDEX,
+	A3XX_HLSQ_CONTROL_0_REG, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG,
+	A3XX_HLSQ_CL_NDRANGE_0_REG, A3XX_HLSQ_CL_NDRANGE_0_REG,
+	A3XX_HLSQ_CL_NDRANGE_2_REG, A3XX_HLSQ_CL_CONTROL_1_REG,
+	A3XX_HLSQ_CL_KERNEL_CONST_REG, A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG,
+	A3XX_HLSQ_CL_WG_OFFSET_REG, A3XX_HLSQ_CL_WG_OFFSET_REG,
+	A3XX_VFD_CONTROL_0, A3XX_VFD_VS_THREADING_THRESHOLD,
+	A3XX_SP_SP_CTRL_REG, A3XX_SP_SP_CTRL_REG,
+	A3XX_SP_VS_CTRL_REG0, A3XX_SP_VS_OUT_REG_7,
+	A3XX_SP_VS_VPC_DST_REG_0, A3XX_SP_VS_PVT_MEM_SIZE_REG,
+	A3XX_SP_VS_LENGTH_REG, A3XX_SP_FS_PVT_MEM_SIZE_REG,
+	A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, A3XX_SP_FS_FLAT_SHAD_MODE_REG_1,
+	A3XX_SP_FS_OUTPUT_REG, A3XX_SP_FS_OUTPUT_REG,
+	A3XX_SP_FS_MRT_REG_0, A3XX_SP_FS_IMAGE_OUTPUT_REG_3,
+	A3XX_SP_FS_LENGTH_REG, A3XX_SP_FS_LENGTH_REG,
+	A3XX_TPL1_TP_VS_TEX_OFFSET, A3XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR,
+	A3XX_VPC_ATTR, A3XX_VPC_VARY_CYLWRAP_ENABLE_1,
+};
+
+/* Global registers that need to be saved separately */
+static const unsigned int global_registers[] = {
+	A3XX_GRAS_CL_USER_PLANE_X0, A3XX_GRAS_CL_USER_PLANE_Y0,
+	A3XX_GRAS_CL_USER_PLANE_Z0, A3XX_GRAS_CL_USER_PLANE_W0,
+	A3XX_GRAS_CL_USER_PLANE_X1, A3XX_GRAS_CL_USER_PLANE_Y1,
+	A3XX_GRAS_CL_USER_PLANE_Z1, A3XX_GRAS_CL_USER_PLANE_W1,
+	A3XX_GRAS_CL_USER_PLANE_X2, A3XX_GRAS_CL_USER_PLANE_Y2,
+	A3XX_GRAS_CL_USER_PLANE_Z2, A3XX_GRAS_CL_USER_PLANE_W2,
+	A3XX_GRAS_CL_USER_PLANE_X3, A3XX_GRAS_CL_USER_PLANE_Y3,
+	A3XX_GRAS_CL_USER_PLANE_Z3, A3XX_GRAS_CL_USER_PLANE_W3,
+	A3XX_GRAS_CL_USER_PLANE_X4, A3XX_GRAS_CL_USER_PLANE_Y4,
+	A3XX_GRAS_CL_USER_PLANE_Z4, A3XX_GRAS_CL_USER_PLANE_W4,
+	A3XX_GRAS_CL_USER_PLANE_X5, A3XX_GRAS_CL_USER_PLANE_Y5,
+	A3XX_GRAS_CL_USER_PLANE_Z5, A3XX_GRAS_CL_USER_PLANE_W5,
+	A3XX_VSC_BIN_SIZE,
+	A3XX_VSC_PIPE_CONFIG_0, A3XX_VSC_PIPE_CONFIG_1,
+	A3XX_VSC_PIPE_CONFIG_2, A3XX_VSC_PIPE_CONFIG_3,
+	A3XX_VSC_PIPE_CONFIG_4, A3XX_VSC_PIPE_CONFIG_5,
+	A3XX_VSC_PIPE_CONFIG_6, A3XX_VSC_PIPE_CONFIG_7,
+	A3XX_VSC_PIPE_DATA_ADDRESS_0, A3XX_VSC_PIPE_DATA_ADDRESS_1,
+	A3XX_VSC_PIPE_DATA_ADDRESS_2, A3XX_VSC_PIPE_DATA_ADDRESS_3,
+	A3XX_VSC_PIPE_DATA_ADDRESS_4, A3XX_VSC_PIPE_DATA_ADDRESS_5,
+	A3XX_VSC_PIPE_DATA_ADDRESS_6, A3XX_VSC_PIPE_DATA_ADDRESS_7,
+	A3XX_VSC_PIPE_DATA_LENGTH_0, A3XX_VSC_PIPE_DATA_LENGTH_1,
+	A3XX_VSC_PIPE_DATA_LENGTH_2, A3XX_VSC_PIPE_DATA_LENGTH_3,
+	A3XX_VSC_PIPE_DATA_LENGTH_4, A3XX_VSC_PIPE_DATA_LENGTH_5,
+	A3XX_VSC_PIPE_DATA_LENGTH_6, A3XX_VSC_PIPE_DATA_LENGTH_7,
+	A3XX_VSC_SIZE_ADDRESS
+};
+
+#define GLOBAL_REGISTER_COUNT ARRAY_SIZE(global_registers)
+
+/* A scratchpad used to build commands during context create */
+static struct tmp_ctx {
+	unsigned int *cmd; /* Next available dword in C&V buffer */
+
+	/* Addresses in comamnd buffer where registers are saved */
+	uint32_t reg_values[GLOBAL_REGISTER_COUNT];
+	uint32_t gmem_base; /* Base GPU address of GMEM */
+} tmp_ctx;
+
+#ifndef GSL_CONTEXT_SWITCH_CPU_SYNC
+/*
+ * Function for executing dest = ( (reg & and) ROL rol ) | or
+ */
+static unsigned int *rmw_regtomem(unsigned int *cmd,
+				  unsigned int reg, unsigned int and,
+				  unsigned int rol, unsigned int or,
+				  unsigned int dest)
+{
+	/* CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0x00000000) | reg */
+	*cmd++ = cp_type3_packet(CP_REG_RMW, 3);
+	*cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
+	*cmd++ = 0x00000000;	/* AND value */
+	*cmd++ = reg;		/* OR address */
+
+	/* CP_SCRATCH_REG2 = ( (CP_SCRATCH_REG2 & and) ROL rol ) |  or */
+	*cmd++ = cp_type3_packet(CP_REG_RMW, 3);
+	*cmd++ = (rol << 24) | A3XX_CP_SCRATCH_REG2;
+	*cmd++ = and;		/* AND value */
+	*cmd++ = or;		/* OR value */
+
+	*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
+	*cmd++ = A3XX_CP_SCRATCH_REG2;
+	*cmd++ = dest;
+
+	return cmd;
+}
+#endif
+
+static void build_regconstantsave_cmds(struct adreno_device *adreno_dev,
+				       struct adreno_context *drawctxt)
+{
+	unsigned int *cmd = tmp_ctx.cmd;
+	unsigned int *start = cmd;
+	unsigned int i;
+
+	drawctxt->constant_save_commands[0].hostptr = cmd;
+	drawctxt->constant_save_commands[0].gpuaddr =
+	    virt2gpu(cmd, &drawctxt->gpustate);
+	cmd++;
+
+	*cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	*cmd++ = 0;
+
+#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
+	/*
+	 * Context registers are already shadowed; just need to
+	 * disable shadowing to prevent corruption.
+	 */
+
+	*cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
+	*cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
+	*cmd++ = 4 << 16;	/* regs, start=0 */
+	*cmd++ = 0x0;		/* count = 0 */
+
+#else
+	/*
+	 * Make sure the HW context has the correct register values before
+	 * reading them.
+	 */
+
+	/* Write context registers into shadow */
+	for (i = 0; i < ARRAY_SIZE(context_register_ranges) / 2; i++) {
+		unsigned int start = context_register_ranges[i * 2];
+		unsigned int end = context_register_ranges[i * 2 + 1];
+		*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
+		*cmd++ = ((end - start + 1) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
+			start;
+		*cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET)
+			  & 0xFFFFE000) + (start - 0x2000) * 4;
+	}
+#endif
+
+	/* Need to handle some of the global registers separately */
+	for (i = 0; i < ARRAY_SIZE(global_registers); i++) {
+		*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
+		*cmd++ = global_registers[i];
+		*cmd++ = tmp_ctx.reg_values[i];
+	}
+
+	/* Save vertex shader constants */
+	*cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
+	*cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
+	*cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
+	*cmd++ = 0x0000FFFF;
+	*cmd++ = 3; /* EXEC_COUNT */
+	*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
+	drawctxt->constant_save_commands[1].hostptr = cmd;
+	drawctxt->constant_save_commands[1].gpuaddr =
+	    virt2gpu(cmd, &drawctxt->gpustate);
+	/*
+	   From fixup:
+
+	   dwords = SP_VS_CTRL_REG1.VSCONSTLENGTH / 4
+	   src = (HLSQ_SHADOW_BASE + 0x2000) / 4
+
+	   From register spec:
+	   SP_VS_CTRL_REG1.VSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
+	 */
+	*cmd++ = 0;	/* (dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src  */
+	/* ALU constant shadow base */
+	*cmd++ = drawctxt->gpustate.gpuaddr & 0xfffffffc;
+
+	/* Save fragment shader constants */
+	*cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
+	*cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
+	*cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
+	*cmd++ = 0x0000FFFF;
+	*cmd++ = 3; /* EXEC_COUNT */
+	*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
+	drawctxt->constant_save_commands[2].hostptr = cmd;
+	drawctxt->constant_save_commands[2].gpuaddr =
+	    virt2gpu(cmd, &drawctxt->gpustate);
+	/*
+	   From fixup:
+
+	   dwords = SP_FS_CTRL_REG1.FSCONSTLENGTH / 4
+	   src = (HLSQ_SHADOW_BASE + 0x2000 + SSIZE) / 4
+
+	   From register spec:
+	   SP_FS_CTRL_REG1.FSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
+	 */
+	*cmd++ = 0;	/* (dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src  */
+
+	/*
+	   From fixup:
+
+	   base = drawctxt->gpustate.gpuaddr (ALU constant shadow base)
+	   offset = SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET
+
+	   From register spec:
+	   SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET [16:24]: Constant object
+	   start offset in on chip RAM,
+	   128bit aligned
+
+	   dst = base + offset
+	   Because of the base alignment we can use
+	   dst = base | offset
+	 */
+	*cmd++ = 0;		/* dst */
+
+	/* Save VS texture memory objects */
+	*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
+	*cmd++ =
+	    ((TEX_SIZE_MEM_OBJECTS / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
+		((HLSQ_SHADOW_BASE + HLSQ_MEMOBJ_OFFSET) / 4);
+	*cmd++ =
+	    (drawctxt->gpustate.gpuaddr +
+	     VS_TEX_OFFSET_MEM_OBJECTS) & 0xfffffffc;
+
+	/* Save VS texture mipmap pointers */
+	*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
+	*cmd++ =
+	    ((TEX_SIZE_MIPMAP / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
+		((HLSQ_SHADOW_BASE + HLSQ_MIPMAP_OFFSET) / 4);
+	*cmd++ =
+	    (drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MIPMAP) & 0xfffffffc;
+
+	/* Save VS texture sampler objects */
+	*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
+	*cmd++ = ((TEX_SIZE_SAMPLER_OBJ / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
+		((HLSQ_SHADOW_BASE + HLSQ_SAMPLER_OFFSET) / 4);
+	*cmd++ =
+	    (drawctxt->gpustate.gpuaddr +
+	     VS_TEX_OFFSET_SAMPLER_OBJ) & 0xfffffffc;
+
+	/* Save FS texture memory objects */
+	*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
+	*cmd++ =
+	    ((TEX_SIZE_MEM_OBJECTS / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
+		((HLSQ_SHADOW_BASE + HLSQ_MEMOBJ_OFFSET + SSIZE) / 4);
+	*cmd++ =
+	    (drawctxt->gpustate.gpuaddr +
+	     FS_TEX_OFFSET_MEM_OBJECTS) & 0xfffffffc;
+
+	/* Save FS texture mipmap pointers */
+	*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
+	*cmd++ =
+	    ((TEX_SIZE_MIPMAP / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
+		((HLSQ_SHADOW_BASE + HLSQ_MIPMAP_OFFSET + SSIZE) / 4);
+	*cmd++ =
+	    (drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MIPMAP) & 0xfffffffc;
+
+	/* Save FS texture sampler objects */
+	*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
+	*cmd++ =
+	    ((TEX_SIZE_SAMPLER_OBJ / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
+		((HLSQ_SHADOW_BASE + HLSQ_SAMPLER_OFFSET + SSIZE) / 4);
+	*cmd++ =
+	    (drawctxt->gpustate.gpuaddr +
+	     FS_TEX_OFFSET_SAMPLER_OBJ) & 0xfffffffc;
+
+	/* Create indirect buffer command for above command sequence */
+	create_ib1(drawctxt, drawctxt->regconstant_save, start, cmd);
+
+	tmp_ctx.cmd = cmd;
+}
+
+/* Copy GMEM contents to system memory shadow. */
+static unsigned int *build_gmem2sys_cmds(struct adreno_device *adreno_dev,
+					 struct adreno_context *drawctxt,
+					 struct gmem_shadow_t *shadow)
+{
+	unsigned int *cmds = tmp_ctx.cmd;
+	unsigned int *start = cmds;
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
+	*cmds++ = CP_REG(A3XX_RB_MODE_CONTROL);
+
+	/* RB_MODE_CONTROL */
+	*cmds++ = _SET(RB_MODECONTROL_RENDER_MODE, RB_RESOLVE_PASS) |
+		_SET(RB_MODECONTROL_MARB_CACHE_SPLIT_MODE, 1) |
+		_SET(RB_MODECONTROL_PACKER_TIMER_ENABLE, 1);
+	/* RB_RENDER_CONTROL */
+	*cmds++ = _SET(RB_RENDERCONTROL_BIN_WIDTH, shadow->width >> 5) |
+		_SET(RB_RENDERCONTROL_DISABLE_COLOR_PIPE, 1);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
+	*cmds++ = CP_REG(A3XX_RB_COPY_CONTROL);
+	/* RB_COPY_CONTROL */
+	*cmds++ = _SET(RB_COPYCONTROL_RESOLVE_CLEAR_MODE,
+		RB_CLEAR_MODE_RESOLVE) |
+		_SET(RB_COPYCONTROL_COPY_GMEM_BASE,
+		tmp_ctx.gmem_base >> 14);
+	/* RB_COPY_DEST_BASE */
+	*cmds++ = _SET(RB_COPYDESTBASE_COPY_DEST_BASE,
+		shadow->gmemshadow.gpuaddr >> 5);
+	/* RB_COPY_DEST_PITCH */
+	*cmds++ = _SET(RB_COPYDESTPITCH_COPY_DEST_PITCH,
+		(shadow->pitch * 4) / 32);
+	/* RB_COPY_DEST_INFO */
+	*cmds++ = _SET(RB_COPYDESTINFO_COPY_DEST_TILE,
+		RB_TILINGMODE_LINEAR) |
+		_SET(RB_COPYDESTINFO_COPY_DEST_FORMAT, RB_R8G8B8A8_UNORM) |
+		_SET(RB_COPYDESTINFO_COPY_COMPONENT_ENABLE, 0X0F) |
+		_SET(RB_COPYDESTINFO_COPY_DEST_ENDIAN, RB_ENDIAN_NONE);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(A3XX_GRAS_SC_CONTROL);
+	/* GRAS_SC_CONTROL */
+	*cmds++ = _SET(GRAS_SC_CONTROL_RENDER_MODE, 2);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
+	*cmds++ = CP_REG(A3XX_VFD_CONTROL_0);
+	/* VFD_CONTROL_0 */
+	*cmds++ = _SET(VFD_CTRLREG0_TOTALATTRTOVS, 4) |
+		_SET(VFD_CTRLREG0_PACKETSIZE, 2) |
+		_SET(VFD_CTRLREG0_STRMDECINSTRCNT, 1) |
+		_SET(VFD_CTRLREG0_STRMFETCHINSTRCNT, 1);
+	/* VFD_CONTROL_1 */
+	*cmds++ = _SET(VFD_CTRLREG1_MAXSTORAGE, 1) |
+		_SET(VFD_CTRLREG1_REGID4VTX,  252) |
+		_SET(VFD_CTRLREG1_REGID4INST,  252);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
+	*cmds++ = CP_REG(A3XX_VFD_FETCH_INSTR_0_0);
+	/* VFD_FETCH_INSTR_0_0 */
+	*cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 11) |
+		_SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 12) |
+		_SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
+	/* VFD_FETCH_INSTR_1_0 */
+	*cmds++ = _SET(VFD_BASEADDR_BASEADDR,
+		shadow->quad_vertices.gpuaddr);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(A3XX_VFD_DECODE_INSTR_0);
+	/* VFD_DECODE_INSTR_0 */
+	*cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
+		_SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
+		_SET(VFD_DECODEINSTRUCTIONS_FORMAT, 2) |
+		_SET(VFD_DECODEINSTRUCTIONS_REGID, 5) |
+		_SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 12) |
+		_SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
+	*cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
+	/* HLSQ_CONTROL_0_REG */
+	*cmds++ = _SET(HLSQ_CTRL0REG_FSTHREADSIZE, HLSQ_TWO_PIX_QUADS) |
+		_SET(HLSQ_CTRL0REG_FSSUPERTHREADENABLE, 1) |
+		_SET(HLSQ_CTRL0REG_SPSHADERRESTART, 1) |
+		_SET(HLSQ_CTRL0REG_RESERVED2, 1) |
+		_SET(HLSQ_CTRL0REG_CHUNKDISABLE, 1) |
+		_SET(HLSQ_CTRL0REG_CONSTSWITCHMODE, 1) |
+		_SET(HLSQ_CTRL0REG_LAZYUPDATEDISABLE, 1) |
+		_SET(HLSQ_CTRL0REG_SPCONSTFULLUPDATE, 1) |
+		_SET(HLSQ_CTRL0REG_TPFULLUPDATE, 1);
+	/* HLSQ_CONTROL_1_REG */
+	*cmds++ = _SET(HLSQ_CTRL1REG_VSTHREADSIZE, HLSQ_TWO_VTX_QUADS) |
+		_SET(HLSQ_CTRL1REG_VSSUPERTHREADENABLE, 1) |
+		_SET(HLSQ_CTRL1REG_RESERVED1, 4);
+	/* HLSQ_CONTROL_2_REG */
+	*cmds++ = _SET(HLSQ_CTRL2REG_PRIMALLOCTHRESHOLD, 31);
+	/* HLSQ_CONTROL_3_REG */
+	*cmds++ = 0x00000000;
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
+	*cmds++ = CP_REG(A3XX_HLSQ_VS_CONTROL_REG);
+	/* HLSQ_VS_CONTROL_REG */
+	*cmds++ = _SET(HLSQ_VSCTRLREG_VSINSTRLENGTH, 1);
+	/* HLSQ_FS_CONTROL_REG */
+	*cmds++ = _SET(HLSQ_FSCTRLREG_FSCONSTLENGTH, 1) |
+		_SET(HLSQ_FSCTRLREG_FSCONSTSTARTOFFSET, 272) |
+		_SET(HLSQ_FSCTRLREG_FSINSTRLENGTH, 1);
+	/* HLSQ_CONST_VSPRESV_RANGE_REG */
+	*cmds++ = 0x00000000;
+	/* HLSQ_CONST_FSPRESV_RANGE_REQ */
+	*cmds++ = _SET(HLSQ_CONSTFSPRESERVEDRANGEREG_STARTENTRY, 32) |
+		_SET(HLSQ_CONSTFSPRESERVEDRANGEREG_ENDENTRY, 32);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(A3XX_SP_FS_LENGTH_REG);
+	/* SP_FS_LENGTH_REG */
+	*cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(A3XX_SP_SP_CTRL_REG);
+	/* SP_SP_CTRL_REG */
+	*cmds++ = _SET(SP_SPCTRLREG_CONSTMODE, 1) |
+		_SET(SP_SPCTRLREG_SLEEPMODE, 1);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 12);
+	*cmds++ = CP_REG(A3XX_SP_VS_CTRL_REG0);
+	/* SP_VS_CTRL_REG0 */
+	*cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) |
+		_SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) |
+		_SET(SP_VSCTRLREG0_VSICACHEINVALID, 1) |
+		_SET(SP_VSCTRLREG0_VSFULLREGFOOTPRINT, 3) |
+		_SET(SP_VSCTRLREG0_VSTHREADSIZE, SP_TWO_VTX_QUADS) |
+		_SET(SP_VSCTRLREG0_VSSUPERTHREADMODE, 1) |
+		_SET(SP_VSCTRLREG0_VSLENGTH, 1);
+	/* SP_VS_CTRL_REG1 */
+	*cmds++ = _SET(SP_VSCTRLREG1_VSINITIALOUTSTANDING, 4);
+	/* SP_VS_PARAM_REG */
+	*cmds++ = _SET(SP_VSPARAMREG_POSREGID, 1) |
+		_SET(SP_VSPARAMREG_PSIZEREGID, 252);
+	/* SP_VS_OUT_REG_0 */
+	*cmds++ = 0x00000000;
+	/* SP_VS_OUT_REG_1 */
+	*cmds++ = 0x00000000;
+	/* SP_VS_OUT_REG_2 */
+	*cmds++ = 0x00000000;
+	/* SP_VS_OUT_REG_3 */
+	*cmds++ = 0x00000000;
+	/* SP_VS_OUT_REG_4 */
+	*cmds++ = 0x00000000;
+	/* SP_VS_OUT_REG_5 */
+	*cmds++ = 0x00000000;
+	/* SP_VS_OUT_REG_6 */
+	*cmds++ = 0x00000000;
+	/* SP_VS_OUT_REG_7 */
+	*cmds++ = 0x00000000;
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7);
+	*cmds++ = CP_REG(A3XX_SP_VS_VPC_DST_REG_0);
+	/* SP_VS_VPC_DST_REG_0 */
+	*cmds++ = 0x00000000;
+	/* SP_VS_VPC_DST_REG_1 */
+	*cmds++ = 0x00000000;
+	/* SP_VS_VPC_DST_REG_2 */
+	*cmds++ = 0x00000000;
+	/* SP_VS_VPC_DST_REG_3 */
+	*cmds++ = 0x00000000;
+	/* SP_VS_OBJ_OFFSET_REG */
+	*cmds++ = 0x00000000;
+	/* SP_VS_OBJ_START_REG */
+	*cmds++ = 0x00000000;
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6);
+	*cmds++ = CP_REG(A3XX_SP_VS_LENGTH_REG);
+	/* SP_VS_LENGTH_REG */
+	*cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
+	/* SP_FS_CTRL_REG0 */
+	*cmds++ = _SET(SP_FSCTRLREG0_FSTHREADMODE, SP_MULTI) |
+		_SET(SP_FSCTRLREG0_FSINSTRBUFFERMODE, SP_BUFFER_MODE) |
+		_SET(SP_FSCTRLREG0_FSICACHEINVALID, 1) |
+		_SET(SP_FSCTRLREG0_FSFULLREGFOOTPRINT, 2) |
+		_SET(SP_FSCTRLREG0_FSINOUTREGOVERLAP, 1) |
+		_SET(SP_FSCTRLREG0_FSTHREADSIZE, SP_TWO_VTX_QUADS) |
+		_SET(SP_FSCTRLREG0_FSSUPERTHREADMODE, 1) |
+		_SET(SP_FSCTRLREG0_FSLENGTH, 1);
+	/* SP_FS_CTRL_REG1 */
+	*cmds++ = _SET(SP_FSCTRLREG1_FSCONSTLENGTH, 1) |
+		_SET(SP_FSCTRLREG1_FSINITIALOUTSTANDING, 2) |
+		_SET(SP_FSCTRLREG1_HALFPRECVAROFFSET, 63);
+	/* SP_FS_OBJ_OFFSET_REG */
+	*cmds++ = _SET(SP_OBJOFFSETREG_CONSTOBJECTSTARTOFFSET, 272) |
+		_SET(SP_OBJOFFSETREG_SHADEROBJOFFSETINIC, 1);
+	/* SP_FS_OBJ_START_REG */
+	*cmds++ = 0x00000000;
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
+	*cmds++ = CP_REG(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0);
+	/* SP_FS_FLAT_SHAD_MODE_REG_0 */
+	*cmds++ = 0x00000000;
+	/* SP_FS_FLAT_SHAD_MODE_REG_1 */
+	*cmds++ = 0x00000000;
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(A3XX_SP_FS_OUTPUT_REG);
+	/* SP_FS_OUTPUT_REG */
+	*cmds++ = _SET(SP_IMAGEOUTPUTREG_PAD0, SP_PIXEL_BASED);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
+	*cmds++ = CP_REG(A3XX_SP_FS_MRT_REG_0);
+	/* SP_FS_MRT_REG_0 */
+	*cmds++ = _SET(SP_FSMRTREG_REGID, 1);
+	/* SP_FS_MRT_REG_1 */
+	*cmds++ = 0x00000000;
+	/* SP_FS_MRT_REG_2 */
+	*cmds++ = 0x00000000;
+	/* SP_FS_MRT_REG_3 */
+	*cmds++ = 0x00000000;
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 11);
+	*cmds++ = CP_REG(A3XX_VPC_ATTR);
+	/* VPC_ATTR */
+	*cmds++ = _SET(VPC_VPCATTR_THRHDASSIGN, 1) |
+		_SET(VPC_VPCATTR_LMSIZE, 1);
+	/* VPC_PACK */
+	*cmds++ = 0x00000000;
+	/* VPC_VARRYING_INTERUPT_MODE_0 */
+	*cmds++ = 0x00000000;
+	/* VPC_VARRYING_INTERUPT_MODE_1 */
+	*cmds++ = 0x00000000;
+	/* VPC_VARRYING_INTERUPT_MODE_2 */
+	*cmds++ = 0x00000000;
+	/* VPC_VARRYING_INTERUPT_MODE_3 */
+	*cmds++ = 0x00000000;
+	/* VPC_VARYING_PS_REPL_MODE_0 */
+	*cmds++ = 0x00000000;
+	/* VPC_VARYING_PS_REPL_MODE_1 */
+	*cmds++ = 0x00000000;
+	/* VPC_VARYING_PS_REPL_MODE_2 */
+	*cmds++ = 0x00000000;
+	/* VPC_VARYING_PS_REPL_MODE_3 */
+	*cmds++ = 0x00000000;
+
+	*cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
+	*cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
+		| (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
+		| (HLSQ_BLOCK_ID_SP_VS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
+		| (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
+	*cmds++ = (HLSQ_SP_VS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
+		| (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
+
+	/* (sy)(rpt3)mov.f32f32 r0.y, (r)r1.y; */
+	*cmds++ = 0x00000005; *cmds++ = 0x30044b01;
+	/* end; */
+	*cmds++ = 0x00000000; *cmds++ = 0x03000000;
+	/* nop; */
+	*cmds++ = 0x00000000; *cmds++ = 0x00000000;
+	/* nop; */
+	*cmds++ = 0x00000000; *cmds++ = 0x00000000;
+
+	*cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
+	*cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
+		| (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
+		| (HLSQ_BLOCK_ID_SP_FS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
+		| (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
+	*cmds++ = (HLSQ_SP_FS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
+		| (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
+
+	/* (sy)(rpt3)mov.f32f32 r0.y, (r)c0.x; */
+	*cmds++ = 0x00000000; *cmds++ = 0x30244b01;
+	/* end; */
+	*cmds++ = 0x00000000; *cmds++ = 0x03000000;
+	/* nop; */
+	*cmds++ = 0x00000000; *cmds++ = 0x00000000;
+	/* nop; */
+	*cmds++ = 0x00000000; *cmds++ = 0x00000000;
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(A3XX_RB_MSAA_CONTROL);
+	/* RB_MSAA_CONTROL */
+	*cmds++ = _SET(RB_MSAACONTROL_MSAA_DISABLE, 1) |
+		_SET(RB_MSAACONTROL_SAMPLE_MASK, 0xFFFF);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(A3XX_RB_DEPTH_CONTROL);
+	/* RB_DEPTH_CONTROL */
+	*cmds++ = _SET(RB_DEPTHCONTROL_Z_TEST_FUNC, RB_FRAG_NEVER);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(A3XX_RB_MRT_CONTROL0);
+	/* RB_MRT_CONTROL0 */
+	*cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
+		_SET(RB_MRTCONTROL_ROP_CODE, 12) |
+		_SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) |
+		_SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
+	*cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL0);
+	/* RB_MRT_BLEND_CONTROL0 */
+	*cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
+		_SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
+		_SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
+		_SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
+		_SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
+		_SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
+	/* RB_MRT_CONTROL1 */
+	*cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
+		_SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
+		_SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
+	*cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL1);
+	/* RB_MRT_BLEND_CONTROL1 */
+	*cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
+		_SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
+		_SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
+		_SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
+		_SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
+		_SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
+	/* RB_MRT_CONTROL2 */
+	*cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
+		_SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
+		_SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
+	*cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL2);
+	/* RB_MRT_BLEND_CONTROL2 */
+	*cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
+		_SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
+		_SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
+		_SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
+		_SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
+		_SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
+	/* RB_MRT_CONTROL3 */
+	*cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
+		_SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
+		_SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL3);
+	/* RB_MRT_BLEND_CONTROL3 */
+	*cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
+		_SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
+		_SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
+		_SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
+		_SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
+		_SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
+	*cmds++ = CP_REG(A3XX_VFD_INDEX_MIN);
+	/* VFD_INDEX_MIN */
+	*cmds++ = 0x00000000;
+	/* VFD_INDEX_MAX */
+	*cmds++ = 0xFFFFFFFF;
+	/* VFD_INSTANCEID_OFFSET */
+	*cmds++ = 0x00000000;
+	/* VFD_INDEX_OFFSET */
+	*cmds++ = 0x00000000;
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(A3XX_VFD_VS_THREADING_THRESHOLD);
+	/* VFD_VS_THREADING_THRESHOLD */
+	*cmds++ = _SET(VFD_THREADINGTHRESHOLD_RESERVED6, 12) |
+		_SET(VFD_THREADINGTHRESHOLD_REGID_VTXCNT, 252);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(A3XX_TPL1_TP_VS_TEX_OFFSET);
+	/* TPL1_TP_VS_TEX_OFFSET */
+	*cmds++ = 0;
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(A3XX_TPL1_TP_FS_TEX_OFFSET);
+	/* TPL1_TP_FS_TEX_OFFSET */
+	*cmds++ = _SET(TPL1_TPTEXOFFSETREG_SAMPLEROFFSET, 16) |
+		_SET(TPL1_TPTEXOFFSETREG_MEMOBJOFFSET, 16) |
+		_SET(TPL1_TPTEXOFFSETREG_BASETABLEPTR, 224);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(A3XX_PC_PRIM_VTX_CNTL);
+	/* PC_PRIM_VTX_CNTL */
+	*cmds++ = _SET(PC_PRIM_VTX_CONTROL_POLYMODE_FRONT_PTYPE,
+		PC_DRAW_TRIANGLES) |
+		_SET(PC_PRIM_VTX_CONTROL_POLYMODE_BACK_PTYPE,
+		PC_DRAW_TRIANGLES) |
+		_SET(PC_PRIM_VTX_CONTROL_PROVOKING_VTX_LAST, 1);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
+	*cmds++ = CP_REG(A3XX_GRAS_SC_WINDOW_SCISSOR_TL);
+	/* GRAS_SC_WINDOW_SCISSOR_TL */
+	*cmds++ = 0x00000000;
+	/* GRAS_SC_WINDOW_SCISSOR_BR */
+	*cmds++ = _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_X, shadow->width - 1) |
+		_SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_Y, shadow->height - 1);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
+	*cmds++ = CP_REG(A3XX_GRAS_SC_SCREEN_SCISSOR_TL);
+	/* GRAS_SC_SCREEN_SCISSOR_TL */
+	*cmds++ = 0x00000000;
+	/* GRAS_SC_SCREEN_SCISSOR_BR */
+	*cmds++ = _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_X, shadow->width - 1) |
+		_SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_Y, shadow->height - 1);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
+	*cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_XOFFSET);
+	/* GRAS_CL_VPORT_XOFFSET */
+	*cmds++ = 0x00000000;
+	/* GRAS_CL_VPORT_XSCALE */
+	*cmds++ = _SET(GRAS_CL_VPORT_XSCALE_VPORT_XSCALE, 0x3f800000);
+	/* GRAS_CL_VPORT_YOFFSET */
+	*cmds++ = 0x00000000;
+	/* GRAS_CL_VPORT_YSCALE */
+	*cmds++ = _SET(GRAS_CL_VPORT_YSCALE_VPORT_YSCALE, 0x3f800000);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
+	*cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_ZOFFSET);
+	/* GRAS_CL_VPORT_ZOFFSET */
+	*cmds++ = 0x00000000;
+	/* GRAS_CL_VPORT_ZSCALE */
+	*cmds++ = _SET(GRAS_CL_VPORT_ZSCALE_VPORT_ZSCALE, 0x3f800000);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(A3XX_GRAS_CL_CLIP_CNTL);
+	/* GRAS_CL_CLIP_CNTL */
+	*cmds++ = _SET(GRAS_CL_CLIP_CNTL_CLIP_DISABLE, 1) |
+		_SET(GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE, 1) |
+		_SET(GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE, 1) |
+		_SET(GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE, 1) |
+		_SET(GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE, 1);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(A3XX_GRAS_CL_GB_CLIP_ADJ);
+	/* GRAS_CL_GB_CLIP_ADJ */
+	*cmds++ = 0x00000000;
+
+	*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	*cmds++ = 0x00000000;
+
+	/*
+	 * Resolve using two draw calls with a dummy register
+	 * write in between. This is a HLM workaround
+	 * that should be removed later.
+	 */
+	*cmds++ = cp_type3_packet(CP_DRAW_INDX_2, 6);
+	*cmds++ = 0x00000000; /* Viz query info */
+	*cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_TRILIST,
+					  PC_DI_SRC_SEL_IMMEDIATE,
+					  PC_DI_INDEX_SIZE_32_BIT,
+					  PC_DI_IGNORE_VISIBILITY);
+	*cmds++ = 0x00000003; /* Num indices */
+	*cmds++ = 0x00000000; /* Index 0 */
+	*cmds++ = 0x00000001; /* Index 1 */
+	*cmds++ = 0x00000002; /* Index 2 */
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(A3XX_HLSQ_CL_CONTROL_0_REG);
+	*cmds++ = 0x00000000;
+
+	*cmds++ = cp_type3_packet(CP_DRAW_INDX_2, 6);
+	*cmds++ = 0x00000000; /* Viz query info */
+	*cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_TRILIST,
+					  PC_DI_SRC_SEL_IMMEDIATE,
+					  PC_DI_INDEX_SIZE_32_BIT,
+					  PC_DI_IGNORE_VISIBILITY);
+	*cmds++ = 0x00000003; /* Num indices */
+	*cmds++ = 0x00000002; /* Index 0 */
+	*cmds++ = 0x00000001; /* Index 1 */
+	*cmds++ = 0x00000003; /* Index 2 */
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(A3XX_HLSQ_CL_CONTROL_0_REG);
+	*cmds++ = 0x00000000;
+
+	*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	*cmds++ = 0x00000000;
+
+	/* Create indirect buffer command for above command sequence */
+	create_ib1(drawctxt, shadow->gmem_save, start, cmds);
+
+	return cmds;
+}
+
+static void build_shader_save_cmds(struct adreno_device *adreno_dev,
+				   struct adreno_context *drawctxt)
+{
+	unsigned int *cmd = tmp_ctx.cmd;
+	unsigned int *start;
+
+	/* Reserve space for boolean values used for COND_EXEC packet */
+	drawctxt->cond_execs[0].hostptr = cmd;
+	drawctxt->cond_execs[0].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
+	*cmd++ = 0;
+	drawctxt->cond_execs[1].hostptr = cmd;
+	drawctxt->cond_execs[1].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
+	*cmd++ = 0;
+
+	drawctxt->shader_save_commands[0].hostptr = cmd;
+	drawctxt->shader_save_commands[0].gpuaddr =
+	    virt2gpu(cmd, &drawctxt->gpustate);
+	*cmd++ = 0;
+	drawctxt->shader_save_commands[1].hostptr = cmd;
+	drawctxt->shader_save_commands[1].gpuaddr =
+	    virt2gpu(cmd, &drawctxt->gpustate);
+	*cmd++ = 0;
+
+	start = cmd;
+
+	/* Save vertex shader */
+
+	*cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
+	*cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
+	*cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
+	*cmd++ = 0x0000FFFF;
+	*cmd++ = 3;		/* EXEC_COUNT */
+
+	*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
+	drawctxt->shader_save_commands[2].hostptr = cmd;
+	drawctxt->shader_save_commands[2].gpuaddr =
+	    virt2gpu(cmd, &drawctxt->gpustate);
+	/*
+	   From fixup:
+
+	   dwords = SP_VS_CTRL_REG0.VS_LENGTH * 8
+
+	   From regspec:
+	   SP_VS_CTRL_REG0.VS_LENGTH [31:24]: VS length, unit = 256bits.
+	   If bit31 is 1, it means overflow
+	   or any long shader.
+
+	   src = (HLSQ_SHADOW_BASE + 0x1000)/4
+	 */
+	*cmd++ = 0;	/*(dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
+	*cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET) & 0xfffffffc;
+
+	/* Save fragment shader */
+	*cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
+	*cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
+	*cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
+	*cmd++ = 0x0000FFFF;
+	*cmd++ = 3;		/* EXEC_COUNT */
+
+	*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
+	drawctxt->shader_save_commands[3].hostptr = cmd;
+	drawctxt->shader_save_commands[3].gpuaddr =
+	    virt2gpu(cmd, &drawctxt->gpustate);
+	/*
+	   From fixup:
+
+	   dwords = SP_FS_CTRL_REG0.FS_LENGTH * 8
+
+	   From regspec:
+	   SP_FS_CTRL_REG0.FS_LENGTH [31:24]: FS length, unit = 256bits.
+	   If bit31 is 1, it means overflow
+	   or any long shader.
+
+	   fs_offset = SP_FS_OBJ_OFFSET_REG.SHADEROBJOFFSETINIC * 32
+	   From regspec:
+
+	   SP_FS_OBJ_OFFSET_REG.SHADEROBJOFFSETINIC [31:25]:
+	   First instruction of the whole shader will be stored from
+	   the offset in instruction cache, unit = 256bits, a cache line.
+	   It can start from 0 if no VS available.
+
+	   src = (HLSQ_SHADOW_BASE + 0x1000 + SSIZE + fs_offset)/4
+	 */
+	*cmd++ = 0;	/*(dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
+	*cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET
+		  + (SHADER_SHADOW_SIZE / 2)) & 0xfffffffc;
+
+	/* Create indirect buffer command for above command sequence */
+	create_ib1(drawctxt, drawctxt->shader_save, start, cmd);
+
+	tmp_ctx.cmd = cmd;
+}
+
+/*
+ * Make an IB to modify context save IBs with the correct shader instruction
+ * and constant sizes and offsets.
+ */
+
+static void build_save_fixup_cmds(struct adreno_device *adreno_dev,
+				  struct adreno_context *drawctxt)
+{
+	unsigned int *cmd = tmp_ctx.cmd;
+	unsigned int *start = cmd;
+
+	/* Flush HLSQ lazy updates */
+	*cmd++ = cp_type3_packet(CP_EVENT_WRITE, 1);
+	*cmd++ = 0x7;		/* HLSQ_FLUSH */
+	*cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	*cmd++ = 0;
+
+	*cmd++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
+	*cmd++ = 0x00000000; /* No start addr for full invalidate */
+	*cmd++ = (unsigned int)
+		UCHE_ENTIRE_CACHE << UCHE_INVALIDATE1REG_ALLORPORTION |
+		UCHE_OP_INVALIDATE << UCHE_INVALIDATE1REG_OPCODE |
+		0; /* No end addr for full invalidate */
+
+	/* Make sure registers are flushed */
+	*cmd++ = cp_type3_packet(CP_CONTEXT_UPDATE, 1);
+	*cmd++ = 0;
+
+#ifdef GSL_CONTEXT_SWITCH_CPU_SYNC
+
+	/* Save shader sizes */
+	*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
+	*cmd++ = A3XX_SP_VS_CTRL_REG0;
+	*cmd++ = drawctxt->shader_save_commands[2].gpuaddr;
+
+	*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
+	*cmd++ = A3XX_SP_FS_CTRL_REG0;
+	*cmd++ = drawctxt->shader_save_commands[3].gpuaddr;
+
+	/* Save shader offsets */
+	*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
+	*cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
+	*cmd++ = drawctxt->shader_save_commands[1].gpuaddr;
+
+	/* Save constant sizes */
+	*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
+	*cmd++ = A3XX_SP_VS_CTRL_REG1;
+	*cmd++ = drawctxt->constant_save_commands[1].gpuaddr;
+	*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
+	*cmd++ = A3XX_SP_FS_CTRL_REG1;
+	*cmd++ = drawctxt->constant_save_commands[2].gpuaddr;
+
+	/* Save FS constant offset */
+	*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
+	*cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
+	*cmd++ = drawctxt->constant_save_commands[0].gpuaddr;
+
+
+	/* Save VS instruction store mode */
+	*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
+	*cmd++ = A3XX_SP_VS_CTRL_REG0;
+	*cmd++ = drawctxt->cond_execs[0].gpuaddr;
+
+	/* Save FS instruction store mode */
+	*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
+	*cmd++ = A3XX_SP_FS_CTRL_REG0;
+	*cmd++ = drawctxt->cond_execs[1].gpuaddr;
+#else
+
+	/* Shader save */
+	cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x7f000000,
+			11+REG_TO_MEM_LOOP_COUNT_SHIFT,
+			(HLSQ_SHADOW_BASE + 0x1000) / 4,
+			drawctxt->shader_save_commands[2].gpuaddr);
+
+	/* CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0x00000000) | SP_FS_CTRL_REG0 */
+	*cmd++ = cp_type3_packet(CP_REG_RMW, 3);
+	*cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
+	*cmd++ = 0x00000000;	/* AND value */
+	*cmd++ = A3XX_SP_FS_CTRL_REG0;	/* OR address */
+	/* CP_SCRATCH_REG2 = ( (CP_SCRATCH_REG2 & 0x7f000000) >> 21 )
+	   |  ((HLSQ_SHADOW_BASE+0x1000+SSIZE)/4) */
+	*cmd++ = cp_type3_packet(CP_REG_RMW, 3);
+	*cmd++ = ((11 + REG_TO_MEM_LOOP_COUNT_SHIFT) << 24) |
+		A3XX_CP_SCRATCH_REG2;
+	*cmd++ = 0x7f000000;	/* AND value */
+	*cmd++ = (HLSQ_SHADOW_BASE + 0x1000 + SSIZE) / 4;	/* OR value */
+
+	/*
+	 * CP_SCRATCH_REG3 = (CP_SCRATCH_REG3 & 0x00000000) |
+	 * SP_FS_OBJ_OFFSET_REG
+	 */
+
+	*cmd++ = cp_type3_packet(CP_REG_RMW, 3);
+	*cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG3;
+	*cmd++ = 0x00000000;	/* AND value */
+	*cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;	/* OR address */
+	/*
+	 * CP_SCRATCH_REG3 = ( (CP_SCRATCH_REG3 & 0xfe000000) >> 25 ) |
+	 * 0x00000000
+	 */
+	*cmd++ = cp_type3_packet(CP_REG_RMW, 3);
+	*cmd++ = A3XX_CP_SCRATCH_REG3;
+	*cmd++ = 0xfe000000;	/* AND value */
+	*cmd++ = 0x00000000;	/* OR value */
+	/*
+	 * CP_SCRATCH_REG2 =  (CP_SCRATCH_REG2 & 0xffffffff) | CP_SCRATCH_REG3
+	 */
+	*cmd++ = cp_type3_packet(CP_REG_RMW, 3);
+	*cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
+	*cmd++ = 0xffffffff;	/* AND value */
+	*cmd++ = A3XX_CP_SCRATCH_REG3;	/* OR address */
+
+	*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
+	*cmd++ = A3XX_CP_SCRATCH_REG2;
+	*cmd++ = drawctxt->shader_save_commands[3].gpuaddr;
+
+	/* Constant save */
+	cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
+			   17, (HLSQ_SHADOW_BASE + 0x2000) / 4,
+			   drawctxt->constant_save_commands[1].gpuaddr);
+
+	cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
+			   17, (HLSQ_SHADOW_BASE + 0x2000 + SSIZE) / 4,
+			   drawctxt->constant_save_commands[2].gpuaddr);
+
+	cmd = rmw_regtomem(cmd, A3XX_SP_FS_OBJ_OFFSET_REG, 0x00ff0000,
+			   18, drawctxt->gpustate.gpuaddr & 0xfffffe00,
+			   drawctxt->constant_save_commands[2].gpuaddr
+			   + sizeof(unsigned int));
+
+	/* Modify constant save conditionals */
+	cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
+		0, 0, drawctxt->cond_execs[2].gpuaddr);
+
+	cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
+		0, 0, drawctxt->cond_execs[3].gpuaddr);
+
+	/* Save VS instruction store mode */
+
+	cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x00000002,
+			   31, 0, drawctxt->cond_execs[0].gpuaddr);
+
+	/* Save FS instruction store mode */
+	cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG0, 0x00000002,
+			   31, 0, drawctxt->cond_execs[1].gpuaddr);
+
+#endif
+
+	create_ib1(drawctxt, drawctxt->save_fixup, start, cmd);
+
+	tmp_ctx.cmd = cmd;
+}
+
+/****************************************************************************/
+/* Functions to build context restore IBs                                   */
+/****************************************************************************/
+
+static unsigned int *build_sys2gmem_cmds(struct adreno_device *adreno_dev,
+					 struct adreno_context *drawctxt,
+					 struct gmem_shadow_t *shadow)
+{
+	unsigned int *cmds = tmp_ctx.cmd;
+	unsigned int *start = cmds;
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
+	*cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
+	/* HLSQ_CONTROL_0_REG */
+	*cmds++ = _SET(HLSQ_CTRL0REG_FSTHREADSIZE, HLSQ_FOUR_PIX_QUADS) |
+		_SET(HLSQ_CTRL0REG_SPSHADERRESTART, 1) |
+		_SET(HLSQ_CTRL0REG_CHUNKDISABLE, 1) |
+		_SET(HLSQ_CTRL0REG_SPCONSTFULLUPDATE, 1) |
+		_SET(HLSQ_CTRL0REG_TPFULLUPDATE, 1);
+	/* HLSQ_CONTROL_1_REG */
+	*cmds++ = _SET(HLSQ_CTRL1REG_VSTHREADSIZE, HLSQ_TWO_VTX_QUADS);
+	/* HLSQ_CONTROL_2_REG */
+	*cmds++ = _SET(HLSQ_CTRL2REG_PRIMALLOCTHRESHOLD, 31);
+	/* HLSQ_CONTROL3_REG */
+	*cmds++ = 0x00000000;
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
+	*cmds++ = CP_REG(A3XX_RB_MRT_BUF_INFO0);
+	/* RB_MRT_BUF_INFO0 */
+	*cmds++ = _SET(RB_MRTBUFINFO_COLOR_FORMAT, RB_R8G8B8A8_UNORM) |
+		_SET(RB_MRTBUFINFO_COLOR_TILE_MODE, RB_TILINGMODE_32X32) |
+		_SET(RB_MRTBUFINFO_COLOR_BUF_PITCH,
+		(shadow->gmem_pitch * 4 * 8) / 256);
+	/* RB_MRT_BUF_BASE0 */
+	*cmds++ = _SET(RB_MRTBUFBASE_COLOR_BUF_BASE, tmp_ctx.gmem_base >> 5);
+
+	/* Texture samplers */
+	*cmds++ = cp_type3_packet(CP_LOAD_STATE, 4);
+	*cmds++ = (16 << CP_LOADSTATE_DSTOFFSET_SHIFT)
+		| (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
+		| (HLSQ_BLOCK_ID_TP_TEX << CP_LOADSTATE_STATEBLOCKID_SHIFT)
+		| (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
+	*cmds++ = (HLSQ_TP_TEX_SAMPLERS << CP_LOADSTATE_STATETYPE_SHIFT)
+		| (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
+	*cmds++ = 0x00000240;
+	*cmds++ = 0x00000000;
+
+	/* Texture memobjs */
+	*cmds++ = cp_type3_packet(CP_LOAD_STATE, 6);
+	*cmds++ = (16 << CP_LOADSTATE_DSTOFFSET_SHIFT)
+		| (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
+		| (HLSQ_BLOCK_ID_TP_TEX << CP_LOADSTATE_STATEBLOCKID_SHIFT)
+		| (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
+	*cmds++ = (HLSQ_TP_TEX_MEMOBJ << CP_LOADSTATE_STATETYPE_SHIFT)
+		| (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
+	*cmds++ = 0x4cc06880;
+	*cmds++ = shadow->height | (shadow->width << 14);
+	*cmds++ = (shadow->pitch*4*8) << 9;
+	*cmds++ = 0x00000000;
+
+	/* Mipmap bases */
+	*cmds++ = cp_type3_packet(CP_LOAD_STATE, 16);
+	*cmds++ = (224 << CP_LOADSTATE_DSTOFFSET_SHIFT)
+		| (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
+		| (HLSQ_BLOCK_ID_TP_MIPMAP << CP_LOADSTATE_STATEBLOCKID_SHIFT)
+		| (14 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
+	*cmds++ = (HLSQ_TP_MIPMAP_BASE << CP_LOADSTATE_STATETYPE_SHIFT)
+		| (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
+	*cmds++ = shadow->gmemshadow.gpuaddr;
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000000;
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
+	*cmds++ = CP_REG(A3XX_HLSQ_VS_CONTROL_REG);
+	/* HLSQ_VS_CONTROL_REG */
+	*cmds++ = _SET(HLSQ_VSCTRLREG_VSINSTRLENGTH, 1);
+	/* HLSQ_FS_CONTROL_REG */
+	*cmds++ = _SET(HLSQ_FSCTRLREG_FSCONSTLENGTH, 1) |
+		_SET(HLSQ_FSCTRLREG_FSCONSTSTARTOFFSET, 128) |
+		_SET(HLSQ_FSCTRLREG_FSINSTRLENGTH, 2);
+	/* HLSQ_CONST_VSPRESV_RANGE_REG */
+	*cmds++ = 0x00000000;
+	/* HLSQ_CONST_FSPRESV_RANGE_REG */
+	*cmds++ = 0x00000000;
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(A3XX_SP_FS_LENGTH_REG);
+	/* SP_FS_LENGTH_REG */
+	*cmds++ = _SET(SP_SHADERLENGTH_LEN, 2);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 12);
+	*cmds++ = CP_REG(A3XX_SP_VS_CTRL_REG0);
+	/* SP_VS_CTRL_REG0 */
+	*cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) |
+		_SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) |
+		_SET(SP_VSCTRLREG0_VSICACHEINVALID, 1) |
+		_SET(SP_VSCTRLREG0_VSFULLREGFOOTPRINT, 2) |
+		_SET(SP_VSCTRLREG0_VSTHREADSIZE, SP_TWO_VTX_QUADS) |
+		_SET(SP_VSCTRLREG0_VSLENGTH, 1);
+	/* SP_VS_CTRL_REG1 */
+	*cmds++ = _SET(SP_VSCTRLREG1_VSINITIALOUTSTANDING, 8);
+	/* SP_VS_PARAM_REG */
+	*cmds++ = _SET(SP_VSPARAMREG_POSREGID, 4) |
+		_SET(SP_VSPARAMREG_PSIZEREGID, 252) |
+		_SET(SP_VSPARAMREG_TOTALVSOUTVAR, 1);
+	/* SP_VS_OUT_REG0 */
+	*cmds++ = _SET(SP_VSOUTREG_COMPMASK0, 3);
+	/* SP_VS_OUT_REG1 */
+	*cmds++ = 0x00000000;
+	/* SP_VS_OUT_REG2 */
+	*cmds++ = 0x00000000;
+	/* SP_VS_OUT_REG3 */
+	*cmds++ = 0x00000000;
+	/* SP_VS_OUT_REG4 */
+	*cmds++ = 0x00000000;
+	/* SP_VS_OUT_REG5 */
+	*cmds++ = 0x00000000;
+	/* SP_VS_OUT_REG6 */
+	*cmds++ = 0x00000000;
+	/* SP_VS_OUT_REG7 */
+	*cmds++ = 0x00000000;
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7);
+	*cmds++ = CP_REG(A3XX_SP_VS_VPC_DST_REG_0);
+	/* SP_VS_VPC_DST_REG0 */
+	*cmds++ = _SET(SP_VSVPCDSTREG_OUTLOC0, 8);
+	/* SP_VS_VPC_DST_REG1 */
+	*cmds++ = 0x00000000;
+	/* SP_VS_VPC_DST_REG2 */
+	*cmds++ = 0x00000000;
+	/* SP_VS_VPC_DST_REG3 */
+	*cmds++ = 0x00000000;
+	/* SP_VS_OBJ_OFFSET_REG */
+	*cmds++ = 0x00000000;
+	/* SP_VS_OBJ_START_REG */
+	*cmds++ = 0x00000000;
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6);
+	*cmds++ = CP_REG(A3XX_SP_VS_LENGTH_REG);
+	/* SP_VS_LENGTH_REG */
+	*cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
+	/* SP_FS_CTRL_REG0 */
+	*cmds++ = _SET(SP_FSCTRLREG0_FSTHREADMODE, SP_MULTI) |
+		_SET(SP_FSCTRLREG0_FSINSTRBUFFERMODE, SP_BUFFER_MODE) |
+		_SET(SP_FSCTRLREG0_FSICACHEINVALID, 1) |
+		_SET(SP_FSCTRLREG0_FSFULLREGFOOTPRINT, 2) |
+		_SET(SP_FSCTRLREG0_FSINOUTREGOVERLAP, 1) |
+		_SET(SP_FSCTRLREG0_FSTHREADSIZE, SP_FOUR_PIX_QUADS) |
+		_SET(SP_FSCTRLREG0_PIXLODENABLE, 1) |
+		_SET(SP_FSCTRLREG0_FSLENGTH, 2);
+	/* SP_FS_CTRL_REG1 */
+	*cmds++ = _SET(SP_FSCTRLREG1_FSCONSTLENGTH, 1) |
+		_SET(SP_FSCTRLREG1_FSINITIALOUTSTANDING, 2) |
+		_SET(SP_FSCTRLREG1_HALFPRECVAROFFSET, 63);
+	/* SP_FS_OBJ_OFFSET_REG */
+	*cmds++ = _SET(SP_OBJOFFSETREG_CONSTOBJECTSTARTOFFSET, 128);
+	/* SP_FS_OBJ_START_REG */
+	*cmds++ = 0x00000000;
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
+	*cmds++ = CP_REG(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0);
+	/* SP_FS_FLAT_SHAD_MODE_REG0 */
+	*cmds++ = 0x00000000;
+	/* SP_FS_FLAT_SHAD_MODE_REG1 */
+	*cmds++ = 0x00000000;
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(A3XX_SP_FS_OUTPUT_REG);
+	/* SP_FS_OUT_REG */
+	*cmds++ = _SET(SP_FSOUTREG_PAD0, SP_PIXEL_BASED);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(A3XX_SP_FS_MRT_REG_0);
+	/* SP_FS_MRT_REG0 */
+	*cmds++ = _SET(SP_FSMRTREG_REGID, 4);
+	/* SP_FS_MRT_REG1 */
+	*cmds++ = 0;
+	/* SP_FS_MRT_REG2 */
+	*cmds++ = 0;
+	/* SP_FS_MRT_REG3 */
+	*cmds++ = 0;
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 11);
+	*cmds++ = CP_REG(A3XX_VPC_ATTR);
+	/* VPC_ATTR */
+	*cmds++ = _SET(VPC_VPCATTR_TOTALATTR, 2) |
+		_SET(VPC_VPCATTR_THRHDASSIGN, 1) |
+		_SET(VPC_VPCATTR_LMSIZE, 1);
+	/* VPC_PACK */
+	*cmds++ = _SET(VPC_VPCPACK_NUMFPNONPOSVAR, 2) |
+		_SET(VPC_VPCPACK_NUMNONPOSVSVAR, 2);
+	/* VPC_VARYING_INTERP_MODE_0 */
+	*cmds++ = 0x00000000;
+	/* VPC_VARYING_INTERP_MODE1 */
+	*cmds++ = 0x00000000;
+	/* VPC_VARYING_INTERP_MODE2 */
+	*cmds++ = 0x00000000;
+	/* VPC_VARYING_IINTERP_MODE3 */
+	*cmds++ = 0x00000000;
+	/* VPC_VARRYING_PS_REPL_MODE_0 */
+	*cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT0A,	1) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
+	/* VPC_VARRYING_PS_REPL_MODE_1 */
+	*cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT0A,	1) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
+	/* VPC_VARRYING_PS_REPL_MODE_2 */
+	*cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT0A,	1) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
+	/* VPC_VARRYING_PS_REPL_MODE_3 */
+	*cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT0A,	1) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
+		_SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 11);
+	*cmds++ = CP_REG(A3XX_SP_SP_CTRL_REG);
+	/* SP_SP_CTRL_REG */
+	*cmds++ = _SET(SP_SPCTRLREG_SLEEPMODE, 1);
+
+	/* Load vertex shader */
+	*cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
+	*cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
+		| (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
+		| (HLSQ_BLOCK_ID_SP_VS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
+		| (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
+	*cmds++ = (HLSQ_SP_VS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
+		| (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
+	/* (sy)end; */
+	*cmds++ = 0x00000000; *cmds++ = 0x13000000;
+	/* nop; */
+	*cmds++ = 0x00000000; *cmds++ = 0x00000000;
+	/* nop; */
+	*cmds++ = 0x00000000; *cmds++ = 0x00000000;
+	/* nop; */
+	*cmds++ = 0x00000000; *cmds++ = 0x00000000;
+
+	/* Load fragment shader */
+	*cmds++ = cp_type3_packet(CP_LOAD_STATE, 18);
+	*cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
+		| (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
+		| (HLSQ_BLOCK_ID_SP_FS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
+		| (2 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
+	*cmds++ = (HLSQ_SP_FS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
+		| (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
+	/* (sy)(rpt1)bary.f (ei)r0.z, (r)0, r0.x; */
+	*cmds++ = 0x00002000; *cmds++ = 0x57368902;
+	/* (rpt5)nop; */
+	*cmds++ = 0x00000000; *cmds++ = 0x00000500;
+	/* sam (f32)r0.xyzw, r0.z, s#0, t#0; */
+	*cmds++ = 0x00000005; *cmds++ = 0xa0c01f00;
+	/* (sy)mov.f32f32 r1.x, r0.x; */
+	*cmds++ = 0x00000000; *cmds++ = 0x30044004;
+	/* mov.f32f32 r1.y, r0.y; */
+	*cmds++ = 0x00000001; *cmds++ = 0x20044005;
+	/* mov.f32f32 r1.z, r0.z; */
+	*cmds++ = 0x00000002; *cmds++ = 0x20044006;
+	/* mov.f32f32 r1.w, r0.w; */
+	*cmds++ = 0x00000003; *cmds++ = 0x20044007;
+	/* end; */
+	*cmds++ = 0x00000000; *cmds++ = 0x03000000;
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
+	*cmds++ = CP_REG(A3XX_VFD_CONTROL_0);
+	/* VFD_CONTROL_0 */
+	*cmds++ = _SET(VFD_CTRLREG0_TOTALATTRTOVS, 8) |
+		_SET(VFD_CTRLREG0_PACKETSIZE, 2) |
+		_SET(VFD_CTRLREG0_STRMDECINSTRCNT, 2) |
+		_SET(VFD_CTRLREG0_STRMFETCHINSTRCNT, 2);
+	/* VFD_CONTROL_1 */
+	*cmds++ =  _SET(VFD_CTRLREG1_MAXSTORAGE, 2) |
+		_SET(VFD_CTRLREG1_REGID4VTX, 252) |
+		_SET(VFD_CTRLREG1_REGID4INST, 252);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
+	*cmds++ = CP_REG(A3XX_VFD_FETCH_INSTR_0_0);
+	/* VFD_FETCH_INSTR_0_0 */
+	*cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 7) |
+		_SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 8) |
+		_SET(VFD_FETCHINSTRUCTIONS_SWITCHNEXT, 1) |
+		_SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
+	/* VFD_FETCH_INSTR_1_0 */
+	*cmds++ = _SET(VFD_BASEADDR_BASEADDR,
+		shadow->quad_vertices_restore.gpuaddr);
+	/* VFD_FETCH_INSTR_0_1 */
+	*cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 11) |
+		_SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 12) |
+		_SET(VFD_FETCHINSTRUCTIONS_INDEXDECODE, 1) |
+		_SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
+	/* VFD_FETCH_INSTR_1_1 */
+	*cmds++ = _SET(VFD_BASEADDR_BASEADDR,
+		shadow->quad_vertices_restore.gpuaddr + 16);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
+	*cmds++ = CP_REG(A3XX_VFD_DECODE_INSTR_0);
+	/* VFD_DECODE_INSTR_0 */
+	*cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
+		_SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
+		_SET(VFD_DECODEINSTRUCTIONS_FORMAT, 1) |
+		_SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 8) |
+		_SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1) |
+		_SET(VFD_DECODEINSTRUCTIONS_SWITCHNEXT, 1);
+	/* VFD_DECODE_INSTR_1 */
+	*cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
+		_SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
+		_SET(VFD_DECODEINSTRUCTIONS_FORMAT, 2) |
+		_SET(VFD_DECODEINSTRUCTIONS_REGID, 4) |
+		_SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 12) |
+		_SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(A3XX_RB_DEPTH_CONTROL);
+	/* RB_DEPTH_CONTROL */
+	*cmds++ = _SET(RB_DEPTHCONTROL_Z_TEST_FUNC, RB_FRAG_NEVER);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(A3XX_RB_STENCIL_CONTROL);
+	/* RB_STENCIL_CONTROL */
+	*cmds++ = _SET(RB_STENCILCONTROL_STENCIL_FUNC, RB_REF_NEVER) |
+		_SET(RB_STENCILCONTROL_STENCIL_FAIL, RB_STENCIL_KEEP) |
+		_SET(RB_STENCILCONTROL_STENCIL_ZPASS, RB_STENCIL_KEEP) |
+		_SET(RB_STENCILCONTROL_STENCIL_ZFAIL, RB_STENCIL_KEEP) |
+		_SET(RB_STENCILCONTROL_STENCIL_FUNC_BF, RB_REF_NEVER) |
+		_SET(RB_STENCILCONTROL_STENCIL_FAIL_BF, RB_STENCIL_KEEP) |
+		_SET(RB_STENCILCONTROL_STENCIL_ZPASS_BF, RB_STENCIL_KEEP) |
+		_SET(RB_STENCILCONTROL_STENCIL_ZFAIL_BF, RB_STENCIL_KEEP);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(A3XX_RB_MODE_CONTROL);
+	/* RB_MODE_CONTROL */
+	*cmds++ = _SET(RB_MODECONTROL_RENDER_MODE, RB_RENDERING_PASS) |
+		_SET(RB_MODECONTROL_MARB_CACHE_SPLIT_MODE, 1);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(A3XX_RB_RENDER_CONTROL);
+	/* RB_RENDER_CONTROL */
+	*cmds++ = _SET(RB_RENDERCONTROL_BIN_WIDTH, shadow->width >> 5) |
+		_SET(RB_RENDERCONTROL_ALPHA_TEST_FUNC, 7);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(A3XX_RB_MSAA_CONTROL);
+	/* RB_MSAA_CONTROL */
+	*cmds++ = _SET(RB_MSAACONTROL_MSAA_DISABLE, 1) |
+		_SET(RB_MSAACONTROL_SAMPLE_MASK, 0xFFFF);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(A3XX_RB_MRT_CONTROL0);
+	/* RB_MRT_CONTROL0 */
+	*cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
+		_SET(RB_MRTCONTROL_ROP_CODE, 12) |
+		_SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) |
+		_SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
+	*cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL0);
+	/* RB_MRT_BLENDCONTROL0 */
+	*cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
+		_SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
+		_SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
+		_SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
+		_SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
+		_SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
+		_SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
+	/* RB_MRT_CONTROL1 */
+	*cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
+		_SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
+		_SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
+	*cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL1);
+	/* RB_MRT_BLENDCONTROL1 */
+	*cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
+		_SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
+		_SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
+		_SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
+		_SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
+		_SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
+		_SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
+	/* RB_MRT_CONTROL2 */
+	*cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
+		_SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
+		_SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
+	*cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL2);
+	/* RB_MRT_BLENDCONTROL2 */
+	*cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
+		_SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
+		_SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
+		_SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
+		_SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
+		_SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
+		_SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
+	/* RB_MRT_CONTROL3 */
+	*cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
+		_SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
+		_SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL3);
+	/* RB_MRT_BLENDCONTROL3 */
+	*cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
+		_SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
+		_SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
+		_SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
+		_SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
+		_SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
+		_SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
+	*cmds++ = CP_REG(A3XX_VFD_INDEX_MIN);
+	/* VFD_INDEX_MIN */
+	*cmds++ = 0x00000000;
+	/* VFD_INDEX_MAX */
+	*cmds++ = 0xFFFFFFFF;
+	/* VFD_INDEX_OFFSET */
+	*cmds++ = 0x00000000;
+	/* TPL1_TP_VS_TEX_OFFSET */
+	*cmds++ = 0x00000000;
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(A3XX_VFD_VS_THREADING_THRESHOLD);
+	/* VFD_VS_THREADING_THRESHOLD */
+	*cmds++ = _SET(VFD_THREADINGTHRESHOLD_RESERVED6, 12) |
+		_SET(VFD_THREADINGTHRESHOLD_REGID_VTXCNT, 252);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(A3XX_TPL1_TP_VS_TEX_OFFSET);
+	/* TPL1_TP_VS_TEX_OFFSET */
+	*cmds++ = 0x00000000;
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(A3XX_TPL1_TP_FS_TEX_OFFSET);
+	/* TPL1_TP_FS_TEX_OFFSET */
+	*cmds++ = _SET(TPL1_TPTEXOFFSETREG_SAMPLEROFFSET, 16) |
+		_SET(TPL1_TPTEXOFFSETREG_MEMOBJOFFSET, 16) |
+		_SET(TPL1_TPTEXOFFSETREG_BASETABLEPTR, 224);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(A3XX_GRAS_SC_CONTROL);
+	/* GRAS_SC_CONTROL */
+	*cmds++ = _SET(GRAS_SC_CONTROL_RASTER_MODE, 1);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(A3XX_GRAS_SU_MODE_CONTROL);
+	/* GRAS_SU_MODE_CONTROL */
+	*cmds++ = 0x00000000;
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
+	*cmds++ = CP_REG(A3XX_GRAS_SC_WINDOW_SCISSOR_TL);
+	/* GRAS_SC_WINDOW_SCISSOR_TL */
+	*cmds++ = 0x00000000;
+	/* GRAS_SC_WINDOW_SCISSOR_BR */
+	*cmds++ = _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_X, shadow->width - 1) |
+		_SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_Y, shadow->height - 1);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
+	*cmds++ = CP_REG(A3XX_GRAS_SC_SCREEN_SCISSOR_TL);
+	/* GRAS_SC_SCREEN_SCISSOR_TL */
+	*cmds++ = 0x00000000;
+	/* GRAS_SC_SCREEN_SCISSOR_BR */
+	*cmds++ = _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_X, shadow->width - 1) |
+		_SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_Y, shadow->height - 1);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
+	*cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_XOFFSET);
+	/* GRAS_CL_VPORT_XOFFSET */
+	*cmds++ = 0x00000000;
+	/* GRAS_CL_VPORT_XSCALE */
+	*cmds++ = _SET(GRAS_CL_VPORT_XSCALE_VPORT_XSCALE, 0x3F800000);
+	/* GRAS_CL_VPORT_YOFFSET */
+	*cmds++ = 0x00000000;
+	/* GRAS_CL_VPORT_YSCALE */
+	*cmds++ = _SET(GRAS_CL_VPORT_YSCALE_VPORT_YSCALE, 0x3F800000);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
+	*cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_ZOFFSET);
+	/* GRAS_CL_VPORT_ZOFFSET */
+	*cmds++ = 0x00000000;
+	/* GRAS_CL_VPORT_ZSCALE */
+	*cmds++ = _SET(GRAS_CL_VPORT_ZSCALE_VPORT_ZSCALE, 0x3F800000);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(A3XX_GRAS_CL_CLIP_CNTL);
+	/* GRAS_CL_CLIP_CNTL */
+	*cmds++ = _SET(GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER, 1);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(A3XX_SP_FS_IMAGE_OUTPUT_REG_0);
+	/* SP_FS_IMAGE_OUTPUT_REG_0 */
+	*cmds++ = _SET(SP_IMAGEOUTPUTREG_MRTFORMAT, SP_R8G8B8A8_UNORM);
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(A3XX_PC_PRIM_VTX_CNTL);
+	/* PC_PRIM_VTX_CONTROL */
+	*cmds++ = _SET(PC_PRIM_VTX_CONTROL_STRIDE_IN_VPC, 2) |
+		_SET(PC_PRIM_VTX_CONTROL_POLYMODE_FRONT_PTYPE,
+		PC_DRAW_TRIANGLES) |
+		_SET(PC_PRIM_VTX_CONTROL_POLYMODE_BACK_PTYPE,
+		PC_DRAW_TRIANGLES) |
+		_SET(PC_PRIM_VTX_CONTROL_PROVOKING_VTX_LAST, 1);
+
+	*cmds++ = cp_type3_packet(CP_DRAW_INDX, 3);
+	*cmds++ = 0x00000000; /* Viz query info */
+	*cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_RECTLIST,
+					  PC_DI_SRC_SEL_AUTO_INDEX,
+					  PC_DI_INDEX_SIZE_16_BIT,
+					  PC_DI_IGNORE_VISIBILITY);
+	*cmds++ = 0x00000002; /* Num indices */
+
+	/* Create indirect buffer command for above command sequence */
+	create_ib1(drawctxt, shadow->gmem_restore, start, cmds);
+
+	return cmds;
+}
+
+static void build_regrestore_cmds(struct adreno_device *adreno_dev,
+				  struct adreno_context *drawctxt)
+{
+	unsigned int *start = tmp_ctx.cmd;
+	unsigned int *cmd = start;
+	unsigned int *lcc_start;
+
+	int i;
+
+	/* Flush HLSQ lazy updates */
+	*cmd++ = cp_type3_packet(CP_EVENT_WRITE, 1);
+	*cmd++ = 0x7;		/* HLSQ_FLUSH */
+	*cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	*cmd++ = 0;
+
+	*cmd++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
+	*cmd++ = 0x00000000;    /* No start addr for full invalidate */
+	*cmd++ = (unsigned int)
+		UCHE_ENTIRE_CACHE << UCHE_INVALIDATE1REG_ALLORPORTION |
+		UCHE_OP_INVALIDATE << UCHE_INVALIDATE1REG_OPCODE |
+		0;  /* No end addr for full invalidate */
+
+	lcc_start = cmd;
+
+	/* deferred cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, ???); */
+	cmd++;
+
+#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
+	/* Force mismatch */
+	*cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000) | 1;
+#else
+	*cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
+#endif
+
+	for (i = 0; i < ARRAY_SIZE(context_register_ranges) / 2; i++) {
+		cmd = reg_range(cmd, context_register_ranges[i * 2],
+				context_register_ranges[i * 2 + 1]);
+	}
+
+	lcc_start[0] = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT,
+					(cmd - lcc_start) - 1);
+
+#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
+	lcc_start[2] |= (0 << 24) | (4 << 16);	/* Disable shadowing. */
+#else
+	lcc_start[2] |= (1 << 24) | (4 << 16);
+#endif
+
+	for (i = 0; i < ARRAY_SIZE(global_registers); i++) {
+		*cmd++ = cp_type0_packet(global_registers[i], 1);
+		tmp_ctx.reg_values[i] = virt2gpu(cmd, &drawctxt->gpustate);
+		*cmd++ = 0x00000000;
+	}
+
+	create_ib1(drawctxt, drawctxt->reg_restore, start, cmd);
+	tmp_ctx.cmd = cmd;
+}
+
+static void build_constantrestore_cmds(struct adreno_device *adreno_dev,
+				       struct adreno_context *drawctxt)
+{
+	unsigned int *cmd = tmp_ctx.cmd;
+	unsigned int *start = cmd;
+	unsigned int mode = 4;	/* Indirect mode */
+	unsigned int stateblock;
+	unsigned int numunits;
+	unsigned int statetype;
+
+	drawctxt->cond_execs[2].hostptr = cmd;
+	drawctxt->cond_execs[2].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
+	*cmd++ = 0;
+	drawctxt->cond_execs[3].hostptr = cmd;
+	drawctxt->cond_execs[3].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
+	*cmd++ = 0;
+
+#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
+	*cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
+	*cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
+	*cmd++ = 4 << 16;
+	*cmd++ = 0x0;
+#endif
+	/* HLSQ full update */
+	*cmd++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmd++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
+	*cmd++ = 0x68000240;	/* A3XX_HLSQ_CONTROL_0_REG */
+
+#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
+	/* Re-enable shadowing */
+	*cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
+	*cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
+	*cmd++ = (4 << 16) | (1 << 24);
+	*cmd++ = 0x0;
+#endif
+
+	/* Load vertex shader constants */
+	*cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
+	*cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
+	*cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
+	*cmd++ = 0x0000ffff;
+	*cmd++ = 3; /* EXEC_COUNT */
+	*cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
+	drawctxt->constant_load_commands[0].hostptr = cmd;
+	drawctxt->constant_load_commands[0].gpuaddr = virt2gpu(cmd,
+		&drawctxt->gpustate);
+
+	/*
+	   From fixup:
+
+	   mode = 4 (indirect)
+	   stateblock = 4 (Vertex constants)
+	   numunits = SP_VS_CTRL_REG1.VSCONSTLENGTH * 2; (256bit units)
+
+	   From register spec:
+	   SP_VS_CTRL_REG1.VSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
+
+	   ord1 = (numunits<<22) | (stateblock<<19) | (mode<<16);
+	 */
+
+	*cmd++ = 0;		/* ord1 */
+	*cmd++ = ((drawctxt->gpustate.gpuaddr) & 0xfffffffc) | 1;
+
+	/* Load fragment shader constants */
+	*cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
+	*cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
+	*cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
+	*cmd++ = 0x0000ffff;
+	*cmd++ = 3; /* EXEC_COUNT */
+	*cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
+	drawctxt->constant_load_commands[1].hostptr = cmd;
+	drawctxt->constant_load_commands[1].gpuaddr =
+	    virt2gpu(cmd, &drawctxt->gpustate);
+	/*
+	   From fixup:
+
+	   mode = 4 (indirect)
+	   stateblock = 6 (Fragment constants)
+	   numunits = SP_FS_CTRL_REG1.FSCONSTLENGTH * 2; (256bit units)
+
+	   From register spec:
+	   SP_FS_CTRL_REG1.FSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
+
+	   ord1 = (numunits<<22) | (stateblock<<19) | (mode<<16);
+	 */
+
+	*cmd++ = 0;		/* ord1 */
+	drawctxt->constant_load_commands[2].hostptr = cmd;
+	drawctxt->constant_load_commands[2].gpuaddr =
+	    virt2gpu(cmd, &drawctxt->gpustate);
+	/*
+	   From fixup:
+	   base = drawctxt->gpustate.gpuaddr (ALU constant shadow base)
+	   offset = SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET
+
+	   From register spec:
+	   SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET [16:24]: Constant object
+	   start offset in on chip RAM,
+	   128bit aligned
+
+	   ord2 = base + offset | 1
+	   Because of the base alignment we can use
+	   ord2 = base | offset | 1
+	 */
+	*cmd++ = 0;		/* ord2 */
+
+	/* Restore VS texture memory objects */
+	stateblock = 0;
+	statetype = 1;
+	numunits = (TEX_SIZE_MEM_OBJECTS / 7) / 4;
+
+	*cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
+	*cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
+	*cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MEM_OBJECTS)
+	    & 0xfffffffc) | statetype;
+
+	/* Restore VS texture mipmap addresses */
+	stateblock = 1;
+	statetype = 1;
+	numunits = TEX_SIZE_MIPMAP / 4;
+	*cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
+	*cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
+	*cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MIPMAP)
+	    & 0xfffffffc) | statetype;
+
+	/* Restore VS texture sampler objects */
+	stateblock = 0;
+	statetype = 0;
+	numunits = (TEX_SIZE_SAMPLER_OBJ / 2) / 4;
+	*cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
+	*cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
+	*cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_SAMPLER_OBJ)
+	    & 0xfffffffc) | statetype;
+
+	/* Restore FS texture memory objects */
+	stateblock = 2;
+	statetype = 1;
+	numunits = (TEX_SIZE_MEM_OBJECTS / 7) / 4;
+	*cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
+	*cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
+	*cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MEM_OBJECTS)
+	    & 0xfffffffc) | statetype;
+
+	/* Restore FS texture mipmap addresses */
+	stateblock = 3;
+	statetype = 1;
+	numunits = TEX_SIZE_MIPMAP / 4;
+	*cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
+	*cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
+	*cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MIPMAP)
+	    & 0xfffffffc) | statetype;
+
+	/* Restore FS texture sampler objects */
+	stateblock = 2;
+	statetype = 0;
+	numunits = (TEX_SIZE_SAMPLER_OBJ / 2) / 4;
+	*cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
+	*cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
+	*cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_SAMPLER_OBJ)
+	    & 0xfffffffc) | statetype;
+
+	create_ib1(drawctxt, drawctxt->constant_restore, start, cmd);
+	tmp_ctx.cmd = cmd;
+}
+
+static void build_shader_restore_cmds(struct adreno_device *adreno_dev,
+				      struct adreno_context *drawctxt)
+{
+	unsigned int *cmd = tmp_ctx.cmd;
+	unsigned int *start = cmd;
+
+	/* Vertex shader */
+	*cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
+	*cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
+	*cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
+	*cmd++ = 1;
+	*cmd++ = 3;		/* EXEC_COUNT */
+
+	*cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
+	drawctxt->shader_load_commands[0].hostptr = cmd;
+	drawctxt->shader_load_commands[0].gpuaddr =
+	    virt2gpu(cmd, &drawctxt->gpustate);
+	/*
+	   From fixup:
+
+	   mode = 4 (indirect)
+	   stateblock = 4 (Vertex shader)
+	   numunits = SP_VS_CTRL_REG0.VS_LENGTH
+
+	   From regspec:
+	   SP_VS_CTRL_REG0.VS_LENGTH [31:24]: VS length, unit = 256bits.
+	   If bit31 is 1, it means overflow
+	   or any long shader.
+
+	   ord1 = (numunits<<22) | (stateblock<<19) | (mode<<11)
+	 */
+	*cmd++ = 0;		/*ord1 */
+	*cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET) & 0xfffffffc;
+
+	/* Fragment shader */
+	*cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
+	*cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
+	*cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
+	*cmd++ = 1;
+	*cmd++ = 3;		/* EXEC_COUNT */
+
+	*cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
+	drawctxt->shader_load_commands[1].hostptr = cmd;
+	drawctxt->shader_load_commands[1].gpuaddr =
+	    virt2gpu(cmd, &drawctxt->gpustate);
+	/*
+	   From fixup:
+
+	   mode = 4 (indirect)
+	   stateblock = 6 (Fragment shader)
+	   numunits = SP_FS_CTRL_REG0.FS_LENGTH
+
+	   From regspec:
+	   SP_FS_CTRL_REG0.FS_LENGTH [31:24]: FS length, unit = 256bits.
+	   If bit31 is 1, it means overflow
+	   or any long shader.
+
+	   ord1 = (numunits<<22) | (stateblock<<19) | (mode<<11)
+	 */
+	*cmd++ = 0;		/*ord1 */
+	*cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET
+		  + (SHADER_SHADOW_SIZE / 2)) & 0xfffffffc;
+
+	create_ib1(drawctxt, drawctxt->shader_restore, start, cmd);
+	tmp_ctx.cmd = cmd;
+}
+
+static void build_hlsqcontrol_restore_cmds(struct adreno_device *adreno_dev,
+					   struct adreno_context *drawctxt)
+{
+	unsigned int *cmd = tmp_ctx.cmd;
+	unsigned int *start = cmd;
+
+	*cmd++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmd++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
+	drawctxt->hlsqcontrol_restore_commands[0].hostptr = cmd;
+	drawctxt->hlsqcontrol_restore_commands[0].gpuaddr
+	    = virt2gpu(cmd, &drawctxt->gpustate);
+	*cmd++ = 0;
+
+	/* Create indirect buffer command for above command sequence */
+	create_ib1(drawctxt, drawctxt->hlsqcontrol_restore, start, cmd);
+
+	tmp_ctx.cmd = cmd;
+}
+
+/* IB that modifies the shader and constant sizes and offsets in restore IBs. */
+static void build_restore_fixup_cmds(struct adreno_device *adreno_dev,
+				     struct adreno_context *drawctxt)
+{
+	unsigned int *cmd = tmp_ctx.cmd;
+	unsigned int *start = cmd;
+
+#ifdef GSL_CONTEXT_SWITCH_CPU_SYNC
+	/* Save shader sizes */
+	*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
+	*cmd++ = A3XX_SP_VS_CTRL_REG0;
+	*cmd++ = drawctxt->shader_load_commands[0].gpuaddr;
+
+	*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
+	*cmd++ = A3XX_SP_FS_CTRL_REG0;
+	*cmd++ = drawctxt->shader_load_commands[1].gpuaddr;
+
+	/* Save constant sizes */
+	*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
+	*cmd++ = A3XX_SP_VS_CTRL_REG1;
+	*cmd++ = drawctxt->constant_load_commands[0].gpuaddr;
+
+	*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
+	*cmd++ = A3XX_SP_FS_CTRL_REG1;
+	*cmd++ = drawctxt->constant_load_commands[1].gpuaddr;
+
+	/* Save constant offsets */
+	*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
+	*cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
+	*cmd++ = drawctxt->constant_load_commands[2].gpuaddr;
+#else
+	/* Save shader sizes */
+	cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x7f000000,
+			   30, (4 << 19) | (4 << 16),
+			   drawctxt->shader_load_commands[0].gpuaddr);
+
+	cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG0, 0x7f000000,
+			   30, (6 << 19) | (4 << 16),
+			   drawctxt->shader_load_commands[1].gpuaddr);
+
+	/* Save constant sizes */
+	cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
+			   23, (4 << 19) | (4 << 16),
+			   drawctxt->constant_load_commands[0].gpuaddr);
+
+	cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
+			   23, (6 << 19) | (4 << 16),
+			   drawctxt->constant_load_commands[1].gpuaddr);
+
+	/* Modify constant restore conditionals */
+	cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
+			0, 0, drawctxt->cond_execs[2].gpuaddr);
+
+	cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
+			0, 0, drawctxt->cond_execs[3].gpuaddr);
+
+	/* Save fragment constant shadow offset */
+	cmd = rmw_regtomem(cmd, A3XX_SP_FS_OBJ_OFFSET_REG, 0x00ff0000,
+			   18, (drawctxt->gpustate.gpuaddr & 0xfffffe00) | 1,
+			   drawctxt->constant_load_commands[2].gpuaddr);
+#endif
+
+	/* Use mask value to avoid flushing HLSQ which would cause the HW to
+	   discard all the shader data */
+
+	cmd = rmw_regtomem(cmd,  A3XX_HLSQ_CONTROL_0_REG, 0x9ffffdff,
+		0, 0, drawctxt->hlsqcontrol_restore_commands[0].gpuaddr);
+
+	create_ib1(drawctxt, drawctxt->restore_fixup, start, cmd);
+
+	tmp_ctx.cmd = cmd;
+}
+
+static int a3xx_create_gpustate_shadow(struct adreno_device *adreno_dev,
+				     struct adreno_context *drawctxt)
+{
+	drawctxt->flags |= CTXT_FLAGS_STATE_SHADOW;
+
+	build_regrestore_cmds(adreno_dev, drawctxt);
+	build_constantrestore_cmds(adreno_dev, drawctxt);
+	build_hlsqcontrol_restore_cmds(adreno_dev, drawctxt);
+	build_regconstantsave_cmds(adreno_dev, drawctxt);
+	build_shader_save_cmds(adreno_dev, drawctxt);
+	build_shader_restore_cmds(adreno_dev, drawctxt);
+	build_restore_fixup_cmds(adreno_dev, drawctxt);
+	build_save_fixup_cmds(adreno_dev, drawctxt);
+
+	return 0;
+}
+
+/* create buffers for saving/restoring registers, constants, & GMEM */
+static int a3xx_create_gmem_shadow(struct adreno_device *adreno_dev,
+				 struct adreno_context *drawctxt)
+{
+	calc_gmemsize(&drawctxt->context_gmem_shadow,
+		adreno_dev->gmemspace.sizebytes);
+	tmp_ctx.gmem_base = adreno_dev->gmemspace.gpu_base;
+
+	if (drawctxt->flags & CTXT_FLAGS_GMEM_SHADOW) {
+		int result =
+		    kgsl_allocate(&drawctxt->context_gmem_shadow.gmemshadow,
+			drawctxt->pagetable,
+			drawctxt->context_gmem_shadow.size);
+
+		if (result)
+			return result;
+	} else {
+		memset(&drawctxt->context_gmem_shadow.gmemshadow, 0,
+		       sizeof(drawctxt->context_gmem_shadow.gmemshadow));
+
+		return 0;
+	}
+
+	build_quad_vtxbuff(drawctxt, &drawctxt->context_gmem_shadow,
+		&tmp_ctx.cmd);
+
+	/* Dow we need to idle? */
+	/* adreno_idle(&adreno_dev->dev, KGSL_TIMEOUT_DEFAULT); */
+
+	tmp_ctx.cmd = build_gmem2sys_cmds(adreno_dev, drawctxt,
+		&drawctxt->context_gmem_shadow);
+	tmp_ctx.cmd = build_sys2gmem_cmds(adreno_dev, drawctxt,
+		&drawctxt->context_gmem_shadow);
+
+	kgsl_cache_range_op(&drawctxt->context_gmem_shadow.gmemshadow,
+		KGSL_CACHE_OP_FLUSH);
+
+	return 0;
+}
+
+static int a3xx_drawctxt_create(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt)
+{
+	int ret;
+
+	/*
+	 * Allocate memory for the GPU state and the context commands.
+	 * Despite the name, this is much more then just storage for
+	 * the gpustate.  This contains command space for gmem save
+	 * and texture and vertex buffer storage too
+	 */
+
+	ret = kgsl_allocate(&drawctxt->gpustate,
+		drawctxt->pagetable, CONTEXT_SIZE);
+
+	if (ret)
+		return ret;
+
+	kgsl_sharedmem_set(&drawctxt->gpustate, 0, 0, CONTEXT_SIZE);
+	tmp_ctx.cmd = drawctxt->gpustate.hostptr + CMD_OFFSET;
+
+	if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) {
+		ret = a3xx_create_gpustate_shadow(adreno_dev, drawctxt);
+		if (ret)
+			goto done;
+
+		drawctxt->flags |= CTXT_FLAGS_SHADER_SAVE;
+	}
+
+	if (!(drawctxt->flags & CTXT_FLAGS_NOGMEMALLOC))
+		ret = a3xx_create_gmem_shadow(adreno_dev, drawctxt);
+
+done:
+	if (ret)
+		kgsl_sharedmem_free(&drawctxt->gpustate);
+
+	return ret;
+}
+
+static void a3xx_drawctxt_save(struct adreno_device *adreno_dev,
+			   struct adreno_context *context)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+
+	if (context == NULL)
+		return;
+
+	if (context->flags & CTXT_FLAGS_GPU_HANG)
+		KGSL_CTXT_WARN(device,
+			       "Current active context has caused gpu hang\n");
+
+	if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
+		/* Fixup self modifying IBs for save operations */
+		adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
+			context->save_fixup, 3);
+
+		/* save registers and constants. */
+		adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
+			context->regconstant_save, 3);
+
+		if (context->flags & CTXT_FLAGS_SHADER_SAVE) {
+			/* Save shader instructions */
+			adreno_ringbuffer_issuecmds(device,
+				KGSL_CMD_FLAGS_PMODE, context->shader_save, 3);
+
+			context->flags |= CTXT_FLAGS_SHADER_RESTORE;
+		}
+	}
+
+	if ((context->flags & CTXT_FLAGS_GMEM_SAVE) &&
+	    (context->flags & CTXT_FLAGS_GMEM_SHADOW)) {
+		/*
+		 * Save GMEM (note: changes shader. shader must
+		 * already be saved.)
+		 */
+
+		adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE,
+					    context->context_gmem_shadow.
+					    gmem_save, 3);
+		context->flags |= CTXT_FLAGS_GMEM_RESTORE;
+	}
+}
+
+static void a3xx_drawctxt_restore(struct adreno_device *adreno_dev,
+			      struct adreno_context *context)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	unsigned int cmds[5];
+
+	if (context == NULL) {
+		/* No context - set the default pagetable and thats it */
+		kgsl_mmu_setstate(device, device->mmu.defaultpagetable);
+		return;
+	}
+
+	KGSL_CTXT_INFO(device, "context flags %08x\n", context->flags);
+
+	cmds[0] = cp_nop_packet(1);
+	cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER;
+	cmds[2] = cp_type3_packet(CP_MEM_WRITE, 2);
+	cmds[3] = device->memstore.gpuaddr +
+		KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, current_context);
+	cmds[4] = context->id;
+	adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, cmds, 5);
+	kgsl_mmu_setstate(device, context->pagetable);
+
+	/*
+	 * Restore GMEM.  (note: changes shader.
+	 * Shader must not already be restored.)
+	 */
+
+	if (context->flags & CTXT_FLAGS_GMEM_RESTORE) {
+		adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE,
+					    context->context_gmem_shadow.
+					    gmem_restore, 3);
+		context->flags &= ~CTXT_FLAGS_GMEM_RESTORE;
+	}
+
+	if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
+		adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
+			context->reg_restore, 3);
+
+		/* Fixup self modifying IBs for restore operations */
+		adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
+			context->restore_fixup, 3);
+
+		adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
+			context->constant_restore, 3);
+
+		if (context->flags & CTXT_FLAGS_SHADER_RESTORE)
+			adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
+				context->shader_restore, 3);
+
+		/* Restore HLSQ_CONTROL_0 register */
+		adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
+			context->hlsqcontrol_restore, 3);
+	}
+}
+
+static void a3xx_rb_init(struct adreno_device *adreno_dev,
+			 struct adreno_ringbuffer *rb)
+{
+	unsigned int *cmds, cmds_gpu;
+	cmds = adreno_ringbuffer_allocspace(rb, 18);
+	cmds_gpu = rb->buffer_desc.gpuaddr + sizeof(uint) * (rb->wptr - 18);
+
+	GSL_RB_WRITE(cmds, cmds_gpu, cp_type3_packet(CP_ME_INIT, 17));
+	GSL_RB_WRITE(cmds, cmds_gpu, 0x000003f7);
+	GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
+	GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
+	GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
+	GSL_RB_WRITE(cmds, cmds_gpu, 0x00000080);
+	GSL_RB_WRITE(cmds, cmds_gpu, 0x00000100);
+	GSL_RB_WRITE(cmds, cmds_gpu, 0x00000180);
+	GSL_RB_WRITE(cmds, cmds_gpu, 0x00006600);
+	GSL_RB_WRITE(cmds, cmds_gpu, 0x00000150);
+	GSL_RB_WRITE(cmds, cmds_gpu, 0x0000014e);
+	GSL_RB_WRITE(cmds, cmds_gpu, 0x00000154);
+	GSL_RB_WRITE(cmds, cmds_gpu, 0x00000001);
+	GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
+	GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
+	/* Protected mode control - turned off for A3XX */
+	GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
+	GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
+	GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
+
+	adreno_ringbuffer_submit(rb);
+}
+
+static void a3xx_err_callback(struct adreno_device *adreno_dev, int bit)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	const char *err = "";
+
+	switch (bit) {
+	case A3XX_INT_RBBM_AHB_ERROR: {
+		unsigned int reg;
+
+		adreno_regread(device, A3XX_RBBM_AHB_ERROR_STATUS, &reg);
+
+		/*
+		 * Return the word address of the erroring register so that it
+		 * matches the register specification
+		 */
+
+		KGSL_DRV_CRIT(device,
+			"RBBM | AHB bus error | %s | addr=%x | ports=%x:%x\n",
+			reg & (1 << 28) ? "WRITE" : "READ",
+			(reg & 0xFFFFF) >> 2, (reg >> 20) & 0x3,
+			(reg >> 24) & 0x3);
+
+		/* Clear the error */
+		adreno_regwrite(device, A3XX_RBBM_AHB_CMD, (1 << 3));
+		return;
+	}
+	case A3XX_INT_RBBM_REG_TIMEOUT:
+		err = "RBBM: AHB register timeout";
+		break;
+	case A3XX_INT_RBBM_ME_MS_TIMEOUT:
+		err = "RBBM: ME master split timeout";
+		break;
+	case A3XX_INT_RBBM_PFP_MS_TIMEOUT:
+		err = "RBBM: PFP master split timeout";
+		break;
+	case A3XX_INT_RBBM_ATB_BUS_OVERFLOW:
+		err = "RBBM: ATB bus oveflow";
+		break;
+	case A3XX_INT_VFD_ERROR:
+		err = "VFD: Out of bounds access";
+		break;
+	case A3XX_INT_CP_T0_PACKET_IN_IB:
+		err = "ringbuffer TO packet in IB interrupt";
+		break;
+	case A3XX_INT_CP_OPCODE_ERROR:
+		err = "ringbuffer opcode error interrupt";
+		break;
+	case A3XX_INT_CP_RESERVED_BIT_ERROR:
+		err = "ringbuffer reserved bit error interrupt";
+		break;
+	case A3XX_INT_CP_HW_FAULT:
+		err = "ringbuffer hardware fault";
+		break;
+	case A3XX_INT_CP_REG_PROTECT_FAULT:
+		err = "ringbuffer protected mode error interrupt";
+		break;
+	case A3XX_INT_CP_AHB_ERROR_HALT:
+		err = "ringbuffer AHB error interrupt";
+		break;
+	case A3XX_INT_MISC_HANG_DETECT:
+		err = "MISC: GPU hang detected";
+		break;
+	case A3XX_INT_UCHE_OOB_ACCESS:
+		err = "UCHE:  Out of bounds access";
+		break;
+	}
+
+	KGSL_DRV_CRIT(device, "%s\n", err);
+	kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF);
+}
+
+static void a3xx_cp_callback(struct adreno_device *adreno_dev, int irq)
+{
+	struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
+
+	if (irq == A3XX_INT_CP_RB_INT) {
+		unsigned int context_id;
+		kgsl_sharedmem_readl(&adreno_dev->dev.memstore,
+				&context_id,
+				KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
+					current_context));
+		if (context_id < KGSL_MEMSTORE_MAX) {
+		kgsl_sharedmem_writel(&rb->device->memstore,
+					KGSL_MEMSTORE_OFFSET(context_id,
+						ts_cmp_enable), 0);
+		wmb();
+		}
+		KGSL_CMD_WARN(rb->device, "ringbuffer rb interrupt\n");
+	}
+
+	wake_up_interruptible_all(&rb->device->wait_queue);
+
+	/* Schedule work to free mem and issue ibs */
+	queue_work(rb->device->work_queue, &rb->device->ts_expired_ws);
+
+	atomic_notifier_call_chain(&rb->device->ts_notifier_list,
+				   rb->device->id, NULL);
+}
+
+#define A3XX_IRQ_CALLBACK(_c) { .func = _c }
+
+#define A3XX_INT_MASK \
+	((1 << A3XX_INT_RBBM_AHB_ERROR) |        \
+	 (1 << A3XX_INT_RBBM_REG_TIMEOUT) |      \
+	 (1 << A3XX_INT_RBBM_ME_MS_TIMEOUT) |    \
+	 (1 << A3XX_INT_RBBM_PFP_MS_TIMEOUT) |   \
+	 (1 << A3XX_INT_RBBM_ATB_BUS_OVERFLOW) | \
+	 (1 << A3XX_INT_VFD_ERROR) |             \
+	 (1 << A3XX_INT_CP_T0_PACKET_IN_IB) |    \
+	 (1 << A3XX_INT_CP_OPCODE_ERROR) |       \
+	 (1 << A3XX_INT_CP_RESERVED_BIT_ERROR) | \
+	 (1 << A3XX_INT_CP_HW_FAULT) |           \
+	 (1 << A3XX_INT_CP_IB1_INT) |            \
+	 (1 << A3XX_INT_CP_IB2_INT) |            \
+	 (1 << A3XX_INT_CP_RB_INT) |             \
+	 (1 << A3XX_INT_CP_REG_PROTECT_FAULT) |  \
+	 (1 << A3XX_INT_CP_AHB_ERROR_HALT) |     \
+	 (1 << A3XX_INT_MISC_HANG_DETECT) |      \
+	 (1 << A3XX_INT_UCHE_OOB_ACCESS))
+
+static struct {
+	void (*func)(struct adreno_device *, int);
+} a3xx_irq_funcs[] = {
+	A3XX_IRQ_CALLBACK(NULL),               /* 0 - RBBM_GPU_IDLE */
+	A3XX_IRQ_CALLBACK(a3xx_err_callback),  /* 1 - RBBM_AHB_ERROR */
+	A3XX_IRQ_CALLBACK(a3xx_err_callback),  /* 2 - RBBM_REG_TIMEOUT */
+	A3XX_IRQ_CALLBACK(a3xx_err_callback),  /* 3 - RBBM_ME_MS_TIMEOUT */
+	A3XX_IRQ_CALLBACK(a3xx_err_callback),  /* 4 - RBBM_PFP_MS_TIMEOUT */
+	A3XX_IRQ_CALLBACK(a3xx_err_callback),  /* 5 - RBBM_ATB_BUS_OVERFLOW */
+	A3XX_IRQ_CALLBACK(a3xx_err_callback),  /* 6 - RBBM_VFD_ERROR */
+	A3XX_IRQ_CALLBACK(NULL),	       /* 7 - CP_SW */
+	A3XX_IRQ_CALLBACK(a3xx_err_callback),  /* 8 - CP_T0_PACKET_IN_IB */
+	A3XX_IRQ_CALLBACK(a3xx_err_callback),  /* 9 - CP_OPCODE_ERROR */
+	A3XX_IRQ_CALLBACK(a3xx_err_callback),  /* 10 - CP_RESERVED_BIT_ERROR */
+	A3XX_IRQ_CALLBACK(a3xx_err_callback),  /* 11 - CP_HW_FAULT */
+	A3XX_IRQ_CALLBACK(NULL),	       /* 12 - CP_DMA */
+	A3XX_IRQ_CALLBACK(a3xx_cp_callback),   /* 13 - CP_IB2_INT */
+	A3XX_IRQ_CALLBACK(a3xx_cp_callback),   /* 14 - CP_IB1_INT */
+	A3XX_IRQ_CALLBACK(a3xx_cp_callback),   /* 15 - CP_RB_INT */
+	A3XX_IRQ_CALLBACK(a3xx_err_callback),  /* 16 - CP_REG_PROTECT_FAULT */
+	A3XX_IRQ_CALLBACK(NULL),	       /* 17 - CP_RB_DONE_TS */
+	A3XX_IRQ_CALLBACK(NULL),	       /* 18 - CP_VS_DONE_TS */
+	A3XX_IRQ_CALLBACK(NULL),	       /* 19 - CP_PS_DONE_TS */
+	A3XX_IRQ_CALLBACK(NULL),	       /* 20 - CP_CACHE_FLUSH_TS */
+	A3XX_IRQ_CALLBACK(a3xx_err_callback),  /* 21 - CP_AHB_ERROR_FAULT */
+	A3XX_IRQ_CALLBACK(NULL),	       /* 22 - Unused */
+	A3XX_IRQ_CALLBACK(NULL),	       /* 23 - Unused */
+	A3XX_IRQ_CALLBACK(a3xx_err_callback),  /* 24 - MISC_HANG_DETECT */
+	A3XX_IRQ_CALLBACK(a3xx_err_callback),  /* 25 - UCHE_OOB_ACCESS */
+	/* 26 to 31 - Unused */
+};
+
+static irqreturn_t a3xx_irq_handler(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	irqreturn_t ret = IRQ_NONE;
+	unsigned int status, tmp;
+	int i;
+
+	adreno_regread(&adreno_dev->dev, A3XX_RBBM_INT_0_STATUS, &status);
+
+	for (tmp = status, i = 0; tmp && i < ARRAY_SIZE(a3xx_irq_funcs); i++) {
+		if (tmp & 1) {
+			if (a3xx_irq_funcs[i].func != NULL) {
+				a3xx_irq_funcs[i].func(adreno_dev, i);
+				ret = IRQ_HANDLED;
+			} else {
+				KGSL_DRV_CRIT(device,
+					"Unhandled interrupt bit %x\n", i);
+			}
+		}
+
+		tmp >>= 1;
+	}
+
+	if (status)
+		adreno_regwrite(&adreno_dev->dev, A3XX_RBBM_INT_CLEAR_CMD,
+			status);
+	return ret;
+}
+
+static void a3xx_irq_control(struct adreno_device *adreno_dev, int state)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+
+	if (state)
+		adreno_regwrite(device, A3XX_RBBM_INT_0_MASK, A3XX_INT_MASK);
+	else
+		adreno_regwrite(device, A3XX_RBBM_INT_0_MASK, 0);
+}
+
+static unsigned int a3xx_busy_cycles(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	unsigned int reg, val;
+
+	/* Freeze the counter */
+	adreno_regread(device, A3XX_RBBM_RBBM_CTL, &reg);
+	reg &= ~RBBM_RBBM_CTL_ENABLE_PWR_CTR1;
+	adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
+
+	/* Read the value */
+	adreno_regread(device, A3XX_RBBM_PERFCTR_PWR_1_LO, &val);
+
+	/* Reset the counter */
+	reg |= RBBM_RBBM_CTL_RESET_PWR_CTR1;
+	adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
+
+	/* Re-enable the counter */
+	reg &= ~RBBM_RBBM_CTL_RESET_PWR_CTR1;
+	reg |= RBBM_RBBM_CTL_ENABLE_PWR_CTR1;
+	adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
+
+	return val;
+}
+
+static void a3xx_start(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+
+	/* Reset the core */
+	adreno_regwrite(device, A3XX_RBBM_SW_RESET_CMD,
+		0x00000001);
+	msleep(20);
+
+	/*
+	 * enable fixed master AXI port of 0x0 for all clients to keep
+	 * traffic from going to random places
+	 */
+
+	adreno_regwrite(device, A3XX_VBIF_FIXED_SORT_EN, 0x0001003F);
+	adreno_regwrite(device, A3XX_VBIF_FIXED_SORT_SEL0, 0x00000000);
+	adreno_regwrite(device, A3XX_VBIF_FIXED_SORT_SEL1, 0x00000000);
+
+	/* Make all blocks contribute to the GPU BUSY perf counter */
+	adreno_regwrite(device, A3XX_RBBM_GPU_BUSY_MASKED, 0xFFFFFFFF);
+
+	/* Enable the RBBM error reporting bits.  This lets us get
+	   useful information on failure */
+
+	adreno_regwrite(device, A3XX_RBBM_AHB_CTL0, 0x00000001);
+
+	/* Enable AHB error reporting */
+	adreno_regwrite(device, A3XX_RBBM_AHB_CTL1, 0xA6FFFFFF);
+
+	/* Turn on the power counters */
+	adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, 0x00003000);
+}
+
+struct adreno_gpudev adreno_a3xx_gpudev = {
+	.reg_rbbm_status = A3XX_RBBM_STATUS,
+	.reg_cp_pfp_ucode_addr = A3XX_CP_PFP_UCODE_ADDR,
+	.reg_cp_pfp_ucode_data = A3XX_CP_PFP_UCODE_DATA,
+
+	.ctxt_create = a3xx_drawctxt_create,
+	.ctxt_save = a3xx_drawctxt_save,
+	.ctxt_restore = a3xx_drawctxt_restore,
+	.rb_init = a3xx_rb_init,
+	.irq_control = a3xx_irq_control,
+	.irq_handler = a3xx_irq_handler,
+	.busy_cycles = a3xx_busy_cycles,
+	.start = a3xx_start,
+};
diff --git a/drivers/gpu/msm/adreno_debugfs.c b/drivers/gpu/msm/adreno_debugfs.c
old mode 100644
new mode 100755
index c878a2c2..9c9ee02c
--- a/drivers/gpu/msm/adreno_debugfs.c
+++ b/drivers/gpu/msm/adreno_debugfs.c
@@ -223,21 +223,24 @@ static int kgsl_regread_nolock(struct kgsl_device *device,
 	return 0;
 }
 
-#define KGSL_ISTORE_START 0x5000
-#define KGSL_ISTORE_LENGTH 0x600
+#define ADRENO_ISTORE_START 0x5000
 static ssize_t kgsl_istore_read(
 	struct file *file,
 	char __user *buff,
 	size_t buff_count,
 	loff_t *ppos)
 {
-	int i, count = KGSL_ISTORE_LENGTH, remaining, pos = 0, tot = 0;
+	int i, count, remaining, pos = 0, tot = 0;
 	struct kgsl_device *device = file->private_data;
 	const int rowc = 8;
+	struct adreno_device *adreno_dev;
 
 	if (!ppos || !device)
 		return 0;
 
+	adreno_dev = ADRENO_DEVICE(device);
+	count = adreno_dev->istore_size * adreno_dev->instruction_size;
+
 	remaining = count;
 	for (i = 0; i < count; i += rowc) {
 		unsigned int vals[rowc];
@@ -248,7 +251,8 @@ static ssize_t kgsl_istore_read(
 		if (pos >= *ppos) {
 			for (j = 0; j < linec; ++j)
 				kgsl_regread_nolock(device,
-					KGSL_ISTORE_START+i+j, vals+j);
+						    ADRENO_ISTORE_START + i + j,
+						    vals + j);
 		} else
 			memset(vals, 0, sizeof(vals));
 
@@ -440,6 +444,8 @@ void adreno_debugfs_init(struct kgsl_device *device)
 			    &kgsl_cff_dump_enable_fops);
 	debugfs_create_u32("wait_timeout", 0644, device->d_debugfs,
 		&adreno_dev->wait_timeout);
+	debugfs_create_u32("ib_check", 0644, device->d_debugfs,
+			   &adreno_dev->ib_check_level);
 
 	/* Create post mortem control files */
 
diff --git a/drivers/gpu/msm/adreno_drawctxt.c b/drivers/gpu/msm/adreno_drawctxt.c
old mode 100644
new mode 100755
index b7b0ea46..fc4789ad
--- a/drivers/gpu/msm/adreno_drawctxt.c
+++ b/drivers/gpu/msm/adreno_drawctxt.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -17,8 +17,11 @@
 #include "kgsl_sharedmem.h"
 #include "adreno.h"
 
+#define KGSL_INIT_REFTIMESTAMP		0x7FFFFFFF
+
 /* quad for copying GMEM to context shadow */
 #define QUAD_LEN 12
+#define QUAD_RESTORE_LEN 14
 
 static unsigned int gmem_copy_quad[QUAD_LEN] = {
 	0x00000000, 0x00000000, 0x3f800000,
@@ -27,6 +30,14 @@ static unsigned int gmem_copy_quad[QUAD_LEN] = {
 	0x00000000, 0x00000000, 0x3f800000
 };
 
+static unsigned int gmem_restore_quad[QUAD_RESTORE_LEN] = {
+	0x00000000, 0x3f800000, 0x3f800000,
+	0x00000000, 0x00000000, 0x00000000,
+	0x3f800000, 0x00000000, 0x00000000,
+	0x3f800000, 0x00000000, 0x00000000,
+	0x3f800000, 0x3f800000,
+};
+
 #define TEXCOORD_LEN 8
 
 static unsigned int gmem_copy_texcoord[TEXCOORD_LEN] = {
@@ -73,12 +84,12 @@ static void set_gmem_copy_quad(struct gmem_shadow_t *shadow)
 	gmem_copy_quad[4] = uint2float(shadow->height);
 	gmem_copy_quad[9] = uint2float(shadow->width);
 
-	gmem_copy_quad[0] = 0;
-	gmem_copy_quad[6] = 0;
-	gmem_copy_quad[7] = 0;
-	gmem_copy_quad[10] = 0;
+	gmem_restore_quad[5] = uint2float(shadow->height);
+	gmem_restore_quad[7] = uint2float(shadow->width);
 
 	memcpy(shadow->quad_vertices.hostptr, gmem_copy_quad, QUAD_LEN << 2);
+	memcpy(shadow->quad_vertices_restore.hostptr, gmem_copy_quad,
+		QUAD_RESTORE_LEN << 2);
 
 	memcpy(shadow->quad_texcoords.hostptr, gmem_copy_texcoord,
 		TEXCOORD_LEN << 2);
@@ -103,6 +114,13 @@ void build_quad_vtxbuff(struct adreno_context *drawctxt,
 
 	cmd += QUAD_LEN;
 
+	/* Used by A3XX, but define for both to make the code easier */
+	shadow->quad_vertices_restore.hostptr = cmd;
+	shadow->quad_vertices_restore.gpuaddr =
+		virt2gpu(cmd, &drawctxt->gpustate);
+
+	cmd += QUAD_RESTORE_LEN;
+
 	/* tex coord buffer location (in GPU space) */
 	shadow->quad_texcoords.hostptr = cmd;
 	shadow->quad_texcoords.gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
@@ -138,28 +156,28 @@ int adreno_drawctxt_create(struct kgsl_device *device,
 
 	drawctxt->pagetable = pagetable;
 	drawctxt->bin_base_offset = 0;
+	drawctxt->id = context->id;
 
-	/* FIXME: Deal with preambles */
+	if (flags & KGSL_CONTEXT_PREAMBLE)
+		drawctxt->flags |= CTXT_FLAGS_PREAMBLE;
 
-	ret = adreno_dev->gpudev->ctxt_gpustate_shadow(adreno_dev, drawctxt);
+	if (flags & KGSL_CONTEXT_NO_GMEM_ALLOC)
+		drawctxt->flags |= CTXT_FLAGS_NOGMEMALLOC;
+
+	if (flags & KGSL_CONTEXT_PER_CONTEXT_TS)
+		drawctxt->flags |= CTXT_FLAGS_PER_CONTEXT_TS;
+
+	ret = adreno_dev->gpudev->ctxt_create(adreno_dev, drawctxt);
 	if (ret)
-		goto err;
-
-	/* Save the shader instruction memory on context switching */
-	drawctxt->flags |= CTXT_FLAGS_SHADER_SAVE;
-
-	if (!(flags & KGSL_CONTEXT_NO_GMEM_ALLOC)) {
-		/* create gmem shadow */
-		ret = adreno_dev->gpudev->ctxt_gmem_shadow(adreno_dev,
-			drawctxt);
-		if (ret != 0)
 			goto err;
-	}
+
+	kgsl_sharedmem_writel(&device->memstore,
+			KGSL_MEMSTORE_OFFSET(drawctxt->id, ref_wait_ts),
+			KGSL_INIT_REFTIMESTAMP);
 
 	context->devctxt = drawctxt;
 	return 0;
 err:
-	kgsl_sharedmem_free(&drawctxt->gpustate);
 	kfree(drawctxt);
 	return ret;
 }
@@ -179,11 +197,12 @@ void adreno_drawctxt_destroy(struct kgsl_device *device,
 			  struct kgsl_context *context)
 {
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
-	struct adreno_context *drawctxt = context->devctxt;
+	struct adreno_context *drawctxt;
 
-	if (drawctxt == NULL)
+	if (context == NULL)
 		return;
 
+	drawctxt = context->devctxt;
 	/* deactivate context */
 	if (adreno_dev->drawctxt_active == drawctxt) {
 		/* no need to save GMEM or shader, the context is
@@ -261,6 +280,6 @@ void adreno_drawctxt_switch(struct adreno_device *adreno_dev,
 	adreno_dev->gpudev->ctxt_save(adreno_dev, adreno_dev->drawctxt_active);
 
 	/* Set the new context */
-	adreno_dev->drawctxt_active = drawctxt;
 	adreno_dev->gpudev->ctxt_restore(adreno_dev, drawctxt);
+	adreno_dev->drawctxt_active = drawctxt;
 }
diff --git a/drivers/gpu/msm/adreno_drawctxt.h b/drivers/gpu/msm/adreno_drawctxt.h
old mode 100644
new mode 100755
index 3c3a8536..61198ebd
--- a/drivers/gpu/msm/adreno_drawctxt.h
+++ b/drivers/gpu/msm/adreno_drawctxt.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -30,12 +30,20 @@
 #define CTXT_FLAGS_GMEM_SAVE		0x00000200
 /* gmem can be restored from shadow */
 #define CTXT_FLAGS_GMEM_RESTORE		0x00000400
+/* preamble packed in cmdbuffer for context switching */
+#define CTXT_FLAGS_PREAMBLE		0x00000800
 /* shader must be copied to shadow */
 #define CTXT_FLAGS_SHADER_SAVE		0x00002000
 /* shader can be restored from shadow */
 #define CTXT_FLAGS_SHADER_RESTORE	0x00004000
 /* Context has caused a GPU hang */
 #define CTXT_FLAGS_GPU_HANG		0x00008000
+/* Specifies there is no need to save GMEM */
+#define CTXT_FLAGS_NOGMEMALLOC          0x00010000
+/* Trash state for context */
+#define CTXT_FLAGS_TRASHSTATE		0x00020000
+/* per context timestamps enabled */
+#define CTXT_FLAGS_PER_CONTEXT_TS	0x00040000
 
 struct kgsl_device;
 struct adreno_device;
@@ -46,37 +54,57 @@ struct kgsl_context;
 struct gmem_shadow_t {
 	struct kgsl_memdesc gmemshadow;	/* Shadow buffer address */
 
-	/* 256 KB GMEM surface = 4 bytes-per-pixel x 256 pixels/row x
-	* 256 rows. */
-	/* width & height must be a multiples of 32, in case tiled textures
-	 * are used. */
-	enum COLORFORMATX format;
+	/*
+	 * 256 KB GMEM surface = 4 bytes-per-pixel x 256 pixels/row x
+	 * 256 rows. Width & height must be multiples of 32 in case tiled
+	 * textures are used
+	*/
+
+	enum COLORFORMATX format; /* Unused on A3XX */
 	unsigned int size;	/* Size of surface used to store GMEM */
 	unsigned int width;	/* Width of surface used to store GMEM */
 	unsigned int height;	/* Height of surface used to store GMEM */
 	unsigned int pitch;	/* Pitch of surface used to store GMEM */
 	unsigned int gmem_pitch;	/* Pitch value used for GMEM */
-	unsigned int *gmem_save_commands;
-	unsigned int *gmem_restore_commands;
+	unsigned int *gmem_save_commands;    /* Unused on A3XX */
+	unsigned int *gmem_restore_commands; /* Unused on A3XX */
 	unsigned int gmem_save[3];
 	unsigned int gmem_restore[3];
 	struct kgsl_memdesc quad_vertices;
 	struct kgsl_memdesc quad_texcoords;
+	struct kgsl_memdesc quad_vertices_restore;
 };
 
 struct adreno_context {
+	unsigned int id;
 	uint32_t flags;
 	struct kgsl_pagetable *pagetable;
 	struct kgsl_memdesc gpustate;
-	unsigned int reg_save[3];
 	unsigned int reg_restore[3];
 	unsigned int shader_save[3];
-	unsigned int shader_fixup[3];
 	unsigned int shader_restore[3];
-	unsigned int chicken_restore[3];
-	unsigned int bin_base_offset;
+
 	/* Information of the GMEM shadow that is created in context create */
 	struct gmem_shadow_t context_gmem_shadow;
+
+	/* A2XX specific items */
+	unsigned int reg_save[3];
+	unsigned int shader_fixup[3];
+	unsigned int chicken_restore[3];
+	unsigned int bin_base_offset;
+
+	/* A3XX specific items */
+	unsigned int regconstant_save[3];
+	unsigned int constant_restore[3];
+	unsigned int hlsqcontrol_restore[3];
+	unsigned int save_fixup[3];
+	unsigned int restore_fixup[3];
+	struct kgsl_memdesc shader_load_commands[2];
+	struct kgsl_memdesc shader_save_commands[4];
+	struct kgsl_memdesc constant_save_commands[3];
+	struct kgsl_memdesc constant_load_commands[3];
+	struct kgsl_memdesc cond_execs[4];
+	struct kgsl_memdesc hlsqcontrol_restore_commands[1];
 };
 
 int adreno_drawctxt_create(struct kgsl_device *device,
diff --git a/drivers/gpu/msm/adreno_pm4types.h b/drivers/gpu/msm/adreno_pm4types.h
old mode 100644
new mode 100755
index 8aea58c9..9340f691
--- a/drivers/gpu/msm/adreno_pm4types.h
+++ b/drivers/gpu/msm/adreno_pm4types.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -29,11 +29,6 @@
 /* skip N 32-bit words to get to the next packet */
 #define CP_NOP			0x10
 
-/* indirect buffer dispatch.  prefetch parser uses this packet type to determine
-*  whether to pre-fetch the IB
-*/
-#define CP_INDIRECT_BUFFER	0x3f
-
 /* indirect buffer dispatch.  same as IB, but init is pipelined */
 #define CP_INDIRECT_BUFFER_PFD	0x37
 
@@ -117,6 +112,9 @@
 /* load constants from a location in memory */
 #define CP_LOAD_CONSTANT_CONTEXT 0x2e
 
+/* (A2x) sets binning configuration registers */
+#define CP_SET_BIN_DATA             0x2f
+
 /* selective invalidation of state pointers */
 #define CP_INVALIDATE_STATE	0x3b
 
@@ -157,6 +155,25 @@
 
 #define CP_SET_PROTECTED_MODE  0x5f /* sets the register protection mode */
 
+/*
+ * for a3xx
+ */
+
+#define CP_LOAD_STATE 0x30 /* load high level sequencer command */
+
+/* Conditionally load a IB based on a flag */
+#define CP_COND_INDIRECT_BUFFER_PFE 0x3A /* prefetch enabled */
+#define CP_COND_INDIRECT_BUFFER_PFD 0x32 /* prefetch disabled */
+
+/* Load a buffer with pre-fetch enabled */
+#define CP_INDIRECT_BUFFER_PFE 0x3F
+
+#define CP_LOADSTATE_DSTOFFSET_SHIFT 0x00000000
+#define CP_LOADSTATE_STATESRC_SHIFT 0x00000010
+#define CP_LOADSTATE_STATEBLOCKID_SHIFT 0x00000013
+#define CP_LOADSTATE_NUMOFUNITS_SHIFT 0x00000016
+#define CP_LOADSTATE_STATETYPE_SHIFT 0x00000000
+#define CP_LOADSTATE_EXTSRCADDR_SHIFT 0x00000002
 
 /* packet header building macros */
 #define cp_type0_packet(regindx, cnt) \
@@ -178,11 +195,20 @@
 #define cp_nop_packet(cnt) \
 	 (CP_TYPE3_PKT | (((cnt)-1) << 16) | (CP_NOP << 8))
 
+#define pkt_is_type0(pkt) (((pkt) & 0XC0000000) == CP_TYPE0_PKT)
+
+#define type0_pkt_size(pkt) ((((pkt) >> 16) & 0x3FFF) + 1)
+#define type0_pkt_offset(pkt) ((pkt) & 0x7FFF)
+
+#define pkt_is_type3(pkt) (((pkt) & 0xC0000000) == CP_TYPE3_PKT)
+
+#define cp_type3_opcode(pkt) (((pkt) >> 8) & 0xFF)
+#define type3_pkt_size(pkt) ((((pkt) >> 16) & 0x3FFF) + 1)
 
 /* packet headers */
 #define CP_HDR_ME_INIT	cp_type3_packet(CP_ME_INIT, 18)
 #define CP_HDR_INDIRECT_BUFFER_PFD cp_type3_packet(CP_INDIRECT_BUFFER_PFD, 2)
-#define CP_HDR_INDIRECT_BUFFER	cp_type3_packet(CP_INDIRECT_BUFFER, 2)
+#define CP_HDR_INDIRECT_BUFFER_PFE cp_type3_packet(CP_INDIRECT_BUFFER_PFE, 2)
 
 /* dword base address of the GFX decode space */
 #define SUBBLOCK_OFFSET(reg) ((unsigned int)((reg) - (0x2000)))
@@ -190,4 +216,14 @@
 /* gmem command buffer length */
 #define CP_REG(reg) ((0x4 << 16) | (SUBBLOCK_OFFSET(reg)))
 
+
+/* Return 1 if the command is an indirect buffer of any kind */
+static inline int adreno_cmd_is_ib(unsigned int cmd)
+{
+	return (cmd == cp_type3_packet(CP_INDIRECT_BUFFER_PFE, 2) ||
+		cmd == cp_type3_packet(CP_INDIRECT_BUFFER_PFD, 2) ||
+		cmd == cp_type3_packet(CP_COND_INDIRECT_BUFFER_PFE, 2) ||
+		cmd == cp_type3_packet(CP_COND_INDIRECT_BUFFER_PFD, 2));
+}
+
 #endif	/* __ADRENO_PM4TYPES_H */
diff --git a/drivers/gpu/msm/adreno_postmortem.c b/drivers/gpu/msm/adreno_postmortem.c
old mode 100644
new mode 100755
index 3d957f69..427741f1
--- a/drivers/gpu/msm/adreno_postmortem.c
+++ b/drivers/gpu/msm/adreno_postmortem.c
@@ -14,6 +14,7 @@
 #include <linux/vmalloc.h>
 
 #include "kgsl.h"
+#include "kgsl_sharedmem.h"
 
 #include "adreno.h"
 #include "adreno_pm4types.h"
@@ -52,7 +53,7 @@ static const struct pm_id_name pm3_types[] = {
 	{CP_IM_LOAD,			"IN__LOAD"},
 	{CP_IM_LOAD_IMMEDIATE,		"IM_LOADI"},
 	{CP_IM_STORE,			"IM_STORE"},
-	{CP_INDIRECT_BUFFER,		"IND_BUF_"},
+	{CP_INDIRECT_BUFFER_PFE,	"IND_BUF_"},
 	{CP_INDIRECT_BUFFER_PFD,	"IND_BUFP"},
 	{CP_INTERRUPT,			"PM4_INTR"},
 	{CP_INVALIDATE_STATE,		"INV_STAT"},
@@ -247,9 +248,8 @@ static void adreno_dump_regs(struct kgsl_device *device,
 static void dump_ib(struct kgsl_device *device, char* buffId, uint32_t pt_base,
 	uint32_t base_offset, uint32_t ib_base, uint32_t ib_size, bool dump)
 {
-	unsigned int memsize;
-	uint8_t *base_addr = kgsl_sharedmem_convertaddr(device, pt_base,
-		ib_base, &memsize);
+	uint8_t *base_addr = adreno_convertaddr(device, pt_base,
+		ib_base, ib_size*sizeof(uint32_t));
 
 	if (base_addr && dump)
 		print_hex_dump(KERN_ERR, buffId, DUMP_PREFIX_OFFSET,
@@ -277,20 +277,19 @@ static void dump_ib1(struct kgsl_device *device, uint32_t pt_base,
 	int i, j;
 	uint32_t value;
 	uint32_t *ib1_addr;
-	unsigned int memsize;
 
 	dump_ib(device, "IB1:", pt_base, base_offset, ib1_base,
 		ib1_size, dump);
 
 	/* fetch virtual address for given IB base */
-	ib1_addr = (uint32_t *)kgsl_sharedmem_convertaddr(device, pt_base,
-		ib1_base, &memsize);
+	ib1_addr = (uint32_t *)adreno_convertaddr(device, pt_base,
+		ib1_base, ib1_size*sizeof(uint32_t));
 	if (!ib1_addr)
 		return;
 
 	for (i = 0; i+3 < ib1_size; ) {
 		value = ib1_addr[i++];
-		if (value == cp_type3_packet(CP_INDIRECT_BUFFER_PFD, 2)) {
+		if (adreno_cmd_is_ib(value)) {
 			uint32_t ib2_base = ib1_addr[i++];
 			uint32_t ib2_size = ib1_addr[i++];
 
@@ -466,7 +465,9 @@ static int adreno_dump(struct kgsl_device *device)
 	const uint32_t *rb_vaddr;
 	int num_item = 0;
 	int read_idx, write_idx;
-	unsigned int ts_processed, rb_memsize;
+        unsigned int ts_processed = 0xdeaddead;
+        struct kgsl_context *context;
+        unsigned int context_id;
 
 	static struct ib_list ib_list;
 
@@ -662,9 +663,18 @@ static int adreno_dump(struct kgsl_device *device)
 	KGSL_LOG_DUMP(device,
 		"MH_INTERRUPT: MASK = %08X | STATUS   = %08X\n", r1, r2);
 
-	ts_processed = device->ftbl->readtimestamp(device,
+        kgsl_sharedmem_readl(&device->memstore,
+                        (unsigned int *) &context_id,
+                        KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
+                               current_context));
+       context = idr_find(&device->context_idr, context_id);
+       if (context) {
+               ts_processed = device->ftbl->readtimestamp(device, context,
 		KGSL_TIMESTAMP_RETIRED);
-	KGSL_LOG_DUMP(device, "TIMESTM RTRD: %08X\n", ts_processed);
+               KGSL_LOG_DUMP(device, "CTXT: %d  TIMESTM RTRD: %08X\n",
+                               context->id, ts_processed);
+       } else
+               KGSL_LOG_DUMP(device, "BAD CTXT: %d\n", context_id);
 
 	num_item = adreno_ringbuffer_count(&adreno_dev->ringbuffer,
 						cp_rb_rptr);
@@ -681,11 +691,16 @@ static int adreno_dump(struct kgsl_device *device)
 
 	KGSL_LOG_DUMP(device, "RB: rd_addr:%8.8x  rb_size:%d  num_item:%d\n",
 		cp_rb_base, rb_count<<2, num_item);
-	rb_vaddr = (const uint32_t *)kgsl_sharedmem_convertaddr(device,
-			cur_pt_base, cp_rb_base, &rb_memsize);
+
+	if (adreno_dev->ringbuffer.buffer_desc.gpuaddr != cp_rb_base)
+		KGSL_LOG_POSTMORTEM_WRITE(device,
+			"rb address mismatch, should be 0x%08x\n",
+			adreno_dev->ringbuffer.buffer_desc.gpuaddr);
+
+	rb_vaddr = adreno_dev->ringbuffer.buffer_desc.hostptr;
 	if (!rb_vaddr) {
 		KGSL_LOG_POSTMORTEM_WRITE(device,
-			"Can't fetch vaddr for CP_RB_BASE\n");
+			"rb has no kernel mapping!\n");
 		goto error_vfree;
 	}
 
@@ -711,7 +726,7 @@ static int adreno_dump(struct kgsl_device *device)
 	i = 0;
 	for (read_idx = 0; read_idx < num_item; ) {
 		uint32_t this_cmd = rb_copy[read_idx++];
-		if (this_cmd == cp_type3_packet(CP_INDIRECT_BUFFER_PFD, 2)) {
+		if (adreno_cmd_is_ib(this_cmd)) {
 			uint32_t ib_addr = rb_copy[read_idx++];
 			uint32_t ib_size = rb_copy[read_idx++];
 			dump_ib1(device, cur_pt_base, (read_idx-3)<<2, ib_addr,
@@ -743,8 +758,7 @@ static int adreno_dump(struct kgsl_device *device)
 		for (read_idx = NUM_DWORDS_OF_RINGBUFFER_HISTORY;
 			read_idx >= 0; --read_idx) {
 			uint32_t this_cmd = rb_copy[read_idx];
-			if (this_cmd == cp_type3_packet(
-				CP_INDIRECT_BUFFER_PFD, 2)) {
+			if (adreno_cmd_is_ib(this_cmd)) {
 				uint32_t ib_addr = rb_copy[read_idx+1];
 				uint32_t ib_size = rb_copy[read_idx+2];
 				if (ib_size && cp_ib1_base == ib_addr) {
diff --git a/drivers/gpu/msm/adreno_ringbuffer.c b/drivers/gpu/msm/adreno_ringbuffer.c
old mode 100644
new mode 100755
index d59057c8..da80576f
--- a/drivers/gpu/msm/adreno_ringbuffer.c
+++ b/drivers/gpu/msm/adreno_ringbuffer.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -22,30 +22,14 @@
 #include "adreno.h"
 #include "adreno_pm4types.h"
 #include "adreno_ringbuffer.h"
+#include "adreno_debugfs.h"
 
 #include "a2xx_reg.h"
+#include "a3xx_reg.h"
 
 #define GSL_RB_NOP_SIZEDWORDS				2
-/* protected mode error checking below register address 0x800
-*  note: if CP_INTERRUPT packet is used then checking needs
-*  to change to below register address 0x7C8
-*/
-#define GSL_RB_PROTECTED_MODE_CONTROL		0x200001F2
 
-/* Firmware file names
- * Legacy names must remain but replacing macro names to
- * match current kgsl model.
- * a200 is yamato
- * a220 is leia
- */
-#define A200_PFP_FW "yamato_pfp.fw"
-#define A200_PM4_FW "yamato_pm4.fw"
-#define A220_PFP_470_FW "leia_pfp_470.fw"
-#define A220_PM4_470_FW "leia_pm4_470.fw"
-#define A225_PFP_FW "a225_pfp.fw"
-#define A225_PM4_FW "a225_pm4.fw"
-
-static void adreno_ringbuffer_submit(struct adreno_ringbuffer *rb)
+void adreno_ringbuffer_submit(struct adreno_ringbuffer *rb)
 {
 	BUG_ON(rb->wptr == 0);
 
@@ -104,8 +88,7 @@ adreno_ringbuffer_waitspace(struct adreno_ringbuffer *rb, unsigned int numcmds,
 	} while ((freecmds != 0) && (freecmds <= numcmds));
 }
 
-
-static unsigned int *adreno_ringbuffer_allocspace(struct adreno_ringbuffer *rb,
+unsigned int *adreno_ringbuffer_allocspace(struct adreno_ringbuffer *rb,
 					     unsigned int numcmds)
 {
 	unsigned int	*ptr = NULL;
@@ -231,9 +214,10 @@ static int adreno_ringbuffer_load_pfp_ucode(struct kgsl_device *device)
 	KGSL_DRV_INFO(device, "loading pfp ucode version: %d\n",
 		adreno_dev->pfp_fw[0]);
 
-	adreno_regwrite(device, REG_CP_PFP_UCODE_ADDR, 0);
+	adreno_regwrite(device, adreno_dev->gpudev->reg_cp_pfp_ucode_addr, 0);
 	for (i = 1; i < adreno_dev->pfp_fw_size; i++)
-		adreno_regwrite(device, REG_CP_PFP_UCODE_DATA,
+		adreno_regwrite(device,
+			adreno_dev->gpudev->reg_cp_pfp_ucode_data,
 				     adreno_dev->pfp_fw[i]);
 err:
 	return ret;
@@ -244,15 +228,15 @@ int adreno_ringbuffer_start(struct adreno_ringbuffer *rb, unsigned int init_ram)
 	int status;
 	/*cp_rb_cntl_u cp_rb_cntl; */
 	union reg_cp_rb_cntl cp_rb_cntl;
-	unsigned int *cmds, rb_cntl;
+	unsigned int rb_cntl;
 	struct kgsl_device *device = rb->device;
-	uint cmds_gpu;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
 
 	if (rb->flags & KGSL_FLAGS_STARTED)
 		return 0;
 
 	if (init_ram) {
-		rb->timestamp = 0;
+		rb->timestamp[KGSL_MEMSTORE_GLOBAL] = 0;
 		GSL_RB_INIT_TIMESTAMP(rb);
 	}
 
@@ -262,12 +246,15 @@ int adreno_ringbuffer_start(struct adreno_ringbuffer *rb, unsigned int init_ram)
 	kgsl_sharedmem_set(&rb->buffer_desc, 0, 0xAA,
 			   (rb->sizedwords << 2));
 
+	if (adreno_is_a2xx(adreno_dev)) {
 	adreno_regwrite(device, REG_CP_RB_WPTR_BASE,
 			     (rb->memptrs_desc.gpuaddr
 			      + GSL_RB_MEMPTRS_WPTRPOLL_OFFSET));
 
 	/* setup WPTR delay */
-	adreno_regwrite(device, REG_CP_RB_WPTR_DELAY, 0 /*0x70000010 */);
+		adreno_regwrite(device, REG_CP_RB_WPTR_DELAY,
+			0 /*0x70000010 */);
+	}
 
 	/*setup REG_CP_RB_CNTL */
 	adreno_regread(device, REG_CP_RB_CNTL, &rb_cntl);
@@ -286,7 +273,11 @@ int adreno_ringbuffer_start(struct adreno_ringbuffer *rb, unsigned int init_ram)
 	*/
 	cp_rb_cntl.f.rb_blksz = ilog2(KGSL_RB_BLKSIZE >> 3);
 
-	cp_rb_cntl.f.rb_poll_en = GSL_RB_CNTL_POLL_EN; /* WPTR polling */
+	if (adreno_is_a2xx(adreno_dev)) {
+		/* WPTR polling */
+		cp_rb_cntl.f.rb_poll_en = GSL_RB_CNTL_POLL_EN;
+	}
+
 	/* mem RPTR writebacks */
 	cp_rb_cntl.f.rb_no_update =  GSL_RB_CNTL_NO_UPDATE;
 
@@ -298,13 +289,41 @@ int adreno_ringbuffer_start(struct adreno_ringbuffer *rb, unsigned int init_ram)
 			     rb->memptrs_desc.gpuaddr +
 			     GSL_RB_MEMPTRS_RPTR_OFFSET);
 
+	if (adreno_is_a3xx(adreno_dev)) {
+		/* enable access protection to privileged registers */
+		adreno_regwrite(device, A3XX_CP_PROTECT_CTRL, 0x00000007);
+
+		/* RBBM registers */
+		adreno_regwrite(device, A3XX_CP_PROTECT_REG_0, 0x63000040);
+		adreno_regwrite(device, A3XX_CP_PROTECT_REG_1, 0x62000080);
+		adreno_regwrite(device, A3XX_CP_PROTECT_REG_2, 0x600000CC);
+		adreno_regwrite(device, A3XX_CP_PROTECT_REG_3, 0x60000108);
+		adreno_regwrite(device, A3XX_CP_PROTECT_REG_4, 0x64000140);
+		adreno_regwrite(device, A3XX_CP_PROTECT_REG_5, 0x66000400);
+
+		/* CP registers */
+		adreno_regwrite(device, A3XX_CP_PROTECT_REG_6, 0x65000700);
+		adreno_regwrite(device, A3XX_CP_PROTECT_REG_7, 0x610007D8);
+		adreno_regwrite(device, A3XX_CP_PROTECT_REG_8, 0x620007E0);
+		adreno_regwrite(device, A3XX_CP_PROTECT_REG_9, 0x61001178);
+		adreno_regwrite(device, A3XX_CP_PROTECT_REG_A, 0x64001180);
+
+		/* RB registers */
+		adreno_regwrite(device, A3XX_CP_PROTECT_REG_B, 0x60003300);
+
+		/* VBIF registers */
+		adreno_regwrite(device, A3XX_CP_PROTECT_REG_C, 0x6B00C000);
+	}
+
+	if (adreno_is_a2xx(adreno_dev)) {
 	/* explicitly clear all cp interrupts */
 	adreno_regwrite(device, REG_CP_INT_ACK, 0xFFFFFFFF);
+	}
 
 	/* setup scratch/timestamp */
-	adreno_regwrite(device, REG_SCRATCH_ADDR,
-			     device->memstore.gpuaddr +
-			     KGSL_DEVICE_MEMSTORE_OFFSET(soptimestamp));
+	adreno_regwrite(device, REG_SCRATCH_ADDR, device->memstore.gpuaddr +
+			     KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
+				     soptimestamp));
 
 	adreno_regwrite(device, REG_SCRATCH_UMSK,
 			     GSL_RB_MEMPTRS_SCRATCH_MASK);
@@ -328,54 +347,8 @@ int adreno_ringbuffer_start(struct adreno_ringbuffer *rb, unsigned int init_ram)
 	/* clear ME_HALT to start micro engine */
 	adreno_regwrite(device, REG_CP_ME_CNTL, 0);
 
-	/* ME_INIT */
-	cmds = adreno_ringbuffer_allocspace(rb, 19);
-	cmds_gpu = rb->buffer_desc.gpuaddr + sizeof(uint)*(rb->wptr-19);
-
-	GSL_RB_WRITE(cmds, cmds_gpu, CP_HDR_ME_INIT);
-	/* All fields present (bits 9:0) */
-	GSL_RB_WRITE(cmds, cmds_gpu, 0x000003ff);
-	/* Disable/Enable Real-Time Stream processing (present but ignored) */
-	GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
-	/* Enable (2D <-> 3D) implicit synchronization (present but ignored) */
-	GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
-
-	GSL_RB_WRITE(cmds, cmds_gpu,
-		SUBBLOCK_OFFSET(REG_RB_SURFACE_INFO));
-	GSL_RB_WRITE(cmds, cmds_gpu,
-		SUBBLOCK_OFFSET(REG_PA_SC_WINDOW_OFFSET));
-	GSL_RB_WRITE(cmds, cmds_gpu,
-		SUBBLOCK_OFFSET(REG_VGT_MAX_VTX_INDX));
-	GSL_RB_WRITE(cmds, cmds_gpu,
-		SUBBLOCK_OFFSET(REG_SQ_PROGRAM_CNTL));
-	GSL_RB_WRITE(cmds, cmds_gpu,
-		SUBBLOCK_OFFSET(REG_RB_DEPTHCONTROL));
-	GSL_RB_WRITE(cmds, cmds_gpu,
-		SUBBLOCK_OFFSET(REG_PA_SU_POINT_SIZE));
-	GSL_RB_WRITE(cmds, cmds_gpu,
-		SUBBLOCK_OFFSET(REG_PA_SC_LINE_CNTL));
-	GSL_RB_WRITE(cmds, cmds_gpu,
-		SUBBLOCK_OFFSET(REG_PA_SU_POLY_OFFSET_FRONT_SCALE));
-
-	/* Vertex and Pixel Shader Start Addresses in instructions
-	* (3 DWORDS per instruction) */
-	GSL_RB_WRITE(cmds, cmds_gpu, 0x80000180);
-	/* Maximum Contexts */
-	GSL_RB_WRITE(cmds, cmds_gpu, 0x00000001);
-	/* Write Confirm Interval and The CP will wait the
-	* wait_interval * 16 clocks between polling  */
-	GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
-
-	/* NQ and External Memory Swap */
-	GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
-	/* Protected mode error checking */
-	GSL_RB_WRITE(cmds, cmds_gpu, GSL_RB_PROTECTED_MODE_CONTROL);
-	/* Disable header dumping and Header dump address */
-	GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
-	/* Header dump size */
-	GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
-
-	adreno_ringbuffer_submit(rb);
+	/* ME init is GPU specific, so jump into the sub-function */
+	adreno_dev->gpudev->rb_init(adreno_dev, rb);
 
 	/* idle device to validate ME INIT */
 	status = adreno_idle(device, KGSL_TIMEOUT_DEFAULT);
@@ -391,7 +364,6 @@ void adreno_ringbuffer_stop(struct adreno_ringbuffer *rb)
 	if (rb->flags & KGSL_FLAGS_STARTED) {
 		/* ME_HALT */
 		adreno_regwrite(rb->device, REG_CP_ME_CNTL, 0x10000000);
-
 		rb->flags &= ~KGSL_FLAGS_STARTED;
 	}
 }
@@ -454,14 +426,28 @@ void adreno_ringbuffer_close(struct adreno_ringbuffer *rb)
 
 static uint32_t
 adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb,
+				struct adreno_context *context,
 				unsigned int flags, unsigned int *cmds,
 				int sizedwords)
 {
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(rb->device);
 	unsigned int *ringcmds;
 	unsigned int timestamp;
-	unsigned int total_sizedwords = sizedwords + 6;
+	unsigned int total_sizedwords = sizedwords;
 	unsigned int i;
 	unsigned int rcmd_gpu;
+	unsigned int context_id = KGSL_MEMSTORE_GLOBAL;
+	unsigned int gpuaddr = rb->device->memstore.gpuaddr;
+
+	if (context != NULL) {
+		/*
+		 * if the context was not created with per context timestamp
+		 * support, we must use the global timestamp since issueibcmds
+		 * will be returning that one.
+		 */
+		if (context->flags & CTXT_FLAGS_PER_CONTEXT_TS)
+			context_id = context->id;
+	}
 
 	/* reserve space to temporarily turn off protected mode
 	*  error checking if needed
@@ -470,6 +456,16 @@ adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb,
 	total_sizedwords += !(flags & KGSL_CMD_FLAGS_NO_TS_CMP) ? 7 : 0;
 	total_sizedwords += !(flags & KGSL_CMD_FLAGS_NOT_KERNEL_CMD) ? 2 : 0;
 
+	if (adreno_is_a3xx(adreno_dev))
+		total_sizedwords += 7;
+
+	total_sizedwords += 2; /* scratchpad ts for recovery */
+	if (context) {
+		total_sizedwords += 3; /* sop timestamp */
+		total_sizedwords += 4; /* eop timestamp */
+	}
+	total_sizedwords += 4; /* global timestamp for recovery*/
+
 	ringcmds = adreno_ringbuffer_allocspace(rb, total_sizedwords);
 	rcmd_gpu = rb->buffer_desc.gpuaddr
 		+ sizeof(uint)*(rb->wptr-total_sizedwords);
@@ -497,28 +493,70 @@ adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb,
 		GSL_RB_WRITE(ringcmds, rcmd_gpu, 1);
 	}
 
-	rb->timestamp++;
-	timestamp = rb->timestamp;
+	/* always increment the global timestamp. once. */
+	rb->timestamp[KGSL_MEMSTORE_GLOBAL]++;
+	if (context) {
+		if (context_id == KGSL_MEMSTORE_GLOBAL)
+			rb->timestamp[context_id] =
+				rb->timestamp[KGSL_MEMSTORE_GLOBAL];
+		else
+			rb->timestamp[context_id]++;
+	}
+	timestamp = rb->timestamp[context_id];
 
-	/* start-of-pipeline and end-of-pipeline timestamps */
+	/* scratchpad ts for recovery */
 	GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type0_packet(REG_CP_TIMESTAMP, 1));
-	GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp);
+	GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp[KGSL_MEMSTORE_GLOBAL]);
+
+	if (adreno_is_a3xx(adreno_dev)) {
+		/*
+		 * FLush HLSQ lazy updates to make sure there are no
+		 * rsources pending for indirect loads after the timestamp
+		 */
+
+		GSL_RB_WRITE(ringcmds, rcmd_gpu,
+			cp_type3_packet(CP_EVENT_WRITE, 1));
+		GSL_RB_WRITE(ringcmds, rcmd_gpu, 0x07); /* HLSQ_FLUSH */
+		GSL_RB_WRITE(ringcmds, rcmd_gpu,
+			cp_type3_packet(CP_WAIT_FOR_IDLE, 1));
+		GSL_RB_WRITE(ringcmds, rcmd_gpu, 0x00);
+	}
+
+	if (context) {
+		/* start-of-pipeline timestamp */
+		GSL_RB_WRITE(ringcmds, rcmd_gpu,
+			cp_type3_packet(CP_MEM_WRITE, 2));
+		GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
+			KGSL_MEMSTORE_OFFSET(context->id, soptimestamp)));
+		GSL_RB_WRITE(ringcmds, rcmd_gpu, timestamp);
+
+		/* end-of-pipeline timestamp */
+		GSL_RB_WRITE(ringcmds, rcmd_gpu,
+			cp_type3_packet(CP_EVENT_WRITE, 3));
+		GSL_RB_WRITE(ringcmds, rcmd_gpu, CACHE_FLUSH_TS);
+		GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
+			KGSL_MEMSTORE_OFFSET(context->id, eoptimestamp)));
+		GSL_RB_WRITE(ringcmds, rcmd_gpu, timestamp);
+	}
+
 	GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_EVENT_WRITE, 3));
 	GSL_RB_WRITE(ringcmds, rcmd_gpu, CACHE_FLUSH_TS);
-	GSL_RB_WRITE(ringcmds, rcmd_gpu,
-		     (rb->device->memstore.gpuaddr +
-		      KGSL_DEVICE_MEMSTORE_OFFSET(eoptimestamp)));
-	GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp);
+	GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
+		      KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
+			      eoptimestamp)));
+	GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp[KGSL_MEMSTORE_GLOBAL]);
 
 	if (!(flags & KGSL_CMD_FLAGS_NO_TS_CMP)) {
 		/* Conditional execution based on memory values */
 		GSL_RB_WRITE(ringcmds, rcmd_gpu,
 			cp_type3_packet(CP_COND_EXEC, 4));
-		GSL_RB_WRITE(ringcmds, rcmd_gpu, (rb->device->memstore.gpuaddr +
-			KGSL_DEVICE_MEMSTORE_OFFSET(ts_cmp_enable)) >> 2);
-		GSL_RB_WRITE(ringcmds, rcmd_gpu, (rb->device->memstore.gpuaddr +
-			KGSL_DEVICE_MEMSTORE_OFFSET(ref_wait_ts)) >> 2);
-		GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp);
+		GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
+			KGSL_MEMSTORE_OFFSET(
+				context_id, ts_cmp_enable)) >> 2);
+		GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
+			KGSL_MEMSTORE_OFFSET(
+				context_id, ref_wait_ts)) >> 2);
+		GSL_RB_WRITE(ringcmds, rcmd_gpu, timestamp);
 		/* # of conditional command DWORDs */
 		GSL_RB_WRITE(ringcmds, rcmd_gpu, 2);
 		GSL_RB_WRITE(ringcmds, rcmd_gpu,
@@ -526,9 +564,17 @@ adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb,
 		GSL_RB_WRITE(ringcmds, rcmd_gpu, CP_INT_CNTL__RB_INT_MASK);
 	}
 
+	if (adreno_is_a3xx(adreno_dev)) {
+		/* Dummy set-constant to trigger context rollover */
+		GSL_RB_WRITE(ringcmds, rcmd_gpu,
+			cp_type3_packet(CP_SET_CONSTANT, 2));
+		GSL_RB_WRITE(ringcmds, rcmd_gpu,
+			(0x4<<16)|(A3XX_HLSQ_CL_KERNEL_GROUP_X_REG - 0x2000));
+		GSL_RB_WRITE(ringcmds, rcmd_gpu, 0);
+	}
+
 	adreno_ringbuffer_submit(rb);
 
-	/* return timestamp of issued coREG_ands */
 	return timestamp;
 }
 
@@ -543,7 +589,199 @@ adreno_ringbuffer_issuecmds(struct kgsl_device *device,
 
 	if (device->state & KGSL_STATE_HUNG)
 		return;
-	adreno_ringbuffer_addcmds(rb, flags, cmds, sizedwords);
+	adreno_ringbuffer_addcmds(rb, NULL, flags, cmds, sizedwords);
+}
+
+static bool _parse_ibs(struct kgsl_device_private *dev_priv, uint gpuaddr,
+			   int sizedwords);
+
+static bool
+_handle_type3(struct kgsl_device_private *dev_priv, uint *hostaddr)
+{
+	unsigned int opcode = cp_type3_opcode(*hostaddr);
+	switch (opcode) {
+	case CP_INDIRECT_BUFFER_PFD:
+	case CP_INDIRECT_BUFFER_PFE:
+	case CP_COND_INDIRECT_BUFFER_PFE:
+	case CP_COND_INDIRECT_BUFFER_PFD:
+		return _parse_ibs(dev_priv, hostaddr[1], hostaddr[2]);
+	case CP_NOP:
+	case CP_WAIT_FOR_IDLE:
+	case CP_WAIT_REG_MEM:
+	case CP_WAIT_REG_EQ:
+	case CP_WAT_REG_GTE:
+	case CP_WAIT_UNTIL_READ:
+	case CP_WAIT_IB_PFD_COMPLETE:
+	case CP_REG_RMW:
+	case CP_REG_TO_MEM:
+	case CP_MEM_WRITE:
+	case CP_MEM_WRITE_CNTR:
+	case CP_COND_EXEC:
+	case CP_COND_WRITE:
+	case CP_EVENT_WRITE:
+	case CP_EVENT_WRITE_SHD:
+	case CP_EVENT_WRITE_CFL:
+	case CP_EVENT_WRITE_ZPD:
+	case CP_DRAW_INDX:
+	case CP_DRAW_INDX_2:
+	case CP_DRAW_INDX_BIN:
+	case CP_DRAW_INDX_2_BIN:
+	case CP_VIZ_QUERY:
+	case CP_SET_STATE:
+	case CP_SET_CONSTANT:
+	case CP_IM_LOAD:
+	case CP_IM_LOAD_IMMEDIATE:
+	case CP_LOAD_CONSTANT_CONTEXT:
+	case CP_INVALIDATE_STATE:
+	case CP_SET_SHADER_BASES:
+	case CP_SET_BIN_MASK:
+	case CP_SET_BIN_SELECT:
+	case CP_SET_BIN_BASE_OFFSET:
+	case CP_SET_BIN_DATA:
+	case CP_CONTEXT_UPDATE:
+	case CP_INTERRUPT:
+	case CP_IM_STORE:
+	case CP_LOAD_STATE:
+		break;
+	/* these shouldn't come from userspace */
+	case CP_ME_INIT:
+	case CP_SET_PROTECTED_MODE:
+	default:
+		KGSL_CMD_ERR(dev_priv->device, "bad CP opcode %0x\n", opcode);
+		return false;
+		break;
+	}
+
+	return true;
+}
+
+static bool
+_handle_type0(struct kgsl_device_private *dev_priv, uint *hostaddr)
+{
+	unsigned int reg = type0_pkt_offset(*hostaddr);
+	unsigned int cnt = type0_pkt_size(*hostaddr);
+	if (reg < 0x0192 || (reg + cnt) >= 0x8000) {
+		KGSL_CMD_ERR(dev_priv->device, "bad type0 reg: 0x%0x cnt: %d\n",
+			     reg, cnt);
+		return false;
+	}
+	return true;
+}
+
+/*
+ * Traverse IBs and dump them to test vector. Detect swap by inspecting
+ * register writes, keeping note of the current state, and dump
+ * framebuffer config to test vector
+ */
+static bool _parse_ibs(struct kgsl_device_private *dev_priv,
+			   uint gpuaddr, int sizedwords)
+{
+	static uint level; /* recursion level */
+	bool ret = false;
+	uint *hostaddr, *hoststart;
+	int dwords_left = sizedwords; /* dwords left in the current command
+					 buffer */
+	struct kgsl_mem_entry *entry;
+
+	spin_lock(&dev_priv->process_priv->mem_lock);
+	entry = kgsl_sharedmem_find_region(dev_priv->process_priv,
+					   gpuaddr, sizedwords * sizeof(uint));
+	spin_unlock(&dev_priv->process_priv->mem_lock);
+	if (entry == NULL) {
+		KGSL_CMD_ERR(dev_priv->device,
+			     "no mapping for gpuaddr: 0x%08x\n", gpuaddr);
+		return false;
+	}
+
+	hostaddr = (uint *)kgsl_gpuaddr_to_vaddr(&entry->memdesc, gpuaddr);
+	if (hostaddr == NULL) {
+		KGSL_CMD_ERR(dev_priv->device,
+			     "no mapping for gpuaddr: 0x%08x\n", gpuaddr);
+		return false;
+	}
+
+	hoststart = hostaddr;
+
+	level++;
+
+	KGSL_CMD_INFO(dev_priv->device, "ib: gpuaddr:0x%08x, wc:%d, hptr:%p\n",
+		gpuaddr, sizedwords, hostaddr);
+
+	mb();
+	while (dwords_left > 0) {
+		bool cur_ret = true;
+		int count = 0; /* dword count including packet header */
+
+		switch (*hostaddr >> 30) {
+		case 0x0: /* type-0 */
+			count = (*hostaddr >> 16)+2;
+			cur_ret = _handle_type0(dev_priv, hostaddr);
+			break;
+		case 0x1: /* type-1 */
+			count = 2;
+			break;
+		case 0x3: /* type-3 */
+			count = ((*hostaddr >> 16) & 0x3fff) + 2;
+			cur_ret = _handle_type3(dev_priv, hostaddr);
+			break;
+		default:
+			KGSL_CMD_ERR(dev_priv->device, "unexpected type: "
+				"type:%d, word:0x%08x @ 0x%p, gpu:0x%08x\n",
+				*hostaddr >> 30, *hostaddr, hostaddr,
+				gpuaddr+4*(sizedwords-dwords_left));
+			cur_ret = false;
+			count = dwords_left;
+			break;
+		}
+
+		if (!cur_ret) {
+			KGSL_CMD_ERR(dev_priv->device,
+				"bad sub-type: #:%d/%d, v:0x%08x"
+				" @ 0x%p[gb:0x%08x], level:%d\n",
+				sizedwords-dwords_left, sizedwords, *hostaddr,
+				hostaddr, gpuaddr+4*(sizedwords-dwords_left),
+				level);
+
+			if (ADRENO_DEVICE(dev_priv->device)->ib_check_level
+				>= 2)
+				print_hex_dump(KERN_ERR,
+					level == 1 ? "IB1:" : "IB2:",
+					DUMP_PREFIX_OFFSET, 32, 4, hoststart,
+					sizedwords*4, 0);
+			goto done;
+		}
+
+		/* jump to next packet */
+		dwords_left -= count;
+		hostaddr += count;
+		if (dwords_left < 0) {
+			KGSL_CMD_ERR(dev_priv->device,
+				"bad count: c:%d, #:%d/%d, "
+				"v:0x%08x @ 0x%p[gb:0x%08x], level:%d\n",
+				count, sizedwords-(dwords_left+count),
+				sizedwords, *(hostaddr-count), hostaddr-count,
+				gpuaddr+4*(sizedwords-(dwords_left+count)),
+				level);
+			if (ADRENO_DEVICE(dev_priv->device)->ib_check_level
+				>= 2)
+				print_hex_dump(KERN_ERR,
+					level == 1 ? "IB1:" : "IB2:",
+					DUMP_PREFIX_OFFSET, 32, 4, hoststart,
+					sizedwords*4, 0);
+			goto done;
+		}
+	}
+
+	ret = true;
+done:
+	if (!ret)
+		KGSL_DRV_ERR(dev_priv->device,
+			"parsing failed: gpuaddr:0x%08x, "
+			"host:0x%p, wc:%d\n", gpuaddr, hoststart, sizedwords);
+
+	level--;
+
+	return ret;
 }
 
 int
@@ -560,6 +798,7 @@ adreno_ringbuffer_issueibcmds(struct kgsl_device_private *dev_priv,
 	unsigned int *cmds;
 	unsigned int i;
 	struct adreno_context *drawctxt;
+	unsigned int start_index = 0;
 
 	if (device->state & KGSL_STATE_HUNG)
 		return -EBUSY;
@@ -571,26 +810,52 @@ adreno_ringbuffer_issueibcmds(struct kgsl_device_private *dev_priv,
 
 	if (drawctxt->flags & CTXT_FLAGS_GPU_HANG) {
 		KGSL_CTXT_WARN(device, "Context %p caused a gpu hang.."
-			" will not accept commands for this context\n",
-			drawctxt);
+			" will not accept commands for context %d\n",
+			drawctxt, drawctxt->id);
 		return -EDEADLK;
 	}
-	link = kzalloc(sizeof(unsigned int) * numibs * 3, GFP_KERNEL);
-	cmds = link;
+
+	cmds = link = kzalloc(sizeof(unsigned int) * (numibs * 3 + 4),
+				GFP_KERNEL);
 	if (!link) {
-		KGSL_MEM_ERR(device, "Failed to allocate memory for for command"
-			" submission, size %x\n", numibs * 3);
+		KGSL_CORE_ERR("kzalloc(%d) failed\n",
+			sizeof(unsigned int) * (numibs * 3 + 4));
 		return -ENOMEM;
 	}
-	for (i = 0; i < numibs; i++) {
-		(void)kgsl_cffdump_parse_ibs(dev_priv, NULL,
-			ibdesc[i].gpuaddr, ibdesc[i].sizedwords, false);
 
+	/*When preamble is enabled, the preamble buffer with state restoration
+	commands are stored in the first node of the IB chain. We can skip that
+	if a context switch hasn't occured */
+
+	if (drawctxt->flags & CTXT_FLAGS_PREAMBLE &&
+		adreno_dev->drawctxt_active == drawctxt)
+		start_index = 1;
+
+	if (!start_index) {
+		*cmds++ = cp_nop_packet(1);
+		*cmds++ = KGSL_START_OF_IB_IDENTIFIER;
+	} else {
+		*cmds++ = cp_nop_packet(4);
+		*cmds++ = KGSL_START_OF_IB_IDENTIFIER;
+		*cmds++ = CP_HDR_INDIRECT_BUFFER_PFD;
+		*cmds++ = ibdesc[0].gpuaddr;
+		*cmds++ = ibdesc[0].sizedwords;
+	}
+	for (i = start_index; i < numibs; i++) {
+		if (unlikely(adreno_dev->ib_check_level >= 1 &&
+		    !_parse_ibs(dev_priv, ibdesc[i].gpuaddr,
+				ibdesc[i].sizedwords))) {
+			kfree(link);
+			return -EINVAL;
+		}
 		*cmds++ = CP_HDR_INDIRECT_BUFFER_PFD;
 		*cmds++ = ibdesc[i].gpuaddr;
 		*cmds++ = ibdesc[i].sizedwords;
 	}
 
+	*cmds++ = cp_nop_packet(1);
+	*cmds++ = KGSL_END_OF_IB_IDENTIFIER;
+
 	kgsl_setstate(device,
 		      kgsl_mmu_pt_get_flags(device->mmu.hwpagetable,
 					device->id));
@@ -598,6 +863,7 @@ adreno_ringbuffer_issueibcmds(struct kgsl_device_private *dev_priv,
 	adreno_drawctxt_switch(adreno_dev, drawctxt, flags);
 
 	*timestamp = adreno_ringbuffer_addcmds(&adreno_dev->ringbuffer,
+					drawctxt,
 					KGSL_CMD_FLAGS_NOT_KERNEL_CMD,
 					&link[0], (cmds - link));
 
@@ -631,12 +897,25 @@ int adreno_ringbuffer_extract(struct adreno_ringbuffer *rb,
 	unsigned int val2;
 	unsigned int val3;
 	unsigned int copy_rb_contents = 0;
-	unsigned int cur_context;
-	unsigned int j;
+	struct kgsl_context *context;
+	unsigned int context_id;
 
 	GSL_RB_GET_READPTR(rb, &rb->rptr);
 
-	retired_timestamp = device->ftbl->readtimestamp(device,
+	/* current_context is the context that is presently active in the
+	 * GPU, i.e the context in which the hang is caused */
+	kgsl_sharedmem_readl(&device->memstore, &context_id,
+		KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
+		current_context));
+	KGSL_DRV_ERR(device, "Last context id: %d\n", context_id);
+	context = idr_find(&device->context_idr, context_id);
+	if (context == NULL) {
+		KGSL_DRV_ERR(device,
+			"GPU recovery from hang not possible because last"
+			" context id is invalid.\n");
+		return -EINVAL;
+	}
+	retired_timestamp = device->ftbl->readtimestamp(device, context,
 		KGSL_TIMESTAMP_RETIRED);
 	KGSL_DRV_ERR(device, "GPU successfully executed till ts: %x\n",
 			retired_timestamp);
@@ -671,7 +950,8 @@ int adreno_ringbuffer_extract(struct adreno_ringbuffer *rb,
 				(val1 == cp_type3_packet(CP_EVENT_WRITE, 3)
 				&& val2 == CACHE_FLUSH_TS &&
 				val3 == (rb->device->memstore.gpuaddr +
-				KGSL_DEVICE_MEMSTORE_OFFSET(eoptimestamp)))) {
+				KGSL_MEMSTORE_OFFSET(context_id,
+					eoptimestamp)))) {
 				rb_rptr = adreno_ringbuffer_inc_wrapped(rb_rptr,
 							rb->buffer_desc.size);
 				KGSL_DRV_ERR(device,
@@ -717,10 +997,6 @@ int adreno_ringbuffer_extract(struct adreno_ringbuffer *rb,
 		return -EINVAL;
 	}
 
-	/* current_context is the context that is presently active in the
-	 * GPU, i.e the context in which the hang is caused */
-	kgsl_sharedmem_readl(&device->memstore, &cur_context,
-		KGSL_DEVICE_MEMSTORE_OFFSET(current_context));
 	while ((rb_rptr / sizeof(unsigned int)) != rb->wptr) {
 		kgsl_sharedmem_readl(&rb->buffer_desc, &value, rb_rptr);
 		rb_rptr = adreno_ringbuffer_inc_wrapped(rb_rptr,
@@ -735,12 +1011,25 @@ int adreno_ringbuffer_extract(struct adreno_ringbuffer *rb,
 			rb_rptr = adreno_ringbuffer_inc_wrapped(rb_rptr,
 							rb->buffer_desc.size);
 			BUG_ON(val1 != (device->memstore.gpuaddr +
-				KGSL_DEVICE_MEMSTORE_OFFSET(current_context)));
+				KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
+					current_context)));
 			kgsl_sharedmem_readl(&rb->buffer_desc, &value, rb_rptr);
 			rb_rptr = adreno_ringbuffer_inc_wrapped(rb_rptr,
 							rb->buffer_desc.size);
-			BUG_ON((copy_rb_contents == 0) &&
-				(value == cur_context));
+
+			/*
+			 * If other context switches were already lost and
+			 * and the current context is the one that is hanging,
+			 * then we cannot recover.  Print an error message
+			 * and leave.
+			 */
+
+			if ((copy_rb_contents == 0) && (value == context_id)) {
+				KGSL_DRV_ERR(device, "GPU recovery could not "
+					"find the previous context\n");
+				return -EINVAL;
+			}
+
 			/*
 			 * If we were copying the commands and got to this point
 			 * then we need to remove the 3 commands that appear
@@ -751,7 +1040,7 @@ int adreno_ringbuffer_extract(struct adreno_ringbuffer *rb,
 			/* if context switches to a context that did not cause
 			 * hang then start saving the rb contents as those
 			 * commands can be executed */
-			if (value != cur_context) {
+			if (value != context_id) {
 				copy_rb_contents = 1;
 				temp_rb_buffer[temp_idx++] = cp_nop_packet(1);
 				temp_rb_buffer[temp_idx++] =
@@ -771,19 +1060,6 @@ int adreno_ringbuffer_extract(struct adreno_ringbuffer *rb,
 	}
 
 	*rb_size = temp_idx;
-	KGSL_DRV_ERR(device, "Extracted rb contents, size: %x\n", *rb_size);
-	for (temp_idx = 0; temp_idx < *rb_size;) {
-		char str[80];
-		int idx = 0;
-		if ((temp_idx + 8) <= *rb_size)
-			j = 8;
-		else
-			j = *rb_size - temp_idx;
-		for (; j != 0; j--)
-			idx += scnprintf(str + idx, 80 - idx,
-				"%8.8X ", temp_rb_buffer[temp_idx++]);
-		printk(KERN_ALERT "%s", str);
-	}
 	return 0;
 }
 
diff --git a/drivers/gpu/msm/adreno_ringbuffer.h b/drivers/gpu/msm/adreno_ringbuffer.h
old mode 100644
new mode 100755
index 3e7a6880..bfac915c
--- a/drivers/gpu/msm/adreno_ringbuffer.h
+++ b/drivers/gpu/msm/adreno_ringbuffer.h
@@ -1,5 +1,4 @@
-/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved.
- * Copyright (C) 2011 Sony Ericsson Mobile Communications AB.
+/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -14,10 +13,6 @@
 #ifndef __ADRENO_RINGBUFFER_H
 #define __ADRENO_RINGBUFFER_H
 
-#define GSL_RB_USE_MEM_RPTR
-#define GSL_RB_USE_MEM_TIMESTAMP
-#define GSL_DEVICE_SHADOW_MEMSTORE_TO_USER
-
 /*
  * Adreno ringbuffer sizes in bytes - these are converted to
  * the appropriate log2 values in the code
@@ -59,52 +54,39 @@ struct adreno_ringbuffer {
 
 	unsigned int wptr; /* write pointer offset in dwords from baseaddr */
 	unsigned int rptr; /* read pointer offset in dwords from baseaddr */
-	uint32_t timestamp;
+
+	unsigned int timestamp[KGSL_MEMSTORE_MAX];
 };
 
 #define GSL_RB_WRITE(ring, gpuaddr, data) \
 	do { \
-		writel_relaxed(data, ring); \
+		*ring = data; \
 		wmb(); \
 		kgsl_cffdump_setmem(gpuaddr, data, 4); \
 		ring++; \
 		gpuaddr += sizeof(uint); \
 	} while (0)
 
-/* timestamp */
-#ifdef GSL_DEVICE_SHADOW_MEMSTORE_TO_USER
-#define GSL_RB_USE_MEM_TIMESTAMP
-#endif /* GSL_DEVICE_SHADOW_MEMSTORE_TO_USER */
-
-#ifdef GSL_RB_USE_MEM_TIMESTAMP
 /* enable timestamp (...scratch0) memory shadowing */
 #define GSL_RB_MEMPTRS_SCRATCH_MASK 0x1
 #define GSL_RB_INIT_TIMESTAMP(rb)
 
-#else
-#define GSL_RB_MEMPTRS_SCRATCH_MASK 0x0
-#define GSL_RB_INIT_TIMESTAMP(rb) \
-		adreno_regwrite((rb)->device->id, REG_CP_TIMESTAMP, 0)
-
-#endif /* GSL_RB_USE_MEMTIMESTAMP */
-
 /* mem rptr */
-#ifdef GSL_RB_USE_MEM_RPTR
 #define GSL_RB_CNTL_NO_UPDATE 0x0 /* enable */
 #define GSL_RB_GET_READPTR(rb, data) \
 	do { \
-		*(data) = readl_relaxed(&(rb)->memptrs->rptr); \
+		*(data) = rb->memptrs->rptr; \
 	} while (0)
-#else
-#define GSL_RB_CNTL_NO_UPDATE 0x1 /* disable */
-#define GSL_RB_GET_READPTR(rb, data) \
-	do { \
-		adreno_regread((rb)->device->id, REG_CP_RB_RPTR, (data)); \
-	} while (0)
-#endif /* GSL_RB_USE_MEMRPTR */
 
 #define GSL_RB_CNTL_POLL_EN 0x0 /* disable */
 
+/*
+ * protected mode error checking below register address 0x800
+ * note: if CP_INTERRUPT packet is used then checking needs
+ * to change to below register address 0x7C8
+ */
+#define GSL_RB_PROTECTED_MODE_CONTROL		0x200001F2
+
 int adreno_ringbuffer_issueibcmds(struct kgsl_device_private *dev_priv,
 				struct kgsl_context *context,
 				struct kgsl_ibdesc *ibdesc,
@@ -126,6 +108,8 @@ void adreno_ringbuffer_issuecmds(struct kgsl_device *device,
 					unsigned int *cmdaddr,
 					int sizedwords);
 
+void adreno_ringbuffer_submit(struct adreno_ringbuffer *rb);
+
 void kgsl_cp_intrcallback(struct kgsl_device *device);
 
 int adreno_ringbuffer_extract(struct adreno_ringbuffer *rb,
@@ -136,6 +120,9 @@ void
 adreno_ringbuffer_restore(struct adreno_ringbuffer *rb, unsigned int *rb_buff,
 			int num_rb_contents);
 
+unsigned int *adreno_ringbuffer_allocspace(struct adreno_ringbuffer *rb,
+					     unsigned int numcmds);
+
 static inline int adreno_ringbuffer_count(struct adreno_ringbuffer *rb,
 	unsigned int rptr)
 {
diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c
old mode 100644
new mode 100755
index e21ca09c..491944ba
--- a/drivers/gpu/msm/kgsl.c
+++ b/drivers/gpu/msm/kgsl.c
@@ -1,5 +1,4 @@
-/* Copyright (c) 2008-2011, Code Aurora Forum. All rights reserved.
- * Copyright (C) 2011 Sony Ericsson Mobile Communications AB.
+/* Copyright (c) 2008-2012, Code Aurora Forum. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -25,6 +24,7 @@
 
 #include <linux/ashmem.h>
 #include <linux/major.h>
+#include <linux/ion.h>
 
 #include "kgsl.h"
 #include "kgsl_debugfs.h"
@@ -45,7 +45,7 @@ module_param_named(mmutype, ksgl_mmu_type, charp, 0);
 MODULE_PARM_DESC(ksgl_mmu_type,
 "Type of MMU to be used for graphics. Valid values are 'iommu' or 'gpummu' or 'nommu'");
 
-#ifdef CONFIG_GENLOCK
+static struct ion_client *kgsl_ion_client;
 
 /**
  * kgsl_add_event - Add a new timstamp event for the KGSL device
@@ -53,25 +53,35 @@ MODULE_PARM_DESC(ksgl_mmu_type,
  * @ts - the timestamp to trigger the event on
  * @cb - callback function to call when the timestamp expires
  * @priv - private data for the specific event type
+ * @owner - driver instance that owns this event
  *
  * @returns - 0 on success or error code on failure
  */
 
-static int kgsl_add_event(struct kgsl_device *device, u32 ts,
-	void (*cb)(struct kgsl_device *, void *, u32), void *priv)
+static int kgsl_add_event(struct kgsl_device *device, u32 id, u32 ts,
+	void (*cb)(struct kgsl_device *, void *, u32, u32), void *priv,
+	struct kgsl_device_private *owner)
 {
 	struct kgsl_event *event;
 	struct list_head *n;
-	unsigned int cur = device->ftbl->readtimestamp(device,
-		KGSL_TIMESTAMP_RETIRED);
+	unsigned int cur_ts;
+	struct kgsl_context *context = NULL;
 
 	if (cb == NULL)
 		return -EINVAL;
 
+	if (id != KGSL_MEMSTORE_GLOBAL) {
+		context = idr_find(&device->context_idr, id);
+		if (context == NULL)
+			return -EINVAL;
+	}
+	cur_ts = device->ftbl->readtimestamp(device, context,
+				KGSL_TIMESTAMP_RETIRED);
+
 	/* Check to see if the requested timestamp has already fired */
 
-	if (timestamp_cmp(cur, ts) >= 0) {
-		cb(device, priv, cur);
+	if (timestamp_cmp(cur_ts, ts) >= 0) {
+		cb(device, priv, id, cur_ts);
 		return 0;
 	}
 
@@ -79,16 +89,24 @@ static int kgsl_add_event(struct kgsl_device *device, u32 ts,
 	if (event == NULL)
 		return -ENOMEM;
 
+	event->context = context;
 	event->timestamp = ts;
 	event->priv = priv;
 	event->func = cb;
+	event->owner = owner;
 
-	/* Add the event in order to the list */
+	/*
+	 * Add the event in order to the list.  Order is by context id
+	 * first and then by timestamp for that context.
+	 */
 
 	for (n = device->events.next ; n != &device->events; n = n->next) {
 		struct kgsl_event *e =
 			list_entry(n, struct kgsl_event, list);
 
+		if (e->context != context)
+			continue;
+
 		if (timestamp_cmp(e->timestamp, ts) > 0) {
 			list_add(&event->list, n->prev);
 			break;
@@ -97,10 +115,77 @@ static int kgsl_add_event(struct kgsl_device *device, u32 ts,
 
 	if (n == &device->events)
 		list_add_tail(&event->list, &device->events);
-
+	
+	queue_work(device->work_queue, &device->ts_expired_ws);
 	return 0;
 }
-#endif
+
+/**
+ * kgsl_cancel_events - Cancel all events for a process
+ * @device - KGSL device for the events to cancel
+ * @owner - driver instance that owns the events to cancel
+ *
+ */
+static void kgsl_cancel_events(struct kgsl_device *device,
+	struct kgsl_device_private *owner)
+{
+	struct kgsl_event *event, *event_tmp;
+	unsigned int id, cur;
+
+	list_for_each_entry_safe(event, event_tmp, &device->events, list) {
+		if (event->owner != owner)
+			continue;
+
+		cur = device->ftbl->readtimestamp(device, event->context,
+				KGSL_TIMESTAMP_RETIRED);
+
+		id = event->context ? event->context->id : KGSL_MEMSTORE_GLOBAL;
+		/*
+		 * "cancel" the events by calling their callback.
+		 * Currently, events are used for lock and memory
+		 * management, so if the process is dying the right
+		 * thing to do is release or free.
+		 */
+		if (event->func)
+			event->func(device, event->priv, id, cur);
+
+		list_del(&event->list);
+		kfree(event);
+	}
+}
+
+/* kgsl_get_mem_entry - get the mem_entry structure for the specified object
+ * @ptbase - the pagetable base of the object
+ * @gpuaddr - the GPU address of the object
+ * @size - Size of the region to search
+ */
+
+struct kgsl_mem_entry *kgsl_get_mem_entry(unsigned int ptbase,
+	unsigned int gpuaddr, unsigned int size)
+{
+	struct kgsl_process_private *priv;
+	struct kgsl_mem_entry *entry;
+
+	mutex_lock(&kgsl_driver.process_mutex);
+
+	list_for_each_entry(priv, &kgsl_driver.process_list, list) {
+		if (!kgsl_mmu_pt_equal(priv->pagetable, ptbase))
+			continue;
+		spin_lock(&priv->mem_lock);
+		entry = kgsl_sharedmem_find_region(priv, gpuaddr, size);
+
+		if (entry) {
+			spin_unlock(&priv->mem_lock);
+			mutex_unlock(&kgsl_driver.process_mutex);
+			return entry;
+		}
+		spin_unlock(&priv->mem_lock);
+	}
+	mutex_unlock(&kgsl_driver.process_mutex);
+
+	return NULL;
+}
+EXPORT_SYMBOL(kgsl_get_mem_entry);
 
 static inline struct kgsl_mem_entry *
 kgsl_mem_entry_create(void)
@@ -121,18 +206,32 @@ kgsl_mem_entry_destroy(struct kref *kref)
 	struct kgsl_mem_entry *entry = container_of(kref,
 						    struct kgsl_mem_entry,
 						    refcount);
-	size_t size = entry->memdesc.size;
+
+	if (entry->memtype != KGSL_MEM_ENTRY_KERNEL)
+		kgsl_driver.stats.mapped -= entry->memdesc.size;
+
+	/*
+	 * Ion takes care of freeing the sglist for us (how nice </sarcasm>) so
+	 * unmap the dma before freeing the sharedmem so kgsl_sharedmem_free
+	 * doesn't try to free it again
+	 */
+
+	if (entry->memtype == KGSL_MEM_ENTRY_ION) {
+		ion_unmap_dma(kgsl_ion_client, entry->priv_data);
+		entry->memdesc.sg = NULL;
+	}
 
 	kgsl_sharedmem_free(&entry->memdesc);
 
-	if (entry->memtype == KGSL_USER_MEMORY)
-		entry->priv->stats.user -= size;
-	else if (entry->memtype == KGSL_MAPPED_MEMORY) {
-		if (entry->file_ptr)
-			fput(entry->file_ptr);
-
-		kgsl_driver.stats.mapped -= size;
-		entry->priv->stats.mapped -= size;
+	switch (entry->memtype) {
+	case KGSL_MEM_ENTRY_PMEM:
+	case KGSL_MEM_ENTRY_ASHMEM:
+		if (entry->priv_data)
+			fput(entry->priv_data);
+		break;
+	case KGSL_MEM_ENTRY_ION:
+		ion_free(kgsl_ion_client, entry->priv_data);
+		break;
 	}
 
 	kfree(entry);
@@ -150,6 +249,21 @@ void kgsl_mem_entry_attach_process(struct kgsl_mem_entry *entry,
 	entry->priv = process;
 }
 
+/* Detach a memory entry from a process and unmap it from the MMU */
+
+static void kgsl_mem_entry_detach_process(struct kgsl_mem_entry *entry)
+{
+	if (entry == NULL)
+		return;
+
+	entry->priv->stats[entry->memtype].cur -= entry->memdesc.size;
+	entry->priv = NULL;
+
+	kgsl_mmu_unmap(entry->memdesc.pagetable, &entry->memdesc);
+
+	kgsl_mem_entry_put(entry);
+}
+
 /* Allocate a new context id */
 
 static struct kgsl_context *
@@ -170,8 +284,8 @@ kgsl_create_context(struct kgsl_device_private *dev_priv)
 			return NULL;
 		}
 
-		ret = idr_get_new(&dev_priv->device->context_idr,
-				  context, &id);
+		ret = idr_get_new_above(&dev_priv->device->context_idr,
+				  context, 1, &id);
 
 		if (ret != -EAGAIN)
 			break;
@@ -182,6 +296,16 @@ kgsl_create_context(struct kgsl_device_private *dev_priv)
 		return NULL;
 	}
 
+	/* MAX - 1, there is one memdesc in memstore for device info */
+	if (id >= KGSL_MEMSTORE_MAX) {
+		KGSL_DRV_ERR(dev_priv->device, "cannot have more than %d "
+				"ctxts due to memstore limitation\n",
+				KGSL_MEMSTORE_MAX);
+		idr_remove(&dev_priv->device->context_idr, id);
+		kfree(context);
+		return NULL;
+	}
+
 	context->id = id;
 	context->dev_priv = dev_priv;
 
@@ -206,74 +330,34 @@ kgsl_destroy_context(struct kgsl_device_private *dev_priv,
 	idr_remove(&dev_priv->device->context_idr, id);
 }
 
-/* to be called when a process is destroyed, this walks the memqueue and
- * frees any entryies that belong to the dying process
- */
-static void kgsl_memqueue_cleanup(struct kgsl_device *device,
-				     struct kgsl_process_private *private)
-{
-	struct kgsl_mem_entry *entry, *entry_tmp;
-
-	if (!private)
-		return;
-
-	BUG_ON(!mutex_is_locked(&device->mutex));
-
-	list_for_each_entry_safe(entry, entry_tmp, &device->memqueue, list) {
-		if (entry->priv == private) {
-			list_del(&entry->list);
-			kgsl_mem_entry_put(entry);
-		}
-	}
-}
-
-static void kgsl_memqueue_freememontimestamp(struct kgsl_device *device,
-				  struct kgsl_mem_entry *entry,
-				  uint32_t timestamp,
-				  enum kgsl_timestamp_type type)
-{
-	BUG_ON(!mutex_is_locked(&device->mutex));
-
-	entry->free_timestamp = timestamp;
-
-	list_add_tail(&entry->list, &device->memqueue);
-}
-
 static void kgsl_timestamp_expired(struct work_struct *work)
 {
 	struct kgsl_device *device = container_of(work, struct kgsl_device,
 		ts_expired_ws);
-	struct kgsl_mem_entry *entry, *entry_tmp;
 	struct kgsl_event *event, *event_tmp;
 	uint32_t ts_processed;
+	unsigned int id;
 
 	mutex_lock(&device->mutex);
 
-	/* get current EOP timestamp */
-	ts_processed = device->ftbl->readtimestamp(device,
-		KGSL_TIMESTAMP_RETIRED);
-
-	/* Flush the freememontimestamp queue */
-	list_for_each_entry_safe(entry, entry_tmp, &device->memqueue, list) {
-		if (timestamp_cmp(ts_processed, entry->free_timestamp) < 0)
-			break;
-
-		list_del(&entry->list);
-		kgsl_mem_entry_put(entry);
-	}
-
 	/* Process expired events */
 	list_for_each_entry_safe(event, event_tmp, &device->events, list) {
+		ts_processed = device->ftbl->readtimestamp(device,
+				event->context, KGSL_TIMESTAMP_RETIRED);
 		if (timestamp_cmp(ts_processed, event->timestamp) < 0)
-			break;
+			continue;
+
+		id = event->context ? event->context->id : KGSL_MEMSTORE_GLOBAL;
 
 		if (event->func)
-			event->func(device, event->priv, ts_processed);
+			event->func(device, event->priv, id, ts_processed);
 
 		list_del(&event->list);
 		kfree(event);
 	}
 
+	device->last_expired_ctxt_id = KGSL_CONTEXT_INVALID;
+
 	mutex_unlock(&device->mutex);
 }
 
@@ -348,11 +432,15 @@ int kgsl_unregister_ts_notifier(struct kgsl_device *device,
 }
 EXPORT_SYMBOL(kgsl_unregister_ts_notifier);
 
-int kgsl_check_timestamp(struct kgsl_device *device, unsigned int timestamp)
+int kgsl_check_timestamp(struct kgsl_device *device,
+	struct kgsl_context *context, unsigned int timestamp)
 {
 	unsigned int ts_processed;
+	unsigned int global;
 
-	ts_processed = device->ftbl->readtimestamp(device,
+	ts_processed = device->ftbl->readtimestamp(device, context,
+		KGSL_TIMESTAMP_RETIRED);
+	global = device->ftbl->readtimestamp(device, NULL,
 		KGSL_TIMESTAMP_RETIRED);
 
 	return (timestamp_cmp(ts_processed, timestamp) >= 0);
@@ -585,18 +673,13 @@ kgsl_put_process_private(struct kgsl_device *device,
 	if (--private->refcnt)
 		goto unlock;
 
-	KGSL_MEM_INFO(device,
-			"Memory usage: user (%d/%d) mapped (%d/%d)\n",
-			private->stats.user, private->stats.user_max,
-			private->stats.mapped, private->stats.mapped_max);
-
 	kgsl_process_uninit_sysfs(private);
 
 	list_del(&private->list);
 
 	list_for_each_entry_safe(entry, entry_tmp, &private->mem_list, list) {
 		list_del(&entry->list);
-		kgsl_mem_entry_put(entry);
+		kgsl_mem_entry_detach_process(entry);
 	}
 
 	kgsl_mmu_putpagetable(private->pagetable);
@@ -641,7 +724,7 @@ static int kgsl_release(struct inode *inodep, struct file *filep)
 	/* clean up any to-be-freed entries that belong to this
 	 * process and this device
 	 */
-	kgsl_memqueue_cleanup(device, private);
+	kgsl_cancel_events(device, dev_priv);
 
 	mutex_unlock(&device->mutex);
 	kfree(dev_priv);
@@ -698,6 +781,9 @@ static int kgsl_open(struct inode *inodep, struct file *filep)
 	kgsl_check_suspended(device);
 
 	if (device->open_count == 0) {
+		kgsl_sharedmem_set(&device->memstore, 0, 0,
+				device->memstore.size);
+
 		result = device->ftbl->start(device, true);
 
 		if (result) {
@@ -758,9 +844,7 @@ kgsl_sharedmem_find_region(struct kgsl_process_private *private,
 	BUG_ON(private == NULL);
 
 	list_for_each_entry(entry, &private->mem_list, list) {
-		if (gpuaddr >= entry->memdesc.gpuaddr &&
-		    ((gpuaddr + size) <=
-			(entry->memdesc.gpuaddr + entry->memdesc.size))) {
+		if (kgsl_gpuaddr_in_memdesc(&entry->memdesc, gpuaddr, size)) {
 			result = entry;
 			break;
 		}
@@ -770,20 +854,6 @@ kgsl_sharedmem_find_region(struct kgsl_process_private *private,
 }
 EXPORT_SYMBOL(kgsl_sharedmem_find_region);
 
-uint8_t *kgsl_gpuaddr_to_vaddr(const struct kgsl_memdesc *memdesc,
-	unsigned int gpuaddr, unsigned int *size)
-{
-	BUG_ON(memdesc->hostptr == NULL);
-
-	if (memdesc->gpuaddr == 0 || (gpuaddr < memdesc->gpuaddr ||
-		gpuaddr >= memdesc->gpuaddr + memdesc->size))
-		return NULL;
-
-	*size = memdesc->size - (gpuaddr - memdesc->gpuaddr);
-	return memdesc->hostptr + (gpuaddr - memdesc->gpuaddr);
-}
-EXPORT_SYMBOL(kgsl_gpuaddr_to_vaddr);
-
 /*call all ioctl sub functions with driver locked*/
 static long kgsl_ioctl_device_getproperty(struct kgsl_device_private *dev_priv,
 					  unsigned int cmd, void *data)
@@ -810,6 +880,40 @@ static long kgsl_ioctl_device_getproperty(struct kgsl_device_private *dev_priv,
 
 		break;
 	}
+	case KGSL_PROP_GPU_RESET_STAT:
+	{
+		/* Return reset status of given context and clear it */
+		uint32_t id;
+		struct kgsl_context *context;
+
+		if (param->sizebytes != sizeof(unsigned int)) {
+			result = -EINVAL;
+			break;
+		}
+		/* We expect the value passed in to contain the context id */
+		if (copy_from_user(&id, param->value,
+			sizeof(unsigned int))) {
+			result = -EFAULT;
+			break;
+		}
+		context = kgsl_find_context(dev_priv, id);
+		if (!context) {
+			result = -EINVAL;
+			break;
+		}
+		/*
+		 * Copy the reset status to value which also serves as
+		 * the out parameter
+		 */
+		if (copy_to_user(param->value, &(context->reset_status),
+			sizeof(unsigned int))) {
+			result = -EFAULT;
+			break;
+		}
+		/* Clear reset status once its been queried */
+		context->reset_status = KGSL_CTX_STAT_NO_ERROR;
+		break;
+	}
 	default:
 		result = dev_priv->device->ftbl->getproperty(
 					dev_priv->device, param->type,
@@ -820,21 +924,35 @@ static long kgsl_ioctl_device_getproperty(struct kgsl_device_private *dev_priv,
 	return result;
 }
 
-static long kgsl_ioctl_device_waittimestamp(struct kgsl_device_private
-						*dev_priv, unsigned int cmd,
-						void *data)
+static long kgsl_ioctl_device_setproperty(struct kgsl_device_private *dev_priv,
+					  unsigned int cmd, void *data)
 {
 	int result = 0;
-	struct kgsl_device_waittimestamp *param = data;
+	/* The getproperty struct is reused for setproperty too */
+	struct kgsl_device_getproperty *param = data;
 
-	/* Set the active count so that suspend doesn't do the
-	   wrong thing */
+	if (dev_priv->device->ftbl->setproperty)
+		result = dev_priv->device->ftbl->setproperty(
+			dev_priv->device, param->type,
+			param->value, param->sizebytes);
+
+	return result;
+}
+
+static long _device_waittimestamp(struct kgsl_device_private *dev_priv,
+		struct kgsl_context *context,
+		unsigned int timestamp,
+		unsigned int timeout)
+{
+	int result = 0;
+
+	/* Set the active count so that suspend doesn't do the wrong thing */
 
 	dev_priv->device->active_cnt++;
 
 	result = dev_priv->device->ftbl->waittimestamp(dev_priv->device,
-					param->timestamp,
-					param->timeout);
+					context, timestamp, timeout);
+
 
 	/* Fire off any pending suspend operations that are in flight */
 
@@ -844,39 +962,33 @@ static long kgsl_ioctl_device_waittimestamp(struct kgsl_device_private
 
 	return result;
 }
-static bool check_ibdesc(struct kgsl_device_private *dev_priv,
-			 struct kgsl_ibdesc *ibdesc, unsigned int numibs,
-			 bool parse)
-{
-	bool result = true;
-	unsigned int i;
-	for (i = 0; i < numibs; i++) {
-		struct kgsl_mem_entry *entry;
-		spin_lock(&dev_priv->process_priv->mem_lock);
-		entry = kgsl_sharedmem_find_region(dev_priv->process_priv,
-			ibdesc[i].gpuaddr, ibdesc[i].sizedwords * sizeof(uint));
-		spin_unlock(&dev_priv->process_priv->mem_lock);
-		if (entry == NULL) {
-			KGSL_DRV_ERR(dev_priv->device,
-				"invalid cmd buffer gpuaddr %08x " \
-				"sizedwords %d\n", ibdesc[i].gpuaddr,
-				ibdesc[i].sizedwords);
-			result = false;
-			break;
-		}
 
-		if (parse && !kgsl_cffdump_parse_ibs(dev_priv, &entry->memdesc,
-			ibdesc[i].gpuaddr, ibdesc[i].sizedwords, true)) {
-			KGSL_DRV_ERR(dev_priv->device,
-				"invalid cmd buffer gpuaddr %08x " \
-				"sizedwords %d numibs %d/%d\n",
-				ibdesc[i].gpuaddr,
-				ibdesc[i].sizedwords, i+1, numibs);
-			result = false;
-			break;
-		}
+static long kgsl_ioctl_device_waittimestamp(struct kgsl_device_private
+						*dev_priv, unsigned int cmd,
+						void *data)
+{
+	struct kgsl_device_waittimestamp *param = data;
+
+	return _device_waittimestamp(dev_priv, KGSL_MEMSTORE_GLOBAL,
+			param->timestamp, param->timeout);
+}
+
+static long kgsl_ioctl_device_waittimestamp_ctxtid(struct kgsl_device_private
+						*dev_priv, unsigned int cmd,
+						void *data)
+{
+	struct kgsl_device_waittimestamp_ctxtid *param = data;
+	struct kgsl_context *context;
+
+	context = kgsl_find_context(dev_priv, param->context_id);
+	if (context == NULL) {
+		KGSL_DRV_ERR(dev_priv->device, "invalid context_id %d\n",
+			param->context_id);
+		return -EINVAL;
 	}
-	return result;
+
+	return _device_waittimestamp(dev_priv, context,
+			param->timestamp, param->timeout);
 }
 
 static long kgsl_ioctl_rb_issueibcmds(struct kgsl_device_private *dev_priv,
@@ -895,7 +1007,7 @@ static long kgsl_ioctl_rb_issueibcmds(struct kgsl_device_private *dev_priv,
 	if (context == NULL) {
 		result = -EINVAL;
 		KGSL_DRV_ERR(dev_priv->device,
-			"invalid drawctxt drawctxt_id %d\n",
+			"invalid context_id %d\n",
 			param->drawctxt_id);
 		goto done;
 	}
@@ -947,12 +1059,6 @@ static long kgsl_ioctl_rb_issueibcmds(struct kgsl_device_private *dev_priv,
 		param->numibs = 1;
 	}
 
-	if (!check_ibdesc(dev_priv, ibdesc, param->numibs, true)) {
-		KGSL_DRV_ERR(dev_priv->device, "bad ibdesc");
-		result = -EINVAL;
-		goto free_ibdesc;
-	}
-
 	result = dev_priv->device->ftbl->issueibcmds(dev_priv,
 					     context,
 					     ibdesc,
@@ -960,17 +1066,6 @@ static long kgsl_ioctl_rb_issueibcmds(struct kgsl_device_private *dev_priv,
 					     &param->timestamp,
 					     param->flags);
 
-	if (result != 0)
-		goto free_ibdesc;
-
-	/* this is a check to try to detect if a command buffer was freed
-	 * during issueibcmds().
-	 */
-	if (!check_ibdesc(dev_priv, ibdesc, param->numibs, false)) {
-		KGSL_DRV_ERR(dev_priv->device, "bad ibdesc AFTER issue");
-		result = -EINVAL;
-		goto free_ibdesc;
-	}
 
 free_ibdesc:
 	kfree(ibdesc);
@@ -983,43 +1078,111 @@ done:
 	return result;
 }
 
+static long _cmdstream_readtimestamp(struct kgsl_device_private *dev_priv,
+		struct kgsl_context *context, unsigned int type,
+		unsigned int *timestamp)
+{
+	*timestamp = dev_priv->device->ftbl->readtimestamp(dev_priv->device,
+			context, type);
+
+	return 0;
+}
+
 static long kgsl_ioctl_cmdstream_readtimestamp(struct kgsl_device_private
 						*dev_priv, unsigned int cmd,
 						void *data)
 {
 	struct kgsl_cmdstream_readtimestamp *param = data;
 
-	param->timestamp =
-		dev_priv->device->ftbl->readtimestamp(dev_priv->device,
-		param->type);
+	return _cmdstream_readtimestamp(dev_priv, NULL,
+			param->type, &param->timestamp);
+}
 
-	return 0;
+static long kgsl_ioctl_cmdstream_readtimestamp_ctxtid(struct kgsl_device_private
+						*dev_priv, unsigned int cmd,
+						void *data)
+{
+	struct kgsl_cmdstream_readtimestamp_ctxtid *param = data;
+	struct kgsl_context *context;
+
+	context = kgsl_find_context(dev_priv, param->context_id);
+	if (context == NULL) {
+		KGSL_DRV_ERR(dev_priv->device, "invalid context_id %d\n",
+			param->context_id);
+		return -EINVAL;
+	}
+
+	return _cmdstream_readtimestamp(dev_priv, context,
+			param->type, &param->timestamp);
+}
+
+static void kgsl_freemem_event_cb(struct kgsl_device *device,
+	void *priv, u32 id, u32 timestamp)
+{
+	struct kgsl_mem_entry *entry = priv;
+	spin_lock(&entry->priv->mem_lock);
+	list_del(&entry->list);
+	spin_unlock(&entry->priv->mem_lock);
+	kgsl_mem_entry_detach_process(entry);
+}
+
+static long _cmdstream_freememontimestamp(struct kgsl_device_private *dev_priv,
+		unsigned int gpuaddr, struct kgsl_context *context,
+		unsigned int timestamp, unsigned int type)
+{
+	int result = 0;
+	struct kgsl_mem_entry *entry = NULL;
+	struct kgsl_device *device = dev_priv->device;
+	unsigned int cur;
+	unsigned int context_id = context ? context->id : KGSL_MEMSTORE_GLOBAL;
+
+	spin_lock(&dev_priv->process_priv->mem_lock);
+	entry = kgsl_sharedmem_find(dev_priv->process_priv, gpuaddr);
+	spin_unlock(&dev_priv->process_priv->mem_lock);
+
+	if (entry) {
+		cur = device->ftbl->readtimestamp(device, context,
+						KGSL_TIMESTAMP_RETIRED);
+
+		result = kgsl_add_event(dev_priv->device, context_id,
+				timestamp, kgsl_freemem_event_cb,
+				entry, dev_priv);
+	} else {
+		KGSL_DRV_ERR(dev_priv->device,
+			"invalid gpuaddr %08x\n", gpuaddr);
+		result = -EINVAL;
+	}
+
+	return result;
 }
 
 static long kgsl_ioctl_cmdstream_freememontimestamp(struct kgsl_device_private
 						    *dev_priv, unsigned int cmd,
 						    void *data)
 {
-	int result = 0;
 	struct kgsl_cmdstream_freememontimestamp *param = data;
-	struct kgsl_mem_entry *entry = NULL;
 
-	spin_lock(&dev_priv->process_priv->mem_lock);
-	entry = kgsl_sharedmem_find(dev_priv->process_priv, param->gpuaddr);
-	if (entry)
-		list_del(&entry->list);
-	spin_unlock(&dev_priv->process_priv->mem_lock);
+	return _cmdstream_freememontimestamp(dev_priv, param->gpuaddr,
+			NULL, param->timestamp, param->type);
+}
 
-	if (entry) {
-		kgsl_memqueue_freememontimestamp(dev_priv->device, entry,
-					param->timestamp, param->type);
-	} else {
+static long kgsl_ioctl_cmdstream_freememontimestamp_ctxtid(
+						struct kgsl_device_private
+						*dev_priv, unsigned int cmd,
+						void *data)
+{
+	struct kgsl_cmdstream_freememontimestamp_ctxtid *param = data;
+	struct kgsl_context *context;
+
+	context = kgsl_find_context(dev_priv, param->context_id);
+	if (context == NULL) {
 		KGSL_DRV_ERR(dev_priv->device,
-			"invalid gpuaddr %08x\n", param->gpuaddr);
-		result = -EINVAL;
+			"invalid drawctxt context_id %d\n", param->context_id);
+		return -EINVAL;
 	}
 
-	return result;
+	return _cmdstream_freememontimestamp(dev_priv, param->gpuaddr,
+			context, param->timestamp, param->type);
 }
 
 static long kgsl_ioctl_drawctxt_create(struct kgsl_device_private *dev_priv,
@@ -1089,7 +1252,7 @@ static long kgsl_ioctl_sharedmem_free(struct kgsl_device_private *dev_priv,
 	spin_unlock(&private->mem_lock);
 
 	if (entry) {
-		kgsl_mem_entry_put(entry);
+		kgsl_mem_entry_detach_process(entry);
 	} else {
 		KGSL_CORE_ERR("invalid gpuaddr %08x\n", param->gpuaddr);
 		result = -EINVAL;
@@ -1169,7 +1332,7 @@ kgsl_ioctl_sharedmem_from_vmalloc(struct kgsl_device_private *dev_priv,
 		goto error;
 	}
 
-	result = kgsl_sharedmem_vmalloc_user(&entry->memdesc,
+	result = kgsl_sharedmem_page_alloc_user(&entry->memdesc,
 					     private->pagetable, len,
 					     param->flags);
 	if (result != 0)
@@ -1177,26 +1340,25 @@ kgsl_ioctl_sharedmem_from_vmalloc(struct kgsl_device_private *dev_priv,
 
 	vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
 
-	result = remap_vmalloc_range(vma, (void *) entry->memdesc.hostptr, 0);
+	result = kgsl_sharedmem_map_vma(vma, &entry->memdesc);
 	if (result) {
-		KGSL_CORE_ERR("remap_vmalloc_range failed: %d\n", result);
-		goto error_free_vmalloc;
+		KGSL_CORE_ERR("kgsl_sharedmem_map_vma failed: %d\n", result);
+		goto error_free_alloc;
 	}
 
 	param->gpuaddr = entry->memdesc.gpuaddr;
 
-	entry->memtype = KGSL_USER_MEMORY;
+	entry->memtype = KGSL_MEM_ENTRY_KERNEL;
 
 	kgsl_mem_entry_attach_process(entry, private);
 
 	/* Process specific statistics */
-	KGSL_STATS_ADD(len, private->stats.user,
-		       private->stats.user_max);
+	kgsl_process_add_stats(private, entry->memtype, len);
 
 	kgsl_check_idle(dev_priv->device);
 	return 0;
 
-error_free_vmalloc:
+error_free_alloc:
 	kgsl_sharedmem_free(&entry->memdesc);
 
 error_free_entry:
@@ -1292,7 +1454,7 @@ static int kgsl_setup_phys_file(struct kgsl_mem_entry *entry,
 
 	}
 
-	entry->file_ptr = filep;
+	entry->priv_data = filep;
 
 	entry->memdesc.pagetable = pagetable;
 	entry->memdesc.size = size;
@@ -1319,8 +1481,8 @@ static int memdesc_sg_virt(struct kgsl_memdesc *memdesc,
 	int sglen = PAGE_ALIGN(size) / PAGE_SIZE;
 	unsigned long paddr = (unsigned long) addr;
 
-	memdesc->sg = kmalloc(sglen * sizeof(struct scatterlist),
-		GFP_KERNEL);
+	memdesc->sg = kgsl_sg_alloc(sglen);
+
 	if (memdesc->sg == NULL)
 		return -ENOMEM;
 
@@ -1360,7 +1522,7 @@ static int memdesc_sg_virt(struct kgsl_memdesc *memdesc,
 
 err:
 	spin_unlock(&current->mm->page_table_lock);
-	kfree(memdesc->sg);
+	kgsl_sg_free(memdesc->sg,  sglen);
 	memdesc->sg = NULL;
 
 	return -EINVAL;
@@ -1464,7 +1626,7 @@ static int kgsl_setup_ashmem(struct kgsl_mem_entry *entry,
 		goto err;
 	}
 
-	entry->file_ptr = filep;
+	entry->priv_data = filep;
 	entry->memdesc.pagetable = pagetable;
 	entry->memdesc.size = ALIGN(size, PAGE_SIZE);
 	entry->memdesc.hostptr = hostptr;
@@ -1488,6 +1650,51 @@ static int kgsl_setup_ashmem(struct kgsl_mem_entry *entry,
 }
 #endif
 
+static int kgsl_setup_ion(struct kgsl_mem_entry *entry,
+		struct kgsl_pagetable *pagetable, int fd)
+{
+	struct ion_handle *handle;
+	struct scatterlist *s;
+	unsigned long flags;
+
+	if (kgsl_ion_client == NULL) {
+		kgsl_ion_client = msm_ion_client_create(UINT_MAX, KGSL_NAME);
+		if (kgsl_ion_client == NULL)
+			return -ENODEV;
+	}
+
+	handle = ion_import_fd(kgsl_ion_client, fd);
+	if (IS_ERR_OR_NULL(handle))
+		return PTR_ERR(handle);
+
+	entry->memtype = KGSL_MEM_ENTRY_ION;
+	entry->priv_data = handle;
+	entry->memdesc.pagetable = pagetable;
+	entry->memdesc.size = 0;
+
+	if (ion_handle_get_flags(kgsl_ion_client, handle, &flags))
+		goto err;
+
+	entry->memdesc.sg = ion_map_dma(kgsl_ion_client, handle, flags);
+
+	if (IS_ERR_OR_NULL(entry->memdesc.sg))
+		goto err;
+
+	/* Calculate the size of the memdesc from the sglist */
+
+	entry->memdesc.sglen = 0;
+
+	for (s = entry->memdesc.sg; s != NULL; s = sg_next(s)) {
+		entry->memdesc.size += s->length;
+		entry->memdesc.sglen++;
+	}
+
+	return 0;
+err:
+	ion_free(kgsl_ion_client, handle);
+	return -ENOMEM;
+}
+
 static long kgsl_ioctl_map_user_mem(struct kgsl_device_private *dev_priv,
 				     unsigned int cmd, void *data)
 {
@@ -1515,6 +1722,7 @@ static long kgsl_ioctl_map_user_mem(struct kgsl_device_private *dev_priv,
 		result = kgsl_setup_phys_file(entry, private->pagetable,
 					      param->fd, param->offset,
 					      param->len);
+		entry->memtype = KGSL_MEM_ENTRY_PMEM;
 		break;
 
 	case KGSL_USER_MEM_TYPE_ADDR:
@@ -1531,6 +1739,7 @@ static long kgsl_ioctl_map_user_mem(struct kgsl_device_private *dev_priv,
 		result = kgsl_setup_hostptr(entry, private->pagetable,
 					    (void *) param->hostptr,
 					    param->offset, param->len);
+		entry->memtype = KGSL_MEM_ENTRY_USER;
 		break;
 
 	case KGSL_USER_MEM_TYPE_ASHMEM:
@@ -1547,6 +1756,12 @@ static long kgsl_ioctl_map_user_mem(struct kgsl_device_private *dev_priv,
 		result = kgsl_setup_ashmem(entry, private->pagetable,
 					   param->fd, (void *) param->hostptr,
 					   param->len);
+
+		entry->memtype = KGSL_MEM_ENTRY_ASHMEM;
+		break;
+	case KGSL_USER_MEM_TYPE_ION:
+		result = kgsl_setup_ion(entry, private->pagetable,
+			param->fd);
 		break;
 	default:
 		KGSL_CORE_ERR("Invalid memory type: %x\n", memtype);
@@ -1566,14 +1781,10 @@ static long kgsl_ioctl_map_user_mem(struct kgsl_device_private *dev_priv,
 	/* Adjust the returned value for a non 4k aligned offset */
 	param->gpuaddr = entry->memdesc.gpuaddr + (param->offset & ~PAGE_MASK);
 
-	entry->memtype = KGSL_MAPPED_MEMORY;
-
 	KGSL_STATS_ADD(param->len, kgsl_driver.stats.mapped,
 		       kgsl_driver.stats.mapped_max);
 
-	/* Statistics */
-	KGSL_STATS_ADD(param->len, private->stats.mapped,
-		       private->stats.mapped_max);
+	kgsl_process_add_stats(private, entry->memtype, param->len);
 
 	kgsl_mem_entry_attach_process(entry, private);
 
@@ -1581,8 +1792,8 @@ static long kgsl_ioctl_map_user_mem(struct kgsl_device_private *dev_priv,
 	return result;
 
  error_put_file_ptr:
-	if (entry->file_ptr)
-		fput(entry->file_ptr);
+	if (entry->priv_data)
+		fput(entry->priv_data);
 
 error:
 	kfree(entry);
@@ -1608,10 +1819,6 @@ kgsl_ioctl_sharedmem_flush_cache(struct kgsl_device_private *dev_priv,
 		result = -EINVAL;
 		goto done;
 	}
-	if (!entry->memdesc.hostptr)
-		entry->memdesc.hostptr =
-				kgsl_gpuaddr_to_vaddr(&entry->memdesc,
-					param->gpuaddr, &entry->memdesc.size);
 
 	if (!entry->memdesc.hostptr) {
 		KGSL_CORE_ERR("invalid hostptr with gpuaddr %08x\n",
@@ -1620,9 +1827,6 @@ kgsl_ioctl_sharedmem_flush_cache(struct kgsl_device_private *dev_priv,
 	}
 
 	kgsl_cache_range_op(&entry->memdesc, KGSL_CACHE_OP_CLEAN);
-
-	/* Statistics - keep track of how many flushes each process does */
-	private->stats.flushes++;
 done:
 	spin_unlock(&private->mem_lock);
 	return result;
@@ -1645,12 +1849,11 @@ kgsl_ioctl_gpumem_alloc(struct kgsl_device_private *dev_priv,
 		param->size, param->flags);
 
 	if (result == 0) {
-		entry->memtype = KGSL_USER_MEMORY;
+		entry->memtype = KGSL_MEM_ENTRY_KERNEL;
 		kgsl_mem_entry_attach_process(entry, private);
 		param->gpuaddr = entry->memdesc.gpuaddr;
 
-		KGSL_STATS_ADD(entry->memdesc.size, private->stats.user,
-		       private->stats.user_max);
+		kgsl_process_add_stats(private, entry->memtype, param->size);
 	} else
 		kfree(entry);
 
@@ -1698,13 +1901,14 @@ struct kgsl_genlock_event_priv {
  * kgsl_genlock_event_cb - Event callback for a genlock timestamp event
  * @device - The KGSL device that expired the timestamp
  * @priv - private data for the event
+ * @context_id - the context id that goes with the timestamp
  * @timestamp - the timestamp that triggered the event
  *
  * Release a genlock lock following the expiration of a timestamp
  */
 
 static void kgsl_genlock_event_cb(struct kgsl_device *device,
-	void *priv, u32 timestamp)
+	void *priv, u32 context_id, u32 timestamp)
 {
 	struct kgsl_genlock_event_priv *ev = priv;
 	int ret;
@@ -1724,6 +1928,7 @@ static void kgsl_genlock_event_cb(struct kgsl_device *device,
  * @timestamp - Timestamp to trigger the event
  * @data - User space buffer containing struct kgsl_genlock_event_priv
  * @len - length of the userspace buffer
+ * @owner - driver instance that owns this event
  * @returns 0 on success or error code on error
  *
  * Attack to a genlock handle and register an event to release the
@@ -1731,7 +1936,8 @@ static void kgsl_genlock_event_cb(struct kgsl_device *device,
  */
 
 static int kgsl_add_genlock_event(struct kgsl_device *device,
-	u32 timestamp, void __user *data, int len)
+	u32 context_id, u32 timestamp, void __user *data, int len,
+	struct kgsl_device_private *owner)
 {
 	struct kgsl_genlock_event_priv *event;
 	struct kgsl_timestamp_event_genlock priv;
@@ -1756,7 +1962,8 @@ static int kgsl_add_genlock_event(struct kgsl_device *device,
 		return ret;
 	}
 
-	ret = kgsl_add_event(device, timestamp, kgsl_genlock_event_cb, event);
+	ret = kgsl_add_event(device, context_id, timestamp,
+			kgsl_genlock_event_cb, event, owner);
 	if (ret)
 		kfree(event);
 
@@ -1764,7 +1971,8 @@ static int kgsl_add_genlock_event(struct kgsl_device *device,
 }
 #else
 static long kgsl_add_genlock_event(struct kgsl_device *device,
-	u32 timestamp, void __user *data, int len)
+	u32 context_id, u32 timestamp, void __user *data, int len,
+	struct kgsl_device_private *owner)
 {
 	return -EINVAL;
 }
@@ -1787,7 +1995,8 @@ static long kgsl_ioctl_timestamp_event(struct kgsl_device_private *dev_priv,
 	switch (param->type) {
 	case KGSL_TIMESTAMP_EVENT_GENLOCK:
 		ret = kgsl_add_genlock_event(dev_priv->device,
-			param->timestamp, param->priv, param->len);
+			param->context_id, param->timestamp, param->priv,
+			param->len, dev_priv);
 		break;
 	default:
 		ret = -EINVAL;
@@ -1811,12 +2020,18 @@ static const struct {
 			kgsl_ioctl_device_getproperty, 1),
 	KGSL_IOCTL_FUNC(IOCTL_KGSL_DEVICE_WAITTIMESTAMP,
 			kgsl_ioctl_device_waittimestamp, 1),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID,
+			kgsl_ioctl_device_waittimestamp_ctxtid, 1),
 	KGSL_IOCTL_FUNC(IOCTL_KGSL_RINGBUFFER_ISSUEIBCMDS,
 			kgsl_ioctl_rb_issueibcmds, 1),
 	KGSL_IOCTL_FUNC(IOCTL_KGSL_CMDSTREAM_READTIMESTAMP,
 			kgsl_ioctl_cmdstream_readtimestamp, 1),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_CMDSTREAM_READTIMESTAMP_CTXTID,
+			kgsl_ioctl_cmdstream_readtimestamp_ctxtid, 1),
 	KGSL_IOCTL_FUNC(IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP,
 			kgsl_ioctl_cmdstream_freememontimestamp, 1),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP_CTXTID,
+			kgsl_ioctl_cmdstream_freememontimestamp_ctxtid, 1),
 	KGSL_IOCTL_FUNC(IOCTL_KGSL_DRAWCTXT_CREATE,
 			kgsl_ioctl_drawctxt_create, 1),
 	KGSL_IOCTL_FUNC(IOCTL_KGSL_DRAWCTXT_DESTROY,
@@ -1839,6 +2054,8 @@ static const struct {
 			kgsl_ioctl_cff_user_event, 0),
 	KGSL_IOCTL_FUNC(IOCTL_KGSL_TIMESTAMP_EVENT,
 			kgsl_ioctl_timestamp_event, 1),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_SETPROPERTY,
+			kgsl_ioctl_device_setproperty, 1),
 };
 
 static long kgsl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
@@ -1892,7 +2109,7 @@ static long kgsl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
 		if (!func) {
 			KGSL_DRV_INFO(dev_priv->device,
 				      "invalid ioctl code %08x\n", cmd);
-			ret = -EINVAL;
+			ret = -ENOIOCTLCMD;
 			goto done;
 		}
 		lock = 1;
@@ -2142,16 +2359,15 @@ kgsl_register_device(struct kgsl_device *device)
 	INIT_WORK(&device->idle_check_ws, kgsl_idle_check);
 	INIT_WORK(&device->ts_expired_ws, kgsl_timestamp_expired);
 
-	INIT_LIST_HEAD(&device->memqueue);
 	INIT_LIST_HEAD(&device->events);
 
+	device->last_expired_ctxt_id = KGSL_CONTEXT_INVALID;
+
 	ret = kgsl_mmu_init(device);
 	if (ret != 0)
 		goto err_dest_work_q;
 
-	ret = kgsl_allocate_contiguous(&device->memstore,
-		sizeof(struct kgsl_devmemstore));
-
+	ret = kgsl_allocate_contiguous(&device->memstore, KGSL_MEMSTORE_SIZE);
 	if (ret != 0)
 		goto err_close_mmu;
 
diff --git a/drivers/gpu/msm/kgsl.h b/drivers/gpu/msm/kgsl.h
old mode 100644
new mode 100755
index e26cdc9e..fc45ff7c
--- a/drivers/gpu/msm/kgsl.h
+++ b/drivers/gpu/msm/kgsl.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2008-2011, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2008-2012, Code Aurora Forum. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -21,9 +21,21 @@
 #include <linux/mutex.h>
 #include <linux/cdev.h>
 #include <linux/regulator/consumer.h>
+#include <linux/mm.h>
 
 #define KGSL_NAME "kgsl"
 
+/* The number of memstore arrays limits the number of contexts allowed.
+ * If more contexts are needed, update multiple for MEMSTORE_SIZE
+ */
+#define KGSL_MEMSTORE_SIZE	((int)(PAGE_SIZE * 2))
+#define KGSL_MEMSTORE_GLOBAL	(0)
+#define KGSL_MEMSTORE_MAX	(KGSL_MEMSTORE_SIZE / \
+		sizeof(struct kgsl_devmemstore) - 1)
+
+/* Timestamp window used to detect rollovers */
+#define KGSL_TIMESTAMP_WINDOW 0x80000000
+
 /*cache coherency ops */
 #define DRM_KGSL_GEM_CACHE_OP_TO_DEV	0x0001
 #define DRM_KGSL_GEM_CACHE_OP_FROM_DEV	0x0002
@@ -91,6 +103,8 @@ struct kgsl_driver {
 	struct {
 		unsigned int vmalloc;
 		unsigned int vmalloc_max;
+		unsigned int page_alloc;
+		unsigned int page_alloc_max;
 		unsigned int coherent;
 		unsigned int coherent_max;
 		unsigned int mapped;
@@ -101,8 +115,41 @@ struct kgsl_driver {
 
 extern struct kgsl_driver kgsl_driver;
 
-#define KGSL_USER_MEMORY 1
-#define KGSL_MAPPED_MEMORY 2
+struct kgsl_pagetable;
+struct kgsl_memdesc;
+
+struct kgsl_memdesc_ops {
+	int (*vmflags)(struct kgsl_memdesc *);
+	int (*vmfault)(struct kgsl_memdesc *, struct vm_area_struct *,
+		       struct vm_fault *);
+	void (*free)(struct kgsl_memdesc *memdesc);
+	int (*map_kernel_mem)(struct kgsl_memdesc *);
+};
+
+#define KGSL_MEMDESC_GUARD_PAGE BIT(0)
+
+/* shared memory allocation */
+struct kgsl_memdesc {
+	struct kgsl_pagetable *pagetable;
+	void *hostptr;
+	unsigned int gpuaddr;
+	unsigned int physaddr;
+	unsigned int size;
+	unsigned int priv;
+	struct scatterlist *sg;
+	unsigned int sglen;
+	struct kgsl_memdesc_ops *ops;
+	int flags;
+};
+
+/* List of different memory entry types */
+
+#define KGSL_MEM_ENTRY_KERNEL 0
+#define KGSL_MEM_ENTRY_PMEM   1
+#define KGSL_MEM_ENTRY_ASHMEM 2
+#define KGSL_MEM_ENTRY_USER   3
+#define KGSL_MEM_ENTRY_ION    4
+#define KGSL_MEM_ENTRY_MAX    5
 
 struct kgsl_pagetable;
 struct kgsl_memdesc_ops;
@@ -124,9 +171,10 @@ struct kgsl_mem_entry {
 	struct kref refcount;
 	struct kgsl_memdesc memdesc;
 	int memtype;
-	struct file *file_ptr;
+	void *priv_data;
 	struct list_head list;
 	uint32_t free_timestamp;
+	unsigned int context_id;
 	/* back pointer to private structure under whose context this
 	* allocation is made */
 	struct kgsl_process_private *priv;
@@ -139,8 +187,10 @@ struct kgsl_mem_entry {
 #endif
 
 void kgsl_mem_entry_destroy(struct kref *kref);
-uint8_t *kgsl_gpuaddr_to_vaddr(const struct kgsl_memdesc *memdesc,
-	unsigned int gpuaddr, unsigned int *size);
+
+struct kgsl_mem_entry *kgsl_get_mem_entry(unsigned int ptbase,
+		unsigned int gpuaddr, unsigned int size);
+
 struct kgsl_mem_entry *kgsl_sharedmem_find_region(
 	struct kgsl_process_private *private, unsigned int gpuaddr,
 	size_t size);
@@ -169,14 +219,26 @@ static inline void kgsl_drm_exit(void)
 #endif
 
 static inline int kgsl_gpuaddr_in_memdesc(const struct kgsl_memdesc *memdesc,
-				unsigned int gpuaddr)
+				unsigned int gpuaddr, unsigned int size)
 {
-	if (gpuaddr >= memdesc->gpuaddr && (gpuaddr + sizeof(unsigned int)) <=
-		(memdesc->gpuaddr + memdesc->size)) {
+	if (gpuaddr >= memdesc->gpuaddr &&
+	    ((gpuaddr + size) <= (memdesc->gpuaddr + memdesc->size))) {
 		return 1;
 	}
 	return 0;
 }
+static inline uint8_t *kgsl_gpuaddr_to_vaddr(struct kgsl_memdesc *memdesc,
+					     unsigned int gpuaddr)
+{
+	if (memdesc->gpuaddr == 0 ||
+		gpuaddr < memdesc->gpuaddr ||
+		gpuaddr >= (memdesc->gpuaddr + memdesc->size) ||
+		(NULL == memdesc->hostptr && memdesc->ops->map_kernel_mem &&
+			memdesc->ops->map_kernel_mem(memdesc)))
+		return NULL;
+
+	return memdesc->hostptr + (gpuaddr - memdesc->gpuaddr);
+}
 
 static inline int timestamp_cmp(unsigned int new, unsigned int old)
 {
@@ -185,7 +247,7 @@ static inline int timestamp_cmp(unsigned int new, unsigned int old)
 	if (ts_diff == 0)
 		return 0;
 
-	return ((ts_diff > 0) || (ts_diff < -20000)) ? 1 : -1;
+	return ((ts_diff > 0) || (ts_diff < -KGSL_TIMESTAMP_WINDOW)) ? 1 : -1;
 }
 
 static inline void
diff --git a/drivers/gpu/msm/kgsl_cffdump.c b/drivers/gpu/msm/kgsl_cffdump.c
old mode 100644
new mode 100755
index aa33152c..1ab8908f
--- a/drivers/gpu/msm/kgsl_cffdump.c
+++ b/drivers/gpu/msm/kgsl_cffdump.c
@@ -20,7 +20,7 @@
 #include <linux/slab.h>
 #include <linux/time.h>
 #include <linux/sched.h>
-#include <mach/socinfo.h>
+//#include <mach/socinfo.h>
 
 #include "kgsl.h"
 #include "kgsl_cffdump.h"
@@ -231,8 +231,6 @@ static void cffdump_printline(int id, uint opcode, uint op1, uint op2,
 
 	spin_lock(&cffdump_lock);
 	if (opcode == CFF_OP_WRITE_MEM) {
-		if (op1 < 0x40000000 || op1 >= 0x60000000)
-			KGSL_CORE_ERR("addr out-of-range: op1=%08x", op1);
 		if ((cff_op_write_membuf.addr != op1 &&
 			cff_op_write_membuf.count)
 			|| (cff_op_write_membuf.count == MEMBUF_SIZE))
@@ -360,15 +358,7 @@ void kgsl_cffdump_destroy()
 
 void kgsl_cffdump_open(enum kgsl_deviceid device_id)
 {
-	/*TODO: move this to where we can report correct gmemsize*/
-	unsigned int va_base;
-
-	if (cpu_is_msm8x60() || cpu_is_msm8960() || cpu_is_msm8930())
-		va_base = 0x40000000;
-	else
-		va_base = 0x20000000;
-
-	kgsl_cffdump_memory_base(device_id, va_base,
+	kgsl_cffdump_memory_base(device_id, KGSL_PAGETABLE_BASE,
 			CONFIG_MSM_KGSL_PAGE_TABLE_SIZE, SZ_256K);
 }
 
@@ -401,8 +391,6 @@ void kgsl_cffdump_syncmem(struct kgsl_device_private *dev_priv,
 	bool clean_cache)
 {
 	const void *src;
-	uint host_size;
-	uint physaddr;
 
 	if (!kgsl_cff_dump_enable)
 		return;
@@ -422,13 +410,9 @@ void kgsl_cffdump_syncmem(struct kgsl_device_private *dev_priv,
 		}
 		memdesc = &entry->memdesc;
 	}
-	BUG_ON(memdesc->gpuaddr == 0);
-	BUG_ON(gpuaddr == 0);
-	physaddr = kgsl_get_realaddr(memdesc) + (gpuaddr - memdesc->gpuaddr);
-
-	src = kgsl_gpuaddr_to_vaddr(memdesc, gpuaddr, &host_size);
-	if (src == NULL || host_size < sizebytes) {
-		KGSL_CORE_ERR("did not find mapping for "
+	src = (uint *)kgsl_gpuaddr_to_vaddr(memdesc, gpuaddr);
+	if (memdesc->hostptr == NULL) {
+		KGSL_CORE_ERR("no kernel mapping for "
 			"gpuaddr: 0x%08x, m->host: 0x%p, phys: 0x%08x\n",
 			gpuaddr, memdesc->hostptr, memdesc->physaddr);
 		return;
@@ -444,7 +428,6 @@ void kgsl_cffdump_syncmem(struct kgsl_device_private *dev_priv,
 				KGSL_CACHE_OP_INV);
 	}
 
-	BUG_ON(physaddr > 0x66000000 && physaddr < 0x66ffffff);
 	while (sizebytes > 3) {
 		cffdump_printline(-1, CFF_OP_WRITE_MEM, gpuaddr, *(uint *)src,
 			0, 0, 0);
@@ -462,7 +445,6 @@ void kgsl_cffdump_setmem(uint addr, uint value, uint sizebytes)
 	if (!kgsl_cff_dump_enable)
 		return;
 
-	BUG_ON(addr > 0x66000000 && addr < 0x66ffffff);
 	while (sizebytes > 3) {
 		/* Use 32bit memory writes as long as there's at least
 		 * 4 bytes left */
@@ -515,197 +497,6 @@ int kgsl_cffdump_waitirq(void)
 }
 EXPORT_SYMBOL(kgsl_cffdump_waitirq);
 
-#define ADDRESS_STACK_SIZE 256
-#define GET_PM4_TYPE3_OPCODE(x) ((*(x) >> 8) & 0xFF)
-static unsigned int kgsl_cffdump_addr_count;
-
-static bool kgsl_cffdump_handle_type3(struct kgsl_device_private *dev_priv,
-	uint *hostaddr, bool check_only)
-{
-	static uint addr_stack[ADDRESS_STACK_SIZE];
-	static uint size_stack[ADDRESS_STACK_SIZE];
-
-	switch (GET_PM4_TYPE3_OPCODE(hostaddr)) {
-	case CP_INDIRECT_BUFFER_PFD:
-	case CP_INDIRECT_BUFFER:
-	{
-		/* traverse indirect buffers */
-		int i;
-		uint ibaddr = hostaddr[1];
-		uint ibsize = hostaddr[2];
-
-		/* is this address already in encountered? */
-		for (i = 0;
-			i < kgsl_cffdump_addr_count && addr_stack[i] != ibaddr;
-			++i)
-			;
-
-		if (kgsl_cffdump_addr_count == i) {
-			addr_stack[kgsl_cffdump_addr_count] = ibaddr;
-			size_stack[kgsl_cffdump_addr_count++] = ibsize;
-
-			if (kgsl_cffdump_addr_count >= ADDRESS_STACK_SIZE) {
-				KGSL_CORE_ERR("stack overflow\n");
-				return false;
-			}
-
-			return kgsl_cffdump_parse_ibs(dev_priv, NULL,
-				ibaddr, ibsize, check_only);
-		} else if (size_stack[i] != ibsize) {
-			KGSL_CORE_ERR("gpuaddr: 0x%08x, "
-				"wc: %u, with size wc: %u already on the "
-				"stack\n", ibaddr, ibsize, size_stack[i]);
-			return false;
-		}
-	}
-	break;
-	}
-
-	return true;
-}
-
-/*
- * Traverse IBs and dump them to test vector. Detect swap by inspecting
- * register writes, keeping note of the current state, and dump
- * framebuffer config to test vector
- */
-bool kgsl_cffdump_parse_ibs(struct kgsl_device_private *dev_priv,
-	const struct kgsl_memdesc *memdesc, uint gpuaddr, int sizedwords,
-	bool check_only)
-{
-	static uint level; /* recursion level */
-	bool ret = true;
-	uint host_size;
-	uint *hostaddr, *hoststart;
-	int dwords_left = sizedwords; /* dwords left in the current command
-					 buffer */
-
-	if (level == 0)
-		kgsl_cffdump_addr_count = 0;
-
-	if (memdesc == NULL) {
-		struct kgsl_mem_entry *entry;
-		spin_lock(&dev_priv->process_priv->mem_lock);
-		entry = kgsl_sharedmem_find_region(dev_priv->process_priv,
-			gpuaddr, sizedwords * sizeof(uint));
-		spin_unlock(&dev_priv->process_priv->mem_lock);
-		if (entry == NULL) {
-			KGSL_CORE_ERR("did not find mapping "
-				"for gpuaddr: 0x%08x\n", gpuaddr);
-			return true;
-		}
-		memdesc = &entry->memdesc;
-	}
-
-	hostaddr = (uint *)kgsl_gpuaddr_to_vaddr(memdesc, gpuaddr, &host_size);
-	if (hostaddr == NULL) {
-		KGSL_CORE_ERR("did not find mapping for "
-			"gpuaddr: 0x%08x\n", gpuaddr);
-		return true;
-	}
-
-	hoststart = hostaddr;
-
-	level++;
-
-	if (!memdesc->physaddr) {
-		KGSL_CORE_ERR("no physaddr");
-	} else {
-		mb();
-		kgsl_cache_range_op((struct kgsl_memdesc *)memdesc,
-				KGSL_CACHE_OP_INV);
-	}
-
-#ifdef DEBUG
-	pr_info("kgsl: cffdump: ib: gpuaddr:0x%08x, wc:%d, hptr:%p\n",
-		gpuaddr, sizedwords, hostaddr);
-#endif
-
-	while (dwords_left > 0) {
-		int count = 0; /* dword count including packet header */
-		bool cur_ret = true;
-
-		switch (*hostaddr >> 30) {
-		case 0x0: /* type-0 */
-			count = (*hostaddr >> 16)+2;
-			break;
-		case 0x1: /* type-1 */
-			count = 2;
-			break;
-		case 0x3: /* type-3 */
-			count = ((*hostaddr >> 16) & 0x3fff) + 2;
-			cur_ret = kgsl_cffdump_handle_type3(dev_priv,
-				hostaddr, check_only);
-			break;
-		default:
-			pr_warn("kgsl: cffdump: parse-ib: unexpected type: "
-				"type:%d, word:0x%08x @ 0x%p, gpu:0x%08x\n",
-				*hostaddr >> 30, *hostaddr, hostaddr,
-				gpuaddr+4*(sizedwords-dwords_left));
-			cur_ret = false;
-			count = dwords_left;
-			break;
-		}
-
-#ifdef DEBUG
-		if (!cur_ret) {
-			pr_info("kgsl: cffdump: bad sub-type: #:%d/%d, v:0x%08x"
-				" @ 0x%p[gb:0x%08x], level:%d\n",
-				sizedwords-dwords_left, sizedwords, *hostaddr,
-				hostaddr, gpuaddr+4*(sizedwords-dwords_left),
-				level);
-
-			print_hex_dump(KERN_ERR, level == 1 ? "IB1:" : "IB2:",
-				DUMP_PREFIX_OFFSET, 32, 4, hoststart,
-				sizedwords*4, 0);
-		}
-#endif
-		ret = ret && cur_ret;
-
-		/* jump to next packet */
-		dwords_left -= count;
-		hostaddr += count;
-		cur_ret = dwords_left >= 0;
-
-#ifdef DEBUG
-		if (!cur_ret) {
-			pr_info("kgsl: cffdump: bad count: c:%d, #:%d/%d, "
-				"v:0x%08x @ 0x%p[gb:0x%08x], level:%d\n",
-				count, sizedwords-(dwords_left+count),
-				sizedwords, *(hostaddr-count), hostaddr-count,
-				gpuaddr+4*(sizedwords-(dwords_left+count)),
-				level);
-
-			print_hex_dump(KERN_ERR, level == 1 ? "IB1:" : "IB2:",
-				DUMP_PREFIX_OFFSET, 32, 4, hoststart,
-				sizedwords*4, 0);
-		}
-#endif
-
-		ret = ret && cur_ret;
-	}
-
-	if (!ret)
-		pr_info("kgsl: cffdump: parsing failed: gpuaddr:0x%08x, "
-			"host:0x%p, wc:%d\n", gpuaddr, hoststart, sizedwords);
-
-	if (!check_only) {
-#ifdef DEBUG
-		uint offset = gpuaddr - memdesc->gpuaddr;
-		pr_info("kgsl: cffdump: ib-dump: hostptr:%p, gpuaddr:%08x, "
-			"physaddr:%08x, offset:%d, size:%d", hoststart,
-			gpuaddr, memdesc->physaddr + offset, offset,
-			sizedwords*4);
-#endif
-		kgsl_cffdump_syncmem(dev_priv, memdesc, gpuaddr, sizedwords*4,
-			false);
-	}
-
-	level--;
-
-	return ret;
-}
-
 static int subbuf_start_handler(struct rchan_buf *buf,
 	void *subbuf, void *prev_subbuf, uint prev_padding)
 {
diff --git a/drivers/gpu/msm/kgsl_device.h b/drivers/gpu/msm/kgsl_device.h
old mode 100644
new mode 100755
index 64d369eb..45e41d91
--- a/drivers/gpu/msm/kgsl_device.h
+++ b/drivers/gpu/msm/kgsl_device.h
@@ -1,5 +1,4 @@
-/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved.
- * Copyright (C) 2011 Sony Ericsson Mobile Communications AB.
+/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -47,6 +46,7 @@
 #define KGSL_STATE_SUSPEND	0x00000010
 #define KGSL_STATE_HUNG		0x00000020
 #define KGSL_STATE_DUMP_AND_RECOVER	0x00000040
+#define KGSL_STATE_SLUMBER	0x00000080
 
 #define KGSL_GRAPHICS_MEMORY_LOW_WATERMARK  0x1000000
 
@@ -76,9 +76,10 @@ struct kgsl_functable {
 		enum kgsl_property_type type, void *value,
 		unsigned int sizebytes);
 	int (*waittimestamp) (struct kgsl_device *device,
-		unsigned int timestamp, unsigned int msecs);
+		struct kgsl_context *context, unsigned int timestamp,
+		unsigned int msecs);
 	unsigned int (*readtimestamp) (struct kgsl_device *device,
-		enum kgsl_timestamp_type type);
+		struct kgsl_context *context, enum kgsl_timestamp_type type);
 	int (*issueibcmds) (struct kgsl_device_private *dev_priv,
 		struct kgsl_context *context, struct kgsl_ibdesc *ibdesc,
 		unsigned int sizedwords, uint32_t *timestamp,
@@ -101,6 +102,9 @@ struct kgsl_functable {
 		struct kgsl_context *context);
 	long (*ioctl) (struct kgsl_device_private *dev_priv,
 		unsigned int cmd, void *data);
+	int (*setproperty) (struct kgsl_device *device,
+		enum kgsl_property_type type, void *value,
+		unsigned int sizebytes);
 };
 
 struct kgsl_memregion {
@@ -120,10 +124,12 @@ struct kgsl_mh {
 };
 
 struct kgsl_event {
+	struct kgsl_context *context;
 	uint32_t timestamp;
-	void (*func)(struct kgsl_device *, void *, u32);
+	void (*func)(struct kgsl_device *, void *, u32, u32);
 	void *priv;
 	struct list_head list;
+	struct kgsl_device_private *owner;
 };
 
 
@@ -152,7 +158,7 @@ struct kgsl_device {
 	uint32_t state;
 	uint32_t requested_state;
 
-	struct list_head memqueue;
+	unsigned int last_expired_ctxt_id;
 	unsigned int active_cnt;
 	struct completion suspend_gate;
 
@@ -185,6 +191,11 @@ struct kgsl_context {
 
 	/* Pointer to the device specific context information */
 	void *devctxt;
+	/*
+	 * Status indicating whether a gpu reset occurred and whether this
+	 * context was responsible for causing it
+	 */
+	unsigned int reset_status;
 };
 
 struct kgsl_process_private {
@@ -194,15 +205,12 @@ struct kgsl_process_private {
 	struct list_head mem_list;
 	struct kgsl_pagetable *pagetable;
 	struct list_head list;
-	struct kobject *kobj;
+	struct kobject kobj;
 
 	struct {
-		unsigned int user;
-		unsigned int user_max;
-		unsigned int mapped;
-		unsigned int mapped_max;
-		unsigned int flushes;
-	} stats;
+		unsigned int cur;
+		unsigned int max;
+	} stats[KGSL_MEM_ENTRY_MAX];
 };
 
 struct kgsl_device_private {
@@ -217,6 +225,14 @@ struct kgsl_power_stats {
 
 struct kgsl_device *kgsl_get_device(int dev_idx);
 
+static inline void kgsl_process_add_stats(struct kgsl_process_private *priv,
+	unsigned int type, size_t size)
+{
+	priv->stats[type].cur += size;
+	if (priv->stats[type].max < priv->stats[type].cur)
+		priv->stats[type].max = priv->stats[type].cur;
+}
+
 static inline void kgsl_regread(struct kgsl_device *device,
 				unsigned int offsetwords,
 				unsigned int *value)
@@ -294,7 +310,8 @@ kgsl_find_context(struct kgsl_device_private *dev_priv, uint32_t id)
 	return  (ctxt && ctxt->dev_priv == dev_priv) ? ctxt : NULL;
 }
 
-int kgsl_check_timestamp(struct kgsl_device *device, unsigned int timestamp);
+int kgsl_check_timestamp(struct kgsl_device *device,
+		struct kgsl_context *context, unsigned int timestamp);
 
 int kgsl_register_ts_notifier(struct kgsl_device *device,
 			      struct notifier_block *nb);
diff --git a/drivers/gpu/msm/kgsl_drm.c b/drivers/gpu/msm/kgsl_drm.c
old mode 100644
new mode 100755
index cdf9dc4e..66ac08f6
--- a/drivers/gpu/msm/kgsl_drm.c
+++ b/drivers/gpu/msm/kgsl_drm.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2009-2011, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2009-2012, Code Aurora Forum. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -17,7 +17,6 @@
 #include "drmP.h"
 #include "drm.h"
 #include <linux/android_pmem.h>
-#include <linux/notifier.h>
 
 #include "kgsl.h"
 #include "kgsl_device.h"
@@ -39,6 +38,9 @@
 #define ENTRY_EMPTY -1
 #define ENTRY_NEEDS_CLEANUP -2
 
+#define DRM_KGSL_NOT_INITED -1
+#define DRM_KGSL_INITED   1
+
 #define DRM_KGSL_NUM_FENCE_ENTRIES (DRM_KGSL_HANDLE_WAIT_ENTRIES << 2)
 #define DRM_KGSL_HANDLE_WAIT_ENTRIES 5
 
@@ -127,6 +129,8 @@ struct drm_kgsl_gem_object {
 	struct list_head wait_list;
 };
 
+static int kgsl_drm_inited = DRM_KGSL_NOT_INITED;
+
 /* This is a global list of all the memory currently mapped in the MMU */
 static struct list_head kgsl_mem_list;
 
@@ -152,22 +156,6 @@ static void kgsl_gem_mem_flush(struct kgsl_memdesc *memdesc, int type, int op)
 	kgsl_cache_range_op(memdesc, cacheop);
 }
 
-/* Flush all the memory mapped in the MMU */
-
-void kgsl_gpu_mem_flush(int op)
-{
-	struct drm_kgsl_gem_object *entry;
-
-	list_for_each_entry(entry, &kgsl_mem_list, list) {
-		kgsl_gem_mem_flush(&entry->memdesc, entry->type, op);
-	}
-
-	/* Takes care of WT/WC case.
-	 * More useful when we go barrierless
-	 */
-	dmb();
-}
-
 /* TODO:
  * Add vsync wait */
 
@@ -186,41 +174,6 @@ struct kgsl_drm_device_priv {
 	struct kgsl_device_private *devpriv[KGSL_DEVICE_MAX];
 };
 
-static int kgsl_ts_notifier_cb(struct notifier_block *blk,
-			       unsigned long code, void *_param);
-
-static struct notifier_block kgsl_ts_nb[KGSL_DEVICE_MAX];
-
-static int kgsl_drm_firstopen(struct drm_device *dev)
-{
-	int i;
-
-	for (i = 0; i < KGSL_DEVICE_MAX; i++) {
-		struct kgsl_device *device = kgsl_get_device(i);
-
-		if (device == NULL)
-			continue;
-
-		kgsl_ts_nb[i].notifier_call = kgsl_ts_notifier_cb;
-		kgsl_register_ts_notifier(device, &kgsl_ts_nb[i]);
-	}
-
-	return 0;
-}
-
-void kgsl_drm_lastclose(struct drm_device *dev)
-{
-	int i;
-
-	for (i = 0; i < KGSL_DEVICE_MAX; i++) {
-		struct kgsl_device *device = kgsl_get_device(i);
-		if (device == NULL)
-			continue;
-
-		kgsl_unregister_ts_notifier(device, &kgsl_ts_nb[i]);
-	}
-}
-
 void kgsl_drm_preclose(struct drm_device *dev, struct drm_file *file_priv)
 {
 }
@@ -268,80 +221,71 @@ kgsl_gem_alloc_memory(struct drm_gem_object *obj)
 {
 	struct drm_kgsl_gem_object *priv = obj->driver_private;
 	int index;
+	int result = 0;
 
 	/* Return if the memory is already allocated */
 
 	if (kgsl_gem_memory_allocated(obj) || TYPE_IS_FD(priv->type))
 		return 0;
 
+	if (priv->pagetable == NULL) {
+		priv->pagetable = kgsl_mmu_getpagetable(KGSL_MMU_GLOBAL_PT);
+
+		if (priv->pagetable == NULL) {
+			DRM_ERROR("Unable to get the GPU MMU pagetable\n");
+			return -EINVAL;
+		}
+	}
+
 	if (TYPE_IS_PMEM(priv->type)) {
 		int type;
 
 		if (priv->type == DRM_KGSL_GEM_TYPE_EBI ||
-		    priv->type & DRM_KGSL_GEM_PMEM_EBI)
-			type = PMEM_MEMTYPE_EBI1;
-		else
-			type = PMEM_MEMTYPE_SMI;
-
-		priv->memdesc.physaddr =
-			pmem_kalloc(obj->size * priv->bufcount,
-				    type | PMEM_ALIGNMENT_4K);
-
-		if (IS_ERR((void *) priv->memdesc.physaddr)) {
-			DRM_ERROR("Unable to allocate PMEM memory\n");
-			return -ENOMEM;
+		    priv->type & DRM_KGSL_GEM_PMEM_EBI) {
+				type = PMEM_MEMTYPE_EBI1;
+				result = kgsl_sharedmem_ebimem_user(
+						&priv->memdesc,
+						priv->pagetable,
+						obj->size * priv->bufcount,
+						0);
+				if (result) {
+					DRM_ERROR(
+					"Unable to allocate PMEM memory\n");
+					return result;
+				}
 		}
-
-		priv->memdesc.size = obj->size * priv->bufcount;
+		else
+			return -EINVAL;
 
 	} else if (TYPE_IS_MEM(priv->type)) {
-		priv->memdesc.hostptr =
-			vmalloc_user(obj->size * priv->bufcount);
 
-		if (priv->memdesc.hostptr == NULL) {
-			DRM_ERROR("Unable to allocate vmalloc memory\n");
-			return -ENOMEM;
+		if (priv->type == DRM_KGSL_GEM_TYPE_KMEM ||
+			priv->type & DRM_KGSL_GEM_CACHE_MASK)
+				list_add(&priv->list, &kgsl_mem_list);
+
+		result = kgsl_sharedmem_page_alloc_user(&priv->memdesc,
+					priv->pagetable,
+					obj->size * priv->bufcount, 0);
+
+		if (result != 0) {
+				DRM_ERROR(
+				"Unable to allocate Vmalloc user memory\n");
+				return result;
 		}
-
-		priv->memdesc.size = obj->size * priv->bufcount;
-		priv->memdesc.ops = &kgsl_vmalloc_ops;
 	} else
 		return -EINVAL;
 
-	for (index = 0; index < priv->bufcount; index++)
+	for (index = 0; index < priv->bufcount; index++) {
 		priv->bufs[index].offset = index * obj->size;
-
+		priv->bufs[index].gpuaddr =
+			priv->memdesc.gpuaddr +
+			priv->bufs[index].offset;
+	}
+	priv->flags |= DRM_KGSL_GEM_FLAG_MAPPED;
 
 	return 0;
 }
 
-#ifdef CONFIG_MSM_KGSL_MMU
-static void
-kgsl_gem_unmap(struct drm_gem_object *obj)
-{
-	struct drm_kgsl_gem_object *priv = obj->driver_private;
-
-	if (!priv->flags & DRM_KGSL_GEM_FLAG_MAPPED)
-		return;
-
-	kgsl_mmu_unmap(priv->pagetable, &priv->memdesc);
-
-	kgsl_mmu_putpagetable(priv->pagetable);
-	priv->pagetable = NULL;
-
-	if ((priv->type == DRM_KGSL_GEM_TYPE_KMEM) ||
-	    (priv->type & DRM_KGSL_GEM_CACHE_MASK))
-		list_del(&priv->list);
-
-	priv->flags &= ~DRM_KGSL_GEM_FLAG_MAPPED;
-}
-#else
-static void
-kgsl_gem_unmap(struct drm_gem_object *obj)
-{
-}
-#endif
-
 static void
 kgsl_gem_free_memory(struct drm_gem_object *obj)
 {
@@ -353,12 +297,17 @@ kgsl_gem_free_memory(struct drm_gem_object *obj)
 	kgsl_gem_mem_flush(&priv->memdesc,  priv->type,
 			   DRM_KGSL_GEM_CACHE_OP_FROM_DEV);
 
-	kgsl_gem_unmap(obj);
-
-	if (TYPE_IS_PMEM(priv->type))
-		pmem_kfree(priv->memdesc.physaddr);
-
 	kgsl_sharedmem_free(&priv->memdesc);
+
+	kgsl_mmu_putpagetable(priv->pagetable);
+	priv->pagetable = NULL;
+
+	if ((priv->type == DRM_KGSL_GEM_TYPE_KMEM) ||
+	    (priv->type & DRM_KGSL_GEM_CACHE_MASK))
+		list_del(&priv->list);
+
+	priv->flags &= ~DRM_KGSL_GEM_FLAG_MAPPED;
+
 }
 
 int
@@ -454,7 +403,7 @@ kgsl_gem_obj_addr(int drm_fd, int handle, unsigned long *start,
 
 	filp = fget(drm_fd);
 	if (unlikely(filp == NULL)) {
-		DRM_ERROR("Unable to ghet the DRM file descriptor\n");
+		DRM_ERROR("Unable to get the DRM file descriptor\n");
 		return -EINVAL;
 	}
 	file_priv = filp->private_data;
@@ -527,7 +476,7 @@ kgsl_gem_init_obj(struct drm_device *dev,
 
 	ret = drm_gem_handle_create(file_priv, obj, handle);
 
-	drm_gem_object_handle_unreference(obj);
+	drm_gem_object_unreference(obj);
 	INIT_LIST_HEAD(&priv->wait_list);
 
 	for (i = 0; i < DRM_KGSL_HANDLE_WAIT_ENTRIES; i++) {
@@ -702,128 +651,14 @@ int
 kgsl_gem_unbind_gpu_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_priv)
 {
-	struct drm_kgsl_gem_bind_gpu *args = data;
-	struct drm_gem_object *obj;
-	struct drm_kgsl_gem_object *priv;
-
-	obj = drm_gem_object_lookup(dev, file_priv, args->handle);
-
-	if (obj == NULL) {
-		DRM_ERROR("Invalid GEM handle %x\n", args->handle);
-		return -EBADF;
-	}
-
-	mutex_lock(&dev->struct_mutex);
-	priv = obj->driver_private;
-
-	if (--priv->bound == 0)
-		kgsl_gem_unmap(obj);
-
-	drm_gem_object_unreference(obj);
-	mutex_unlock(&dev->struct_mutex);
 	return 0;
 }
 
-#ifdef CONFIG_MSM_KGSL_MMU
-static int
-kgsl_gem_map(struct drm_gem_object *obj)
-{
-	struct drm_kgsl_gem_object *priv = obj->driver_private;
-	int index;
-	int ret = -EINVAL;
-
-	if (priv->flags & DRM_KGSL_GEM_FLAG_MAPPED)
-		return 0;
-
-	/* Get the global page table */
-
-	if (priv->pagetable == NULL) {
-		priv->pagetable = kgsl_mmu_getpagetable(KGSL_MMU_GLOBAL_PT);
-
-		if (priv->pagetable == NULL) {
-			DRM_ERROR("Unable to get the GPU MMU pagetable\n");
-			return -EINVAL;
-		}
-	}
-
-	priv->memdesc.pagetable = priv->pagetable;
-
-	ret = kgsl_mmu_map(priv->pagetable, &priv->memdesc,
-			   GSL_PT_PAGE_RV | GSL_PT_PAGE_WV);
-
-	if (!ret) {
-		for (index = 0; index < priv->bufcount; index++) {
-			priv->bufs[index].gpuaddr =
-				priv->memdesc.gpuaddr +
-				priv->bufs[index].offset;
-		}
-	}
-
-	/* Add cached memory to the list to be cached */
-
-	if (priv->type == DRM_KGSL_GEM_TYPE_KMEM ||
-	    priv->type & DRM_KGSL_GEM_CACHE_MASK)
-		list_add(&priv->list, &kgsl_mem_list);
-
-	priv->flags |= DRM_KGSL_GEM_FLAG_MAPPED;
-
-	return ret;
-}
-#else
-static int
-kgsl_gem_map(struct drm_gem_object *obj)
-{
-	struct drm_kgsl_gem_object *priv = obj->driver_private;
-	int index;
-
-	if (TYPE_IS_PMEM(priv->type)) {
-		for (index = 0; index < priv->bufcount; index++)
-			priv->bufs[index].gpuaddr =
-			priv->memdesc.physaddr + priv->bufs[index].offset;
-
-		return 0;
-	}
-
-	return -EINVAL;
-}
-#endif
-
 int
 kgsl_gem_bind_gpu_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_priv)
 {
-	struct drm_kgsl_gem_bind_gpu *args = data;
-	struct drm_gem_object *obj;
-	struct drm_kgsl_gem_object *priv;
-	int ret = 0;
-
-	obj = drm_gem_object_lookup(dev, file_priv, args->handle);
-
-	if (obj == NULL) {
-		DRM_ERROR("Invalid GEM handle %x\n", args->handle);
-		return -EBADF;
-	}
-
-	mutex_lock(&dev->struct_mutex);
-	priv = obj->driver_private;
-
-	if (priv->bound++ == 0) {
-
-		if (!kgsl_gem_memory_allocated(obj)) {
-			DRM_ERROR("Memory not allocated for this object\n");
-			ret = -ENOMEM;
-			goto out;
-		}
-
-		ret = kgsl_gem_map(obj);
-
-		/* This is legacy behavior - use GET_BUFFERINFO instead */
-		args->gpuptr = priv->bufs[0].gpuaddr;
-	}
-out:
-	drm_gem_object_unreference(obj);
-	mutex_unlock(&dev->struct_mutex);
-	return ret;
+	return 0;
 }
 
 /* Allocate the memory and prepare it for CPU mapping */
@@ -1068,17 +903,18 @@ int kgsl_gem_kmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	struct drm_gem_object *obj = vma->vm_private_data;
 	struct drm_device *dev = obj->dev;
 	struct drm_kgsl_gem_object *priv;
-	unsigned long offset, pg;
+	unsigned long offset;
 	struct page *page;
+	int i;
 
 	mutex_lock(&dev->struct_mutex);
 
 	priv = obj->driver_private;
 
 	offset = (unsigned long) vmf->virtual_address - vma->vm_start;
-	pg = (unsigned long) priv->memdesc.hostptr + offset;
+	i = offset >> PAGE_SHIFT;
+	page = sg_page(&(priv->memdesc.sg[i]));
 
-	page = vmalloc_to_page((void *) pg);
 	if (!page) {
 		mutex_unlock(&dev->struct_mutex);
 		return VM_FAULT_SIGBUS;
@@ -1370,27 +1206,6 @@ wakeup_fence_entries(struct drm_kgsl_gem_object_fence *fence)
 	fence->fence_id = ENTRY_NEEDS_CLEANUP;  /* Mark it as needing cleanup */
 }
 
-static int kgsl_ts_notifier_cb(struct notifier_block *blk,
-			       unsigned long code, void *_param)
-{
-	struct drm_kgsl_gem_object_fence *fence;
-	struct kgsl_device *device = kgsl_get_device(code);
-	int i;
-
-	/* loop through the fences to see what things can be processed */
-
-	for (i = 0; i < DRM_KGSL_NUM_FENCE_ENTRIES; i++) {
-		fence = &gem_buf_fence[i];
-		if (!fence->ts_valid || fence->ts_device != code)
-			continue;
-
-		if (kgsl_check_timestamp(device, fence->timestamp))
-			wakeup_fence_entries(fence);
-	}
-
-	return 0;
-}
-
 int
 kgsl_gem_lock_handle_ioctl(struct drm_device *dev, void *data,
 						   struct drm_file *file_priv)
@@ -1583,7 +1398,7 @@ kgsl_gem_unlock_on_ts_ioctl(struct drm_device *dev, void *data,
 	}
 
 	device = kgsl_get_device(ts_device);
-	ts_done = kgsl_check_timestamp(device, args->timestamp);
+	ts_done = kgsl_check_timestamp(device, NULL, args->timestamp);
 
 	mutex_lock(&dev->struct_mutex);
 
@@ -1634,11 +1449,9 @@ struct drm_ioctl_desc kgsl_drm_ioctls[] = {
 };
 
 static struct drm_driver driver = {
-	.driver_features = DRIVER_USE_PLATFORM_DEVICE | DRIVER_GEM,
+	.driver_features = DRIVER_GEM,
 	.load = kgsl_drm_load,
 	.unload = kgsl_drm_unload,
-	.firstopen = kgsl_drm_firstopen,
-	.lastclose = kgsl_drm_lastclose,
 	.preclose = kgsl_drm_preclose,
 	.suspend = kgsl_drm_suspend,
 	.resume = kgsl_drm_resume,
@@ -1669,8 +1482,13 @@ int kgsl_drm_init(struct platform_device *dev)
 {
 	int i;
 
+	/* Only initialize once */
+	if (kgsl_drm_inited == DRM_KGSL_INITED)
+		return 0;
+
+	kgsl_drm_inited = DRM_KGSL_INITED;
+
 	driver.num_ioctls = DRM_ARRAY_SIZE(kgsl_drm_ioctls);
-	driver.platform_device = dev;
 
 	INIT_LIST_HEAD(&kgsl_mem_list);
 
@@ -1680,10 +1498,11 @@ int kgsl_drm_init(struct platform_device *dev)
 		gem_buf_fence[i].fence_id = ENTRY_EMPTY;
 	}
 
-	return drm_init(&driver);
+	return drm_platform_init(&driver, dev);
 }
 
 void kgsl_drm_exit(void)
 {
-	drm_exit(&driver);
+	kgsl_drm_inited = DRM_KGSL_NOT_INITED;
+	drm_platform_exit(&driver, driver.kdriver.platform_device);
 }
diff --git a/drivers/gpu/msm/kgsl_gpummu.c b/drivers/gpu/msm/kgsl_gpummu.c
index 9e7ef61d..5d326658 100644
--- a/drivers/gpu/msm/kgsl_gpummu.c
+++ b/drivers/gpu/msm/kgsl_gpummu.c
@@ -356,8 +356,8 @@ err_ptpool_remove:
 int kgsl_gpummu_pt_equal(struct kgsl_pagetable *pt,
 					unsigned int pt_base)
 {
-	struct kgsl_gpummu_pt *gpummu_pt = pt->priv;
-	return pt && pt_base && (gpummu_pt->base.gpuaddr == pt_base);
+	struct kgsl_gpummu_pt *gpummu_pt = pt ? pt->priv : NULL;
+	return gpummu_pt && pt_base && (gpummu_pt->base.gpuaddr == pt_base);
 }
 
 void kgsl_gpummu_destroy_pagetable(void *mmu_specific_pt)
@@ -385,14 +385,16 @@ kgsl_pt_map_set(struct kgsl_gpummu_pt *pt, uint32_t pte, uint32_t val)
 {
 	uint32_t *baseptr = (uint32_t *)pt->base.hostptr;
 
-	writel_relaxed(val, &baseptr[pte]);
+	BUG_ON(pte*sizeof(uint32_t) >= pt->base.size);
+	baseptr[pte] = val;
 }
 
 static inline uint32_t
 kgsl_pt_map_get(struct kgsl_gpummu_pt *pt, uint32_t pte)
 {
 	uint32_t *baseptr = (uint32_t *)pt->base.hostptr;
-	return readl_relaxed(&baseptr[pte]) & GSL_PT_PAGE_ADDR_MASK;
+	BUG_ON(pte*sizeof(uint32_t) >= pt->base.size);
+	return baseptr[pte] & GSL_PT_PAGE_ADDR_MASK;	
 }
 
 static unsigned int kgsl_gpummu_pt_get_flags(struct kgsl_pagetable *pt,
@@ -683,7 +685,7 @@ kgsl_gpummu_map(void *mmu_specific_pt,
 		flushtlb = 1;
 
 	for_each_sg(memdesc->sg, s, memdesc->sglen, i) {
-		unsigned int paddr = sg_phys(s);
+		unsigned int paddr = kgsl_get_sg_pa(s);
 		unsigned int j;
 
 		/* Each sg entry might be multiple pages long */
diff --git a/drivers/gpu/msm/kgsl_iommu.c b/drivers/gpu/msm/kgsl_iommu.c
index 30365a3c..760cdb03 100644
--- a/drivers/gpu/msm/kgsl_iommu.c
+++ b/drivers/gpu/msm/kgsl_iommu.c
@@ -34,8 +34,8 @@ struct kgsl_iommu {
 static int kgsl_iommu_pt_equal(struct kgsl_pagetable *pt,
 					unsigned int pt_base)
 {
-	struct iommu_domain *domain = pt->priv;
-	return pt && pt_base && ((unsigned int)domain == pt_base);
+	struct iommu_domain *domain = pt ? pt->priv : NULL;
+	return domain && pt_base && ((unsigned int)domain == pt_base);
 }
 
 static void kgsl_iommu_destroy_pagetable(void *mmu_specific_pt)
diff --git a/drivers/gpu/msm/kgsl_mmu.c b/drivers/gpu/msm/kgsl_mmu.c
old mode 100644
new mode 100755
index 7916ecb0..d7585ef9
--- a/drivers/gpu/msm/kgsl_mmu.c
+++ b/drivers/gpu/msm/kgsl_mmu.c
@@ -1,5 +1,4 @@
-/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved.
- * Copyright (C) 2011 Sony Ericsson Mobile Communications AB.
+/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -23,6 +22,7 @@
 #include "kgsl_mmu.h"
 #include "kgsl_device.h"
 #include "kgsl_sharedmem.h"
+#include "adreno_postmortem.h"
 
 #define KGSL_MMU_ALIGN_SHIFT    13
 #define KGSL_MMU_ALIGN_MASK     (~((1 << KGSL_MMU_ALIGN_SHIFT) - 1))
@@ -592,6 +592,12 @@ kgsl_mmu_unmap(struct kgsl_pagetable *pagetable,
 			memdesc->gpuaddr & KGSL_MMU_ALIGN_MASK,
 			memdesc->size);
 
+	/*
+	 * Don't clear the gpuaddr on global mappings because they
+	 * may be in use by other pagetables
+	 */
+	if (!(memdesc->priv & KGSL_MEMFLAGS_GLOBAL))
+	memdesc->gpuaddr = 0;
 	return 0;
 }
 EXPORT_SYMBOL(kgsl_mmu_unmap);
@@ -623,6 +629,7 @@ int kgsl_mmu_map_global(struct kgsl_pagetable *pagetable,
 			gpuaddr, memdesc->gpuaddr);
 		goto error_unmap;
 	}
+	memdesc->priv |= KGSL_MEMFLAGS_GLOBAL;
 	return result;
 error_unmap:
 	kgsl_mmu_unmap(pagetable, memdesc);
diff --git a/drivers/gpu/msm/kgsl_sharedmem.c b/drivers/gpu/msm/kgsl_sharedmem.c
old mode 100644
new mode 100755
index a587c44a..b52fc1d5
--- a/drivers/gpu/msm/kgsl_sharedmem.c
+++ b/drivers/gpu/msm/kgsl_sharedmem.c
@@ -1,5 +1,4 @@
-/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved.
- * Copyright (C) 2011 Sony Ericsson Mobile Communications AB.
+/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -11,9 +10,14 @@
  * GNU General Public License for more details.
  *
  */
+
+#include <linux/module.h>
 #include <linux/vmalloc.h>
 #include <linux/memory_alloc.h>
 #include <asm/cacheflush.h>
+#include <linux/slab.h>
+#include <linux/kmemleak.h>
+#include <linux/highmem.h>
 
 #include "kgsl.h"
 #include "kgsl_sharedmem.h"
@@ -21,6 +25,59 @@
 #include "kgsl_device.h"
 #include "adreno_ringbuffer.h"
 
+/* An attribute for showing per-process memory statistics */
+struct kgsl_mem_entry_attribute {
+	struct attribute attr;
+	int memtype;
+	ssize_t (*show)(struct kgsl_process_private *priv,
+		int type, char *buf);
+};
+
+#define to_mem_entry_attr(a) \
+container_of(a, struct kgsl_mem_entry_attribute, attr)
+
+#define __MEM_ENTRY_ATTR(_type, _name, _show) \
+{ \
+	.attr = { .name = __stringify(_name), .mode = 0444 }, \
+	.memtype = _type, \
+	.show = _show, \
+}
+
+/*
+ * A structure to hold the attributes for a particular memory type.
+ * For each memory type in each process we store the current and maximum
+ * memory usage and display the counts in sysfs.  This structure and
+ * the following macro allow us to simplify the definition for those
+ * adding new memory types
+ */
+
+struct mem_entry_stats {
+	int memtype;
+	struct kgsl_mem_entry_attribute attr;
+	struct kgsl_mem_entry_attribute max_attr;
+};
+
+
+#define MEM_ENTRY_STAT(_type, _name) \
+{ \
+	.memtype = _type, \
+	.attr = __MEM_ENTRY_ATTR(_type, _name, mem_entry_show), \
+	.max_attr = __MEM_ENTRY_ATTR(_type, _name##_max, \
+		mem_entry_max_show), \
+}
+
+
+/*
+ * One page allocation for a guard region to protect against over-zealous
+ * GPU pre-fetch
+ */
+
+static struct page *kgsl_guard_page;
+
+/**
+ * Given a kobj, find the process structure attached to it
+ */
+
 static struct kgsl_process_private *
 _get_priv_from_kobj(struct kobject *kobj)
 {
@@ -41,87 +98,109 @@ _get_priv_from_kobj(struct kobject *kobj)
 	return NULL;
 }
 
-/* sharedmem / memory sysfs files */
+/**
+ * Show the current amount of memory allocated for the given memtype
+ */
 
 static ssize_t
-process_show(struct kobject *kobj,
-	     struct kobj_attribute *attr,
-	     char *buf)
+mem_entry_show(struct kgsl_process_private *priv, int type, char *buf)
 {
+	return snprintf(buf, PAGE_SIZE, "%d\n", priv->stats[type].cur);
+}
+
+/**
+ * Show the maximum memory allocated for the given memtype through the life of
+ * the process
+ */
+
+static ssize_t
+mem_entry_max_show(struct kgsl_process_private *priv, int type, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%d\n", priv->stats[type].max);
+}
+
+
+static void mem_entry_sysfs_release(struct kobject *kobj)
+{
+}
+
+static ssize_t mem_entry_sysfs_show(struct kobject *kobj,
+	struct attribute *attr, char *buf)
+{
+	struct kgsl_mem_entry_attribute *pattr = to_mem_entry_attr(attr);
 	struct kgsl_process_private *priv;
-	unsigned int val = 0;
+	ssize_t ret;
 
 	mutex_lock(&kgsl_driver.process_mutex);
 	priv = _get_priv_from_kobj(kobj);
 
-	if (priv == NULL) {
-		mutex_unlock(&kgsl_driver.process_mutex);
-		return 0;
-	}
-
-	if (!strncmp(attr->attr.name, "user", 4))
-		val = priv->stats.user;
-	if (!strncmp(attr->attr.name, "user_max", 8))
-		val = priv->stats.user_max;
-	if (!strncmp(attr->attr.name, "mapped", 6))
-		val = priv->stats.mapped;
-	if (!strncmp(attr->attr.name, "mapped_max", 10))
-		val = priv->stats.mapped_max;
-	if (!strncmp(attr->attr.name, "flushes", 7))
-		val = priv->stats.flushes;
+	if (priv && pattr->show)
+		ret = pattr->show(priv, pattr->memtype, buf);
+	else
+		ret = -EIO;
 
 	mutex_unlock(&kgsl_driver.process_mutex);
-	return snprintf(buf, PAGE_SIZE, "%u\n", val);
+	return ret;
 }
 
-#define KGSL_MEMSTAT_ATTR(_name, _show) \
-	static struct kobj_attribute attr_##_name = \
-	__ATTR(_name, 0444, _show, NULL)
-
-KGSL_MEMSTAT_ATTR(user, process_show);
-KGSL_MEMSTAT_ATTR(user_max, process_show);
-KGSL_MEMSTAT_ATTR(mapped, process_show);
-KGSL_MEMSTAT_ATTR(mapped_max, process_show);
-KGSL_MEMSTAT_ATTR(flushes, process_show);
-
-static struct attribute *process_attrs[] = {
-	&attr_user.attr,
-	&attr_user_max.attr,
-	&attr_mapped.attr,
-	&attr_mapped_max.attr,
-	&attr_flushes.attr,
-	NULL
+static const struct sysfs_ops mem_entry_sysfs_ops = {
+	.show = mem_entry_sysfs_show,
 };
 
-static struct attribute_group process_attr_group = {
-	.attrs = process_attrs,
+static struct kobj_type ktype_mem_entry = {
+	.sysfs_ops = &mem_entry_sysfs_ops,
+	.default_attrs = NULL,
+	.release = mem_entry_sysfs_release
+};
+
+static struct mem_entry_stats mem_stats[] = {
+	MEM_ENTRY_STAT(KGSL_MEM_ENTRY_KERNEL, kernel),
+#ifdef CONFIG_ANDROID_PMEM
+	MEM_ENTRY_STAT(KGSL_MEM_ENTRY_PMEM, pmem),
+#endif
+#ifdef CONFIG_ASHMEM
+	MEM_ENTRY_STAT(KGSL_MEM_ENTRY_ASHMEM, ashmem),
+#endif
+	MEM_ENTRY_STAT(KGSL_MEM_ENTRY_USER, user),
+#ifdef CONFIG_ION
+	MEM_ENTRY_STAT(KGSL_MEM_ENTRY_ION, ion),
+#endif
 };
 
 void
 kgsl_process_uninit_sysfs(struct kgsl_process_private *private)
 {
-	/* Remove the sysfs entry */
-	if (private->kobj) {
-		sysfs_remove_group(private->kobj, &process_attr_group);
-		kobject_put(private->kobj);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(mem_stats); i++) {
+		sysfs_remove_file(&private->kobj, &mem_stats[i].attr.attr);
+		sysfs_remove_file(&private->kobj,
+			&mem_stats[i].max_attr.attr);
 	}
+
+	kobject_put(&private->kobj);
 }
 
 void
 kgsl_process_init_sysfs(struct kgsl_process_private *private)
 {
 	unsigned char name[16];
+	int i, ret;
 
-	/* Add a entry to the sysfs device */
 	snprintf(name, sizeof(name), "%d", private->pid);
-	private->kobj = kobject_create_and_add(name, kgsl_driver.prockobj);
 
-	/* sysfs failure isn't fatal, just annoying */
-	if (private->kobj != NULL) {
-		if (sysfs_create_group(private->kobj, &process_attr_group)) {
-			kobject_put(private->kobj);
-			private->kobj = NULL;
-		}
+	if (kobject_init_and_add(&private->kobj, &ktype_mem_entry,
+		kgsl_driver.prockobj, name))
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(mem_stats); i++) {
+		/* We need to check the value of sysfs_create_file, but we
+		 * don't really care if it passed or not */
+
+		ret = sysfs_create_file(&private->kobj,
+			&mem_stats[i].attr.attr);
+		ret = sysfs_create_file(&private->kobj,
+			&mem_stats[i].max_attr.attr);
 	}
 }
 
@@ -135,6 +214,10 @@ static int kgsl_drv_memstat_show(struct device *dev,
 		val = kgsl_driver.stats.vmalloc;
 	else if (!strncmp(attr->attr.name, "vmalloc_max", 11))
 		val = kgsl_driver.stats.vmalloc_max;
+	else if (!strncmp(attr->attr.name, "page_alloc", 10))
+		val = kgsl_driver.stats.page_alloc;
+	else if (!strncmp(attr->attr.name, "page_alloc_max", 14))
+		val = kgsl_driver.stats.page_alloc_max;
 	else if (!strncmp(attr->attr.name, "coherent", 8))
 		val = kgsl_driver.stats.coherent;
 	else if (!strncmp(attr->attr.name, "coherent_max", 12))
@@ -164,6 +247,8 @@ static int kgsl_drv_histogram_show(struct device *dev,
 
 DEVICE_ATTR(vmalloc, 0444, kgsl_drv_memstat_show, NULL);
 DEVICE_ATTR(vmalloc_max, 0444, kgsl_drv_memstat_show, NULL);
+DEVICE_ATTR(page_alloc, 0444, kgsl_drv_memstat_show, NULL);
+DEVICE_ATTR(page_alloc_max, 0444, kgsl_drv_memstat_show, NULL);
 DEVICE_ATTR(coherent, 0444, kgsl_drv_memstat_show, NULL);
 DEVICE_ATTR(coherent_max, 0444, kgsl_drv_memstat_show, NULL);
 DEVICE_ATTR(mapped, 0444, kgsl_drv_memstat_show, NULL);
@@ -173,6 +258,8 @@ DEVICE_ATTR(histogram, 0444, kgsl_drv_histogram_show, NULL);
 static struct device_attribute *drv_attr_list[] = {
 	&dev_attr_vmalloc,
 	&dev_attr_vmalloc_max,
+	&dev_attr_page_alloc,
+	&dev_attr_page_alloc_max,
 	&dev_attr_coherent,
 	&dev_attr_coherent_max,
 	&dev_attr_mapped,
@@ -216,7 +303,7 @@ static void outer_cache_range_op_sg(struct scatterlist *sg, int sglen, int op)
 	int i;
 
 	for_each_sg(sg, s, sglen, i) {
-		unsigned int paddr = sg_phys(s);
+		unsigned int paddr = kgsl_get_sg_pa(s);
 		_outer_cache_range_op(op, paddr, s->length);
 	}
 }
@@ -227,17 +314,18 @@ static void outer_cache_range_op_sg(struct scatterlist *sg, int sglen, int op)
 }
 #endif
 
-static int kgsl_vmalloc_vmfault(struct kgsl_memdesc *memdesc,
+static int kgsl_page_alloc_vmfault(struct kgsl_memdesc *memdesc,
 				struct vm_area_struct *vma,
 				struct vm_fault *vmf)
 {
-	unsigned long offset, pg;
+	unsigned long offset;
 	struct page *page;
+	int i;
 
 	offset = (unsigned long) vmf->virtual_address - vma->vm_start;
-	pg = (unsigned long) memdesc->hostptr + offset;
 
-	page = vmalloc_to_page((void *) pg);
+	i = offset >> PAGE_SHIFT;
+	page = sg_page(&memdesc->sg[i]);
 	if (page == NULL)
 		return VM_FAULT_SIGBUS;
 
@@ -247,15 +335,30 @@ static int kgsl_vmalloc_vmfault(struct kgsl_memdesc *memdesc,
 	return 0;
 }
 
-static int kgsl_vmalloc_vmflags(struct kgsl_memdesc *memdesc)
+static int kgsl_page_alloc_vmflags(struct kgsl_memdesc *memdesc)
 {
 	return VM_RESERVED | VM_DONTEXPAND;
 }
 
-static void kgsl_vmalloc_free(struct kgsl_memdesc *memdesc)
+static void kgsl_page_alloc_free(struct kgsl_memdesc *memdesc)
 {
+	int i = 0;
+	struct scatterlist *sg;
+	int sglen = memdesc->sglen;
+
+	/* Don't free the guard page if it was used */
+	if (memdesc->flags & KGSL_MEMDESC_GUARD_PAGE)
+		sglen--;
+
+	kgsl_driver.stats.page_alloc -= memdesc->size;
+
+	if (memdesc->hostptr) {
+		vunmap(memdesc->hostptr);
 	kgsl_driver.stats.vmalloc -= memdesc->size;
-	vfree(memdesc->hostptr);
+	}
+	if (memdesc->sg)
+		for_each_sg(memdesc->sg, sg, sglen, i)
+			__free_page(sg_page(sg));
 }
 
 static int kgsl_contiguous_vmflags(struct kgsl_memdesc *memdesc)
@@ -263,6 +366,48 @@ static int kgsl_contiguous_vmflags(struct kgsl_memdesc *memdesc)
 	return VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTEXPAND;
 }
 
+/*
+ * kgsl_page_alloc_map_kernel - Map the memory in memdesc to kernel address
+ * space
+ *
+ * @memdesc - The memory descriptor which contains information about the memory
+ *
+ * Return: 0 on success else error code
+ */
+static int kgsl_page_alloc_map_kernel(struct kgsl_memdesc *memdesc)
+{
+	if (!memdesc->hostptr) {
+		pgprot_t page_prot = pgprot_writecombine(PAGE_KERNEL);
+		struct page **pages = NULL;
+		struct scatterlist *sg;
+		int sglen = memdesc->sglen;
+		int i;
+
+		/* Don't map the guard page if it exists */
+		if (memdesc->flags & KGSL_MEMDESC_GUARD_PAGE)
+			sglen--;
+
+		/* create a list of pages to call vmap */
+		pages = vmalloc(sglen * sizeof(struct page *));
+		if (!pages) {
+			KGSL_CORE_ERR("vmalloc(%d) failed\n",
+				sglen * sizeof(struct page *));
+			return -ENOMEM;
+		}
+		for_each_sg(memdesc->sg, sg, sglen, i)
+			pages[i] = sg_page(sg);
+		memdesc->hostptr = vmap(pages, sglen,
+					VM_IOREMAP, page_prot);
+		KGSL_STATS_ADD(memdesc->size, kgsl_driver.stats.vmalloc,
+				kgsl_driver.stats.vmalloc_max);
+		vfree(pages);
+	}
+	if (!memdesc->hostptr)
+		return -ENOMEM;
+
+	return 0;
+}
+
 static int kgsl_contiguous_vmfault(struct kgsl_memdesc *memdesc,
 				struct vm_area_struct *vma,
 				struct vm_fault *vmf)
@@ -302,12 +447,13 @@ static void kgsl_coherent_free(struct kgsl_memdesc *memdesc)
 }
 
 /* Global - also used by kgsl_drm.c */
-struct kgsl_memdesc_ops kgsl_vmalloc_ops = {
-	.free = kgsl_vmalloc_free,
-	.vmflags = kgsl_vmalloc_vmflags,
-	.vmfault = kgsl_vmalloc_vmfault,
+struct kgsl_memdesc_ops kgsl_page_alloc_ops = {
+	.free = kgsl_page_alloc_free,
+	.vmflags = kgsl_page_alloc_vmflags,
+	.vmfault = kgsl_page_alloc_vmfault,
+	.map_kernel_mem = kgsl_page_alloc_map_kernel,
 };
-EXPORT_SYMBOL(kgsl_vmalloc_ops);
+EXPORT_SYMBOL(kgsl_page_alloc_ops);
 
 static struct kgsl_memdesc_ops kgsl_ebimem_ops = {
 	.free = kgsl_ebimem_free,
@@ -341,47 +487,145 @@ void kgsl_cache_range_op(struct kgsl_memdesc *memdesc, int op)
 EXPORT_SYMBOL(kgsl_cache_range_op);
 
 static int
-_kgsl_sharedmem_vmalloc(struct kgsl_memdesc *memdesc,
+_kgsl_sharedmem_page_alloc(struct kgsl_memdesc *memdesc,
 			struct kgsl_pagetable *pagetable,
-			void *ptr, size_t size, unsigned int protflags)
+			size_t size, unsigned int protflags)
 {
-	int order, ret = 0;
+	int i, order, ret = 0;
 	int sglen = PAGE_ALIGN(size) / PAGE_SIZE;
-	int i;
+	struct page **pages = NULL;
+	pgprot_t page_prot = pgprot_writecombine(PAGE_KERNEL);
+	void *ptr;
+
+	/*
+	 * Add guard page to the end of the allocation when the
+	 * IOMMU is in use.
+	 */
+
+	if (kgsl_mmu_get_mmutype() == KGSL_MMU_TYPE_IOMMU)
+		sglen++;
 
 	memdesc->size = size;
 	memdesc->pagetable = pagetable;
 	memdesc->priv = KGSL_MEMFLAGS_CACHED;
-	memdesc->ops = &kgsl_vmalloc_ops;
-	memdesc->hostptr = (void *) ptr;
+	memdesc->ops = &kgsl_page_alloc_ops;
+
+	memdesc->sg = kgsl_sg_alloc(sglen);
 
-	memdesc->sg = kmalloc(sglen * sizeof(struct scatterlist), GFP_KERNEL);
 	if (memdesc->sg == NULL) {
+		KGSL_CORE_ERR("vmalloc(%d) failed\n",
+			sglen * sizeof(struct scatterlist));
 		ret = -ENOMEM;
 		goto done;
 	}
 
+	/*
+	 * Allocate space to store the list of pages to send to vmap.
+	 * This is an array of pointers so we can track 1024 pages per page of
+	 * allocation which means we can handle up to a 8MB buffer request with
+	 * two pages; well within the acceptable limits for using kmalloc.
+	 */
+
+	pages = kmalloc(sglen * sizeof(struct page *), GFP_KERNEL);
+
+	if (pages == NULL) {
+		KGSL_CORE_ERR("kmalloc (%d) failed\n",
+			sglen * sizeof(struct page *));
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	kmemleak_not_leak(memdesc->sg);
+
 	memdesc->sglen = sglen;
 	sg_init_table(memdesc->sg, sglen);
 
-	for (i = 0; i < memdesc->sglen; i++, ptr += PAGE_SIZE) {
-		struct page *page = vmalloc_to_page(ptr);
-		if (!page) {
-			ret = -EINVAL;
+	for (i = 0; i < PAGE_ALIGN(size) / PAGE_SIZE; i++) {
+
+		/*
+		 * Don't use GFP_ZERO here because it is faster to memset the
+		 * range ourselves (see below)
+		 */
+
+		pages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
+		if (pages[i] == NULL) {
+			ret = -ENOMEM;
+			memdesc->sglen = i;
 			goto done;
 		}
-		sg_set_page(&memdesc->sg[i], page, PAGE_SIZE, 0);
+
+		sg_set_page(&memdesc->sg[i], pages[i], PAGE_SIZE, 0);
 	}
 
-	kgsl_cache_range_op(memdesc, KGSL_CACHE_OP_INV);
+	/* ADd the guard page to the end of the sglist */
+
+	if (kgsl_mmu_get_mmutype() == KGSL_MMU_TYPE_IOMMU) {
+		/*
+		 * It doesn't matter if we use GFP_ZERO here, this never
+		 * gets mapped, and we only allocate it once in the life
+		 * of the system
+		 */
+
+		if (kgsl_guard_page == NULL)
+			kgsl_guard_page = alloc_page(GFP_KERNEL | __GFP_ZERO |
+				__GFP_HIGHMEM);
+
+		if (kgsl_guard_page != NULL) {
+			sg_set_page(&memdesc->sg[sglen - 1], kgsl_guard_page,
+				PAGE_SIZE, 0);
+			memdesc->flags |= KGSL_MEMDESC_GUARD_PAGE;
+		} else
+			memdesc->sglen--;
+	}
+
+	/*
+	 * All memory that goes to the user has to be zeroed out before it gets
+	 * exposed to userspace. This means that the memory has to be mapped in
+	 * the kernel, zeroed (memset) and then unmapped.  This also means that
+	 * the dcache has to be flushed to ensure coherency between the kernel
+	 * and user pages. We used to pass __GFP_ZERO to alloc_page which mapped
+	 * zeroed and unmaped each individual page, and then we had to turn
+	 * around and call flush_dcache_page() on that page to clear the caches.
+	 * This was killing us for performance. Instead, we found it is much
+	 * faster to allocate the pages without GFP_ZERO, map the entire range,
+	 * memset it, flush the range and then unmap - this results in a factor
+	 * of 4 improvement for speed for large buffers.  There is a small
+	 * increase in speed for small buffers, but only on the order of a few
+	 * microseconds at best.  The only downside is that there needs to be
+	 * enough temporary space in vmalloc to accomodate the map. This
+	 * shouldn't be a problem, but if it happens, fall back to a much slower
+	 * path
+	 */
+
+	ptr = vmap(pages, i, VM_IOREMAP, page_prot);
+
+	if (ptr != NULL) {
+		memset(ptr, 0, memdesc->size);
+		dmac_flush_range(ptr, ptr + memdesc->size);
+		vunmap(ptr);
+	} else {
+		int j;
+
+		/* Very, very, very slow path */
+
+		for (j = 0; j < i; j++) {
+			ptr = kmap_atomic(pages[j],KM_BOUNCE_READ);
+			memset(ptr, 0, PAGE_SIZE);
+			dmac_flush_range(ptr, ptr + PAGE_SIZE);
+			kunmap_atomic(ptr,KM_BOUNCE_READ);
+		}
+	}
+
+        outer_cache_range_op_sg(memdesc->sg, memdesc->sglen,
+                            KGSL_CACHE_OP_FLUSH);
 
 	ret = kgsl_mmu_map(pagetable, memdesc, protflags);
 
 	if (ret)
 		goto done;
 
-	KGSL_STATS_ADD(size, kgsl_driver.stats.vmalloc,
-		kgsl_driver.stats.vmalloc_max);
+	KGSL_STATS_ADD(size, kgsl_driver.stats.page_alloc,
+		kgsl_driver.stats.page_alloc_max);
 
 	order = get_order(size);
 
@@ -389,6 +633,8 @@ _kgsl_sharedmem_vmalloc(struct kgsl_memdesc *memdesc,
 		kgsl_driver.stats.histogram[order]++;
 
 done:
+	kfree(pages);
+
 	if (ret)
 		kgsl_sharedmem_free(memdesc);
 
@@ -396,51 +642,41 @@ done:
 }
 
 int
-kgsl_sharedmem_vmalloc(struct kgsl_memdesc *memdesc,
+kgsl_sharedmem_page_alloc(struct kgsl_memdesc *memdesc,
 		       struct kgsl_pagetable *pagetable, size_t size)
 {
-	void *ptr;
-
+	int ret = 0;
 	BUG_ON(size == 0);
 
 	size = ALIGN(size, PAGE_SIZE * 2);
-	ptr = vmalloc(size);
 
-	if (ptr  == NULL) {
-		KGSL_CORE_ERR("vmalloc(%d) failed\n", size);
-		return -ENOMEM;
-	}
-
-	return _kgsl_sharedmem_vmalloc(memdesc, pagetable, ptr, size,
+	ret =  _kgsl_sharedmem_page_alloc(memdesc, pagetable, size,
 		GSL_PT_PAGE_RV | GSL_PT_PAGE_WV);
+	if (!ret)
+		ret = kgsl_page_alloc_map_kernel(memdesc);
+	if (ret)
+		kgsl_sharedmem_free(memdesc);
+	return ret;
 }
-EXPORT_SYMBOL(kgsl_sharedmem_vmalloc);
+EXPORT_SYMBOL(kgsl_sharedmem_page_alloc);
 
 int
-kgsl_sharedmem_vmalloc_user(struct kgsl_memdesc *memdesc,
+kgsl_sharedmem_page_alloc_user(struct kgsl_memdesc *memdesc,
 			    struct kgsl_pagetable *pagetable,
 			    size_t size, int flags)
 {
-	void *ptr;
 	unsigned int protflags;
 
 	BUG_ON(size == 0);
-	ptr = vmalloc_user(size);
-
-	if (ptr == NULL) {
-		KGSL_CORE_ERR("vmalloc_user(%d) failed: allocated=%d\n",
-			      size, kgsl_driver.stats.vmalloc);
-		return -ENOMEM;
-	}
 
 	protflags = GSL_PT_PAGE_RV;
 	if (!(flags & KGSL_MEMFLAGS_GPUREADONLY))
 		protflags |= GSL_PT_PAGE_WV;
 
-	return _kgsl_sharedmem_vmalloc(memdesc, pagetable, ptr, size,
+	return _kgsl_sharedmem_page_alloc(memdesc, pagetable, size,
 		protflags);
 }
-EXPORT_SYMBOL(kgsl_sharedmem_vmalloc_user);
+EXPORT_SYMBOL(kgsl_sharedmem_page_alloc_user);
 
 int
 kgsl_sharedmem_alloc_coherent(struct kgsl_memdesc *memdesc, size_t size)
@@ -488,7 +724,7 @@ void kgsl_sharedmem_free(struct kgsl_memdesc *memdesc)
 	if (memdesc->ops && memdesc->ops->free)
 		memdesc->ops->free(memdesc);
 
-	kfree(memdesc->sg);
+	kgsl_sg_free(memdesc->sg, memdesc->sglen);
 
 	memset(memdesc, 0, sizeof(*memdesc));
 }
@@ -570,13 +806,17 @@ kgsl_sharedmem_readl(const struct kgsl_memdesc *memdesc,
 			uint32_t *dst,
 			unsigned int offsetbytes)
 {
+	uint32_t *src;
 	BUG_ON(memdesc == NULL || memdesc->hostptr == NULL || dst == NULL);
-	WARN_ON(offsetbytes + sizeof(unsigned int) > memdesc->size);
+	WARN_ON(offsetbytes % sizeof(uint32_t) != 0);
+	if (offsetbytes % sizeof(uint32_t) != 0)
+		return -EINVAL;
 
-	if (offsetbytes + sizeof(unsigned int) > memdesc->size)
+	WARN_ON(offsetbytes + sizeof(uint32_t) > memdesc->size);
+	if (offsetbytes + sizeof(uint32_t) > memdesc->size)
 		return -ERANGE;
-
-	*dst = readl_relaxed(memdesc->hostptr + offsetbytes);
+	src = (uint32_t *)(memdesc->hostptr + offsetbytes);
+	*dst = *src;
 	return 0;
 }
 EXPORT_SYMBOL(kgsl_sharedmem_readl);
@@ -586,12 +826,19 @@ kgsl_sharedmem_writel(const struct kgsl_memdesc *memdesc,
 			unsigned int offsetbytes,
 			uint32_t src)
 {
+	uint32_t *dst;
 	BUG_ON(memdesc == NULL || memdesc->hostptr == NULL);
-	BUG_ON(offsetbytes + sizeof(unsigned int) > memdesc->size);
+	WARN_ON(offsetbytes % sizeof(uint32_t) != 0);
+	if (offsetbytes % sizeof(uint32_t) != 0)
+		return -EINVAL;
 
-	kgsl_cffdump_setmem(memdesc->physaddr + offsetbytes,
-		src, sizeof(uint));
-	writel_relaxed(src, memdesc->hostptr + offsetbytes);
+	WARN_ON(offsetbytes + sizeof(uint32_t) > memdesc->size);
+	if (offsetbytes + sizeof(uint32_t) > memdesc->size)
+		return -ERANGE;
+	kgsl_cffdump_setmem(memdesc->gpuaddr + offsetbytes,
+		src, sizeof(uint32_t));
+	dst = (uint32_t *)(memdesc->hostptr + offsetbytes);
+	*dst = src;
 	return 0;
 }
 EXPORT_SYMBOL(kgsl_sharedmem_writel);
@@ -603,9 +850,39 @@ kgsl_sharedmem_set(const struct kgsl_memdesc *memdesc, unsigned int offsetbytes,
 	BUG_ON(memdesc == NULL || memdesc->hostptr == NULL);
 	BUG_ON(offsetbytes + sizebytes > memdesc->size);
 
-	kgsl_cffdump_setmem(memdesc->physaddr + offsetbytes, value,
+	kgsl_cffdump_setmem(memdesc->gpuaddr + offsetbytes, value,
 		sizebytes);
 	memset(memdesc->hostptr + offsetbytes, value, sizebytes);
 	return 0;
 }
 EXPORT_SYMBOL(kgsl_sharedmem_set);
+
+/*
+ * kgsl_sharedmem_map_vma - Map a user vma to physical memory
+ *
+ * @vma - The user vma to map
+ * @memdesc - The memory descriptor which contains information about the
+ * physical memory
+ *
+ * Return: 0 on success else error code
+ */
+int
+kgsl_sharedmem_map_vma(struct vm_area_struct *vma,
+			const struct kgsl_memdesc *memdesc)
+{
+	unsigned long addr = vma->vm_start;
+	unsigned long size = vma->vm_end - vma->vm_start;
+	int ret, i = 0;
+
+	if (!memdesc->sg || (size != memdesc->size) ||
+		(memdesc->sglen != (size / PAGE_SIZE)))
+		return -EINVAL;
+
+	for (; addr < vma->vm_end; addr += PAGE_SIZE, i++) {
+		ret = vm_insert_page(vma, addr, sg_page(&memdesc->sg[i]));
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(kgsl_sharedmem_map_vma);
diff --git a/drivers/gpu/msm/kgsl_sharedmem.h b/drivers/gpu/msm/kgsl_sharedmem.h
old mode 100644
new mode 100755
index 61bcf05b..20579ac9
--- a/drivers/gpu/msm/kgsl_sharedmem.h
+++ b/drivers/gpu/msm/kgsl_sharedmem.h
@@ -16,6 +16,8 @@
 
 #include <linux/slab.h>
 #include <linux/dma-mapping.h>
+#include <linux/vmalloc.h>
+#include <linux/kmemleak.h>
 
 /*
  * Convert a page to a physical address
@@ -31,20 +33,15 @@ struct kgsl_process_private;
 
 /** Set if the memdesc describes cached memory */
 #define KGSL_MEMFLAGS_CACHED    0x00000001
+/** Set if the memdesc is mapped into all pagetables */
+#define KGSL_MEMFLAGS_GLOBAL    0x00000002
 
-struct kgsl_memdesc_ops {
-	int (*vmflags)(struct kgsl_memdesc *);
-	int (*vmfault)(struct kgsl_memdesc *, struct vm_area_struct *,
-		       struct vm_fault *);
-	void (*free)(struct kgsl_memdesc *memdesc);
-};
+extern struct kgsl_memdesc_ops kgsl_page_alloc_ops;
 
-extern struct kgsl_memdesc_ops kgsl_vmalloc_ops;
-
-int kgsl_sharedmem_vmalloc(struct kgsl_memdesc *memdesc,
+int kgsl_sharedmem_page_alloc(struct kgsl_memdesc *memdesc,
 			   struct kgsl_pagetable *pagetable, size_t size);
 
-int kgsl_sharedmem_vmalloc_user(struct kgsl_memdesc *memdesc,
+int kgsl_sharedmem_page_alloc_user(struct kgsl_memdesc *memdesc,
 				struct kgsl_pagetable *pagetable,
 				size_t size, int flags);
 
@@ -80,6 +77,68 @@ void kgsl_process_uninit_sysfs(struct kgsl_process_private *private);
 int kgsl_sharedmem_init_sysfs(void);
 void kgsl_sharedmem_uninit_sysfs(void);
 
+static inline unsigned int kgsl_get_sg_pa(struct scatterlist *sg)
+{
+	/*
+	 * Try sg_dma_address first to support ion carveout
+	 * regions which do not work with sg_phys().
+	 */
+	unsigned int pa = sg_dma_address(sg);
+	if (pa == 0)
+		pa = sg_phys(sg);
+	return pa;
+}
+
+int
+kgsl_sharedmem_map_vma(struct vm_area_struct *vma,
+			const struct kgsl_memdesc *memdesc);
+
+/*
+ * For relatively small sglists, it is preferable to use kzalloc
+ * rather than going down the vmalloc rat hole.  If the size of
+ * the sglist is < PAGE_SIZE use kzalloc otherwise fallback to
+ * vmalloc
+ */
+
+static inline void *kgsl_sg_alloc(unsigned int sglen)
+{
+	if ((sglen * sizeof(struct scatterlist)) <  PAGE_SIZE)
+		return kzalloc(sglen * sizeof(struct scatterlist), GFP_KERNEL);
+	else
+		return vmalloc(sglen * sizeof(struct scatterlist));
+}
+
+static inline void kgsl_sg_free(void *ptr, unsigned int sglen)
+{
+	if ((sglen * sizeof(struct scatterlist)) < PAGE_SIZE)
+		kfree(ptr);
+	else
+		vfree(ptr);
+}
+
+static inline int
+memdesc_sg_phys(struct kgsl_memdesc *memdesc,
+		unsigned int physaddr, unsigned int size)
+{
+	memdesc->sg = kgsl_sg_alloc(1);
+
+	kmemleak_not_leak(memdesc->sg);
+
+	memdesc->sglen = 1;
+	sg_init_table(memdesc->sg, 1);
+	memdesc->sg[0].length = size;
+	memdesc->sg[0].offset = 0;
+	memdesc->sg[0].dma_address = physaddr;
+	return 0;
+}
+
+static inline int
+kgsl_allocate(struct kgsl_memdesc *memdesc,
+		struct kgsl_pagetable *pagetable, size_t size)
+{
+	return kgsl_sharedmem_page_alloc(memdesc, pagetable, size);
+}
+
 static inline int
 memdesc_sg_phys(struct kgsl_memdesc *memdesc,
 		unsigned int physaddr, unsigned int size)
@@ -112,21 +171,13 @@ kgsl_allocate_user(struct kgsl_memdesc *memdesc,
 		struct kgsl_pagetable *pagetable,
 		size_t size, unsigned int flags)
 {
-#ifdef CONFIG_MSM_KGSL_MMU
-	return kgsl_sharedmem_vmalloc_user(memdesc, pagetable, size, flags);
-#else
-	return kgsl_sharedmem_ebimem_user(memdesc, pagetable, size, flags);
-#endif
+	return kgsl_sharedmem_page_alloc_user(memdesc, pagetable, size, flags);
 }
 
 static inline int
 kgsl_allocate_contiguous(struct kgsl_memdesc *memdesc, size_t size)
 {
 	int ret  = kgsl_sharedmem_alloc_coherent(memdesc, size);
-#ifndef CONFIG_MSM_KGSL_MMU
-	if (!ret)
-		memdesc->gpuaddr = memdesc->physaddr;
-#endif
 	return ret;
 }
 
diff --git a/drivers/gpu/msm/z180.c b/drivers/gpu/msm/z180.c
old mode 100644
new mode 100755
index e7a1d521..688f23d8
--- a/drivers/gpu/msm/z180.c
+++ b/drivers/gpu/msm/z180.c
@@ -100,6 +100,7 @@ enum z180_cmdwindow_type {
 static int z180_start(struct kgsl_device *device, unsigned int init_ram);
 static int z180_stop(struct kgsl_device *device);
 static int z180_wait(struct kgsl_device *device,
+				struct kgsl_context *context,
 				unsigned int timestamp,
 				unsigned int msecs);
 static void z180_regread(struct kgsl_device *device,
@@ -382,8 +383,8 @@ static int z180_idle(struct kgsl_device *device, unsigned int timeout)
 
 	if (timestamp_cmp(z180_dev->current_timestamp,
 		z180_dev->timestamp) > 0)
-		status = z180_wait(device, z180_dev->current_timestamp,
-					timeout);
+		status = z180_wait(device, NULL,
+				z180_dev->current_timestamp, timeout);
 
 	if (status)
 		KGSL_DRV_ERR(device, "z180_waittimestamp() timed out\n");
@@ -793,14 +794,16 @@ static void z180_cmdwindow_write(struct kgsl_device *device,
 }
 
 static unsigned int z180_readtimestamp(struct kgsl_device *device,
-			     enum kgsl_timestamp_type type)
+		struct kgsl_context *context, enum kgsl_timestamp_type type)
 {
 	struct z180_device *z180_dev = Z180_DEVICE(device);
+	(void)context;
 	/* get current EOP timestamp */
 	return z180_dev->timestamp;
 }
 
 static int z180_waittimestamp(struct kgsl_device *device,
+				struct kgsl_context *context,
 				unsigned int timestamp,
 				unsigned int msecs)
 {
@@ -811,13 +814,14 @@ static int z180_waittimestamp(struct kgsl_device *device,
 		msecs = 10 * MSEC_PER_SEC;
 
 	mutex_unlock(&device->mutex);
-	status = z180_wait(device, timestamp, msecs);
+	status = z180_wait(device, context, timestamp, msecs);
 	mutex_lock(&device->mutex);
 
 	return status;
 }
 
 static int z180_wait(struct kgsl_device *device,
+				struct kgsl_context *context,
 				unsigned int timestamp,
 				unsigned int msecs)
 {
@@ -826,7 +830,7 @@ static int z180_wait(struct kgsl_device *device,
 
 	timeout = wait_io_event_interruptible_timeout(
 			device->wait_queue,
-			kgsl_check_timestamp(device, timestamp),
+			kgsl_check_timestamp(device, context, timestamp),
 			msecs_to_jiffies(msecs));
 
 	if (timeout > 0)
diff --git a/drivers/misc/pmem.c b/drivers/misc/pmem.c
index f1523fd5..bc083e17 100755
--- a/drivers/misc/pmem.c
+++ b/drivers/misc/pmem.c
@@ -1,7 +1,7 @@
 /* drivers/android/pmem.c
  *
  * Copyright (C) 2007 Google, Inc.
- * Copyright (c) 2009-2010, Code Aurora Forum. All rights reserved.
+ * Copyright (c) 2009-2012, Code Aurora Forum. All rights reserved.
 *
  * This software is licensed under the terms of the GNU General Public
  * License version 2, as published by the Free Software Foundation, and
@@ -1074,17 +1074,17 @@ static void bitmap_bits_set_all(uint32_t *bitp, int bit_start, int bit_end)
 
 static int
 bitmap_allocate_contiguous(uint32_t *bitp, int num_bits_to_alloc,
-		int total_bits, int spacing)
+		int total_bits, int spacing, int start_bit)
 {
 	int bit_start, last_bit, word_index;
 
 	if (num_bits_to_alloc <= 0)
 		return -1;
 
-	for (bit_start = 0; ;
-		bit_start = (last_bit +
+	for (bit_start = start_bit; ;
+		bit_start = ((last_bit +
 			(word_index << PMEM_32BIT_WORD_ORDER) + spacing - 1)
-			& ~(spacing - 1)) {
+			& ~(spacing - 1)) + start_bit) {
 		int bit_end = bit_start + num_bits_to_alloc, total_words;
 
 		if (bit_end > total_bits)
@@ -1162,7 +1162,8 @@ static int reserve_quanta(const unsigned int quanta_needed,
 	ret = bitmap_allocate_contiguous(pmem[id].allocator.bitmap.bitmap,
 		quanta_needed,
 		(pmem[id].size + pmem[id].quantum - 1) / pmem[id].quantum,
-		spacing);
+		spacing,
+		start_bit);
 
 #if PMEM_DEBUG
 	if (ret < 0)
@@ -1915,6 +1916,13 @@ int pmem_cache_maint(struct file *file, unsigned int cmd,
 	if (!file)
 		return -EBADF;
 
+	/*
+	 * check that the vaddr passed for flushing is valid
+	 * so that you don't crash the kernel
+	 */
+	if (!pmem_addr->vaddr)
+		return -EINVAL;
+
 	data = file->private_data;
 	id = get_id(file);
 
diff --git a/drivers/mmc/host/msm_sdcc.c b/drivers/mmc/host/msm_sdcc.c
old mode 100644
new mode 100755
index 806dcad0..4fef37f7
--- a/drivers/mmc/host/msm_sdcc.c
+++ b/drivers/mmc/host/msm_sdcc.c
@@ -73,7 +73,7 @@ static int msmsdcc_auto_suspend(struct mmc_host *, int);
 #define BUSCLK_TIMEOUT (HZ)
 #define SQN_BUSCLK_TIMEOUT (5 * HZ)
 static unsigned int msmsdcc_fmin = 144000;
-static unsigned int msmsdcc_fmax = 50000000;
+static unsigned int msmsdcc_fmax = 64000000;
 static unsigned int msmsdcc_4bit = 1;
 static unsigned int msmsdcc_pwrsave = 1;
 static unsigned int msmsdcc_piopoll = 1;
@@ -308,42 +308,40 @@ msmsdcc_dma_exec_func(struct msm_dmov_cmd *cmd)
 }
 
 static void
-msmsdcc_dma_complete_func(struct msm_dmov_cmd *cmd,
-			  unsigned int result,
-			  struct msm_dmov_errdata *err)
+msmsdcc_dma_complete_tlet(unsigned long data)
 {
-	struct msmsdcc_dma_data	*dma_data =
-		container_of(cmd, struct msmsdcc_dma_data, hdr);
-	struct msmsdcc_host	*host = dma_data->host;
+	struct msmsdcc_host *host = (struct msmsdcc_host *)data;
 	unsigned long		flags;
 	struct mmc_request	*mrq;
+	struct msm_dmov_errdata err;
 
 	spin_lock_irqsave(&host->lock, flags);
 	host->dma.active = 0;
 
+	err = host->dma.err;
 	mrq = host->curr.mrq;
 	BUG_ON(!mrq);
 	WARN_ON(!mrq->data);
 
-	if (!(result & DMOV_RSLT_VALID)) {
+	if (!(host->dma.result & DMOV_RSLT_VALID)) {
 		pr_err("msmsdcc: Invalid DataMover result\n");
 		goto out;
 	}
 
-	if (result & DMOV_RSLT_DONE) {
+	if (host->dma.result & DMOV_RSLT_DONE) {
 		host->curr.data_xfered = host->curr.xfer_size;
 	} else {
 		/* Error or flush  */
-		if (result & DMOV_RSLT_ERROR)
+		if (host->dma.result & DMOV_RSLT_ERROR)
 			pr_err("%s: DMA error (0x%.8x)\n",
-			       mmc_hostname(host->mmc), result);
-		if (result & DMOV_RSLT_FLUSH)
+			       mmc_hostname(host->mmc), host->dma.result);
+		if (host->dma.result & DMOV_RSLT_FLUSH)
 			pr_err("%s: DMA channel flushed (0x%.8x)\n",
-			       mmc_hostname(host->mmc), result);
-		if (err)
+			       mmc_hostname(host->mmc), host->dma.result);
+
 			pr_err("Flush data: %.8x %.8x %.8x %.8x %.8x %.8x\n",
-			       err->flush[0], err->flush[1], err->flush[2],
-			       err->flush[3], err->flush[4], err->flush[5]);
+		       err.flush[0], err.flush[1], err.flush[2],
+		       err.flush[3], err.flush[4], err.flush[5]);
 		if (!mrq->data->error)
 			mrq->data->error = -EIO;
 	}
@@ -391,6 +389,22 @@ out:
 	return;
 }
 
+static void
+msmsdcc_dma_complete_func(struct msm_dmov_cmd *cmd,
+			  unsigned int result,
+			  struct msm_dmov_errdata *err)
+{
+	struct msmsdcc_dma_data	*dma_data =
+		container_of(cmd, struct msmsdcc_dma_data, hdr);
+	struct msmsdcc_host *host = dma_data->host;
+
+	dma_data->result = result;
+	if (err)
+		memcpy(&dma_data->err, err, sizeof(struct msm_dmov_errdata));
+
+	tasklet_schedule(&host->dma_tlet);
+}
+
 static int validate_dma(struct msmsdcc_host *host, struct mmc_data *data)
 {
 	if (host->dma.channel == -1)
@@ -451,14 +465,30 @@ static int msmsdcc_config_dma(struct msmsdcc_host *host, struct mmc_data *data)
 	host->curr.user_pages = 0;
 
 	box = &nc->cmd[0];
-	for (i = 0; i < host->dma.num_ents; i++) {
+
+	/* location of command block must be 64 bit aligned */
+	BUG_ON(host->dma.cmd_busaddr & 0x07);
+
+	nc->cmdptr = (host->dma.cmd_busaddr >> 3) | CMD_PTR_LP;
+	host->dma.hdr.cmdptr = DMOV_CMD_PTR_LIST |
+			       DMOV_CMD_ADDR(host->dma.cmdptr_busaddr);
+	host->dma.hdr.complete_func = msmsdcc_dma_complete_func;
+
+	n = dma_map_sg(mmc_dev(host->mmc), host->dma.sg,
+			host->dma.num_ents, host->dma.dir);
+	if (n == 0) {
+		printk(KERN_ERR "%s: Unable to map in all sg elements\n",
+			mmc_hostname(host->mmc));
+		host->dma.sg = NULL;
+		host->dma.num_ents = 0;
+		return -ENOMEM;
+	}
+
+	for_each_sg(host->dma.sg, sg, n, i) {
+
 		box->cmd = CMD_MODE_BOX;
 
-	/* Initialize sg dma address */
-	sg->dma_address = page_to_dma(mmc_dev(host->mmc), sg_page(sg))
-				+ sg->offset;
-
-	if (i == (host->dma.num_ents - 1))
+		if (i == n - 1)
 			box->cmd |= CMD_LC;
 		rows = (sg_dma_len(sg) % MCI_FIFOSIZE) ?
 			(sg_dma_len(sg) / MCI_FIFOSIZE) + 1 :
@@ -486,27 +516,6 @@ static int msmsdcc_config_dma(struct msmsdcc_host *host, struct mmc_data *data)
 			box->cmd |= CMD_DST_CRCI(crci);
 		}
 		box++;
-		sg++;
-	}
-
-	/* location of command block must be 64 bit aligned */
-	BUG_ON(host->dma.cmd_busaddr & 0x07);
-
-	nc->cmdptr = (host->dma.cmd_busaddr >> 3) | CMD_PTR_LP;
-	host->dma.hdr.cmdptr = DMOV_CMD_PTR_LIST |
-			       DMOV_CMD_ADDR(host->dma.cmdptr_busaddr);
-	host->dma.hdr.complete_func = msmsdcc_dma_complete_func;
-
-	n = dma_map_sg(mmc_dev(host->mmc), host->dma.sg,
-			host->dma.num_ents, host->dma.dir);
-/* dsb inside dma_map_sg will write nc out to mem as well */
-
-	if (n != host->dma.num_ents) {
-		printk(KERN_ERR "%s: Unable to map in all sg elements\n",
-			mmc_hostname(host->mmc));
-		host->dma.sg = NULL;
-		host->dma.num_ents = 0;
-		return -ENOMEM;
 	}
 
 	return 0;
@@ -542,6 +551,11 @@ msmsdcc_start_command_deferred(struct msmsdcc_host *host,
 	      (cmd->opcode == 53))
 		*c |= MCI_CSPM_DATCMD;
 
+	if (host->prog_scan && (cmd->opcode == 12)) {
+		*c |= MCI_CPSM_PROGENA;
+		host->prog_enable = true;
+	}
+
 	if (cmd == cmd->mrq->stop)
 		*c |= MCI_CSPM_MCIABORT;
 
@@ -612,6 +626,8 @@ msmsdcc_start_data(struct msmsdcc_host *host, struct mmc_data *data,
 		}
 		dsb();
 		msm_dmov_enqueue_cmd_ext(host->dma.channel, &host->dma.hdr);
+		if (data->flags & MMC_DATA_WRITE)
+			host->prog_scan = true;
 	} else {
 		msmsdcc_writel(host, timeout, MMCIDATATIMER);
 
@@ -701,6 +717,9 @@ msmsdcc_pio_read(struct msmsdcc_host *host, char *buffer, unsigned int remain)
 		count += remain;
 	}else
 #endif
+	if (remain % 4)
+		remain = ((remain >> 2) + 1) << 2;
+
 		while (msmsdcc_readl(host, MMCISTATUS) & MCI_RXDATAAVLBL) {
 			*ptr = msmsdcc_readl(host, MMCIFIFO + (count % MCI_FIFOSIZE));
 			ptr++;
@@ -737,13 +756,14 @@ msmsdcc_pio_write(struct msmsdcc_host *host, char *buffer,
 	} else {
 #endif
 		do {
-			unsigned int count, maxcnt;
+			unsigned int count, maxcnt, sz;
 
 			maxcnt = status & MCI_TXFIFOEMPTY ? MCI_FIFOSIZE :
 							MCI_FIFOHALFSIZE;
 			count = min(remain, maxcnt);
 
-			writesl(base + MMCIFIFO, ptr, count >> 2);
+			sz = count % 4 ? (count >> 2) + 1 : (count >> 2);
+			writesl(base + MMCIFIFO, ptr, sz);
 			ptr += count;
 			remain -= count;
 
@@ -906,8 +926,23 @@ static void msmsdcc_do_cmdirq(struct msmsdcc_host *host, uint32_t status)
 		else if (host->curr.data) { /* Non DMA */
 			msmsdcc_stop_data(host);
 			msmsdcc_request_end(host, cmd->mrq);
-		} else /* host->data == NULL */
+		} else { /* host->data == NULL */
+			if (!cmd->error && host->prog_enable) {
+				if (status & MCI_PROGDONE) {
+					host->prog_scan = false;
+					host->prog_enable = false;
 			msmsdcc_request_end(host, cmd->mrq);
+				} else {
+					host->curr.cmd = cmd;
+				}
+			} else {
+				if (host->prog_enable) {
+					host->prog_scan = false;
+					host->prog_enable = false;
+				}
+				msmsdcc_request_end(host, cmd->mrq);
+			}
+		}
 	} else if (cmd->data)
 		if (!(cmd->data->flags & MMC_DATA_READ))
 			msmsdcc_start_data(host, cmd->data,
@@ -921,7 +956,7 @@ msmsdcc_handle_irq_data(struct msmsdcc_host *host, u32 status,
 	struct mmc_data *data = host->curr.data;
 
 	if (status & (MCI_CMDSENT | MCI_CMDRESPEND | MCI_CMDCRCFAIL |
-	              MCI_CMDTIMEOUT) && host->curr.cmd) {
+			MCI_CMDTIMEOUT | MCI_PROGDONE) && host->curr.cmd) {
 		msmsdcc_do_cmdirq(host, status);
 	}
 
@@ -1265,24 +1300,6 @@ msmsdcc_init_dma(struct msmsdcc_host *host)
 	return 0;
 }
 
-#ifdef CONFIG_MMC_MSM7X00A_RESUME_IN_WQ
-static void
-do_resume_work(struct work_struct *work)
-{
-	struct msmsdcc_host *host =
-		container_of(work, struct msmsdcc_host, resume_task);
-	struct mmc_host	*mmc = host->mmc;
-
-	if (mmc) {
-		mmc_resume_host(mmc);
-		if (host->stat_irq)
-			enable_irq(host->stat_irq);
-	}
-}
-
-#endif
-
-
 #ifdef CONFIG_HAS_EARLYSUSPEND
 static void msmsdcc_early_suspend(struct early_suspend *h)
 {
@@ -1382,14 +1399,8 @@ msmsdcc_probe(struct platform_device *pdev)
 	host->dmares = dmares;
 	spin_lock_init(&host->lock);
 
-#ifdef CONFIG_MMC_EMBEDDED_SDIO
-	if (plat->embedded_sdio)
-		mmc_set_embedded_sdio_data(mmc,
-					   &plat->embedded_sdio->cis,
-					   &plat->embedded_sdio->cccr,
-					   plat->embedded_sdio->funcs,
-					   plat->embedded_sdio->num_funcs);
-#endif
+	tasklet_init(&host->dma_tlet, msmsdcc_dma_complete_tlet,
+			(unsigned long)host);
 
 	/*
 	 * Setup DMA
@@ -1608,22 +1619,14 @@ msmsdcc_resume(struct platform_device *dev)
 
 		msmsdcc_writel(host, host->saved_irq0mask, MMCIMASK0);
 
-		if (mmc->card && mmc->card->type != MMC_TYPE_SDIO) {
-#ifdef CONFIG_MMC_MSM7X00A_RESUME_IN_WQ
-			schedule_work(&host->resume_task);
-#else
+		if (mmc->card && mmc->card->type != MMC_TYPE_SDIO)
 			mmc_resume_host(mmc);
-#endif
-		}
-
 		if (host->stat_irq)
 			enable_irq(host->stat_irq);
-
 #if BUSCLK_PWRSAVE
 		if (host->clks_on)
 			msmsdcc_disable_clocks(host, 1);
 #endif
-
 	}
 	return 0;
 }
diff --git a/drivers/mmc/host/msm_sdcc.h b/drivers/mmc/host/msm_sdcc.h
old mode 100644
new mode 100755
index fdb0b9c6..1368fc0c
--- a/drivers/mmc/host/msm_sdcc.h
+++ b/drivers/mmc/host/msm_sdcc.h
@@ -155,7 +155,7 @@
 #define MCI_IRQENABLE	\
 	(MCI_CMDCRCFAILMASK|MCI_DATACRCFAILMASK|MCI_CMDTIMEOUTMASK|	\
 	MCI_DATATIMEOUTMASK|MCI_TXUNDERRUNMASK|MCI_RXOVERRUNMASK|	\
-	MCI_CMDRESPENDMASK|MCI_CMDSENTMASK|MCI_DATAENDMASK)
+	MCI_CMDRESPENDMASK|MCI_CMDSENTMASK|MCI_DATAENDMASK|MCI_PROGDONEMASK)
 
 /*
  * The size of the FIFO in bytes.
@@ -164,7 +164,7 @@
 
 #define MCI_FIFOHALFSIZE (MCI_FIFOSIZE / 2)
 
-#define NR_SG		32
+#define NR_SG		128
 
 struct clk;
 
@@ -190,7 +190,7 @@ struct msmsdcc_dma_data {
 	int				busy; /* Set if DM is busy */
 	int				active;
 	unsigned int 			result;
-	struct msm_dmov_errdata 	*err;
+	struct msm_dmov_errdata		err;
 };
 
 struct msmsdcc_pio_data {
@@ -258,17 +258,12 @@ struct msmsdcc_host {
 	int polling_enabled;
 #endif
 
-#ifdef CONFIG_MMC_MSM7X00A_RESUME_IN_WQ
-	struct work_struct	resume_task;
-#endif
 	struct tasklet_struct 	dma_tlet;
 
 
 #ifdef CONFIG_MMC_AUTO_SUSPEND
 	unsigned long           suspended;
 #endif
-	unsigned int prog_scan;
-	unsigned int prog_enable;
 	/* Command parameters */
 	unsigned int		cmd_timeout;
 	unsigned int		cmd_pio_irqmask;
@@ -279,6 +274,8 @@ struct msmsdcc_host {
 	unsigned int	dummy_52_needed;
 	unsigned int	dummy_52_state;
 
+	bool prog_scan;
+	bool prog_enable;
 };
 
 #endif
diff --git a/drivers/mtd/devices/htcleo_nand.c b/drivers/mtd/devices/htcleo_nand.c
index 9b23d680..12a9b390 100755
--- a/drivers/mtd/devices/htcleo_nand.c
+++ b/drivers/mtd/devices/htcleo_nand.c
@@ -51,7 +51,7 @@ unsigned crci_mask;
 
 #include "msm_nand.h"
 
-#define MSM_NAND_DMA_BUFFER_SIZE SZ_4K
+#define MSM_NAND_DMA_BUFFER_SIZE SZ_1M
 #define MSM_NAND_DMA_BUFFER_SLOTS \
 	(MSM_NAND_DMA_BUFFER_SIZE / (sizeof(((atomic_t *)0)->counter) * 8))
 
diff --git a/drivers/power/ds2746_battery.c b/drivers/power/ds2746_battery.c
index 5f071a39..19e97980 100644
--- a/drivers/power/ds2746_battery.c
+++ b/drivers/power/ds2746_battery.c
@@ -46,6 +46,8 @@ Original Auther:
 /*#include "../w1/slaves/w1_ds2784.h"*/
 #include <linux/time.h>
 #include <linux/rtc.h>
+#include <linux/proc_fs.h>
+#include <asm/uaccess.h>
 
 struct ds2746_device_info {
 
@@ -64,6 +66,45 @@ struct ds2746_device_info {
 };
 static struct wake_lock vbus_wake_lock;
 
+/* 
+ * proc_fs interface for fast charge
+ * by marc1706
+ */
+#define PROC_FAST_CHARGE_NAME "fast_charge"
+
+static struct proc_dir_entry *fast_charge;
+static int allow_fast_charge = 1;
+
+static int proc_read_fast_charge(char *page, char **start, off_t off, int count,
+	int *eof, void *data)
+{
+	int ret;
+
+	ret = sprintf(page, "%i\n", allow_fast_charge);
+
+	return ret;
+}
+
+static int proc_write_fast_charge(struct file *file, const char *buffer,
+	unsigned long count, void *data)
+{
+	char temp_buff[count + 1];
+	int ret;
+	int len = count;
+
+	if (copy_from_user(temp_buff, buffer, len))
+		return -EFAULT;
+	
+	sscanf(temp_buff, "%i", &ret);
+	
+	if (!ret || ret == 1)
+		allow_fast_charge = ret;
+	else
+		printk(KERN_ALERT "%s: Incorrect value:%i\n", __func__, ret);
+	
+	return ret;
+}
+
 /*========================================================================================
 
 HTC power algorithm helper member and functions
@@ -279,10 +320,18 @@ static BOOL is_charging_avaiable(void)
 
 static BOOL is_high_current_charging_avaialable(void)
 {
-	if (!poweralg.protect_flags.is_charging_high_current_avaialble)	return FALSE;
-	//if (!poweralg.is_china_ac_in) return FALSE; /* allow high current charging on china chargers */
-	if (poweralg.charge_state == CHARGE_STATE_UNKNOWN) return FALSE;
-	return TRUE;
+	bool ret;
+
+	if (!poweralg.protect_flags.is_charging_high_current_avaialble)
+		ret = FALSE;
+	else if (!poweralg.is_china_ac_in && !allow_fast_charge)
+		ret = FALSE;
+	else if (poweralg.charge_state == CHARGE_STATE_UNKNOWN)
+		ret = FALSE;
+	else
+		ret = TRUE;
+	
+	return ret;
 }
 
 static void update_next_charge_state(void)
@@ -1245,6 +1294,19 @@ static int __init ds2746_battery_init(void)
 		return ret;
 	}
 
+	fast_charge = create_proc_entry(PROC_FAST_CHARGE_NAME, 0644, NULL);
+	
+	if (fast_charge == NULL) {
+		remove_proc_entry(PROC_FAST_CHARGE_NAME, NULL);
+		printk(KERN_ALERT "%s: Unable to create /proc/%s\n", __func__,
+		       PROC_FAST_CHARGE_NAME);
+	}
+	fast_charge->read_proc = proc_read_fast_charge;
+	fast_charge->write_proc = proc_write_fast_charge;
+	fast_charge->uid = 0;
+	fast_charge->gid = 0;
+	printk(KERN_INFO "/proc/%s created\n", PROC_FAST_CHARGE_NAME);
+
 	/*mutex_init(&htc_batt_info.lock);*/
 	return platform_driver_register(&ds2746_battery_driver);
 }
diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
old mode 100644
new mode 100755
index 8ee4bfa6..b8964347
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -125,5 +125,7 @@ source "drivers/staging/iio/Kconfig"
 
 source "drivers/staging/zram/Kconfig"
 
+source "drivers/staging/snappy/Kconfig"
+
 endif # !STAGING_EXCLUDE_BUILD
 endif # STAGING
diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile
old mode 100644
new mode 100755
index 5a1b7341..621dc916
--- a/drivers/staging/Makefile
+++ b/drivers/staging/Makefile
@@ -45,4 +45,5 @@ obj-$(CONFIG_DX_SEP)		+= sep/
 obj-$(CONFIG_IIO)		+= iio/
 obj-$(CONFIG_ZRAM)		+= zram/
 obj-$(CONFIG_XVMALLOC)		+= zram/
-
+obj-$(CONFIG_SNAPPY_COMPRESS)   += snappy/
+obj-$(CONFIG_SNAPPY_DECOMPRESS) += snappy/
diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c
index a0763da9..7453f1b0 100644
--- a/drivers/staging/android/binder.c
+++ b/drivers/staging/android/binder.c
@@ -31,14 +31,15 @@
 #include <linux/rbtree.h>
 #include <linux/sched.h>
 #include <linux/seq_file.h>
-#include <linux/slab.h>
 #include <linux/uaccess.h>
 #include <linux/vmalloc.h>
+#include <linux/slab.h>
 
 #include "binder.h"
 
 static DEFINE_MUTEX(binder_lock);
 static DEFINE_MUTEX(binder_deferred_lock);
+static DEFINE_MUTEX(binder_mmap_lock);
 
 static HLIST_HEAD(binder_procs);
 static HLIST_HEAD(binder_deferred_list);
@@ -103,7 +104,7 @@ enum {
 static uint32_t binder_debug_mask;
 module_param_named(debug_mask, binder_debug_mask, uint, S_IWUSR | S_IRUGO);
 
-static int binder_debug_no_lock;
+static bool binder_debug_no_lock;
 module_param_named(proc_no_lock, binder_debug_no_lock, bool, S_IWUSR | S_IRUGO);
 
 static DECLARE_WAIT_QUEUE_HEAD(binder_user_error_wait);
@@ -258,7 +259,7 @@ struct binder_ref {
 };
 
 struct binder_buffer {
-	struct list_head entry; /* free and allocated entries by addesss */
+	struct list_head entry; /* free and allocated entries by address */
 	struct rb_node rb_node; /* free entry by size or allocated entry */
 				/* by address */
 	unsigned free:1;
@@ -288,6 +289,7 @@ struct binder_proc {
 	struct rb_root refs_by_node;
 	int pid;
 	struct vm_area_struct *vma;
+	struct mm_struct *vma_vm_mm;
 	struct task_struct *tsk;
 	struct files_struct *files;
 	struct hlist_node deferred_work_node;
@@ -380,8 +382,7 @@ int task_get_unused_fd_flags(struct binder_proc *proc, int flags)
 
 repeat:
 	fdt = files_fdtable(files);
-	fd = find_next_zero_bit(fdt->open_fds->fds_bits, fdt->max_fds,
-				files->next_fd);
+	fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, files->next_fd);
 
 	/*
 	 * N.B. For clone tasks sharing a files structure, this test
@@ -633,6 +634,11 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate,
 	if (mm) {
 		down_write(&mm->mmap_sem);
 		vma = proc->vma;
+		if (vma && mm != proc->vma_vm_mm) {
+			pr_err("binder: %d: vma mm and task mm mismatch\n",
+				proc->pid);
+			vma = NULL;
+		}
 	}
 
 	if (allocate == 0)
@@ -651,7 +657,7 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate,
 		page = &proc->pages[(page_addr - proc->buffer) / PAGE_SIZE];
 
 		BUG_ON(*page);
-		*page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+		*page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
 		if (*page == NULL) {
 			binder_debug(BINDER_DEBUG_TOP_ERRORS,
 			       "binder: %d: binder_alloc_buf failed "
@@ -1283,7 +1289,8 @@ static void binder_send_failed_reply(struct binder_transaction *t,
 				binder_debug(BINDER_DEBUG_TOP_ERRORS,
 					"binder: reply failed, target "
 					"thread, %d:%d, has error code %d "
-					"already\n", target_thread->proc->pid,
+					"already\n",
+                                        target_thread->proc->pid,
 					target_thread->pid,
 					target_thread->return_error);
 			}
@@ -1338,9 +1345,9 @@ static void binder_transaction_buffer_release(struct binder_proc *proc,
 		    buffer->data_size < sizeof(*fp) ||
 		    !IS_ALIGNED(*offp, sizeof(void *))) {
 			binder_debug(BINDER_DEBUG_TOP_ERRORS,
-					"binder: transaction release %d bad"
-					"offset %zd, size %zd\n", debug_id,
-					*offp, buffer->data_size);
+				     "binder: transaction release %d bad"
+				     "offset %zd, size %zd\n", debug_id,
+				     *offp, buffer->data_size);
 			continue;
 		}
 		fp = (struct flat_binder_object *)(buffer->data + *offp);
@@ -1352,7 +1359,9 @@ static void binder_transaction_buffer_release(struct binder_proc *proc,
 			if (node == NULL) {
 				binder_debug(BINDER_DEBUG_TOP_ERRORS,
 					"binder: transaction release %d"
-				       " bad node %p\n", debug_id, fp->binder);
+				        " bad node %p\n",
+                                        debug_id,
+                                        fp->binder);
 				break;
 			}
 			binder_debug(BINDER_DEBUG_TRANSACTION,
@@ -2272,6 +2281,7 @@ retry:
 			if (put_user(thread->return_error2, (uint32_t __user *)ptr))
 				return -EFAULT;
 			ptr += sizeof(uint32_t);
+			binder_stat_br(proc, thread, thread->return_error2);
 			if (ptr == end)
 				goto done;
 			thread->return_error2 = BR_OK;
@@ -2279,6 +2289,7 @@ retry:
 		if (put_user(thread->return_error, (uint32_t __user *)ptr))
 			return -EFAULT;
 		ptr += sizeof(uint32_t);
+		binder_stat_br(proc, thread, thread->return_error);
 		thread->return_error = BR_OK;
 		goto done;
 	}
@@ -2434,6 +2445,7 @@ retry:
 			if (put_user(death->cookie, (void * __user *)ptr))
 				return -EFAULT;
 			ptr += sizeof(void *);
+			binder_stat_br(proc, thread, cmd);
 			binder_debug(BINDER_DEBUG_DEATH_NOTIFICATION,
 				     "binder: %d:%d %s %p\n",
 				      proc->pid, thread->pid,
@@ -2541,6 +2553,7 @@ done:
 			     proc->pid, thread->pid);
 		if (put_user(BR_SPAWN_LOOPER, (uint32_t __user *)buffer))
 			return -EFAULT;
+		binder_stat_br(proc, thread, BR_SPAWN_LOOPER);
 	}
 	return 0;
 }
@@ -2556,14 +2569,38 @@ static void binder_release_work(struct list_head *list)
 			struct binder_transaction *t;
 
 			t = container_of(w, struct binder_transaction, work);
-			if (t->buffer->target_node && !(t->flags & TF_ONE_WAY))
+			if (t->buffer->target_node &&
+			    !(t->flags & TF_ONE_WAY)) {
 				binder_send_failed_reply(t, BR_DEAD_REPLY);
+			} else {
+				binder_debug(BINDER_DEBUG_DEAD_TRANSACTION,
+					"binder: undelivered transaction %d\n",
+					t->debug_id);
+				t->buffer->transaction = NULL;
+				kfree(t);
+				binder_stats_deleted(BINDER_STAT_TRANSACTION);
+			}
 		} break;
 		case BINDER_WORK_TRANSACTION_COMPLETE: {
+			binder_debug(BINDER_DEBUG_DEAD_TRANSACTION,
+				"binder: undelivered TRANSACTION_COMPLETE\n");
 			kfree(w);
 			binder_stats_deleted(BINDER_STAT_TRANSACTION_COMPLETE);
 		} break;
+		case BINDER_WORK_DEAD_BINDER_AND_CLEAR:
+		case BINDER_WORK_CLEAR_DEATH_NOTIFICATION: {
+			struct binder_ref_death *death;
+
+			death = container_of(w, struct binder_ref_death, work);
+			binder_debug(BINDER_DEBUG_DEAD_TRANSACTION,
+				"binder: undelivered death notification, %p\n",
+				death->cookie);
+			kfree(death);
+			binder_stats_deleted(BINDER_STAT_DEATH);
+		} break;
 		default:
+			pr_err("binder: unexpected work type, %d, not freed\n",
+			       w->type);
 			break;
 		}
 	}
@@ -2684,8 +2721,7 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 	unsigned int size = _IOC_SIZE(cmd);
 	void __user *ubuf = (void __user *)arg;
 
-	/*binder_debug(BINDER_DEBUG_TOP_ERRORS, "binder_ioctl: %d:%d %x %lx\n",
-					proc->pid, current->pid, cmd, arg);*/
+	/*printk(KERN_INFO "binder_ioctl: %d:%d %x %lx\n", proc->pid, current->pid, cmd, arg);*/
 
 	ret = wait_event_interruptible(binder_user_error_wait,
 						binder_stop_on_user_error < 2);
@@ -2755,6 +2791,7 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		if (binder_context_mgr_node != NULL) {
 			binder_debug(BINDER_DEBUG_TOP_ERRORS,
 				"binder: BINDER_SET_CONTEXT_MGR already set\n");
+				     "binder: BINDER_SET_CONTEXT_MGR already set\n");
 			ret = -EBUSY;
 			goto err;
 		}
@@ -2808,8 +2845,8 @@ err:
 	wait_event_interruptible(binder_user_error_wait, binder_stop_on_user_error < 2);
 	if (ret && ret != -ERESTARTSYS)
 		binder_debug(BINDER_DEBUG_TOP_ERRORS,
-				"binder: %d:%d ioctl %x %lx returned %d\n",
-				proc->pid, current->pid, cmd, arg, ret);
+			     "binder: %d:%d ioctl %x %lx returned %d\n",
+			     proc->pid, current->pid, cmd, arg, ret);
 	return ret;
 }
 
@@ -2821,7 +2858,6 @@ static void binder_vma_open(struct vm_area_struct *vma)
 		     proc->pid, vma->vm_start, vma->vm_end,
 		     (vma->vm_end - vma->vm_start) / SZ_1K, vma->vm_flags,
 		     (unsigned long)pgprot_val(vma->vm_page_prot));
-	dump_stack();
 }
 
 static void binder_vma_close(struct vm_area_struct *vma)
@@ -2833,6 +2869,7 @@ static void binder_vma_close(struct vm_area_struct *vma)
 		     (vma->vm_end - vma->vm_start) / SZ_1K, vma->vm_flags,
 		     (unsigned long)pgprot_val(vma->vm_page_prot));
 	proc->vma = NULL;
+	proc->vma_vm_mm = NULL;
 	binder_defer_work(proc, BINDER_DEFERRED_PUT_FILES);
 }
 
@@ -2865,6 +2902,7 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma)
 	}
 	vma->vm_flags = (vma->vm_flags | VM_DONTCOPY) & ~VM_MAYWRITE;
 
+	mutex_lock(&binder_mmap_lock);
 	if (proc->buffer) {
 		ret = -EBUSY;
 		failure_string = "already mapped";
@@ -2879,6 +2917,7 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma)
 	}
 	proc->buffer = area->addr;
 	proc->user_buffer_offset = vma->vm_start - (uintptr_t)proc->buffer;
+	mutex_unlock(&binder_mmap_lock);
 
 #ifdef CONFIG_CPU_CACHE_VIPT
 	if (cache_is_vipt_aliasing()) {
@@ -2913,8 +2952,9 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma)
 	binder_insert_free_buffer(proc, buffer);
 	proc->free_async_space = proc->buffer_size / 2;
 	barrier();
-	proc->files = get_files_struct(current);
+	proc->files = get_files_struct(proc->tsk);
 	proc->vma = vma;
+	proc->vma_vm_mm = vma->vm_mm;
 
 	/*binder_debug(BINDER_DEBUG_TOP_ERRORS,
 		"binder_mmap: %d %lx-%lx maps %p\n",
@@ -2925,14 +2965,17 @@ err_alloc_small_buf_failed:
 	kfree(proc->pages);
 	proc->pages = NULL;
 err_alloc_pages_failed:
+	mutex_lock(&binder_mmap_lock);
 	vfree(proc->buffer);
 	proc->buffer = NULL;
 err_get_vm_area_failed:
 err_already_mapped:
+	mutex_unlock(&binder_mmap_lock);
 err_bad_arg:
 	binder_debug(BINDER_DEBUG_TOP_ERRORS,
 		"binder_mmap: %d %lx-%lx %s failed %d\n",
-	       proc->pid, vma->vm_start, vma->vm_end, failure_string, ret);
+	       proc->pid, vma->vm_start, vma->vm_end, failure_string,
+               ret);
 	return ret;
 }
 
@@ -3039,6 +3082,7 @@ static void binder_deferred_release(struct binder_proc *proc)
 		nodes++;
 		rb_erase(&node->rb_node, &proc->nodes);
 		list_del_init(&node->work.entry);
+		binder_release_work(&node->async_todo);
 		if (hlist_empty(&node->refs)) {
 			kfree(node);
 			binder_stats_deleted(BINDER_STAT_NODE);
@@ -3077,6 +3121,7 @@ static void binder_deferred_release(struct binder_proc *proc)
 		binder_delete_ref(ref);
 	}
 	binder_release_work(&proc->todo);
+	binder_release_work(&proc->delivered_death);
 	buffers = 0;
 
 	while ((n = rb_first(&proc->allocated_buffers))) {
diff --git a/drivers/staging/android/logger.c b/drivers/staging/android/logger.c
old mode 100644
new mode 100755
index 1a0c1391..002e11ae
--- a/drivers/staging/android/logger.c
+++ b/drivers/staging/android/logger.c
@@ -37,7 +37,7 @@
  * mutex 'mutex'.
  */
 struct logger_log {
-	unsigned char 		*buffer;/* the ring buffer itself */
+	unsigned char		*buffer;/* the ring buffer itself */
 	struct miscdevice	misc;	/* misc device representing the log */
 	wait_queue_head_t	wq;	/* wait queue for readers */
 	struct list_head	readers; /* this log's readers */
@@ -57,19 +57,25 @@ struct logger_reader {
 	struct logger_log	*log;	/* associated log */
 	struct list_head	list;	/* entry in logger_log's list */
 	size_t			r_off;	/* current read head offset */
+	bool			r_all;	/* reader can read all entries */
+	int			r_ver;	/* reader ABI version */
 };
 
 /* logger_offset - returns index 'n' into the log via (optimized) modulus */
-#define logger_offset(n)	((n) & (log->size - 1))
+size_t logger_offset(struct logger_log *log, size_t n)
+{
+	return n & (log->size-1);
+}
+
 
 /*
  * file_get_log - Given a file structure, return the associated log
  *
  * This isn't aesthetic. We have several goals:
  *
- * 	1) Need to quickly obtain the associated log during an I/O operation
- * 	2) Readers need to maintain state (logger_reader)
- * 	3) Writers need to be very fast (open() should be a near no-op)
+ *	1) Need to quickly obtain the associated log during an I/O operation
+ *	2) Readers need to maintain state (logger_reader)
+ *	3) Writers need to be very fast (open() should be a near no-op)
  *
  * In the reader case, we can trivially go file->logger_reader->logger_log.
  * For a writer, we don't want to maintain a logger_reader, so we just go
@@ -86,25 +92,75 @@ static inline struct logger_log *file_get_log(struct file *file)
 }
 
 /*
- * get_entry_len - Grabs the length of the payload of the next entry starting
- * from 'off'.
+ * get_entry_header - returns a pointer to the logger_entry header within
+ * 'log' starting at offset 'off'. A temporary logger_entry 'scratch' must
+ * be provided. Typically the return value will be a pointer within
+ * 'logger->buf'.  However, a pointer to 'scratch' may be returned if
+ * the log entry spans the end and beginning of the circular buffer.
+ */
+static struct logger_entry *get_entry_header(struct logger_log *log,
+		size_t off, struct logger_entry *scratch)
+{
+	size_t len = min(sizeof(struct logger_entry), log->size - off);
+	if (len != sizeof(struct logger_entry)) {
+		memcpy(((void *) scratch), log->buffer + off, len);
+		memcpy(((void *) scratch) + len, log->buffer,
+			sizeof(struct logger_entry) - len);
+		return scratch;
+	}
+
+	return (struct logger_entry *) (log->buffer + off);
+}
+
+/*
+ * get_entry_msg_len - Grabs the length of the message of the entry
+ * starting from from 'off'.
+ *
+ * An entry length is 2 bytes (16 bits) in host endian order.
+ * In the log, the length does not include the size of the log entry structure.
+ * This function returns the size including the log entry structure.
  *
  * Caller needs to hold log->mutex.
  */
-static __u32 get_entry_len(struct logger_log *log, size_t off)
+static __u32 get_entry_msg_len(struct logger_log *log, size_t off)
 {
-	__u16 val;
+	struct logger_entry scratch;
+	struct logger_entry *entry;
 
-	switch (log->size - off) {
-	case 1:
-		memcpy(&val, log->buffer + off, 1);
-		memcpy(((char *) &val) + 1, log->buffer, 1);
-		break;
-	default:
-		memcpy(&val, log->buffer + off, 2);
+	entry = get_entry_header(log, off, &scratch);
+	return entry->len;
+}
+
+static size_t get_user_hdr_len(int ver)
+{
+	if (ver < 2)
+		return sizeof(struct user_logger_entry_compat);
+	else
+		return sizeof(struct logger_entry);
+}
+
+static ssize_t copy_header_to_user(int ver, struct logger_entry *entry,
+					 char __user *buf)
+{
+	void *hdr;
+	size_t hdr_len;
+	struct user_logger_entry_compat v1;
+
+	if (ver < 2) {
+		v1.len      = entry->len;
+		v1.__pad    = 0;
+		v1.pid      = entry->pid;
+		v1.tid      = entry->tid;
+		v1.sec      = entry->sec;
+		v1.nsec     = entry->nsec;
+		hdr         = &v1;
+		hdr_len     = sizeof(struct user_logger_entry_compat);
+	} else {
+		hdr         = entry;
+		hdr_len     = sizeof(struct logger_entry);
 	}
 
-	return sizeof(struct logger_entry) + val;
+	return copy_to_user(buf, hdr, hdr_len);
 }
 
 /*
@@ -118,15 +174,31 @@ static ssize_t do_read_log_to_user(struct logger_log *log,
 				   char __user *buf,
 				   size_t count)
 {
+	struct logger_entry scratch;
+	struct logger_entry *entry;
 	size_t len;
+	size_t msg_start;
 
 	/*
-	 * We read from the log in two disjoint operations. First, we read from
-	 * the current read head offset up to 'count' bytes or to the end of
+	 * First, copy the header to userspace, using the version of
+	 * the header requested
+	 */
+	entry = get_entry_header(log, reader->r_off, &scratch);
+	if (copy_header_to_user(reader->r_ver, entry, buf))
+		return -EFAULT;
+
+	count -= get_user_hdr_len(reader->r_ver);
+	buf += get_user_hdr_len(reader->r_ver);
+	msg_start = logger_offset(log,
+		reader->r_off + sizeof(struct logger_entry));
+
+	/*
+	 * We read from the msg in two disjoint operations. First, we read from
+	 * the current msg head offset up to 'count' bytes or to the end of
 	 * the log, whichever comes first.
 	 */
-	len = min(count, log->size - reader->r_off);
-	if (copy_to_user(buf, log->buffer + reader->r_off, len))
+	len = min(count, log->size - msg_start);
+	if (copy_to_user(buf, log->buffer + msg_start, len))
 		return -EFAULT;
 
 	/*
@@ -137,9 +209,34 @@ static ssize_t do_read_log_to_user(struct logger_log *log,
 		if (copy_to_user(buf + len, log->buffer, count - len))
 			return -EFAULT;
 
-	reader->r_off = logger_offset(reader->r_off + count);
+	reader->r_off = logger_offset(log, reader->r_off +
+		sizeof(struct logger_entry) + count);
 
-	return count;
+	return count + get_user_hdr_len(reader->r_ver);
+}
+
+/*
+ * get_next_entry_by_uid - Starting at 'off', returns an offset into
+ * 'log->buffer' which contains the first entry readable by 'euid'
+ */
+static size_t get_next_entry_by_uid(struct logger_log *log,
+		size_t off, uid_t euid)
+{
+	while (off != log->w_off) {
+		struct logger_entry *entry;
+		struct logger_entry scratch;
+		size_t next_len;
+
+		entry = get_entry_header(log, off, &scratch);
+
+		if (entry->euid == euid)
+			return off;
+
+		next_len = sizeof(struct logger_entry) + entry->len;
+		off = logger_offset(log, off + next_len);
+	}
+
+	return off;
 }
 
 /*
@@ -147,11 +244,11 @@ static ssize_t do_read_log_to_user(struct logger_log *log,
  *
  * Behavior:
  *
- * 	- O_NONBLOCK works
- * 	- If there are no log entries to read, blocks until log is written to
- * 	- Atomically reads exactly one log entry
+ *	- O_NONBLOCK works
+ *	- If there are no log entries to read, blocks until log is written to
+ *	- Atomically reads exactly one log entry
  *
- * Optimal read size is LOGGER_ENTRY_MAX_LEN. Will set errno to EINVAL if read
+ * Will set errno to EINVAL if read
  * buffer is insufficient to hold next entry.
  */
 static ssize_t logger_read(struct file *file, char __user *buf,
@@ -164,9 +261,10 @@ static ssize_t logger_read(struct file *file, char __user *buf,
 
 start:
 	while (1) {
+		mutex_lock(&log->mutex);
+
 		prepare_to_wait(&log->wq, &wait, TASK_INTERRUPTIBLE);
 
-		mutex_lock(&log->mutex);
 		ret = (log->w_off == reader->r_off);
 		mutex_unlock(&log->mutex);
 		if (!ret)
@@ -191,6 +289,10 @@ start:
 
 	mutex_lock(&log->mutex);
 
+	if (!reader->r_all)
+		reader->r_off = get_next_entry_by_uid(log,
+			reader->r_off, current_euid());
+
 	/* is there still something to read or did we race? */
 	if (unlikely(log->w_off == reader->r_off)) {
 		mutex_unlock(&log->mutex);
@@ -198,7 +300,8 @@ start:
 	}
 
 	/* get the size of the next entry */
-	ret = get_entry_len(log, reader->r_off);
+	ret = get_user_hdr_len(reader->r_ver) +
+		get_entry_msg_len(log, reader->r_off);
 	if (count < ret) {
 		ret = -EINVAL;
 		goto out;
@@ -224,8 +327,9 @@ static size_t get_next_entry(struct logger_log *log, size_t off, size_t len)
 	size_t count = 0;
 
 	do {
-		size_t nr = get_entry_len(log, off);
-		off = logger_offset(off + nr);
+		size_t nr = sizeof(struct logger_entry) +
+			get_entry_msg_len(log, off);
+		off = logger_offset(log, off + nr);
 		count += nr;
 	} while (count < len);
 
@@ -233,16 +337,28 @@ static size_t get_next_entry(struct logger_log *log, size_t off, size_t len)
 }
 
 /*
- * clock_interval - is a < c < b in mod-space? Put another way, does the line
- * from a to b cross c?
+ * is_between - is a < c < b, accounting for wrapping of a, b, and c
+ *    positions in the buffer
+ *
+ * That is, if a<b, check for c between a and b
+ * and if a>b, check for c outside (not between) a and b
+ *
+ * |------- a xxxxxxxx b --------|
+ *               c^
+ *
+ * |xxxxx b --------- a xxxxxxxxx|
+ *    c^
+ *  or                    c^
  */
-static inline int clock_interval(size_t a, size_t b, size_t c)
+static inline int is_between(size_t a, size_t b, size_t c)
 {
-	if (b < a) {
-		if (a < c || b >= c)
+	if (a < b) {
+		/* is c between a and b? */
+		if (a < c && c <= b)
 			return 1;
 	} else {
-		if (a < c && b >= c)
+		/* is c outside of b through a? */
+		if (c <= b || a < c)
 			return 1;
 	}
 
@@ -260,14 +376,14 @@ static inline int clock_interval(size_t a, size_t b, size_t c)
 static void fix_up_readers(struct logger_log *log, size_t len)
 {
 	size_t old = log->w_off;
-	size_t new = logger_offset(old + len);
+	size_t new = logger_offset(log, old + len);
 	struct logger_reader *reader;
 
-	if (clock_interval(old, new, log->head))
+	if (is_between(old, new, log->head))
 		log->head = get_next_entry(log, log->head, len);
 
 	list_for_each_entry(reader, &log->readers, list)
-		if (clock_interval(old, new, reader->r_off))
+		if (is_between(old, new, reader->r_off))
 			reader->r_off = get_next_entry(log, reader->r_off, len);
 }
 
@@ -286,7 +402,7 @@ static void do_write_log(struct logger_log *log, const void *buf, size_t count)
 	if (count != len)
 		memcpy(log->buffer, buf + len, count - len);
 
-	log->w_off = logger_offset(log->w_off + count);
+	log->w_off = logger_offset(log, log->w_off + count);
 
 }
 
@@ -309,9 +425,15 @@ static ssize_t do_write_log_from_user(struct logger_log *log,
 
 	if (count != len)
 		if (copy_from_user(log->buffer, buf + len, count - len))
+			/*
+			 * Note that by not updating w_off, this abandons the
+			 * portion of the new entry that *was* successfully
+			 * copied, just above.  This is intentional to avoid
+			 * message corruption from missing fragments.
+			 */
 			return -EFAULT;
 
-	log->w_off = logger_offset(log->w_off + count);
+	log->w_off = logger_offset(log, log->w_off + count);
 
 	return count;
 }
@@ -336,7 +458,9 @@ ssize_t logger_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	header.tid = current->pid;
 	header.sec = now.tv_sec;
 	header.nsec = now.tv_nsec;
+	header.euid = current_euid();
 	header.len = min_t(size_t, iocb->ki_left, LOGGER_ENTRY_MAX_PAYLOAD);
+	header.hdr_size = sizeof(struct logger_entry);
 
 	/* null writes succeed, return zero */
 	if (unlikely(!header.len))
@@ -409,6 +533,10 @@ static int logger_open(struct inode *inode, struct file *file)
 			return -ENOMEM;
 
 		reader->log = log;
+		reader->r_ver = 1;
+		reader->r_all = in_egroup_p(inode->i_gid) ||
+			capable(CAP_SYSLOG);
+
 		INIT_LIST_HEAD(&reader->list);
 
 		mutex_lock(&log->mutex);
@@ -466,6 +594,10 @@ static unsigned int logger_poll(struct file *file, poll_table *wait)
 	poll_wait(file, &log->wq, wait);
 
 	mutex_lock(&log->mutex);
+	if (!reader->r_all)
+		reader->r_off = get_next_entry_by_uid(log,
+			reader->r_off, current_euid());
+
 	if (log->w_off != reader->r_off)
 		ret |= POLLIN | POLLRDNORM;
 	mutex_unlock(&log->mutex);
@@ -473,11 +605,25 @@ static unsigned int logger_poll(struct file *file, poll_table *wait)
 	return ret;
 }
 
+static long logger_set_version(struct logger_reader *reader, void __user *arg)
+{
+	int version;
+	if (copy_from_user(&version, arg, sizeof(int)))
+		return -EFAULT;
+
+	if ((version < 1) || (version > 2))
+		return -EINVAL;
+
+	reader->r_ver = version;
+	return 0;
+}
+
 static long logger_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 	struct logger_log *log = file_get_log(file);
 	struct logger_reader *reader;
-	long ret = -ENOTTY;
+	long ret = -EINVAL;
+	void __user *argp = (void __user *) arg;
 
 	mutex_lock(&log->mutex);
 
@@ -502,8 +648,14 @@ static long logger_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 			break;
 		}
 		reader = file->private_data;
+
+		if (!reader->r_all)
+			reader->r_off = get_next_entry_by_uid(log,
+				reader->r_off, current_euid());
+
 		if (log->w_off != reader->r_off)
-			ret = get_entry_len(log, reader->r_off);
+			ret = get_user_hdr_len(reader->r_ver) +
+				get_entry_msg_len(log, reader->r_off);
 		else
 			ret = 0;
 		break;
@@ -517,6 +669,22 @@ static long logger_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		log->head = log->w_off;
 		ret = 0;
 		break;
+	case LOGGER_GET_VERSION:
+		if (!(file->f_mode & FMODE_READ)) {
+			ret = -EBADF;
+			break;
+		}
+		reader = file->private_data;
+		ret = reader->r_ver;
+		break;
+	case LOGGER_SET_VERSION:
+		if (!(file->f_mode & FMODE_READ)) {
+			ret = -EBADF;
+			break;
+		}
+		reader = file->private_data;
+		ret = logger_set_version(reader, argp);
+		break;
 	}
 
 	mutex_unlock(&log->mutex);
@@ -537,8 +705,8 @@ static const struct file_operations logger_fops = {
 
 /*
  * Defines a log structure with name 'NAME' and a size of 'SIZE' bytes, which
- * must be a power of two, greater than LOGGER_ENTRY_MAX_LEN, and less than
- * LONG_MAX minus LOGGER_ENTRY_MAX_LEN.
+ * must be a power of two, and greater than
+ * (LOGGER_ENTRY_MAX_PAYLOAD + sizeof(struct logger_entry)).
  */
 #define DEFINE_LOGGER_DEVICE(VAR, NAME, SIZE) \
 static unsigned char _buf_ ## VAR[SIZE]; \
diff --git a/drivers/staging/android/logger.h b/drivers/staging/android/logger.h
old mode 100644
new mode 100755
index 2cb06e9d..3f612a3b
--- a/drivers/staging/android/logger.h
+++ b/drivers/staging/android/logger.h
@@ -20,7 +20,12 @@
 #include <linux/types.h>
 #include <linux/ioctl.h>
 
-struct logger_entry {
+/*
+ * The userspace structure for version 1 of the logger_entry ABI.
+ * This structure is returned to userspace unless the caller requests
+ * an upgrade to a newer ABI version.
+ */
+struct user_logger_entry_compat {
 	__u16		len;	/* length of the payload */
 	__u16		__pad;	/* no matter what, we get 2 bytes of padding */
 	__s32		pid;	/* generating process's pid */
@@ -30,14 +35,28 @@ struct logger_entry {
 	char		msg[0];	/* the entry's payload */
 };
 
+/*
+ * The structure for version 2 of the logger_entry ABI.
+ * This structure is returned to userspace if ioctl(LOGGER_SET_VERSION)
+ * is called with version >= 2
+ */
+struct logger_entry {
+	__u16		len;		/* length of the payload */
+	__u16		hdr_size;	/* sizeof(struct logger_entry_v2) */
+	__s32		pid;		/* generating process's pid */
+	__s32		tid;		/* generating process's tid */
+	__s32		sec;		/* seconds since Epoch */
+	__s32		nsec;		/* nanoseconds */
+	uid_t		euid;		/* effective UID of logger */
+	char		msg[0];		/* the entry's payload */
+};
+
 #define LOGGER_LOG_RADIO	"log_radio"	/* radio-related messages */
 #define LOGGER_LOG_EVENTS	"log_events"	/* system/hardware events */
 #define LOGGER_LOG_SYSTEM	"log_system"	/* system/framework messages */
 #define LOGGER_LOG_MAIN		"log_main"	/* everything else */
 
-#define LOGGER_ENTRY_MAX_LEN		(4*1024)
-#define LOGGER_ENTRY_MAX_PAYLOAD	\
-	(LOGGER_ENTRY_MAX_LEN - sizeof(struct logger_entry))
+#define LOGGER_ENTRY_MAX_PAYLOAD	4076
 
 #define __LOGGERIO	0xAE
 
@@ -45,5 +64,7 @@ struct logger_entry {
 #define LOGGER_GET_LOG_LEN		_IO(__LOGGERIO, 2) /* used log len */
 #define LOGGER_GET_NEXT_ENTRY_LEN	_IO(__LOGGERIO, 3) /* next entry len */
 #define LOGGER_FLUSH_LOG		_IO(__LOGGERIO, 4) /* flush log */
+#define LOGGER_GET_VERSION		_IO(__LOGGERIO, 5) /* abi version */
+#define LOGGER_SET_VERSION		_IO(__LOGGERIO, 6) /* abi version */
 
 #endif /* _LINUX_LOGGER_H */
diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c
old mode 100644
new mode 100755
index 42cd93ea..f8017368
--- a/drivers/staging/android/lowmemorykiller.c
+++ b/drivers/staging/android/lowmemorykiller.c
@@ -29,12 +29,22 @@
  *
  */
 
-#include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/kobject.h>
+#include <linux/memory.h>
+#include <linux/memory_hotplug.h>
 #include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
 #include <linux/oom.h>
 #include <linux/sched.h>
-#include <linux/notifier.h>
+#include <linux/slab.h>
+#include <linux/sysfs.h>
+
+#ifdef CONFIG_SWAP
+#include <linux/fs.h>
+#include <linux/swap.h>
+#endif
 
 static uint32_t lowmem_debug_level = 2;
 static int lowmem_adj[6] = {
@@ -52,8 +62,16 @@ static size_t lowmem_minfree[6] = {
 };
 static int lowmem_minfree_size = 4;
 
+static size_t lowmem_minfree_notif_trigger;
+
+static unsigned int offlining;
 static struct task_struct *lowmem_deathpending;
-static DEFINE_SPINLOCK(lowmem_deathpending_lock);
+static unsigned long lowmem_deathpending_timeout;
+static struct kobject *lowmem_kobj;
+
+#ifdef CONFIG_SWAP
+static int fudgeswap = 512;
+#endif
 
 #define lowmem_print(level, x...)			\
 	do {						\
@@ -85,12 +103,73 @@ task_notify_func(struct notifier_block *self, unsigned long val, void *data)
 {
 	struct task_struct *task = data;
 
-	if (task == lowmem_deathpending) {
-		schedule_work(&task_free_work);
-	}
+	if (task == lowmem_deathpending)
+		lowmem_deathpending = NULL;
+
 	return NOTIFY_OK;
 }
 
+#ifdef CONFIG_MEMORY_HOTPLUG
+static int lmk_hotplug_callback(struct notifier_block *self,
+				unsigned long cmd, void *data)
+{
+	switch (cmd) {
+	/* Don't care LMK cases */
+	case MEM_ONLINE:
+	case MEM_OFFLINE:
+	case MEM_CANCEL_ONLINE:
+	case MEM_CANCEL_OFFLINE:
+	case MEM_GOING_ONLINE:
+		offlining = 0;
+		lowmem_print(4, "lmk in normal mode\n");
+		break;
+	/* LMK should account for movable zone */
+	case MEM_GOING_OFFLINE:
+		offlining = 1;
+		lowmem_print(4, "lmk in hotplug mode\n");
+		break;
+	}
+	return NOTIFY_DONE;
+}
+#endif
+
+
+
+static void lowmem_notify_killzone_approach(void);
+
+static inline void get_free_ram(int *other_free, int *other_file)
+{
+	struct zone *zone;
+	*other_free = global_page_state(NR_FREE_PAGES);
+	*other_file = global_page_state(NR_FILE_PAGES) -
+						global_page_state(NR_SHMEM);
+#ifdef CONFIG_SWAP
+	if(fudgeswap != 0){
+		struct sysinfo si;
+		si_swapinfo(&si);
+
+		if(si.freeswap > 0){
+			if(fudgeswap > si.freeswap)
+				other_file += si.freeswap;
+			else
+				other_file += fudgeswap;
+		}
+	}
+#endif
+	if (offlining) {
+		/* Discount all free space in the section being offlined */
+		for_each_zone(zone) {
+			 if (zone_idx(zone) == ZONE_MOVABLE) {
+				*other_free -= zone_page_state(zone,
+						NR_FREE_PAGES);
+				lowmem_print(4, "lowmem_shrink discounted "
+					"%lu pages in movable zone\n",
+					zone_page_state(zone, NR_FREE_PAGES));
+			}
+		}
+	}
+}
+
 static int lowmem_shrink(int nr_to_scan, gfp_t gfp_mask)
 {
 	struct task_struct *p;
@@ -102,10 +181,8 @@ static int lowmem_shrink(int nr_to_scan, gfp_t gfp_mask)
 	int selected_tasksize = 0;
 	int selected_oom_adj;
 	int array_size = ARRAY_SIZE(lowmem_adj);
-	int other_free = global_page_state(NR_FREE_PAGES);
-	int other_file = global_page_state(NR_FILE_PAGES);
-	unsigned long flags;
-
+	int other_free;
+	int other_file;
 	/*
 	 * If we already have a death outstanding, then
 	 * bail out right away; indicating to vmscan
@@ -113,15 +190,24 @@ static int lowmem_shrink(int nr_to_scan, gfp_t gfp_mask)
 	 * this pass.
 	 *
 	 */
-	if (lowmem_deathpending)
+	if (lowmem_deathpending &&
+	    time_before_eq(jiffies, lowmem_deathpending_timeout))
 		return 0;
 
+	get_free_ram(&other_free, &other_file);
+
+	if (other_free < lowmem_minfree_notif_trigger &&
+			other_file < lowmem_minfree_notif_trigger) {
+		lowmem_notify_killzone_approach();
+	}
+
 	if (lowmem_adj_size < array_size)
 		array_size = lowmem_adj_size;
 	if (lowmem_minfree_size < array_size)
 		array_size = lowmem_minfree_size;
 	for (i = 0; i < array_size; i++) {
-		if (other_file < lowmem_minfree[i]) {
+		if (other_free < lowmem_minfree[i] &&
+		    other_file < lowmem_minfree[i]) {
 			min_adj = lowmem_adj[i];
 			break;
 		}
@@ -178,18 +264,13 @@ static int lowmem_shrink(int nr_to_scan, gfp_t gfp_mask)
 	}
 
 	if (selected) {
-		spin_lock_irqsave(&lowmem_deathpending_lock, flags);
-		if (!lowmem_deathpending) {
-			lowmem_print(1,
-				"send sigkill to %d (%s), adj %d, size %d\n",
-				selected->pid, selected->comm,
-				selected_oom_adj, selected_tasksize);
-			lowmem_deathpending = selected;
-			task_free_register(&task_nb);
-			force_sig(SIGKILL, selected);
-			rem -= selected_tasksize;
-		}
-		spin_unlock_irqrestore(&lowmem_deathpending_lock, flags);
+		lowmem_print(1, "send sigkill to %d (%s), adj %d, size %d\n",
+			     selected->pid, selected->comm,
+			     selected_oom_adj, selected_tasksize);
+		lowmem_deathpending = selected;
+		lowmem_deathpending_timeout = jiffies + HZ;
+		force_sig(SIGKILL, selected);
+		rem -= selected_tasksize;
 	}
 	lowmem_print(4, "lowmem_shrink %d, %x, return %d\n",
 		     nr_to_scan, gfp_mask, rem);
@@ -202,15 +283,93 @@ static struct shrinker lowmem_shrinker = {
 	.seeks = DEFAULT_SEEKS * 16
 };
 
+static void lowmem_notify_killzone_approach(void)
+{
+	lowmem_print(3, "notification trigger activated\n");
+	sysfs_notify(lowmem_kobj, NULL, "notify_trigger_active");
+}
+
+static ssize_t lowmem_notify_trigger_active_show(struct kobject *k,
+		struct kobj_attribute *attr, char *buf)
+{
+	int other_free, other_file;
+	get_free_ram(&other_free, &other_file);
+	if (other_free < lowmem_minfree_notif_trigger &&
+			other_file < lowmem_minfree_notif_trigger)
+		return snprintf(buf, 3, "1\n");
+	else
+		return snprintf(buf, 3, "0\n");
+}
+
+static struct kobj_attribute lowmem_notify_trigger_active_attr =
+	__ATTR(notify_trigger_active, S_IRUGO,
+			lowmem_notify_trigger_active_show, NULL);
+
+static struct attribute *lowmem_default_attrs[] = {
+	&lowmem_notify_trigger_active_attr.attr,
+	NULL,
+};
+
+static ssize_t lowmem_show(struct kobject *k, struct attribute *attr, char *buf)
+{
+	struct kobj_attribute *kobj_attr;
+	kobj_attr = container_of(attr, struct kobj_attribute, attr);
+	return kobj_attr->show(k, kobj_attr, buf);
+}
+
+static const struct sysfs_ops lowmem_ops = {
+	.show = lowmem_show,
+};
+
+static void lowmem_kobj_release(struct kobject *kobj)
+{
+	/* Nothing to be done here */
+}
+
+static struct kobj_type lowmem_kobj_type = {
+	.release = lowmem_kobj_release,
+	.sysfs_ops = &lowmem_ops,
+	.default_attrs = lowmem_default_attrs,
+};
+
 static int __init lowmem_init(void)
 {
+	int rc;
+	task_free_register(&task_nb);
 	register_shrinker(&lowmem_shrinker);
+#ifdef CONFIG_MEMORY_HOTPLUG
+	hotplug_memory_notifier(lmk_hotplug_callback, 0);
+#endif
+
+	lowmem_kobj = kzalloc(sizeof(*lowmem_kobj), GFP_KERNEL);
+	if (!lowmem_kobj) {
+		rc = -ENOMEM;
+		goto err;
+	}
+
+	rc = kobject_init_and_add(lowmem_kobj, &lowmem_kobj_type,
+			mm_kobj, "lowmemkiller");
+	if (rc)
+		goto err_kobj;
+
 	return 0;
+
+err_kobj:
+	kfree(lowmem_kobj);
+
+err:
+	unregister_shrinker(&lowmem_shrinker);
+	task_free_unregister(&task_nb);
+
+	return rc;
 }
 
 static void __exit lowmem_exit(void)
 {
+	kobject_put(lowmem_kobj);
+	kfree(lowmem_kobj);
 	unregister_shrinker(&lowmem_shrinker);
+	task_free_unregister(&task_nb);
 }
 
 module_param_named(cost, lowmem_shrinker.seeks, int, S_IRUGO | S_IWUSR);
@@ -219,7 +378,12 @@ module_param_array_named(adj, lowmem_adj, int, &lowmem_adj_size,
 module_param_array_named(minfree, lowmem_minfree, uint, &lowmem_minfree_size,
 			 S_IRUGO | S_IWUSR);
 module_param_named(debug_level, lowmem_debug_level, uint, S_IRUGO | S_IWUSR);
+module_param_named(notify_trigger, lowmem_minfree_notif_trigger, uint,
+			 S_IRUGO | S_IWUSR);
 
+#ifdef CONFIG_SWAP
+module_param_named(fudgeswap, fudgeswap, int, S_IRUGO | S_IWUSR);
+#endif
 module_init(lowmem_init);
 module_exit(lowmem_exit);
 
diff --git a/drivers/staging/snappy/Kconfig b/drivers/staging/snappy/Kconfig
new file mode 100755
index 00000000..24f69085
--- /dev/null
+++ b/drivers/staging/snappy/Kconfig
@@ -0,0 +1,5 @@
+config SNAPPY_COMPRESS
+	tristate "Google Snappy Compression"
+
+config SNAPPY_DECOMPRESS
+	tristate "Google Snappy Decompression"
diff --git a/drivers/staging/snappy/Makefile b/drivers/staging/snappy/Makefile
new file mode 100755
index 00000000..399d070a
--- /dev/null
+++ b/drivers/staging/snappy/Makefile
@@ -0,0 +1,5 @@
+snappy_compress-objs := csnappy_compress.o
+snappy_decompress-objs := csnappy_decompress.o
+
+obj-$(CONFIG_SNAPPY_COMPRESS) += csnappy_compress.o
+obj-$(CONFIG_SNAPPY_DECOMPRESS) += csnappy_decompress.o
diff --git a/drivers/staging/snappy/csnappy.h b/drivers/staging/snappy/csnappy.h
new file mode 100755
index 00000000..1e0a54ea
--- /dev/null
+++ b/drivers/staging/snappy/csnappy.h
@@ -0,0 +1,125 @@
+#ifndef __CSNAPPY_H__
+#define __CSNAPPY_H__
+/*
+File modified for the Linux Kernel by
+Zeev Tarantov <zeev.tarantov at gmail.com>
+*/
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define CSNAPPY_VERSION	4
+
+#define CSNAPPY_WORKMEM_BYTES_POWER_OF_TWO 15
+#define CSNAPPY_WORKMEM_BYTES (1 << CSNAPPY_WORKMEM_BYTES_POWER_OF_TWO)
+
+/*
+ * Returns the maximal size of the compressed representation of
+ * input data that is "source_len" bytes in length;
+ */
+uint32_t
+csnappy_max_compressed_length(uint32_t source_len) __attribute__((const));
+
+/*
+ * Flat array compression that does not emit the "uncompressed length"
+ * prefix. Compresses "input" array to the "output" array.
+ *
+ * REQUIRES: "input" is at most 32KiB long.
+ * REQUIRES: "output" points to an array of memory that is at least
+ * "csnappy_max_compressed_length(input_length)" in size.
+ * REQUIRES: working_memory has (1 << workmem_bytes_power_of_two) bytes.
+ * REQUIRES: 9 <= workmem_bytes_power_of_two <= 15.
+ *
+ * Returns an "end" pointer into "output" buffer.
+ * "end - output" is the compressed size of "input".
+ */
+char*
+csnappy_compress_fragment(
+	const char *input,
+	const uint32_t input_length,
+	char *output,
+	void *working_memory,
+	const int workmem_bytes_power_of_two);
+
+/*
+ * REQUIRES: "compressed" must point to an area of memory that is at
+ * least "csnappy_max_compressed_length(input_length)" bytes in length.
+ * REQUIRES: working_memory has (1 << workmem_bytes_power_of_two) bytes.
+ * REQUIRES: 9 <= workmem_bytes_power_of_two <= 15.
+ *
+ * Takes the data stored in "input[0..input_length]" and stores
+ * it in the array pointed to by "compressed".
+ *
+ * "*out_compressed_length" is set to the length of the compressed output.
+ */
+void
+csnappy_compress(
+	const char *input,
+	uint32_t input_length,
+	char *compressed,
+	uint32_t *out_compressed_length,
+	void *working_memory,
+	const int workmem_bytes_power_of_two);
+
+/*
+ * Reads header of compressed data to get stored length of uncompressed data.
+ * REQUIRES: start points to compressed data.
+ * REQUIRES: n is length of available compressed data.
+ *
+ * Returns SNAPPY_E_HEADER_BAD on error.
+ * Returns number of bytes read from input on success.
+ * Stores decoded length into *result.
+ */
+int
+csnappy_get_uncompressed_length(
+	const char *start,
+	uint32_t n,
+	uint32_t *result);
+
+/*
+ * Safely decompresses all data from array "src" of length "src_len" containing
+ * entire compressed stream (with header) into array "dst" of size "dst_len".
+ * REQUIRES: dst_len is at least csnappy_get_uncompressed_length(...).
+ *
+ * Iff sucessful, returns CSNAPPY_E_OK.
+ * If recorded length in header is greater than dst_len, returns
+ *  CSNAPPY_E_OUTPUT_INSUF.
+ * If compressed data is malformed, does not write more than dst_len into dst.
+ */
+int
+csnappy_decompress(
+	const char *src,
+	uint32_t src_len,
+	char *dst,
+	uint32_t dst_len);
+
+/*
+ * Safely decompresses stream src_len bytes long read from src to dst.
+ * Amount of available space at dst must be provided in *dst_len by caller.
+ * If compressed stream needs more space, it will not overflow and return
+ *  CSNAPPY_E_OUTPUT_OVERRUN.
+ * On success, sets *dst_len to actal number of bytes decompressed.
+ * Iff sucessful, returns CSNAPPY_E_OK.
+ */
+int
+csnappy_decompress_noheader(
+	const char *src,
+	uint32_t src_len,
+	char *dst,
+	uint32_t *dst_len);
+
+/*
+ * Return values (< 0 = Error)
+ */
+#define CSNAPPY_E_OK			0
+#define CSNAPPY_E_HEADER_BAD		(-1)
+#define CSNAPPY_E_OUTPUT_INSUF		(-2)
+#define CSNAPPY_E_OUTPUT_OVERRUN	(-3)
+#define CSNAPPY_E_INPUT_NOT_CONSUMED	(-4)
+#define CSNAPPY_E_DATA_MALFORMED	(-5)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/drivers/staging/snappy/csnappy_compress.c b/drivers/staging/snappy/csnappy_compress.c
new file mode 100755
index 00000000..7344f772
--- /dev/null
+++ b/drivers/staging/snappy/csnappy_compress.c
@@ -0,0 +1,497 @@
+/*
+Copyright 2011, Google Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+  * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+  * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+  * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+File modified for the Linux Kernel by
+Zeev Tarantov <zeev.tarantov at gmail.com>
+*/
+
+#include "csnappy_internal.h"
+#ifdef __KERNEL__
+#include <linux/kernel.h>
+#include <linux/module.h>
+#endif
+#include "csnappy.h"
+
+
+static inline char*
+encode_varint32(char *sptr, uint32_t v)
+{
+	uint8_t* ptr = (uint8_t *)sptr;
+	static const int B = 128;
+	if (v < (1<<7)) {
+		*(ptr++) = v;
+	} else if (v < (1<<14)) {
+		*(ptr++) = v | B;
+		*(ptr++) = v>>7;
+	} else if (v < (1<<21)) {
+		*(ptr++) = v | B;
+		*(ptr++) = (v>>7) | B;
+		*(ptr++) = v>>14;
+	} else if (v < (1<<28)) {
+		*(ptr++) = v | B;
+		*(ptr++) = (v>>7) | B;
+		*(ptr++) = (v>>14) | B;
+		*(ptr++) = v>>21;
+	} else {
+		*(ptr++) = v | B;
+		*(ptr++) = (v>>7) | B;
+		*(ptr++) = (v>>14) | B;
+		*(ptr++) = (v>>21) | B;
+		*(ptr++) = v>>28;
+	}
+	return (char *)ptr;
+}
+
+
+/*
+ * Any hash function will produce a valid compressed bitstream, but a good
+ * hash function reduces the number of collisions and thus yields better
+ * compression for compressible input, and more speed for incompressible
+ * input. Of course, it doesn't hurt if the hash function is reasonably fast
+ * either, as it gets called a lot.
+ */
+static inline uint32_t HashBytes(uint32_t bytes, int shift)
+{
+	uint32_t kMul = 0x1e35a7bd;
+	return (bytes * kMul) >> shift;
+}
+static inline uint32_t Hash(const char *p, int shift)
+{
+	return HashBytes(UNALIGNED_LOAD32(p), shift);
+}
+
+
+/*
+ * *** DO NOT CHANGE THE VALUE OF kBlockSize ***
+
+ * New Compression code chops up the input into blocks of at most
+ * the following size.  This ensures that back-references in the
+ * output never cross kBlockSize block boundaries.  This can be
+ * helpful in implementing blocked decompression.  However the
+ * decompression code should not rely on this guarantee since older
+ * compression code may not obey it.
+ */
+#define kBlockLog 15
+#define kBlockSize (1 << kBlockLog)
+
+
+/*
+ * Return the largest n such that
+ *
+ *   s1[0,n-1] == s2[0,n-1]
+ *   and n <= (s2_limit - s2).
+ *
+ * Does not read *s2_limit or beyond.
+ * Does not read *(s1 + (s2_limit - s2)) or beyond.
+ * Requires that s2_limit >= s2.
+ *
+ * Separate implementation for x86_64, for speed.  Uses the fact that
+ * x86_64 is little endian.
+ */
+#if defined(__x86_64__)
+static inline int
+FindMatchLength(const char *s1, const char *s2, const char *s2_limit)
+{
+	uint64_t x;
+	int matched, matching_bits;
+	DCHECK_GE(s2_limit, s2);
+	matched = 0;
+	/*
+	 * Find out how long the match is. We loop over the data 64 bits at a
+	 * time until we find a 64-bit block that doesn't match; then we find
+	 * the first non-matching bit and use that to calculate the total
+	 * length of the match.
+	 */
+	while (likely(s2 <= s2_limit - 8)) {
+		if (unlikely(UNALIGNED_LOAD64(s1 + matched) ==
+				UNALIGNED_LOAD64(s2))) {
+			s2 += 8;
+			matched += 8;
+		} else {
+			/*
+			 * On current (mid-2008) Opteron models there is a 3%
+			 * more efficient code sequence to find the first
+			 * non-matching byte. However, what follows is ~10%
+			 * better on Intel Core 2 and newer, and we expect AMD's
+			 * bsf instruction to improve.
+			 */
+			x = UNALIGNED_LOAD64(s1 + matched) ^
+				UNALIGNED_LOAD64(s2);
+			matching_bits = FindLSBSetNonZero64(x);
+			matched += matching_bits >> 3;
+			return matched;
+		}
+	}
+	while (likely(s2 < s2_limit)) {
+		if (likely(s1[matched] == *s2)) {
+			++s2;
+			++matched;
+		} else {
+			return matched;
+		}
+	}
+	return matched;
+}
+#else /* !defined(__x86_64__) */
+static inline int
+FindMatchLength(const char *s1, const char *s2, const char *s2_limit)
+{
+	/* Implementation based on the x86-64 version, above. */
+	int matched = 0;
+	DCHECK_GE(s2_limit, s2);
+
+	while (s2 <= s2_limit - 4 &&
+		UNALIGNED_LOAD32(s2) == UNALIGNED_LOAD32(s1 + matched)) {
+		s2 += 4;
+		matched += 4;
+	}
+#if defined(__LITTLE_ENDIAN)
+	if (s2 <= s2_limit - 4) {
+		uint32_t x = UNALIGNED_LOAD32(s1 + matched) ^
+				UNALIGNED_LOAD32(s2);
+		int matching_bits = FindLSBSetNonZero(x);
+		matched += matching_bits >> 3;
+	} else {
+		while ((s2 < s2_limit) && (s1[matched] == *s2)) {
+			++s2;
+			++matched;
+		}
+	}
+#else
+	while ((s2 < s2_limit) && (s1[matched] == *s2)) {
+		++s2;
+		++matched;
+	}
+#endif
+	return matched;
+}
+#endif /* !defined(__x86_64__) */
+
+
+static inline char*
+EmitLiteral(char *op, const char *literal, int len, int allow_fast_path)
+{
+	int n = len - 1; /* Zero-length literals are disallowed */
+	if (n < 60) {
+		/* Fits in tag byte */
+		*op++ = LITERAL | (n << 2);
+		/*
+		The vast majority of copies are below 16 bytes, for which a
+		call to memcpy is overkill. This fast path can sometimes
+		copy up to 15 bytes too much, but that is okay in the
+		main loop, since we have a bit to go on for both sides:
+		- The input will always have kInputMarginBytes = 15 extra
+		available bytes, as long as we're in the main loop, and
+		if not, allow_fast_path = false.
+		- The output will always have 32 spare bytes (see
+		snappy_max_compressed_length).
+		*/
+		if (allow_fast_path && len <= 16) {
+			UNALIGNED_STORE64(op, UNALIGNED_LOAD64(literal));
+			UNALIGNED_STORE64(op + 8,
+						UNALIGNED_LOAD64(literal + 8));
+			return op + len;
+		}
+	} else {
+		/* Encode in upcoming bytes */
+		char *base = op;
+		int count = 0;
+		op++;
+		while (n > 0) {
+			*op++ = n & 0xff;
+			n >>= 8;
+			count++;
+		}
+		DCHECK_GE(count, 1);
+		DCHECK_LE(count, 4);
+		*base = LITERAL | ((59+count) << 2);
+	}
+	memcpy(op, literal, len);
+	return op + len;
+}
+
+static inline char*
+EmitCopyLessThan64(char *op, int offset, int len)
+{
+	DCHECK_LE(len, 64);
+	DCHECK_GE(len, 4);
+	DCHECK_LT(offset, 65536);
+
+	if ((len < 12) && (offset < 2048)) {
+		int len_minus_4 = len - 4;
+		DCHECK_LT(len_minus_4, 8); /* Must fit in 3 bits */
+		*op++ = COPY_1_BYTE_OFFSET   |
+			((len_minus_4) << 2) |
+			((offset >> 8) << 5);
+		*op++ = offset & 0xff;
+	} else {
+		*op++ = COPY_2_BYTE_OFFSET | ((len-1) << 2);
+		put_unaligned_le16(offset, op);
+		op += 2;
+	}
+	return op;
+}
+
+static inline char*
+EmitCopy(char *op, int offset, int len)
+{
+	/* Emit 64 byte copies but make sure to keep at least four bytes
+	 * reserved */
+	while (len >= 68) {
+		op = EmitCopyLessThan64(op, offset, 64);
+		len -= 64;
+	}
+
+	/* Emit an extra 60 byte copy if have too much data to fit in one
+	 * copy */
+	if (len > 64) {
+		op = EmitCopyLessThan64(op, offset, 60);
+		len -= 60;
+	}
+
+	/* Emit remainder */
+	op = EmitCopyLessThan64(op, offset, len);
+	return op;
+}
+
+
+/*
+ * For 0 <= offset <= 4, GetUint32AtOffset(UNALIGNED_LOAD64(p), offset) will
+ * equal UNALIGNED_LOAD32(p + offset).  Motivation: On x86-64 hardware we have
+ * empirically found that overlapping loads such as
+ *  UNALIGNED_LOAD32(p) ... UNALIGNED_LOAD32(p+1) ... UNALIGNED_LOAD32(p+2)
+ * are slower than UNALIGNED_LOAD64(p) followed by shifts and casts to uint32_t.
+ */
+static inline uint32_t
+GetUint32AtOffset(uint64_t v, int offset)
+{
+	DCHECK(0 <= offset && offset <= 4);
+#ifdef __LITTLE_ENDIAN
+	return v >> (8 * offset);
+#else
+	return v >> (32 - 8 * offset);
+#endif
+}
+
+#define kInputMarginBytes 15
+char*
+csnappy_compress_fragment(
+	const char *input,
+	const uint32_t input_size,
+	char *op,
+	void *working_memory,
+	const int workmem_bytes_power_of_two)
+{
+	const char *ip, *ip_end, *base_ip, *next_emit, *ip_limit, *next_ip,
+			*candidate, *base;
+	uint16_t *table = (uint16_t *)working_memory;
+	uint64_t input_bytes;
+	uint32_t hash, next_hash, prev_hash, cur_hash, skip, candidate_bytes;
+	int shift, matched;
+
+	DCHECK_GE(workmem_bytes_power_of_two, 9);
+	DCHECK_LE(workmem_bytes_power_of_two, 15);
+	/* Table of 2^X bytes, need (X-1) bits to address table of uint16_t.
+	 * How many bits of 32bit hash function result are discarded? */
+	shift = 33 - workmem_bytes_power_of_two;
+	/* "ip" is the input pointer, and "op" is the output pointer. */
+	ip = input;
+	DCHECK_LE(input_size, kBlockSize);
+	ip_end = input + input_size;
+	base_ip = ip;
+	/* Bytes in [next_emit, ip) will be emitted as literal bytes. Or
+	   [next_emit, ip_end) after the main loop. */
+	next_emit = ip;
+
+	if (unlikely(input_size < kInputMarginBytes))
+		goto emit_remainder;
+
+	memset(working_memory, 0, 1 << workmem_bytes_power_of_two);
+
+	ip_limit = input + input_size - kInputMarginBytes;
+	next_hash = Hash(++ip, shift);
+
+main_loop:
+	DCHECK_LT(next_emit, ip);
+	/*
+	* The body of this loop calls EmitLiteral once and then EmitCopy one or
+	* more times. (The exception is that when we're close to exhausting
+	* the input we goto emit_remainder.)
+	*
+	* In the first iteration of this loop we're just starting, so
+	* there's nothing to copy, so calling EmitLiteral once is
+	* necessary. And we only start a new iteration when the
+	* current iteration has determined that a call to EmitLiteral will
+	* precede the next call to EmitCopy (if any).
+	*
+	* Step 1: Scan forward in the input looking for a 4-byte-long match.
+	* If we get close to exhausting the input then goto emit_remainder.
+	*
+	* Heuristic match skipping: If 32 bytes are scanned with no matches
+	* found, start looking only at every other byte. If 32 more bytes are
+	* scanned, look at every third byte, etc.. When a match is found,
+	* immediately go back to looking at every byte. This is a small loss
+	* (~5% performance, ~0.1% density) for compressible data due to more
+	* bookkeeping, but for non-compressible data (such as JPEG) it's a huge
+	* win since the compressor quickly "realizes" the data is incompressible
+	* and doesn't bother looking for matches everywhere.
+	*
+	* The "skip" variable keeps track of how many bytes there are since the
+	* last match; dividing it by 32 (ie. right-shifting by five) gives the
+	* number of bytes to move ahead for each iteration.
+	*/
+	skip = 32;
+
+	next_ip = ip;
+	do {
+		ip = next_ip;
+		hash = next_hash;
+		DCHECK_EQ(hash, Hash(ip, shift));
+		next_ip = ip + (skip++ >> 5);
+		if (unlikely(next_ip > ip_limit))
+			goto emit_remainder;
+		next_hash = Hash(next_ip, shift);
+		candidate = base_ip + table[hash];
+		DCHECK_GE(candidate, base_ip);
+		DCHECK_LT(candidate, ip);
+
+		table[hash] = ip - base_ip;
+	} while (likely(UNALIGNED_LOAD32(ip) !=
+			UNALIGNED_LOAD32(candidate)));
+
+	/*
+	* Step 2: A 4-byte match has been found. We'll later see if more
+	* than 4 bytes match. But, prior to the match, input
+	* bytes [next_emit, ip) are unmatched. Emit them as "literal bytes."
+	*/
+	DCHECK_LE(next_emit + 16, ip_end);
+	op = EmitLiteral(op, next_emit, ip - next_emit, 1);
+
+	/*
+	* Step 3: Call EmitCopy, and then see if another EmitCopy could
+	* be our next move. Repeat until we find no match for the
+	* input immediately after what was consumed by the last EmitCopy call.
+	*
+	* If we exit this loop normally then we need to call EmitLiteral next,
+	* though we don't yet know how big the literal will be. We handle that
+	* by proceeding to the next iteration of the main loop. We also can exit
+	* this loop via goto if we get close to exhausting the input.
+	*/
+	input_bytes = 0;
+	candidate_bytes = 0;
+
+	do {
+		/* We have a 4-byte match at ip, and no need to emit any
+		 "literal bytes" prior to ip. */
+		base = ip;
+		matched = 4 + FindMatchLength(candidate + 4, ip + 4, ip_end);
+		ip += matched;
+		DCHECK_EQ(0, memcmp(base, candidate, matched));
+		op = EmitCopy(op, base - candidate, matched);
+		/* We could immediately start working at ip now, but to improve
+		 compression we first update table[Hash(ip - 1, ...)]. */
+		next_emit = ip;
+		if (unlikely(ip >= ip_limit))
+			goto emit_remainder;
+		input_bytes = UNALIGNED_LOAD64(ip - 1);
+		prev_hash = HashBytes(GetUint32AtOffset(input_bytes, 0), shift);
+		table[prev_hash] = ip - base_ip - 1;
+		cur_hash = HashBytes(GetUint32AtOffset(input_bytes, 1), shift);
+		candidate = base_ip + table[cur_hash];
+		candidate_bytes = UNALIGNED_LOAD32(candidate);
+		table[cur_hash] = ip - base_ip;
+	} while (GetUint32AtOffset(input_bytes, 1) == candidate_bytes);
+
+	next_hash = HashBytes(GetUint32AtOffset(input_bytes, 2), shift);
+	++ip;
+	goto main_loop;
+
+emit_remainder:
+	/* Emit the remaining bytes as a literal */
+	if (next_emit < ip_end)
+		op = EmitLiteral(op, next_emit, ip_end - next_emit, 0);
+
+	return op;
+}
+#if defined(__KERNEL__) && !defined(STATIC)
+EXPORT_SYMBOL(csnappy_compress_fragment);
+#endif
+
+uint32_t __attribute__((const))
+csnappy_max_compressed_length(uint32_t source_len)
+{
+	return 32 + source_len + source_len/6;
+}
+#if defined(__KERNEL__) && !defined(STATIC)
+EXPORT_SYMBOL(csnappy_max_compressed_length);
+#endif
+
+void
+csnappy_compress(
+	const char *input,
+	uint32_t input_length,
+	char *compressed,
+	uint32_t *compressed_length,
+	void *working_memory,
+	const int workmem_bytes_power_of_two)
+{
+	int workmem_size;
+	int num_to_read;
+	uint32_t written = 0;
+	char *p = encode_varint32(compressed, input_length);
+	written += (p - compressed);
+	compressed = p;
+	while (input_length > 0) {
+		num_to_read = min(input_length, (uint32_t)kBlockSize);
+		workmem_size = workmem_bytes_power_of_two;
+		if (num_to_read < kBlockSize) {
+			for (workmem_size = 9;
+			     workmem_size < workmem_bytes_power_of_two;
+			     ++workmem_size) {
+				if ((1 << (workmem_size-1)) >= num_to_read)
+					break;
+			}
+		}
+		p = csnappy_compress_fragment(
+				input, num_to_read, compressed,
+				working_memory, workmem_size);
+		written += (p - compressed);
+		compressed = p;
+		input_length -= num_to_read;
+		input += num_to_read;
+	}
+	*compressed_length = written;
+}
+#if defined(__KERNEL__) && !defined(STATIC)
+EXPORT_SYMBOL(csnappy_compress);
+
+MODULE_LICENSE("BSD");
+MODULE_DESCRIPTION("Snappy Compressor");
+#endif
diff --git a/drivers/staging/snappy/csnappy_decompress.c b/drivers/staging/snappy/csnappy_decompress.c
new file mode 100755
index 00000000..d05d8173
--- /dev/null
+++ b/drivers/staging/snappy/csnappy_decompress.c
@@ -0,0 +1,321 @@
+/*
+Copyright 2011, Google Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+  * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+  * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+  * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+File modified for the Linux Kernel by
+Zeev Tarantov <zeev.tarantov at gmail.com>
+*/
+
+#include "csnappy_internal.h"
+#ifdef __KERNEL__
+#include <linux/kernel.h>
+#include <linux/module.h>
+#endif
+#include "csnappy.h"
+
+
+/* Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits */
+static const uint32_t wordmask[] = {
+	0u, 0xffu, 0xffffu, 0xffffffu, 0xffffffffu
+};
+
+/*
+ * Data stored per entry in lookup table:
+ *      Range   Bits-used       Description
+ *      ------------------------------------
+ *      1..64   0..7            Literal/copy length encoded in opcode byte
+ *      0..7    8..10           Copy offset encoded in opcode byte / 256
+ *      0..4    11..13          Extra bytes after opcode
+ *
+ * We use eight bits for the length even though 7 would have sufficed
+ * because of efficiency reasons:
+ *      (1) Extracting a byte is faster than a bit-field
+ *      (2) It properly aligns copy offset so we do not need a <<8
+ */
+static const uint16_t char_table[256] = {
+	0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002,
+	0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004,
+	0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006,
+	0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008,
+	0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a,
+	0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c,
+	0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e,
+	0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010,
+	0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012,
+	0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014,
+	0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016,
+	0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018,
+	0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a,
+	0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c,
+	0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e,
+	0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020,
+	0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022,
+	0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024,
+	0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026,
+	0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028,
+	0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a,
+	0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c,
+	0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e,
+	0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030,
+	0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032,
+	0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034,
+	0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036,
+	0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038,
+	0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a,
+	0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c,
+	0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e,
+	0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040
+};
+
+/*
+ * Copy "len" bytes from "src" to "op", one byte at a time.  Used for
+ * handling COPY operations where the input and output regions may
+ * overlap.  For example, suppose:
+ *    src    == "ab"
+ *    op     == src + 2
+ *    len    == 20
+ * After IncrementalCopy(src, op, len), the result will have
+ * eleven copies of "ab"
+ *    ababababababababababab
+ * Note that this does not match the semantics of either memcpy()
+ * or memmove().
+ */
+static inline void IncrementalCopy(const char *src, char *op, int len)
+{
+	DCHECK_GT(len, 0);
+	do {
+		*op++ = *src++;
+	} while (--len > 0);
+}
+
+/*
+ * Equivalent to IncrementalCopy except that it can write up to ten extra
+ * bytes after the end of the copy, and that it is faster.
+ *
+ * The main part of this loop is a simple copy of eight bytes at a time until
+ * we've copied (at least) the requested amount of bytes.  However, if op and
+ * src are less than eight bytes apart (indicating a repeating pattern of
+ * length < 8), we first need to expand the pattern in order to get the correct
+ * results. For instance, if the buffer looks like this, with the eight-byte
+ * <src> and <op> patterns marked as intervals:
+ *
+ *    abxxxxxxxxxxxx
+ *    [------]           src
+ *      [------]         op
+ *
+ * a single eight-byte copy from <src> to <op> will repeat the pattern once,
+ * after which we can move <op> two bytes without moving <src>:
+ *
+ *    ababxxxxxxxxxx
+ *    [------]           src
+ *        [------]       op
+ *
+ * and repeat the exercise until the two no longer overlap.
+ *
+ * This allows us to do very well in the special case of one single byte
+ * repeated many times, without taking a big hit for more general cases.
+ *
+ * The worst case of extra writing past the end of the match occurs when
+ * op - src == 1 and len == 1; the last copy will read from byte positions
+ * [0..7] and write to [4..11], whereas it was only supposed to write to
+ * position 1. Thus, ten excess bytes.
+ */
+static const int kMaxIncrementCopyOverflow = 10;
+static inline void IncrementalCopyFastPath(const char *src, char *op, int len)
+{
+	while (op - src < 8) {
+		UNALIGNED_STORE64(op, UNALIGNED_LOAD64(src));
+		len -= op - src;
+		op += op - src;
+	}
+	while (len > 0) {
+		UNALIGNED_STORE64(op, UNALIGNED_LOAD64(src));
+		src += 8;
+		op += 8;
+		len -= 8;
+	}
+}
+
+
+/* A type that writes to a flat array. */
+struct SnappyArrayWriter {
+	char *base;
+	char *op;
+	char *op_limit;
+};
+
+static inline int
+SAW__Append(struct SnappyArrayWriter *this,
+	    const char *ip, uint32_t len, int allow_fast_path)
+{
+	char *op = this->op;
+	const int space_left = this->op_limit - op;
+	/*Fast path, used for the majority (about 90%) of dynamic invocations.*/
+	if (allow_fast_path && len <= 16 && space_left >= 16) {
+		UNALIGNED_STORE64(op, UNALIGNED_LOAD64(ip));
+		UNALIGNED_STORE64(op + 8, UNALIGNED_LOAD64(ip + 8));
+	} else {
+		if (space_left < len)
+			return CSNAPPY_E_OUTPUT_OVERRUN;
+		memcpy(op, ip, len);
+	}
+	this->op = op + len;
+	return CSNAPPY_E_OK;
+}
+
+static inline int
+SAW__AppendFromSelf(struct SnappyArrayWriter *this,
+		    uint32_t offset, uint32_t len)
+{
+	char *op = this->op;
+	const int space_left = this->op_limit - op;
+	/* -1u catches offset==0 */
+	if (op - this->base <= offset - 1u)
+		return CSNAPPY_E_DATA_MALFORMED;
+	/* Fast path, used for the majority (70-80%) of dynamic invocations. */
+	if (len <= 16 && offset >= 8 && space_left >= 16) {
+		UNALIGNED_STORE64(op, UNALIGNED_LOAD64(op - offset));
+		UNALIGNED_STORE64(op + 8, UNALIGNED_LOAD64(op - offset + 8));
+	} else if (space_left >= len + kMaxIncrementCopyOverflow) {
+		IncrementalCopyFastPath(op - offset, op, len);
+	} else {
+		if (space_left < len)
+			return CSNAPPY_E_OUTPUT_OVERRUN;
+		IncrementalCopy(op - offset, op, len);
+	}
+	this->op = op + len;
+	return CSNAPPY_E_OK;
+}
+
+
+int
+csnappy_get_uncompressed_length(
+	const char *src,
+	uint32_t src_len,
+	uint32_t *result)
+{
+	const char *src_base = src;
+	uint32_t shift = 0;
+	uint8_t c;
+	/* Length is encoded in 1..5 bytes */
+	*result = 0;
+	for (;;) {
+		if (shift >= 32)
+			goto err_out;
+		if (src_len == 0)
+			goto err_out;
+		c = *(const uint8_t *)src++;
+		src_len -= 1;
+		*result |= (uint32_t)(c & 0x7f) << shift;
+		if (c < 128)
+			break;
+		shift += 7;
+	}
+	return src - src_base;
+err_out:
+	return CSNAPPY_E_HEADER_BAD;
+}
+#if defined(__KERNEL__) && !defined(STATIC)
+EXPORT_SYMBOL(csnappy_get_uncompressed_length);
+#endif
+
+int
+csnappy_decompress_noheader(
+	const char	*src,
+	uint32_t	src_remaining,
+	char		*dst,
+	uint32_t	*dst_len)
+{
+	struct SnappyArrayWriter writer;
+	uint32_t length, trailer, opword, extra_bytes;
+	int ret;
+	uint8_t opcode;
+	char scratch[5];
+	writer.op = writer.base = dst;
+	writer.op_limit = writer.op + *dst_len;
+	while (src_remaining) {
+		if (unlikely(src_remaining < 5)) {
+			memcpy(scratch, src, src_remaining);
+			src = scratch;
+		}
+		opcode = *(const uint8_t *)src++;
+		opword = char_table[opcode];
+		extra_bytes = opword >> 11;
+		trailer = get_unaligned_le32(src) & wordmask[extra_bytes];
+		src += extra_bytes;
+		src_remaining -= 1 + extra_bytes;
+		length = opword & 0xff;
+		if (opcode & 0x3) {
+			trailer += opword & 0x700;
+			ret = SAW__AppendFromSelf(&writer, trailer, length);
+			if (ret < 0)
+				return ret;
+		} else {
+			length += trailer;
+			if (unlikely(src_remaining < length))
+				return CSNAPPY_E_DATA_MALFORMED;
+			ret = src_remaining >= 16;
+			ret = SAW__Append(&writer, src, length, ret);
+			if (ret < 0)
+				return ret;
+			src += length;
+			src_remaining -= length;
+		}
+	}
+	*dst_len = writer.op - writer.base;
+	return CSNAPPY_E_OK;
+}
+#if defined(__KERNEL__) && !defined(STATIC)
+EXPORT_SYMBOL(csnappy_decompress_noheader);
+#endif
+
+int
+csnappy_decompress(
+	const char *src,
+	uint32_t src_len,
+	char *dst,
+	uint32_t dst_len)
+{
+	int n;
+	uint32_t olen = 0;
+	/* Read uncompressed length from the front of the compressed input */
+	n = csnappy_get_uncompressed_length(src, src_len, &olen);
+	if (unlikely(n < CSNAPPY_E_OK))
+		return n;
+	/* Protect against possible DoS attack */
+	if (unlikely(olen > dst_len))
+		return CSNAPPY_E_OUTPUT_INSUF;
+	return csnappy_decompress_noheader(src + n, src_len - n, dst, &olen);
+}
+#if defined(__KERNEL__) && !defined(STATIC)
+EXPORT_SYMBOL(csnappy_decompress);
+
+MODULE_LICENSE("BSD");
+MODULE_DESCRIPTION("Snappy Decompressor");
+#endif
diff --git a/drivers/staging/snappy/csnappy_internal.h b/drivers/staging/snappy/csnappy_internal.h
new file mode 100755
index 00000000..6f1a5465
--- /dev/null
+++ b/drivers/staging/snappy/csnappy_internal.h
@@ -0,0 +1,83 @@
+/*
+Copyright 2011 Google Inc. All Rights Reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+    * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Various stubs for the open-source version of Snappy.
+
+File modified for the Linux Kernel by
+Zeev Tarantov <zeev.tarantov at gmail.com>
+*/
+
+#ifndef CSNAPPY_INTERNAL_H_
+#define CSNAPPY_INTERNAL_H_
+
+#ifndef __KERNEL__
+#include "csnappy_internal_userspace.h"
+#else
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/compiler.h>
+#include <asm/byteorder.h>
+#include <asm/unaligned.h>
+
+#ifdef DEBUG
+#define DCHECK(cond)	if (!(cond)) \
+			printk(KERN_DEBUG "assert failed @ %s:%i\n", \
+				__FILE__, __LINE__)
+#else
+#define DCHECK(cond)
+#endif
+
+#define UNALIGNED_LOAD16(_p)		get_unaligned((const uint16_t *)(_p))
+#define UNALIGNED_LOAD32(_p)		get_unaligned((const uint32_t *)(_p))
+#define UNALIGNED_LOAD64(_p)		get_unaligned((const uint64_t *)(_p))
+#define UNALIGNED_STORE16(_p, _val)	put_unaligned((_val), (uint16_t *)(_p))
+#define UNALIGNED_STORE32(_p, _val)	put_unaligned((_val), (uint32_t *)(_p))
+#define UNALIGNED_STORE64(_p, _val)	put_unaligned((_val), (uint64_t *)(_p))
+
+#define FindLSBSetNonZero(n)		__builtin_ctz(n)
+#define FindLSBSetNonZero64(n)		__builtin_ctzll(n)
+
+#endif /* __KERNEL__ */
+
+#define DCHECK_EQ(a, b)	DCHECK(((a) == (b)))
+#define DCHECK_NE(a, b)	DCHECK(((a) != (b)))
+#define DCHECK_GT(a, b)	DCHECK(((a) >  (b)))
+#define DCHECK_GE(a, b)	DCHECK(((a) >= (b)))
+#define DCHECK_LT(a, b)	DCHECK(((a) <  (b)))
+#define DCHECK_LE(a, b)	DCHECK(((a) <= (b)))
+
+enum {
+	LITERAL = 0,
+	COPY_1_BYTE_OFFSET = 1,  /* 3 bit length + 3 bits of offset in opcode */
+	COPY_2_BYTE_OFFSET = 2,
+	COPY_4_BYTE_OFFSET = 3
+};
+
+#endif  /* CSNAPPY_INTERNAL_H_ */
diff --git a/drivers/staging/zram/Kconfig b/drivers/staging/zram/Kconfig
old mode 100644
new mode 100755
index 18c1a971..24027b43
--- a/drivers/staging/zram/Kconfig
+++ b/drivers/staging/zram/Kconfig
@@ -6,8 +6,6 @@ config ZRAM
 	tristate "Compressed RAM block device support"
 	depends on BLOCK && SYSFS
 	select XVMALLOC
-	select LZO_COMPRESS
-	select LZO_DECOMPRESS
 	default n
 	help
 	  Creates virtual block devices called /dev/zramX (X = 0, 1, ...).
@@ -21,23 +19,6 @@ config ZRAM
 	  See zram.txt for more information.
 	  Project home: http://compcache.googlecode.com/
 
-config ZRAM_NUM_DEVICES
-	int "Default number of zram devices"
-	depends on ZRAM
-	range 1 32
-	default 1
-	help
-	  Select default number of zram devices. You can override this value
-	  using 'num_devices' module parameter.
-
-config ZRAM_DEFAULT_PERCENTAGE
-	int "Default number of zram percentage"
-	depends on ZRAM
-	range 10 80
-	default 25
-	help
-	  Select default zram disk size: percentage of total RAM
-
 config ZRAM_DEBUG
 	bool "Compressed RAM block device debug support"
 	depends on ZRAM
@@ -45,11 +26,15 @@ config ZRAM_DEBUG
 	help
 	  This option adds additional debugging code to the compressed
 	  RAM block device driver.
+config ZRAM_LZO
+       bool "LZO compression"
+       default y
+       depends on ZRAM
+       select LZO_COMPRESS
+       select LZO_DECOMPRESS
 
-config ZRAM_DEFAULT_DISKSIZE
-	int "Default size of zram in bytes"
-	depends on ZRAM
-	default 100663296
-	help
-	  Set default zram disk size (default ~ 96MB)
-
+config ZRAM_SNAPPY
+       bool "Snappy compression"
+       depends on ZRAM
+       select SNAPPY_COMPRESS
+       select SNAPPY_DECOMPRESS
diff --git a/drivers/staging/zram/Makefile b/drivers/staging/zram/Makefile
old mode 100644
new mode 100755
diff --git a/drivers/staging/zram/xvmalloc.c b/drivers/staging/zram/xvmalloc.c
old mode 100644
new mode 100755
diff --git a/drivers/staging/zram/xvmalloc.h b/drivers/staging/zram/xvmalloc.h
old mode 100644
new mode 100755
diff --git a/drivers/staging/zram/xvmalloc_int.h b/drivers/staging/zram/xvmalloc_int.h
old mode 100644
new mode 100755
diff --git a/drivers/staging/zram/zram.txt b/drivers/staging/zram/zram.txt
old mode 100644
new mode 100755
diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c
old mode 100644
new mode 100755
index fc4c2e6f..c61261ac
--- a/drivers/staging/zram/zram_drv.c
+++ b/drivers/staging/zram/zram_drv.c
@@ -29,12 +29,90 @@
 #include <linux/genhd.h>
 #include <linux/highmem.h>
 #include <linux/slab.h>
-#include <linux/lzo.h>
 #include <linux/string.h>
 #include <linux/vmalloc.h>
 
 #include "zram_drv.h"
 
+#if defined(CONFIG_ZRAM_LZO)
+#include <linux/lzo.h>
+#ifdef MULTIPLE_COMPRESSORS
+static const struct zram_compressor lzo_compressor = {
+	.name = "LZO",
+	.workmem_bytes = LZO1X_MEM_COMPRESS,
+	.compress = &lzo1x_1_compress,
+	.decompress = &lzo1x_decompress_safe
+};
+#else /* !MULTIPLE_COMPRESSORS */
+#define WMSIZE		LZO1X_MEM_COMPRESS
+#define COMPRESS(s, sl, d, dl, wm)	\
+	lzo1x_1_compress(s, sl, d, dl, wm)
+#define DECOMPRESS(s, sl, d, dl)	\
+	lzo1x_decompress_safe(s, sl, d, dl)
+#endif /* !MULTIPLE_COMPRESSORS */
+#endif /* defined(CONFIG_ZRAM_LZO) */
+
+#if defined(CONFIG_ZRAM_SNAPPY)
+#include "../snappy/csnappy.h" /* if built in drivers/staging */
+#define WMSIZE_ORDER	((PAGE_SHIFT > 14) ? (15) : (PAGE_SHIFT+1))
+static int
+snappy_compress_(
+	const unsigned char *src,
+	size_t src_len,
+	unsigned char *dst,
+	size_t *dst_len,
+	void *workmem)
+{
+	const unsigned char *end = csnappy_compress_fragment(
+		src, (uint32_t)src_len, dst, workmem, WMSIZE_ORDER);
+	*dst_len = end - dst;
+	return 0;
+}
+static int
+snappy_decompress_(
+	const unsigned char *src,
+	size_t src_len,
+	unsigned char *dst,
+	size_t *dst_len)
+{
+	uint32_t dst_len_ = (uint32_t)*dst_len;
+	int ret = csnappy_decompress_noheader(src, src_len, dst, &dst_len_);
+	*dst_len = (size_t)dst_len_;
+	return ret;
+}
+#ifdef MULTIPLE_COMPRESSORS
+static const struct zram_compressor snappy_compressor = {
+	.name = "SNAPPY",
+	.workmem_bytes = (1 << WMSIZE_ORDER),
+	.compress = &snappy_compress_,
+	.decompress = &snappy_decompress_
+};
+#else /* !MULTIPLE_COMPRESSORS */
+#define WMSIZE		(1 << WMSIZE_ORDER)
+#define COMPRESS(s, sl, d, dl, wm)	\
+	snappy_compress_(s, sl, d, dl, wm)
+#define DECOMPRESS(s, sl, d, dl)	\
+	snappy_decompress_(s, sl, d, dl)
+#endif /* !MULTIPLE_COMPRESSORS */
+#endif /* defined(CONFIG_ZRAM_SNAPPY) */
+
+#ifdef MULTIPLE_COMPRESSORS
+const struct zram_compressor * const zram_compressors[] = {
+#if defined(CONFIG_ZRAM_LZO)
+	&lzo_compressor,
+#endif
+#if defined(CONFIG_ZRAM_SNAPPY)
+	&snappy_compressor,
+#endif
+	NULL
+};
+#define WMSIZE		(zram->compressor->workmem_bytes)
+#define COMPRESS(s, sl, d, dl, wm)	\
+	(zram->compressor->compress(s, sl, d, dl, wm))
+#define DECOMPRESS(s, sl, d, dl)	\
+	(zram->compressor->decompress(s, sl, d, dl))
+#endif /* MULTIPLE_COMPRESSORS */
+
 /* Globals */
 static int zram_major;
 struct zram *zram_devices;
@@ -104,19 +182,33 @@ static int page_zero_filled(void *ptr)
 	return 1;
 }
 
-static u64 zram_default_disksize_bytes(void)
+static void zram_set_disksize(struct zram *zram, size_t totalram_bytes)
 {
-#if 0
-	return ((totalram_pages << PAGE_SHIFT) *
-		default_disksize_perc_ram / 100) & PAGE_MASK;
-#endif
-	return CONFIG_ZRAM_DEFAULT_DISKSIZE;
-}
+	if (!zram->disksize) {
+		pr_info(
+		"disk size not provided. You can use disksize_kb module "
+		"param to specify size.\nUsing default: (%u%% of RAM).\n",
+		default_disksize_perc_ram
+		);
+		zram->disksize = default_disksize_perc_ram *
+					(totalram_bytes / 100);
+	}
 
-static void zram_set_disksize(struct zram *zram, u64 size_bytes)
-{
-	zram->disksize = size_bytes;
-	set_capacity(zram->disk, size_bytes >> SECTOR_SHIFT);
+	if (zram->disksize > 2 * (totalram_bytes)) {
+		pr_info(
+		"There is little point creating a zram of greater than "
+		"twice the size of memory since we expect a 2:1 compression "
+		"ratio. Note that zram uses about 0.1%% of the size of "
+		"the disk when not in use so a huge zram is "
+		"wasteful.\n"
+		"\tMemory Size: %zu kB\n"
+		"\tSize you selected: %llu kB\n"
+		"Continuing anyway ...\n",
+		totalram_bytes >> 10, zram->disksize
+		);
+	}
+
+	zram->disksize &= PAGE_MASK;
 }
 
 static void zram_free_page(struct zram *zram, size_t index)
@@ -243,7 +335,7 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
 	cmem = kmap_atomic(zram->table[index].page, KM_USER1) +
 		zram->table[index].offset;
 
-	ret = lzo1x_decompress_safe(cmem + sizeof(*zheader),
+	ret = DECOMPRESS(cmem + sizeof(*zheader),
 				    xv_get_object_size(cmem) - sizeof(*zheader),
 				    uncmem, &clen);
 
@@ -257,7 +349,7 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
 	kunmap_atomic(user_mem, KM_USER0);
 
 	/* Should NEVER happen. Return bio error if it does. */
-	if (unlikely(ret != LZO_E_OK)) {
+	if (unlikely(ret)) {
 		pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
 		zram_stat64_inc(zram, &zram->stats.failed_reads);
 		return ret;
@@ -291,13 +383,13 @@ static int zram_read_before_write(struct zram *zram, char *mem, u32 index)
 		return 0;
 	}
 
-	ret = lzo1x_decompress_safe(cmem + sizeof(*zheader),
+	ret = DECOMPRESS(cmem + sizeof(*zheader),
 				    xv_get_object_size(cmem) - sizeof(*zheader),
 				    mem, &clen);
 	kunmap_atomic(cmem, KM_USER0);
 
 	/* Should NEVER happen. Return bio error if it does. */
-	if (unlikely(ret != LZO_E_OK)) {
+	if (unlikely(ret)) {
 		pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
 		zram_stat64_inc(zram, &zram->stats.failed_reads);
 		return ret;
@@ -363,18 +455,13 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
 		goto out;
 	}
 
-	ret = lzo1x_1_compress(uncmem, PAGE_SIZE, src, &clen,
+	COMPRESS(uncmem, PAGE_SIZE, src, &clen,
 			       zram->compress_workmem);
 
 	kunmap_atomic(user_mem, KM_USER0);
 	if (is_partial_io(bvec))
 			kfree(uncmem);
 
-	if (unlikely(ret != LZO_E_OK)) {
-		pr_err("Compression failed! err=%d\n", ret);
-		goto out;
-	}
-
 	/*
 	 * Page is incompressible. Store it as-is (uncompressed)
 	 * since we do not want to return too many disk write
@@ -546,27 +633,35 @@ static int zram_make_request(struct request_queue *queue, struct bio *bio)
 {
 	struct zram *zram = queue->queuedata;
 
+	if (unlikely(!zram->init_done) && zram_init_device(zram))
+		goto error;
+
+	down_read(&zram->init_lock);
+	if (unlikely(!zram->init_done))
+		goto error_unlock;
+
 	if (!valid_io_request(zram, bio)) {
 		zram_stat64_inc(zram, &zram->stats.invalid_io);
-		bio_io_error(bio);
-		return 0;
-	}
-
-	if (unlikely(!zram->init_done) && zram_init_device(zram)) {
-		bio_io_error(bio);
-		return 0;
+		goto error_unlock;
 	}
 
 	__zram_make_request(zram, bio, bio_data_dir(bio));
+	up_read(&zram->init_lock);
 
 	return 0;
+
+error_unlock:
+	up_read(&zram->init_lock);
+error:
+	bio_io_error(bio);
+	return 0;
+
 }
 
-void zram_reset_device(struct zram *zram)
+void __zram_reset_device(struct zram *zram)
 {
 	size_t index;
 
-	mutex_lock(&zram->init_lock);
 	zram->init_done = 0;
 
 	/* Free various per-device buffers */
@@ -602,8 +697,14 @@ void zram_reset_device(struct zram *zram)
 	/* Reset stats */
 	memset(&zram->stats, 0, sizeof(zram->stats));
 
-	zram_set_disksize(zram, zram_default_disksize_bytes());
-	mutex_unlock(&zram->init_lock);
+	zram->disksize = 0;
+}
+
+void zram_reset_device(struct zram *zram)
+{
+	down_write(&zram->init_lock);
+	__zram_reset_device(zram);
+	up_write(&zram->init_lock);
 }
 
 int zram_init_device(struct zram *zram)
@@ -611,37 +712,39 @@ int zram_init_device(struct zram *zram)
 	int ret;
 	size_t num_pages;
 
-	mutex_lock(&zram->init_lock);
+	down_write(&zram->init_lock);
 
 	if (zram->init_done) {
-		mutex_unlock(&zram->init_lock);
+		up_write(&zram->init_lock);
 		return 0;
 	}
 
-	zram->compress_workmem = kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL);
+	zram_set_disksize(zram, totalram_pages << PAGE_SHIFT);
+
+	zram->compress_workmem = kzalloc(WMSIZE, GFP_KERNEL);
 	if (!zram->compress_workmem) {
 		pr_err("Error allocating compressor working memory!\n");
 		ret = -ENOMEM;
-		goto fail;
+		goto fail_no_table;
 	}
 
-	zram->compress_buffer = (void *)__get_free_pages(__GFP_ZERO, 1);
+	zram->compress_buffer =
+		(void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1);
 	if (!zram->compress_buffer) {
 		pr_err("Error allocating compressor buffer space\n");
 		ret = -ENOMEM;
-		goto fail;
+		goto fail_no_table;
 	}
 
 	num_pages = zram->disksize >> PAGE_SHIFT;
 	zram->table = vmalloc(num_pages * sizeof(*zram->table));
 	if (!zram->table) {
 		pr_err("Error allocating zram address table\n");
-		/* To prevent accessing table entries during cleanup */
-		zram->disksize = 0;
 		ret = -ENOMEM;
-		goto fail;
+		goto fail_no_table;
 	}
-  memset(zram->table, 0, num_pages * sizeof(*zram->table));
+	memset(zram->table, 0, num_pages * sizeof(*zram->table));
+	set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
 
 	/* zram devices sort of resembles non-rotational disks */
 	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue);
@@ -654,15 +757,17 @@ int zram_init_device(struct zram *zram)
 	}
 
 	zram->init_done = 1;
-	mutex_unlock(&zram->init_lock);
+	up_write(&zram->init_lock);
 
 	pr_debug("Initialization done!\n");
 	return 0;
 
+fail_no_table:
+	/* To prevent accessing table entries during cleanup */
+	zram->disksize = 0;
 fail:
-	mutex_unlock(&zram->init_lock);
-	zram_reset_device(zram);
-
+	__zram_reset_device(zram);
+	up_write(&zram->init_lock);
 	pr_err("Initialization failed: err=%d\n", ret);
 	return ret;
 }
@@ -687,7 +792,7 @@ static int create_device(struct zram *zram, int device_id)
 	int ret = 0;
 
 	init_rwsem(&zram->lock);
-	mutex_init(&zram->init_lock);
+	init_rwsem(&zram->init_lock);
 	spin_lock_init(&zram->stat64_lock);
 
 	zram->queue = blk_alloc_queue(GFP_KERNEL);
@@ -718,13 +823,13 @@ static int create_device(struct zram *zram, int device_id)
 	zram->disk->private_data = zram;
 	snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
 
-	/*
-	 * Set some default disksize. To set another disksize, user
-	 * must reset the device and then write a new disksize to
-	 * corresponding device's sysfs node.
-	 */
-	zram_set_disksize(zram, zram_default_disksize_bytes());
+	/* Actual capacity set using syfs (/sys/block/zram<id>/disksize */
+	set_capacity(zram->disk, 0);
 
+	/* Can be changed using sysfs (/sys/block/zram<id>/compressor) */
+#ifdef MULTIPLE_COMPRESSORS
+	zram->compressor = zram_compressors[0];
+#endif
 	/*
 	 * To ensure that we always get PAGE_SIZE aligned
 	 * and n*PAGE_SIZED sized I/O requests.
@@ -768,13 +873,6 @@ static int __init zram_init(void)
 {
 	int ret, dev_id;
 
-	/*
-	 * Module parameter not specified by user. Use default
-	 * value as defined during kernel config.
-	 */
-	if (zram_num_devices == 0)
-		zram_num_devices = CONFIG_ZRAM_NUM_DEVICES;
-
 	if (zram_num_devices > max_num_devices) {
 		pr_warning("Invalid value for num_devices: %u\n",
 				zram_num_devices);
@@ -789,12 +887,15 @@ static int __init zram_init(void)
 		goto out;
 	}
 
+	if (!zram_num_devices) {
+		pr_info("num_devices not specified. Using default: 1\n");
+		zram_num_devices = 1;
+	}
+
 	/* Allocate the device array and initialize each one */
 	pr_info("Creating %u devices ...\n", zram_num_devices);
-	zram_devices = kzalloc(zram_num_devices * sizeof(struct zram),
-				GFP_KERNEL);
-	if (!zram_devices)
-	{
+	zram_devices = kzalloc(zram_num_devices * sizeof(struct zram), GFP_KERNEL);
+	if (!zram_devices) {
 		ret = -ENOMEM;
 		goto unregister;
 	}
@@ -836,8 +937,8 @@ static void __exit zram_exit(void)
 	pr_debug("Cleanup done!\n");
 }
 
-module_param_named(num_devices, zram_num_devices, uint, 0);
-MODULE_PARM_DESC(num_devices, "Number of zram devices");
+module_param(zram_num_devices, uint, 0);
+MODULE_PARM_DESC(zram_num_devices, "Number of zram devices");
 
 module_init(zram_init);
 module_exit(zram_exit);
diff --git a/drivers/staging/zram/zram_drv.h b/drivers/staging/zram/zram_drv.h
old mode 100644
new mode 100755
index fed0d14b..6ccb855b
--- a/drivers/staging/zram/zram_drv.h
+++ b/drivers/staging/zram/zram_drv.h
@@ -41,7 +41,7 @@ struct zobj_header {
 /*-- Configurable parameters */
 
 /* Default zram disk size: 25% of total RAM */
-static const unsigned default_disksize_perc_ram = CONFIG_ZRAM_DEFAULT_PERCENTAGE;
+static const unsigned default_disksize_perc_ram = 25;
 
 /*
  * Pages that compress to size greater than this are stored
@@ -66,6 +66,13 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3;
 #define ZRAM_SECTOR_PER_LOGICAL_BLOCK	\
 	(1 << (ZRAM_LOGICAL_BLOCK_SHIFT - SECTOR_SHIFT))
 
+#if defined(CONFIG_ZRAM_LZO) + defined(CONFIG_ZRAM_SNAPPY) == 0
+#error At least one of CONFIG_ZRAM_LZO, CONFIG_ZRAM_SNAPPY must be defined!
+#endif
+#if defined(CONFIG_ZRAM_LZO) + defined(CONFIG_ZRAM_SNAPPY) > 1
+#define MULTIPLE_COMPRESSORS
+#endif
+
 /* Flags for zram pages (table[page_no].flags) */
 enum zram_pageflags {
 	/* Page is stored uncompressed */
@@ -103,6 +110,9 @@ struct zram_stats {
 
 struct zram {
 	struct xv_pool *mem_pool;
+#ifdef MULTIPLE_COMPRESSORS
+	const struct zram_compressor *compressor;
+#endif
 	void *compress_workmem;
 	void *compress_buffer;
 	struct table *table;
@@ -112,8 +122,8 @@ struct zram {
 	struct request_queue *queue;
 	struct gendisk *disk;
 	int init_done;
-	/* Prevent concurrent execution of device init and reset */
-	struct mutex init_lock;
+	/* Prevent concurrent execution of device init, reset and R/W request */
+	struct rw_semaphore init_lock;
 	/*
 	 * This is the limit on amount of *uncompressed* worth of data
 	 * we can store in a disk.
@@ -130,7 +140,27 @@ extern struct attribute_group zram_disk_attr_group;
 #endif
 
 extern int zram_init_device(struct zram *zram);
-extern void zram_reset_device(struct zram *zram);
+extern void __zram_reset_device(struct zram *zram);
+
+#ifdef MULTIPLE_COMPRESSORS
+struct zram_compressor {
+	const char *name;
+	int (*compress)(
+		const unsigned char *src,
+		size_t src_len,
+		unsigned char *dst,
+		size_t *dst_len,
+		void *workmem);
+	int (*decompress)(
+		const unsigned char *src,
+		size_t src_len,
+		unsigned char *dst,
+		size_t *dst_len);
+	unsigned workmem_bytes;
+};
+
+extern const struct zram_compressor * const zram_compressors[];
+#endif
 
 #endif
 
diff --git a/drivers/staging/zram/zram_sysfs.c b/drivers/staging/zram/zram_sysfs.c
old mode 100644
new mode 100755
index d894928a..231924e0
--- a/drivers/staging/zram/zram_sysfs.c
+++ b/drivers/staging/zram/zram_sysfs.c
@@ -55,23 +55,78 @@ static ssize_t disksize_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t len)
 {
 	int ret;
+	u64 disksize;
 	struct zram *zram = dev_to_zram(dev);
 
+	ret = strict_strtoull(buf, 10, &disksize);
+	if (ret)
+		return ret;
+
+	down_write(&zram->init_lock);
 	if (zram->init_done) {
+		up_write(&zram->init_lock);
 		pr_info("Cannot change disksize for initialized device\n");
 		return -EBUSY;
 	}
 
-	ret = strict_strtoull(buf, 10, &zram->disksize);
-	if (ret)
-		return ret;
-
-	zram->disksize = PAGE_ALIGN(zram->disksize);
+	zram->disksize = PAGE_ALIGN(disksize);
 	set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
+	up_write(&zram->init_lock);
 
 	return len;
 }
 
+#ifdef MULTIPLE_COMPRESSORS
+static ssize_t compressor_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	char * const buf_base = buf;
+	const struct zram_compressor *p, *curr;
+	unsigned int i = 0;
+	struct zram *zram = dev_to_zram(dev);
+	curr = zram->compressor;
+	p = zram_compressors[i];
+	while (p) {
+		if (curr == p)
+			buf += sprintf(buf, "*");
+		buf += sprintf(buf, "%u - %s\n", i, p->name);
+		p = zram_compressors[++i];
+	}
+	return buf - buf_base;
+}
+
+static ssize_t compressor_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	const struct zram_compressor *p;
+	unsigned long requested;
+	unsigned int i = 0;
+	int ret;
+	struct zram *zram = dev_to_zram(dev);
+
+	if (zram->init_done) {
+		pr_info("Cannot change compressor for initialized device\n");
+		return -EBUSY;
+	}
+
+	ret = strict_strtoul(buf, 10, &requested);
+	if (ret)
+		return ret;
+
+	p = zram_compressors[i];
+	while (p && (i < requested))
+		p = zram_compressors[++i];
+
+	if (!p) {
+		pr_info("No compressor with index #%lu\n", requested);
+		return -EINVAL;
+	}
+
+	zram->compressor = p;
+	return len;
+}
+#endif /* MULTIPLE_COMPRESSORS */
+
 static ssize_t initstate_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
@@ -106,8 +161,10 @@ static ssize_t reset_store(struct device *dev,
 	if (bdev)
 		fsync_bdev(bdev);
 
+	down_write(&zram->init_lock);
 	if (zram->init_done)
-		zram_reset_device(zram);
+		__zram_reset_device(zram);
+	up_write(&zram->init_lock);
 
 	return len;
 }
@@ -188,6 +245,10 @@ static ssize_t mem_used_total_show(struct device *dev,
 	return sprintf(buf, "%llu\n", val);
 }
 
+#ifdef MULTIPLE_COMPRESSORS
+static DEVICE_ATTR(compressor, S_IRUGO | S_IWUSR,
+		compressor_show, compressor_store);
+#endif
 static DEVICE_ATTR(disksize, S_IRUGO | S_IWUSR,
 		disksize_show, disksize_store);
 static DEVICE_ATTR(initstate, S_IRUGO, initstate_show, NULL);
@@ -202,6 +263,9 @@ static DEVICE_ATTR(compr_data_size, S_IRUGO, compr_data_size_show, NULL);
 static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL);
 
 static struct attribute *zram_disk_attrs[] = {
+#ifdef MULTIPLE_COMPRESSORS
+	&dev_attr_compressor.attr,
+#endif
 	&dev_attr_disksize.attr,
 	&dev_attr_initstate.attr,
 	&dev_attr_reset.attr,
diff --git a/drivers/video/msm/msm_fb.c b/drivers/video/msm/msm_fb.c
index 82e110ce..028a1c7c 100644
--- a/drivers/video/msm/msm_fb.c
+++ b/drivers/video/msm/msm_fb.c
@@ -992,7 +992,7 @@ static void setup_fb_info(struct msmfb_info *msmfb)
 	int r;
 
 	/* finish setting up the fb_info struct */
-	strncpy(fb_info->fix.id, "msmfb", 16);
+	strncpy(fb_info->fix.id, "msmfb31_0", 16);
 	fb_info->fix.ypanstep = 1;
 
 	fb_info->fbops = &msmfb_ops;
diff --git a/include/linux/capability.h b/include/linux/capability.h
old mode 100644
new mode 100755
index c8f2a5f7..c4f6d94d
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -357,7 +357,11 @@ struct cpu_vfs_cap_data {
 
 #define CAP_MAC_ADMIN        33
 
-#define CAP_LAST_CAP         CAP_MAC_ADMIN
+/* Allow configuring the kernel's syslog (printk behaviour) */
+
+#define CAP_SYSLOG           34
+
+#define CAP_LAST_CAP         CAP_SYSLOG
 
 #define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP)
 
diff --git a/include/linux/genlock.h b/include/linux/genlock.h
old mode 100644
new mode 100755
index 2e9f9d68..587c49df
--- a/include/linux/genlock.h
+++ b/include/linux/genlock.h
@@ -12,7 +12,7 @@ void genlock_put_handle(struct genlock_handle *handle);
 struct genlock *genlock_create_lock(struct genlock_handle *);
 struct genlock *genlock_attach_lock(struct genlock_handle *, int fd);
 int genlock_wait(struct genlock_handle *handle, u32 timeout);
-void genlock_release_lock(struct genlock_handle *);
+/* genlock_release_lock was deprecated */
 int genlock_lock(struct genlock_handle *handle, int op, int flags,
 	u32 timeout);
 #endif
@@ -21,7 +21,8 @@ int genlock_lock(struct genlock_handle *handle, int op, int flags,
 #define GENLOCK_WRLOCK 1
 #define GENLOCK_RDLOCK 2
 
-#define GENLOCK_NOBLOCK (1 << 0)
+#define GENLOCK_NOBLOCK       (1 << 0)
+#define GENLOCK_WRITE_TO_READ (1 << 1)
 
 struct genlock_lock {
 	int fd;
@@ -37,9 +38,15 @@ struct genlock_lock {
 	struct genlock_lock)
 #define GENLOCK_IOC_ATTACH _IOW(GENLOCK_IOC_MAGIC, 2, \
 	struct genlock_lock)
+
+/* Deprecated */
 #define GENLOCK_IOC_LOCK _IOW(GENLOCK_IOC_MAGIC, 3, \
 	struct genlock_lock)
+
+/* Deprecated */
 #define GENLOCK_IOC_RELEASE _IO(GENLOCK_IOC_MAGIC, 4)
 #define GENLOCK_IOC_WAIT _IOW(GENLOCK_IOC_MAGIC, 5, \
 	struct genlock_lock)
+#define GENLOCK_IOC_DREADLOCK _IOW(GENLOCK_IOC_MAGIC, 6, \
+	struct genlock_lock)
 #endif
diff --git a/include/linux/ion.h b/include/linux/ion.h
new file mode 100755
index 00000000..4b7b8b7d
--- /dev/null
+++ b/include/linux/ion.h
@@ -0,0 +1,548 @@
+/*
+ * include/linux/ion.h
+ *
+ * Copyright (C) 2011 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _LINUX_ION_H
+#define _LINUX_ION_H
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+
+struct ion_handle;
+/**
+ * enum ion_heap_types - list of all possible types of heaps
+ * @ION_HEAP_TYPE_SYSTEM:	 memory allocated via vmalloc
+ * @ION_HEAP_TYPE_SYSTEM_CONTIG: memory allocated via kmalloc
+ * @ION_HEAP_TYPE_CARVEOUT:	 memory allocated from a prereserved
+ * 				 carveout heap, allocations are physically
+ * 				 contiguous
+ * @ION_HEAP_END:		 helper for iterating over heaps
+ */
+enum ion_heap_type {
+	ION_HEAP_TYPE_SYSTEM,
+	ION_HEAP_TYPE_SYSTEM_CONTIG,
+	ION_HEAP_TYPE_CARVEOUT,
+	ION_HEAP_TYPE_CUSTOM, /* must be last so device specific heaps always
+				 are at the end of this enum */
+	ION_NUM_HEAPS,
+};
+
+#define ION_HEAP_SYSTEM_MASK		(1 << ION_HEAP_TYPE_SYSTEM)
+#define ION_HEAP_SYSTEM_CONTIG_MASK	(1 << ION_HEAP_TYPE_SYSTEM_CONTIG)
+#define ION_HEAP_CARVEOUT_MASK		(1 << ION_HEAP_TYPE_CARVEOUT)
+
+
+/**
+ * These are the only ids that should be used for Ion heap ids.
+ * The ids listed are the order in which allocation will be attempted
+ * if specified. Don't swap the order of heap ids unless you know what
+ * you are doing!
+ */
+
+enum ion_heap_ids {
+	ION_HEAP_SYSTEM_ID,
+	ION_HEAP_SYSTEM_CONTIG_ID,
+	ION_HEAP_EBI_ID,
+	ION_HEAP_SMI_ID,
+	ION_HEAP_ADSP_ID,
+	ION_HEAP_AUDIO_ID,
+};
+
+#define ION_KMALLOC_HEAP_NAME	"kmalloc"
+#define ION_VMALLOC_HEAP_NAME	"vmalloc"
+#define ION_EBI1_HEAP_NAME	"EBI1"
+#define ION_ADSP_HEAP_NAME	"adsp"
+#define ION_SMI_HEAP_NAME	"smi"
+
+#define CACHED          1
+#define UNCACHED        0
+
+#define ION_CACHE_SHIFT 0
+
+#define ION_SET_CACHE(__cache)  ((__cache) << ION_CACHE_SHIFT)
+
+#define ION_IS_CACHED(__flags)	((__flags) & (1 << ION_CACHE_SHIFT))
+
+#ifdef __KERNEL__
+#include <linux/err.h>
+#include <mach/ion.h>
+struct ion_device;
+struct ion_heap;
+struct ion_mapper;
+struct ion_client;
+struct ion_buffer;
+
+/* This should be removed some day when phys_addr_t's are fully
+   plumbed in the kernel, and all instances of ion_phys_addr_t should
+   be converted to phys_addr_t.  For the time being many kernel interfaces
+   do not accept phys_addr_t's that would have to */
+#define ion_phys_addr_t unsigned long
+
+/**
+ * struct ion_platform_heap - defines a heap in the given platform
+ * @type:	type of the heap from ion_heap_type enum
+ * @id:		unique identifier for heap.  When allocating (lower numbers 
+ * 		will be allocated from first)
+ * @name:	used for debug purposes
+ * @base:	base address of heap in physical memory if applicable
+ * @size:	size of the heap in bytes if applicable
+ *
+ * Provided by the board file.
+ */
+struct ion_platform_heap {
+	enum ion_heap_type type;
+	unsigned int id;
+	const char *name;
+	ion_phys_addr_t base;
+	size_t size;
+	enum ion_memory_types memory_type;
+};
+
+/**
+ * struct ion_platform_data - array of platform heaps passed from board file
+ * @nr:		number of structures in the array
+ * @heaps:	array of platform_heap structions
+ *
+ * Provided by the board file in the form of platform data to a platform device.
+ */
+struct ion_platform_data {
+	int nr;
+	struct ion_platform_heap heaps[];
+};
+
+#ifdef CONFIG_ION
+
+/**
+ * ion_client_create() -  allocate a client and returns it
+ * @dev:	the global ion device
+ * @heap_mask:	mask of heaps this client can allocate from
+ * @name:	used for debugging
+ */
+struct ion_client *ion_client_create(struct ion_device *dev,
+				     unsigned int heap_mask, const char *name);
+
+/**
+ *  msm_ion_client_create - allocate a client using the ion_device specified in
+ *				drivers/gpu/ion/msm/msm_ion.c
+ *
+ * heap_mask and name are the same as ion_client_create, return values
+ * are the same as ion_client_create.
+ */
+
+struct ion_client *msm_ion_client_create(unsigned int heap_mask,
+					const char *name);
+
+/**
+ * ion_client_destroy() -  free's a client and all it's handles
+ * @client:	the client
+ *
+ * Free the provided client and all it's resources including
+ * any handles it is holding.
+ */
+void ion_client_destroy(struct ion_client *client);
+
+/**
+ * ion_alloc - allocate ion memory
+ * @client:	the client
+ * @len:	size of the allocation
+ * @align:	requested allocation alignment, lots of hardware blocks have
+ *		alignment requirements of some kind
+ * @flags:	mask of heaps to allocate from, if multiple bits are set
+ *		heaps will be tried in order from lowest to highest order bit
+ *
+ * Allocate memory in one of the heaps provided in heap mask and return
+ * an opaque handle to it.
+ */
+struct ion_handle *ion_alloc(struct ion_client *client, size_t len,
+			     size_t align, unsigned int flags);
+
+/**
+ * ion_free - free a handle
+ * @client:	the client
+ * @handle:	the handle to free
+ *
+ * Free the provided handle.
+ */
+void ion_free(struct ion_client *client, struct ion_handle *handle);
+
+/**
+ * ion_phys - returns the physical address and len of a handle
+ * @client:	the client
+ * @handle:	the handle
+ * @addr:	a pointer to put the address in
+ * @len:	a pointer to put the length in
+ *
+ * This function queries the heap for a particular handle to get the
+ * handle's physical address.  It't output is only correct if
+ * a heap returns physically contiguous memory -- in other cases
+ * this api should not be implemented -- ion_map_dma should be used
+ * instead.  Returns -EINVAL if the handle is invalid.  This has
+ * no implications on the reference counting of the handle --
+ * the returned value may not be valid if the caller is not
+ * holding a reference.
+ */
+int ion_phys(struct ion_client *client, struct ion_handle *handle,
+	     ion_phys_addr_t *addr, size_t *len);
+
+/**
+ * ion_map_kernel - create mapping for the given handle
+ * @client:	the client
+ * @handle:	handle to map
+ * @flags:	flags for this mapping
+ *
+ * Map the given handle into the kernel and return a kernel address that
+ * can be used to access this address. If no flags are specified, this
+ * will return a non-secure uncached mapping.
+ */
+void *ion_map_kernel(struct ion_client *client, struct ion_handle *handle,
+			unsigned long flags);
+
+/**
+ * ion_unmap_kernel() - destroy a kernel mapping for a handle
+ * @client:	the client
+ * @handle:	handle to unmap
+ */
+void ion_unmap_kernel(struct ion_client *client, struct ion_handle *handle);
+
+/**
+ * ion_map_dma - create a dma mapping for a given handle
+ * @client:	the client
+ * @handle:	handle to map
+ *
+ * Return an sglist describing the given handle
+ */
+struct scatterlist *ion_map_dma(struct ion_client *client,
+				struct ion_handle *handle,
+				unsigned long flags);
+
+/**
+ * ion_unmap_dma() - destroy a dma mapping for a handle
+ * @client:	the client
+ * @handle:	handle to unmap
+ */
+void ion_unmap_dma(struct ion_client *client, struct ion_handle *handle);
+
+/**
+ * ion_share() - given a handle, obtain a buffer to pass to other clients
+ * @client:	the client
+ * @handle:	the handle to share
+ *
+ * Given a handle, return a buffer, which exists in a global name
+ * space, and can be passed to other clients.  Should be passed into ion_import
+ * to obtain a new handle for this buffer.
+ *
+ * NOTE: This function does do not an extra reference.  The burden is on the
+ * caller to make sure the buffer doesn't go away while it's being passed to
+ * another client.  That is, ion_free should not be called on this handle until
+ * the buffer has been imported into the other client.
+ */
+struct ion_buffer *ion_share(struct ion_client *client,
+			     struct ion_handle *handle);
+
+/**
+ * ion_import() - given an buffer in another client, import it
+ * @client:	this blocks client
+ * @buffer:	the buffer to import (as obtained from ion_share)
+ *
+ * Given a buffer, add it to the client and return the handle to use to refer
+ * to it further.  This is called to share a handle from one kernel client to
+ * another.
+ */
+struct ion_handle *ion_import(struct ion_client *client,
+			      struct ion_buffer *buffer);
+
+/**
+ * ion_import_fd() - given an fd obtained via ION_IOC_SHARE ioctl, import it
+ * @client:	this blocks client
+ * @fd:		the fd
+ *
+ * A helper function for drivers that will be recieving ion buffers shared
+ * with them from userspace.  These buffers are represented by a file
+ * descriptor obtained as the return from the ION_IOC_SHARE ioctl.
+ * This function coverts that fd into the underlying buffer, and returns
+ * the handle to use to refer to it further.
+ */
+struct ion_handle *ion_import_fd(struct ion_client *client, int fd);
+
+/**
+ * ion_handle_get_flags - get the flags for a given handle
+ *
+ * @client - client who allocated the handle
+ * @handle - handle to get the flags
+ * @flags - pointer to store the flags
+ *
+ * Gets the current flags for a handle. These flags indicate various options
+ * of the buffer (caching, security, etc.)
+ */
+int ion_handle_get_flags(struct ion_client *client, struct ion_handle *handle,
+				unsigned long *flags);
+
+#else
+static inline struct ion_client *ion_client_create(struct ion_device *dev,
+				     unsigned int heap_mask, const char *name)
+{
+	return ERR_PTR(-ENODEV);
+}
+
+static inline struct ion_client *msm_ion_client_create(unsigned int heap_mask,
+					const char *name)
+{
+	return ERR_PTR(-ENODEV);
+}
+
+static inline void ion_client_destroy(struct ion_client *client) { }
+
+static inline struct ion_handle *ion_alloc(struct ion_client *client,
+			size_t len, size_t align, unsigned int flags)
+{
+	return ERR_PTR(-ENODEV);
+}
+
+static inline void ion_free(struct ion_client *client,
+	struct ion_handle *handle) { }
+
+
+static inline int ion_phys(struct ion_client *client,
+	struct ion_handle *handle, ion_phys_addr_t *addr, size_t *len)
+{
+	return -ENODEV;
+}
+
+static inline void *ion_map_kernel(struct ion_client *client,
+	struct ion_handle *handle, unsigned long flags)
+{
+	return ERR_PTR(-ENODEV);
+}
+
+static inline void ion_unmap_kernel(struct ion_client *client,
+	struct ion_handle *handle) { }
+
+static inline struct scatterlist *ion_map_dma(struct ion_client *client,
+	struct ion_handle *handle, unsigned long flags)
+{
+	return ERR_PTR(-ENODEV);
+}
+
+static inline void ion_unmap_dma(struct ion_client *client,
+	struct ion_handle *handle) { }
+
+static inline struct ion_buffer *ion_share(struct ion_client *client,
+	struct ion_handle *handle)
+{
+	return ERR_PTR(-ENODEV);
+}
+
+static inline struct ion_handle *ion_import(struct ion_client *client,
+	struct ion_buffer *buffer)
+{
+	return ERR_PTR(-ENODEV);
+}
+
+static inline struct ion_handle *ion_import_fd(struct ion_client *client,
+	int fd)
+{
+	return ERR_PTR(-ENODEV);
+}
+
+static inline int ion_handle_get_flags(struct ion_client *client,
+	struct ion_handle *handle, unsigned long *flags)
+{
+	return -ENODEV;
+}
+#endif /* CONFIG_ION */
+#endif /* __KERNEL__ */
+
+/**
+ * DOC: Ion Userspace API
+ *
+ * create a client by opening /dev/ion
+ * most operations handled via following ioctls
+ *
+ */
+
+/**
+ * struct ion_allocation_data - metadata passed from userspace for allocations
+ * @len:	size of the allocation
+ * @align:	required alignment of the allocation
+ * @flags:	flags passed to heap
+ * @handle:	pointer that will be populated with a cookie to use to refer
+ *		to this allocation
+ *
+ * Provided by userspace as an argument to the ioctl
+ */
+struct ion_allocation_data {
+	size_t len;
+	size_t align;
+	unsigned int flags;
+	struct ion_handle *handle;
+};
+
+/**
+ * struct ion_fd_data - metadata passed to/from userspace for a handle/fd pair
+ * @handle:	a handle
+ * @fd:		a file descriptor representing that handle
+ *
+ * For ION_IOC_SHARE or ION_IOC_MAP userspace populates the handle field with
+ * the handle returned from ion alloc, and the kernel returns the file
+ * descriptor to share or map in the fd field.  For ION_IOC_IMPORT, userspace
+ * provides the file descriptor and the kernel returns the handle.
+ */
+struct ion_fd_data {
+	struct ion_handle *handle;
+	int fd;
+};
+
+/**
+ * struct ion_handle_data - a handle passed to/from the kernel
+ * @handle:	a handle
+ */
+struct ion_handle_data {
+	struct ion_handle *handle;
+};
+
+/**
+ * struct ion_custom_data - metadata passed to/from userspace for a custom ioctl
+ * @cmd:	the custom ioctl function to call
+ * @arg:	additional data to pass to the custom ioctl, typically a user
+ *		pointer to a predefined structure
+ *
+ * This works just like the regular cmd and arg fields of an ioctl.
+ */
+struct ion_custom_data {
+	unsigned int cmd;
+	unsigned long arg;
+};
+
+
+/* struct ion_flush_data - data passed to ion for flushing caches
+ *
+ * @handle:	handle with data to flush
+ * @vaddr:	userspace virtual address mapped with mmap
+ * @offset:	offset into the handle to flush
+ * @length:	length of handle to flush
+ *
+ * Performs cache operations on the handle. If p is the start address
+ * of the handle, p + offset through p + offset + length will have
+ * the cache operations performed
+ */
+struct ion_flush_data {
+	struct ion_handle *handle;
+	void *vaddr;
+	unsigned int offset;
+	unsigned int length;
+};
+
+/* struct ion_flag_data - information about flags for this buffer
+ *
+ * @handle:	handle to get flags from
+ * @flags:	flags of this handle
+ *
+ * Takes handle as an input and outputs the flags from the handle
+ * in the flag field.
+ */
+struct ion_flag_data {
+	struct ion_handle *handle;
+	unsigned long flags;
+};
+
+#define ION_IOC_MAGIC		'I'
+
+/**
+ * DOC: ION_IOC_ALLOC - allocate memory
+ *
+ * Takes an ion_allocation_data struct and returns it with the handle field
+ * populated with the opaque handle for the allocation.
+ */
+#define ION_IOC_ALLOC		_IOWR(ION_IOC_MAGIC, 0, \
+				      struct ion_allocation_data)
+
+/**
+ * DOC: ION_IOC_FREE - free memory
+ *
+ * Takes an ion_handle_data struct and frees the handle.
+ */
+#define ION_IOC_FREE		_IOWR(ION_IOC_MAGIC, 1, struct ion_handle_data)
+
+/**
+ * DOC: ION_IOC_MAP - get a file descriptor to mmap
+ *
+ * Takes an ion_fd_data struct with the handle field populated with a valid
+ * opaque handle.  Returns the struct with the fd field set to a file
+ * descriptor open in the current address space.  This file descriptor
+ * can then be used as an argument to mmap.
+ */
+#define ION_IOC_MAP		_IOWR(ION_IOC_MAGIC, 2, struct ion_fd_data)
+
+/**
+ * DOC: ION_IOC_SHARE - creates a file descriptor to use to share an allocation
+ *
+ * Takes an ion_fd_data struct with the handle field populated with a valid
+ * opaque handle.  Returns the struct with the fd field set to a file
+ * descriptor open in the current address space.  This file descriptor
+ * can then be passed to another process.  The corresponding opaque handle can
+ * be retrieved via ION_IOC_IMPORT.
+ */
+#define ION_IOC_SHARE		_IOWR(ION_IOC_MAGIC, 4, struct ion_fd_data)
+
+/**
+ * DOC: ION_IOC_IMPORT - imports a shared file descriptor
+ *
+ * Takes an ion_fd_data struct with the fd field populated with a valid file
+ * descriptor obtained from ION_IOC_SHARE and returns the struct with the handle
+ * filed set to the corresponding opaque handle.
+ */
+#define ION_IOC_IMPORT		_IOWR(ION_IOC_MAGIC, 5, int)
+
+/**
+ * DOC: ION_IOC_CUSTOM - call architecture specific ion ioctl
+ *
+ * Takes the argument of the architecture specific ioctl to call and
+ * passes appropriate userdata for that ioctl
+ */
+#define ION_IOC_CUSTOM		_IOWR(ION_IOC_MAGIC, 6, struct ion_custom_data)
+
+
+/**
+ * DOC: ION_IOC_CLEAN_CACHES - clean the caches
+ *
+ * Clean the caches of the handle specified.
+ */
+#define ION_IOC_CLEAN_CACHES	_IOWR(ION_IOC_MAGIC, 7, \
+						struct ion_flush_data)
+/**
+ * DOC: ION_MSM_IOC_INV_CACHES - invalidate the caches
+ *
+ * Invalidate the caches of the handle specified.
+ */
+#define ION_IOC_INV_CACHES	_IOWR(ION_IOC_MAGIC, 8, \
+						struct ion_flush_data)
+/**
+ * DOC: ION_MSM_IOC_CLEAN_CACHES - clean and invalidate the caches
+ *
+ * Clean and invalidate the caches of the handle specified.
+ */
+#define ION_IOC_CLEAN_INV_CACHES	_IOWR(ION_IOC_MAGIC, 9, \
+						struct ion_flush_data)
+
+/**
+ * DOC: ION_IOC_GET_FLAGS - get the flags of the handle
+ *
+ * Gets the flags of the current handle which indicate cachability,
+ * secure state etc.
+ */
+#define ION_IOC_GET_FLAGS		_IOWR(ION_IOC_MAGIC, 10, \
+						struct ion_flag_data)
+#endif /* _LINUX_ION_H */
diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index 58ae8e00..aabe5a8d 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -106,7 +106,7 @@ extern char *kobject_get_path(struct kobject *kobj, gfp_t flag);
 
 struct kobj_type {
 	void (*release)(struct kobject *kobj);
-	struct sysfs_ops *sysfs_ops;
+	const struct sysfs_ops *sysfs_ops;
 	struct attribute **default_attrs;
 };
 
diff --git a/include/linux/msm_kgsl.h b/include/linux/msm_kgsl.h
old mode 100644
new mode 100755
index 56e6cc6b..92b41a5d
--- a/include/linux/msm_kgsl.h
+++ b/include/linux/msm_kgsl.h
@@ -35,13 +35,18 @@
 #define _MSM_KGSL_H
 
 #define KGSL_VERSION_MAJOR        3
-#define KGSL_VERSION_MINOR        8
+#define KGSL_VERSION_MINOR        10
 
 /*context flags */
-#define KGSL_CONTEXT_SAVE_GMEM	1
-#define KGSL_CONTEXT_NO_GMEM_ALLOC	2
-#define KGSL_CONTEXT_SUBMIT_IB_LIST	4
-#define KGSL_CONTEXT_CTX_SWITCH	8
+#define KGSL_CONTEXT_SAVE_GMEM		0x00000001
+#define KGSL_CONTEXT_NO_GMEM_ALLOC	0x00000002
+#define KGSL_CONTEXT_SUBMIT_IB_LIST	0x00000004
+#define KGSL_CONTEXT_CTX_SWITCH		0x00000008
+#define KGSL_CONTEXT_PREAMBLE		0x00000010
+#define KGSL_CONTEXT_TRASH_STATE	0x00000020
+#define KGSL_CONTEXT_PER_CONTEXT_TS	0x00000040
+
+#define KGSL_CONTEXT_INVALID 0xffffffff
 
 /* Memory allocayion flags */
 #define KGSL_MEMFLAGS_GPUREADONLY	0x01000000
@@ -57,6 +62,25 @@
 #define KGSL_FLAGS_RESERVED1   0x00000040
 #define KGSL_FLAGS_RESERVED2   0x00000080
 #define KGSL_FLAGS_SOFT_RESET  0x00000100
+#define KGSL_FLAGS_PER_CONTEXT_TIMESTAMPS 0x00000200
+
+/* Clock flags to show which clocks should be controled by a given platform */
+#define KGSL_CLK_SRC	0x00000001
+#define KGSL_CLK_CORE	0x00000002
+#define KGSL_CLK_IFACE	0x00000004
+#define KGSL_CLK_MEM	0x00000008
+#define KGSL_CLK_MEM_IFACE 0x00000010
+#define KGSL_CLK_AXI	0x00000020
+
+/*
+ * Reset status values for context
+ */
+enum kgsl_ctx_reset_stat {
+	KGSL_CTX_STAT_NO_ERROR				= 0x00000000,
+	KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT		= 0x00000001,
+	KGSL_CTX_STAT_INNOCENT_CONTEXT_RESET_EXT	= 0x00000002,
+	KGSL_CTX_STAT_UNKNOWN_CONTEXT_RESET_EXT		= 0x00000003
+};
 
 #define KGSL_MAX_PWRLEVELS 5
 
@@ -74,7 +98,9 @@ enum kgsl_deviceid {
 enum kgsl_user_mem_type {
 	KGSL_USER_MEM_TYPE_PMEM		= 0x00000000,
 	KGSL_USER_MEM_TYPE_ASHMEM	= 0x00000001,
-	KGSL_USER_MEM_TYPE_ADDR		= 0x00000002
+	KGSL_USER_MEM_TYPE_ADDR		= 0x00000002,
+	KGSL_USER_MEM_TYPE_ION		= 0x00000003,
+	KGSL_USER_MEM_TYPE_MAX		= 0x00000004,
 };
 
 struct kgsl_devinfo {
@@ -111,9 +137,9 @@ struct kgsl_devmemstore {
 	unsigned int sbz5;
 };
 
-#define KGSL_DEVICE_MEMSTORE_OFFSET(field) \
-	offsetof(struct kgsl_devmemstore, field)
-
+#define KGSL_MEMSTORE_OFFSET(ctxt_id, field) \
+	((ctxt_id)*sizeof(struct kgsl_devmemstore) + \
+	 offsetof(struct kgsl_devmemstore, field))
 
 /* timestamp id*/
 enum kgsl_timestamp_type {
@@ -132,6 +158,7 @@ enum kgsl_property_type {
 	KGSL_PROP_MMU_ENABLE 	  = 0x00000006,
 	KGSL_PROP_INTERRUPT_WAITS = 0x00000007,
 	KGSL_PROP_VERSION         = 0x00000008,
+	KGSL_PROP_GPU_RESET_STAT  = 0x00000009
 };
 
 struct kgsl_shadowprop {
@@ -246,6 +273,14 @@ struct kgsl_device_waittimestamp {
 #define IOCTL_KGSL_DEVICE_WAITTIMESTAMP \
 	_IOW(KGSL_IOC_TYPE, 0x6, struct kgsl_device_waittimestamp)
 
+struct kgsl_device_waittimestamp_ctxtid {
+	unsigned int context_id;
+	unsigned int timestamp;
+	unsigned int timeout;
+};
+
+#define IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID \
+	_IOW(KGSL_IOC_TYPE, 0x7, struct kgsl_device_waittimestamp_ctxtid)
 
 /* issue indirect commands to the GPU.
  * drawctxt_id must have been created with IOCTL_KGSL_DRAWCTXT_CREATE
@@ -339,6 +374,26 @@ struct kgsl_map_user_mem {
 #define IOCTL_KGSL_MAP_USER_MEM \
 	_IOWR(KGSL_IOC_TYPE, 0x15, struct kgsl_map_user_mem)
 
+struct kgsl_cmdstream_readtimestamp_ctxtid {
+	unsigned int context_id;
+	unsigned int type;
+	unsigned int timestamp; /*output param */
+};
+
+#define IOCTL_KGSL_CMDSTREAM_READTIMESTAMP_CTXTID \
+	_IOWR(KGSL_IOC_TYPE, 0x16, struct kgsl_cmdstream_readtimestamp_ctxtid)
+
+struct kgsl_cmdstream_freememontimestamp_ctxtid {
+	unsigned int context_id;
+	unsigned int gpuaddr;
+	unsigned int type;
+	unsigned int timestamp;
+};
+
+#define IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP_CTXTID \
+	_IOW(KGSL_IOC_TYPE, 0x17, \
+	struct kgsl_cmdstream_freememontimestamp_ctxtid)
+
 /* add a block of pmem or fb into the GPU address space */
 struct kgsl_sharedmem_from_pmem {
 	int pmem_fd;
@@ -482,6 +537,14 @@ struct kgsl_timestamp_event_genlock {
 	int handle; /* Handle of the genlock lock to release */
 };
 
+/*
+ * Set a property within the kernel.  Uses the same structure as
+ * IOCTL_KGSL_GETPROPERTY
+ */
+
+#define IOCTL_KGSL_SETPROPERTY \
+	_IOW(KGSL_IOC_TYPE, 0x32, struct kgsl_device_getproperty)
+
 #ifdef __KERNEL__
 #ifdef CONFIG_MSM_KGSL_DRM
 int kgsl_gem_obj_addr(int drm_fd, int handle, unsigned long *start,
diff --git a/include/linux/msm_mdp.h b/include/linux/msm_mdp.h
index a933facc..dcedc554 100644
--- a/include/linux/msm_mdp.h
+++ b/include/linux/msm_mdp.h
@@ -1,6 +1,7 @@
 /* include/linux/msm_mdp.h
  *
  * Copyright (C) 2007 Google Incorporated
+ * Copyright (c) 2012 Code Aurora Forum. All rights reserved.
  *
  * This software is licensed under the terms of the GNU General Public
  * License version 2, as published by the Free Software Foundation, and
@@ -15,25 +16,90 @@
 #define _MSM_MDP_H_
 
 #include <linux/types.h>
+#include <linux/fb.h>
 
 #define MSMFB_IOCTL_MAGIC 'm'
 #define MSMFB_GRP_DISP          _IOW(MSMFB_IOCTL_MAGIC, 1, unsigned int)
 #define MSMFB_BLIT              _IOW(MSMFB_IOCTL_MAGIC, 2, unsigned int)
+#define MSMFB_SUSPEND_SW_REFRESHER _IOW(MSMFB_IOCTL_MAGIC, 128, unsigned int)
+#define MSMFB_RESUME_SW_REFRESHER _IOW(MSMFB_IOCTL_MAGIC, 129, unsigned int)
+#define MSMFB_CURSOR _IOW(MSMFB_IOCTL_MAGIC, 130, struct fb_cursor)
+#define MSMFB_SET_LUT _IOW(MSMFB_IOCTL_MAGIC, 131, struct fb_cmap)
+#define MSMFB_HISTOGRAM _IOWR(MSMFB_IOCTL_MAGIC, 132, struct mdp_histogram_data)
+/* new ioctls's for set/get ccs matrix */
+#define MSMFB_GET_CCS_MATRIX  _IOWR(MSMFB_IOCTL_MAGIC, 133, struct mdp_ccs)
+#define MSMFB_SET_CCS_MATRIX  _IOW(MSMFB_IOCTL_MAGIC, 134, struct mdp_ccs)
+#define MSMFB_OVERLAY_SET       _IOWR(MSMFB_IOCTL_MAGIC, 135, \
+						struct mdp_overlay)
+#define MSMFB_OVERLAY_UNSET     _IOW(MSMFB_IOCTL_MAGIC, 136, unsigned int)
+#define MSMFB_OVERLAY_PLAY      _IOW(MSMFB_IOCTL_MAGIC, 137, \
+						struct msmfb_overlay_data)
+#define MSMFB_GET_PAGE_PROTECTION _IOR(MSMFB_IOCTL_MAGIC, 138, \
+					struct mdp_page_protection)
+#define MSMFB_SET_PAGE_PROTECTION _IOW(MSMFB_IOCTL_MAGIC, 139, \
+					struct mdp_page_protection)
+#define MSMFB_OVERLAY_GET      _IOR(MSMFB_IOCTL_MAGIC, 140, \
+						struct mdp_overlay)
+#define MSMFB_OVERLAY_PLAY_ENABLE     _IOW(MSMFB_IOCTL_MAGIC, 141, unsigned int)
+#define MSMFB_OVERLAY_BLT       _IOWR(MSMFB_IOCTL_MAGIC, 142, \
+						struct msmfb_overlay_blt)
+#define MSMFB_OVERLAY_BLT_OFFSET     _IOW(MSMFB_IOCTL_MAGIC, 143, unsigned int)
+#define MSMFB_HISTOGRAM_START	_IOR(MSMFB_IOCTL_MAGIC, 144, \
+						struct mdp_histogram_start_req)
+#define MSMFB_HISTOGRAM_STOP	_IOR(MSMFB_IOCTL_MAGIC, 145, unsigned int)
+#define MSMFB_NOTIFY_UPDATE	_IOW(MSMFB_IOCTL_MAGIC, 146, unsigned int)
+
+#define MSMFB_OVERLAY_3D       _IOWR(MSMFB_IOCTL_MAGIC, 147, \
+						struct msmfb_overlay_3d)
+
+#define MSMFB_MIXER_INFO       _IOWR(MSMFB_IOCTL_MAGIC, 148, \
+						struct msmfb_mixer_info_req)
+#define MSMFB_OVERLAY_PLAY_WAIT _IOWR(MSMFB_IOCTL_MAGIC, 149, \
+						struct msmfb_overlay_data)
+#define MSMFB_WRITEBACK_INIT _IO(MSMFB_IOCTL_MAGIC, 150)
+#define MSMFB_WRITEBACK_START _IO(MSMFB_IOCTL_MAGIC, 151)
+#define MSMFB_WRITEBACK_STOP _IO(MSMFB_IOCTL_MAGIC, 152)
+#define MSMFB_WRITEBACK_QUEUE_BUFFER _IOW(MSMFB_IOCTL_MAGIC, 153, \
+						struct msmfb_data)
+#define MSMFB_WRITEBACK_DEQUEUE_BUFFER _IOW(MSMFB_IOCTL_MAGIC, 154, \
+						struct msmfb_data)
+#define MSMFB_WRITEBACK_TERMINATE _IO(MSMFB_IOCTL_MAGIC, 155)
+#define MSMFB_MDP_PP _IOWR(MSMFB_IOCTL_MAGIC, 156, struct msmfb_mdp_pp)
+
+#define FB_TYPE_3D_PANEL 0x10101010
+#define MDP_IMGTYPE2_START 0x10000
+#define MSMFB_DRIVER_VERSION	0xF9E8D701
 
 enum {
-	MDP_RGB_565,		/* RGB 565 planar */
+	NOTIFY_UPDATE_START,
+	NOTIFY_UPDATE_STOP,
+};
+
+enum {
+	MDP_RGB_565,      /* RGB 565 planer */
 	MDP_XRGB_8888,		/* RGB 888 padded */
-	MDP_Y_CBCR_H2V2,	/* Y and CbCr, pseudo planar w/ Cb is in MSB */
+	MDP_Y_CBCR_H2V2,  /* Y and CbCr, pseudo planer w/ Cb is in MSB */
+	MDP_Y_CBCR_H2V2_ADRENO,
 	MDP_ARGB_8888,		/* ARGB 888 */
-	MDP_RGB_888,		/* RGB 888 planar */
-	MDP_Y_CRCB_H2V2,	/* Y and CrCb, pseudo planar w/ Cr is in MSB */
+	MDP_RGB_888,      /* RGB 888 planer */
+	MDP_Y_CRCB_H2V2,  /* Y and CrCb, pseudo planer w/ Cr is in MSB */
 	MDP_YCRYCB_H2V1,	/* YCrYCb interleave */
-	MDP_Y_CRCB_H2V1,	/* Y and CrCb, pseduo planar w/ Cr is in MSB */
-	MDP_Y_CBCR_H2V1,	/* Y and CrCb, pseduo planar w/ Cr is in MSB */
+	MDP_Y_CRCB_H2V1,  /* Y and CrCb, pseduo planer w/ Cr is in MSB */
+	MDP_Y_CBCR_H2V1,   /* Y and CrCb, pseduo planer w/ Cr is in MSB */
 	MDP_RGBA_8888,		/* ARGB 888 */
 	MDP_BGRA_8888,		/* ABGR 888 */
 	MDP_RGBX_8888,		/* RGBX 888 */
-	MDP_IMGTYPE_LIMIT	/* Non valid image type after this enum */
+	MDP_Y_CRCB_H2V2_TILE,  /* Y and CrCb, pseudo planer tile */
+	MDP_Y_CBCR_H2V2_TILE,  /* Y and CbCr, pseudo planer tile */
+	MDP_Y_CR_CB_H2V2,  /* Y, Cr and Cb, planar */
+	MDP_Y_CR_CB_GH2V2,  /* Y, Cr and Cb, planar aligned to Android YV12 */
+	MDP_Y_CB_CR_H2V2,  /* Y, Cb and Cr, planar */
+	MDP_Y_CRCB_H1V1,  /* Y and CrCb, pseduo planer w/ Cr is in MSB */
+	MDP_Y_CBCR_H1V1,  /* Y and CbCr, pseduo planer w/ Cb is in MSB */
+	MDP_IMGTYPE_LIMIT,
+	MDP_BGR_565 = MDP_IMGTYPE2_START,      /* BGR 565 planer */
+	MDP_FB_FORMAT,    /* framebuffer format */
+	MDP_IMGTYPE_LIMIT2 /* Non valid image type after this enum */
 };
 
 enum {
@@ -41,24 +107,57 @@ enum {
 	FB_IMG,
 };
 
-/* flag values */
+enum {
+	HSIC_HUE = 0,
+	HSIC_SAT,
+	HSIC_INT,
+	HSIC_CON,
+	NUM_HSIC_PARAM,
+};
+
+/* mdp_blit_req flag values */
 #define MDP_ROT_NOP	0
 #define MDP_FLIP_LR	0x1
 #define MDP_FLIP_UD	0x2
 #define MDP_ROT_90	0x4
 #define MDP_ROT_180	(MDP_FLIP_UD|MDP_FLIP_LR)
 #define MDP_ROT_270	(MDP_ROT_90|MDP_FLIP_UD|MDP_FLIP_LR)
-#define MDP_ROT_MASK	0x7
 #define MDP_DITHER	0x8
 #define MDP_BLUR	0x10
 #define MDP_BLEND_FG_PREMULT 0x20000
+#define MDP_DEINTERLACE 0x80000000
+#define MDP_SHARPENING  0x40000000
+#define MDP_NO_DMA_BARRIER_START	0x20000000
+#define MDP_NO_DMA_BARRIER_END		0x10000000
+#define MDP_NO_BLIT			0x08000000
+#define MDP_BLIT_WITH_DMA_BARRIERS	0x000
+#define MDP_BLIT_WITH_NO_DMA_BARRIERS    \
+	(MDP_NO_DMA_BARRIER_START | MDP_NO_DMA_BARRIER_END)
+#define MDP_BLIT_SRC_GEM                0x04000000
+#define MDP_BLIT_DST_GEM                0x02000000
+#define MDP_BLIT_NON_CACHED		0x01000000
+#define MDP_OV_PIPE_SHARE		0x00800000
+#define MDP_DEINTERLACE_ODD		0x00400000
+#define MDP_OV_PLAY_NOWAIT		0x00200000
+#define MDP_SOURCE_ROTATED_90		0x00100000
+#define MDP_DPP_HSIC			0x00080000
+#define MDP_BACKEND_COMPOSITION		0x00040000
+#define MDP_BORDERFILL_SUPPORTED	0x00010000
+#define MDP_SECURE_OVERLAY_SESSION      0x00008000
+#define MDP_MEMORY_ID_TYPE_FB		0x00001000
 
 #define MDP_TRANSP_NOP	0xffffffff
 #define MDP_ALPHA_NOP	0xff
 
-/* drewis: added for android 4.0 */
-#define MDP_BLIT_NON_CACHED		0x01000000
-/* drewis: end */
+#define MDP_FB_PAGE_PROTECTION_NONCACHED         (0)
+#define MDP_FB_PAGE_PROTECTION_WRITECOMBINE      (1)
+#define MDP_FB_PAGE_PROTECTION_WRITETHROUGHCACHE (2)
+#define MDP_FB_PAGE_PROTECTION_WRITEBACKCACHE    (3)
+#define MDP_FB_PAGE_PROTECTION_WRITEBACKWACACHE  (4)
+/* Sentinel: Don't use! */
+#define MDP_FB_PAGE_PROTECTION_INVALID           (5)
+/* Count of the number of MDP_FB_PAGE_PROTECTION_... values. */
+#define MDP_NUM_FB_PAGE_PROTECTION_VALUES        (5)
 
 struct mdp_rect {
 	uint32_t x;
@@ -73,8 +172,41 @@ struct mdp_img {
 	uint32_t format;
 	uint32_t offset;
 	int memory_id;		/* the file descriptor */
+	uint32_t priv;
 };
 
+/*
+ * {3x3} + {3} ccs matrix
+ */
+
+#define MDP_CCS_RGB2YUV 	0
+#define MDP_CCS_YUV2RGB 	1
+
+#define MDP_CCS_SIZE	9
+#define MDP_BV_SIZE	3
+
+struct mdp_ccs {
+	int direction;			/* MDP_CCS_RGB2YUV or YUV2RGB */
+	uint16_t ccs[MDP_CCS_SIZE];	/* 3x3 color coefficients */
+	uint16_t bv[MDP_BV_SIZE];	/* 1x3 bias vector */
+};
+
+struct mdp_csc {
+	int id;
+	uint32_t csc_mv[9];
+	uint32_t csc_pre_bv[3];
+	uint32_t csc_post_bv[3];
+	uint32_t csc_pre_lv[6];
+	uint32_t csc_post_lv[6];
+};
+
+/* The version of the mdp_blit_req structure so that
+ * user applications can selectively decide which functionality
+ * to include
+ */
+
+#define MDP_BLIT_REQ_VERSION 2
+
 struct mdp_blit_req {
 	struct mdp_img src;
 	struct mdp_img dst;
@@ -83,6 +215,7 @@ struct mdp_blit_req {
 	uint32_t alpha;
 	uint32_t transp_mask;
 	uint32_t flags;
+	int sharpening_strength;  /* -127 <--> 127, default 64 */
 };
 
 struct mdp_blit_req_list {
@@ -90,4 +223,289 @@ struct mdp_blit_req_list {
 	struct mdp_blit_req req[];
 };
 
+#define MSMFB_DATA_VERSION 2
+
+struct msmfb_data {
+	uint32_t offset;
+	int memory_id;
+	int id;
+	uint32_t flags;
+	uint32_t priv;
+	uint32_t iova;
+};
+
+#define MSMFB_NEW_REQUEST -1
+
+struct msmfb_overlay_data {
+	uint32_t id;
+	struct msmfb_data data;
+	uint32_t version_key;
+	struct msmfb_data plane1_data;
+	struct msmfb_data plane2_data;
+};
+
+struct msmfb_img {
+	uint32_t width;
+	uint32_t height;
+	uint32_t format;
+};
+
+#define MSMFB_WRITEBACK_DEQUEUE_BLOCKING 0x1
+struct msmfb_writeback_data {
+	struct msmfb_data buf_info;
+	struct msmfb_img img;
+};
+
+struct dpp_ctrl {
+	/*
+	 *'sharp_strength' has inputs = -128 <-> 127
+	 *  Increasingly positive values correlate with increasingly sharper
+	 *  picture. Increasingly negative values correlate with increasingly
+	 *  smoothed picture.
+	 */
+	int8_t sharp_strength;
+	int8_t hsic_params[NUM_HSIC_PARAM];
+};
+
+struct mdp_overlay {
+	struct msmfb_img src;
+	struct mdp_rect src_rect;
+	struct mdp_rect dst_rect;
+	uint32_t z_order;	/* stage number */
+	uint32_t is_fg;		/* control alpha & transp */
+	uint32_t alpha;
+	uint32_t transp_mask;
+	uint32_t flags;
+	uint32_t id;
+	uint32_t user_data[8];
+	struct dpp_ctrl dpp;
+};
+
+struct msmfb_overlay_3d {
+	uint32_t is_3d;
+	uint32_t width;
+	uint32_t height;
+};
+
+
+struct msmfb_overlay_blt {
+	uint32_t enable;
+	uint32_t offset;
+	uint32_t width;
+	uint32_t height;
+	uint32_t bpp;
+};
+
+struct mdp_histogram {
+	uint32_t frame_cnt;
+	uint32_t bin_cnt;
+	uint32_t *r;
+	uint32_t *g;
+	uint32_t *b;
+};
+
+
+/*
+
+	mdp_block_type defines the identifiers for each of pipes in MDP 4.3
+
+	MDP_BLOCK_RESERVED is provided for backward compatibility and is
+	deprecated. It corresponds to DMA_P. So MDP_BLOCK_DMA_P should be used
+	instead.
+
+*/
+
+enum {
+	MDP_BLOCK_RESERVED = 0,
+	MDP_BLOCK_OVERLAY_0,
+	MDP_BLOCK_OVERLAY_1,
+	MDP_BLOCK_VG_1,
+	MDP_BLOCK_VG_2,
+	MDP_BLOCK_RGB_1,
+	MDP_BLOCK_RGB_2,
+	MDP_BLOCK_DMA_P,
+	MDP_BLOCK_DMA_S,
+	MDP_BLOCK_DMA_E,
+	MDP_BLOCK_MAX,
+};
+
+/*
+mdp_histogram_start_req is used to provide the parameters for
+histogram start request
+*/
+
+struct mdp_histogram_start_req {
+	uint32_t block;
+	uint8_t frame_cnt;
+	uint8_t bit_mask;
+	uint8_t num_bins;
+};
+
+
+/*
+
+   mdp_histogram_data is used to return the histogram data, once
+   the histogram is done/stopped/cance
+
+ */
+
+
+struct mdp_histogram_data {
+	uint32_t block;
+	uint8_t bin_cnt;
+	uint32_t *c0;
+	uint32_t *c1;
+	uint32_t *c2;
+	uint32_t *extra_info;
+};
+
+struct mdp_pcc_coeff {
+	uint32_t c, r, g, b, rr, gg, bb, rg, gb, rb, rgb_0, rgb_1;
+};
+
+struct mdp_pcc_cfg_data {
+	uint32_t block;
+	uint32_t ops;
+	struct mdp_pcc_coeff r, g, b;
+};
+
+#define MDP_CSC_FLAG_ENABLE	0x1
+#define MDP_CSC_FLAG_YUV_IN	0x2
+#define MDP_CSC_FLAG_YUV_OUT	0x4
+
+struct mdp_csc_cfg {
+	/* flags for enable CSC, toggling RGB,YUV input/output */
+	uint32_t flags;
+	uint32_t csc_mv[9];
+	uint32_t csc_pre_bv[3];
+	uint32_t csc_post_bv[3];
+	uint32_t csc_pre_lv[6];
+	uint32_t csc_post_lv[6];
+};
+
+struct mdp_csc_cfg_data {
+	uint32_t block;
+	struct mdp_csc_cfg csc_data;
+};
+
+enum {
+	mdp_lut_igc,
+	mdp_lut_pgc,
+	mdp_lut_hist,
+	mdp_lut_max,
+};
+
+
+struct mdp_igc_lut_data {
+	uint32_t block;
+	uint32_t len, ops;
+	uint32_t *c0_c1_data;
+	uint32_t *c2_data;
+};
+
+struct mdp_ar_gc_lut_data {
+	uint32_t x_start;
+	uint32_t slope;
+	uint32_t offset;
+};
+
+struct mdp_pgc_lut_data {
+	uint32_t block;
+	uint32_t flags;
+	uint8_t num_r_stages;
+	uint8_t num_g_stages;
+	uint8_t num_b_stages;
+	struct mdp_ar_gc_lut_data *r_data;
+	struct mdp_ar_gc_lut_data *g_data;
+	struct mdp_ar_gc_lut_data *b_data;
+};
+
+
+struct mdp_hist_lut_data {
+	uint32_t block;
+	uint32_t ops;
+	uint32_t len;
+	uint32_t *data;
+};
+
+
+struct mdp_lut_cfg_data {
+	uint32_t lut_type;
+	union {
+		struct mdp_igc_lut_data igc_lut_data;
+		struct mdp_pgc_lut_data pgc_lut_data;
+		struct mdp_hist_lut_data hist_lut_data;
+	} data;
+};
+
+struct mdp_qseed_cfg_data {
+	uint32_t block;
+	uint32_t table_num;
+	uint32_t ops;
+	uint32_t len;
+	uint32_t *data;
+};
+
+
+enum {
+	mdp_op_pcc_cfg,
+	mdp_op_csc_cfg,
+	mdp_op_lut_cfg,
+	mdp_op_qseed_cfg,
+	mdp_op_max,
+};
+
+struct msmfb_mdp_pp {
+	uint32_t op;
+	union {
+		struct mdp_pcc_cfg_data pcc_cfg_data;
+		struct mdp_csc_cfg_data csc_cfg_data;
+		struct mdp_lut_cfg_data lut_cfg_data;
+		struct mdp_qseed_cfg_data qseed_cfg_data;
+	} data;
+};
+
+
+struct mdp_page_protection {
+	uint32_t page_protection;
+};
+
+
+struct mdp_mixer_info {
+	int pndx;
+	int pnum;
+	int ptype;
+	int mixer_num;
+	int z_order;
+};
+
+#define MAX_PIPE_PER_MIXER  4
+
+struct msmfb_mixer_info_req {
+	int mixer_num;
+	int cnt;
+	struct mdp_mixer_info info[MAX_PIPE_PER_MIXER];
+};
+
+enum {
+	DISPLAY_SUBSYSTEM_ID,
+	ROTATOR_SUBSYSTEM_ID,
+};
+
+#ifdef __KERNEL__
+
+/* get the framebuffer physical address information */
+int get_fb_phys_info(unsigned long *start, unsigned long *len, int fb_num,
+	int subsys_id);
+struct fb_info *msm_fb_get_writeback_fb(void);
+int msm_fb_writeback_init(struct fb_info *info);
+int msm_fb_writeback_start(struct fb_info *info);
+int msm_fb_writeback_queue_buffer(struct fb_info *info,
+		struct msmfb_data *data);
+int msm_fb_writeback_dequeue_buffer(struct fb_info *info,
+		struct msmfb_data *data);
+int msm_fb_writeback_stop(struct fb_info *info);
+int msm_fb_writeback_terminate(struct fb_info *info);
+#endif
+
 #endif /* _MSM_MDP_H_ */
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 2ac98852..748f853b 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -26,7 +26,7 @@
 #define __HCI_CORE_H
 
 #include <net/bluetooth/hci.h>
-
+#include <linux/wakelock.h>
 /* HCI upper protocols */
 #define HCI_PROTO_L2CAP	0
 #define HCI_PROTO_SCO	1
@@ -183,10 +183,11 @@ struct hci_conn {
 
 	struct timer_list disc_timer;
 	struct timer_list idle_timer;
+	struct timer_list auto_accept_timer;
 
 	struct work_struct work_add;
 	struct work_struct work_del;
-
+	struct wake_lock idle_lock;
 	struct device	dev;
 	atomic_t	devref;
 
@@ -246,6 +247,7 @@ enum {
 	HCI_CONN_ENCRYPT_PEND,
 	HCI_CONN_RSWITCH_PEND,
 	HCI_CONN_MODE_CHANGE_PEND,
+	HCI_CONN_SCO_SETUP_PEND,
 };
 
 static inline void hci_conn_hash_init(struct hci_dev *hdev)
@@ -326,6 +328,7 @@ void hci_acl_connect(struct hci_conn *conn);
 void hci_acl_disconn(struct hci_conn *conn, __u8 reason);
 void hci_add_sco(struct hci_conn *conn, __u16 handle);
 void hci_setup_sync(struct hci_conn *conn, __u16 handle);
+void hci_sco_setup(struct hci_conn *conn, __u8 status);
 
 struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type,
 					__u16 pkt_type, bdaddr_t *dst);
diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index 0d940835..152922a4 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -10,7 +10,7 @@ SCHED_FEAT(FAIR_SLEEPERS, 1)
  * them to run sooner, but does not allow tons of sleepers to
  * rip the spread apart.
  */
-SCHED_FEAT(GENTLE_FAIR_SLEEPERS, 1)
+SCHED_FEAT(GENTLE_FAIR_SLEEPERS, 0)
 
 /*
  * By not normalizing the sleep time, heavy tasks get an effective
diff --git a/mm/ashmem.c b/mm/ashmem.c
old mode 100644
new mode 100755
index 5e059283..365fed2e
--- a/mm/ashmem.c
+++ b/mm/ashmem.c
@@ -29,6 +29,7 @@
 #include <linux/mutex.h>
 #include <linux/shmem_fs.h>
 #include <linux/ashmem.h>
+#include <asm/cacheflush.h>
 
 #define ASHMEM_NAME_PREFIX ""
 #define ASHMEM_NAME_PREFIX_LEN 0
@@ -45,6 +46,8 @@ struct ashmem_area {
 	struct list_head unpinned_list;	/* list of all ashmem areas */
 	struct file *file;		/* the shmem-based backing file */
 	size_t size;			/* size of the mapping, in bytes */
+	unsigned long vm_start;		/* Start address of vm_area
+					 * which maps this ashmem */
 	unsigned long prot_mask;	/* allowed prot bits, as vm_flags */
 };
 
@@ -178,7 +181,7 @@ static int ashmem_open(struct inode *inode, struct file *file)
 	struct ashmem_area *asma;
 	int ret;
 
-	ret = nonseekable_open(inode, file);
+	ret = generic_file_open(inode, file);
 	if (unlikely(ret))
 		return ret;
 
@@ -210,6 +213,67 @@ static int ashmem_release(struct inode *ignored, struct file *file)
 	return 0;
 }
 
+static ssize_t ashmem_read(struct file *file, char __user *buf,
+			   size_t len, loff_t *pos)
+{
+	struct ashmem_area *asma = file->private_data;
+	int ret = 0;
+
+	mutex_lock(&ashmem_mutex);
+
+	/* If size is not set, or set to 0, always return EOF. */
+	if (asma->size == 0) {
+		goto out;
+        }
+
+	if (!asma->file) {
+		ret = -EBADF;
+		goto out;
+	}
+
+	ret = asma->file->f_op->read(asma->file, buf, len, pos);
+	if (ret < 0) {
+		goto out;
+	}
+
+	/** Update backing file pos, since f_ops->read() doesn't */
+	asma->file->f_pos = *pos;
+
+out:
+	mutex_unlock(&ashmem_mutex);
+	return ret;
+}
+
+static loff_t ashmem_llseek(struct file *file, loff_t offset, int origin)
+{
+	struct ashmem_area *asma = file->private_data;
+	int ret;
+
+	mutex_lock(&ashmem_mutex);
+
+	if (asma->size == 0) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (!asma->file) {
+		ret = -EBADF;
+		goto out;
+	}
+
+	ret = asma->file->f_op->llseek(asma->file, offset, origin);
+	if (ret < 0) {
+		goto out;
+	}
+
+	/** Copy f_pos from backing file, since f_ops->llseek() sets it */
+	file->f_pos = asma->file->f_pos;
+
+out:
+	mutex_unlock(&ashmem_mutex);
+	return ret;
+}
+
 static inline unsigned long
 calc_vm_may_flags(unsigned long prot)
 {
@@ -264,6 +328,7 @@ static int ashmem_mmap(struct file *file, struct vm_area_struct *vma)
 		vma->vm_file = asma->file;
 	}
 	vma->vm_flags |= VM_CAN_NONLINEAR;
+	asma->vm_start = vma->vm_start;
 
 out:
 	mutex_unlock(&ashmem_mutex);
@@ -564,6 +629,69 @@ static int ashmem_pin_unpin(struct ashmem_area *asma, unsigned long cmd,
 	return ret;
 }
 
+#ifdef CONFIG_OUTER_CACHE
+static unsigned int virtaddr_to_physaddr(unsigned int virtaddr)
+{
+	unsigned int physaddr = 0;
+	pgd_t *pgd_ptr = NULL;
+	pmd_t *pmd_ptr = NULL;
+	pte_t *pte_ptr = NULL, pte;
+
+	spin_lock(&current->mm->page_table_lock);
+	pgd_ptr = pgd_offset(current->mm, virtaddr);
+	if (pgd_none(*pgd) || pgd_bad(*pgd)) {
+		pr_err("Failed to convert virtaddr %x to pgd_ptr\n",
+			virtaddr);
+		goto done;
+	}
+
+	pmd_ptr = pmd_offset(pgd_ptr, virtaddr);
+	if (pmd_none(*pmd_ptr) || pmd_bad(*pmd_ptr)) {
+		pr_err("Failed to convert pgd_ptr %p to pmd_ptr\n",
+			(void *)pgd_ptr);
+		goto done;
+	}
+
+	pte_ptr = pte_offset_map(pmd_ptr, virtaddr);
+	if (!pte_ptr) {
+		pr_err("Failed to convert pmd_ptr %p to pte_ptr\n",
+			(void *)pmd_ptr);
+		goto done;
+	}
+	pte = *pte_ptr;
+	physaddr = pte_pfn(pte);
+	pte_unmap(pte_ptr);
+done:
+	spin_unlock(&current->mm->page_table_lock);
+	physaddr <<= PAGE_SHIFT;
+	return physaddr;
+}
+#endif
+
+static int ashmem_cache_op(struct ashmem_area *asma,
+	void (*cache_func)(unsigned long vstart, unsigned long length,
+				unsigned long pstart))
+{
+#ifdef CONFIG_OUTER_CACHE
+	unsigned long vaddr;
+#endif
+	mutex_lock(&ashmem_mutex);
+#ifndef CONFIG_OUTER_CACHE
+	cache_func(asma->vm_start, asma->size, 0);
+#else
+	for (vaddr = asma->vm_start; vaddr < asma->vm_start + asma->size;
+		vaddr += PAGE_SIZE) {
+		unsigned long physaddr;
+		physaddr = virtaddr_to_physaddr(vaddr);
+		if (!physaddr)
+			return -EINVAL;
+		cache_func(vaddr, PAGE_SIZE, physaddr);
+	}
+#endif
+	mutex_unlock(&ashmem_mutex);
+	return 0;
+}
+
 static long ashmem_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 	struct ashmem_area *asma = file->private_data;
@@ -604,6 +732,15 @@ static long ashmem_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 			ashmem_shrink(ret, GFP_KERNEL);
 		}
 		break;
+	case ASHMEM_CACHE_FLUSH_RANGE:
+		ret = ashmem_cache_op(asma, &clean_and_invalidate_caches);
+		break;
+	case ASHMEM_CACHE_CLEAN_RANGE:
+		ret = ashmem_cache_op(asma, &clean_caches);
+		break;
+	case ASHMEM_CACHE_INV_RANGE:
+		ret = ashmem_cache_op(asma, &invalidate_caches);
+		break;
 	}
 
 	return ret;
@@ -666,6 +803,8 @@ static struct file_operations ashmem_fops = {
 	.owner = THIS_MODULE,
 	.open = ashmem_open,
 	.release = ashmem_release,
+        .read = ashmem_read,
+        .llseek = ashmem_llseek,
 	.mmap = ashmem_mmap,
 	.unlocked_ioctl = ashmem_ioctl,
 	.compat_ioctl = ashmem_ioctl,
diff --git a/mm/ksm.c b/mm/ksm.c
old mode 100644
new mode 100755
index e9501f83..d8ce84ad
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -163,9 +163,6 @@ static unsigned long ksm_pages_unshared;
 /* The number of rmap_items in use: to calculate pages_volatile */
 static unsigned long ksm_rmap_items;
 
-/* Limit on the number of unswappable pages used */
-static unsigned long ksm_max_kernel_pages;
-
 /* Number of pages ksmd should scan in one batch */
 static unsigned int ksm_thread_pages_to_scan = 100;
 
@@ -317,7 +314,7 @@ static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
 	do {
 		cond_resched();
 		page = follow_page(vma, addr, FOLL_GET);
-		if (!page)
+		if (IS_ERR_OR_NULL(page))
 			break;
 		if (PageKsm(page))
 			ret = handle_mm_fault(vma->vm_mm, vma, addr,
@@ -391,7 +388,7 @@ static struct page *get_mergeable_page(struct rmap_item *rmap_item)
 		goto out;
 
 	page = follow_page(vma, addr, FOLL_GET);
-	if (!page)
+	if (IS_ERR_OR_NULL(page))
 		goto out;
 	if (PageAnon(page)) {
 		flush_anon_page(vma, page, addr);
@@ -628,7 +625,7 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
 	if (!ptep)
 		goto out;
 
-	if (pte_write(*ptep)) {
+	if (pte_write(*ptep) || pte_dirty(*ptep)) {
 		pte_t entry;
 
 		swapped = PageSwapCache(page);
@@ -648,10 +645,12 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
 		 * page
 		 */
 		if ((page_mapcount(page) + 2 + swapped) != page_count(page)) {
-			set_pte_at_notify(mm, addr, ptep, entry);
+			set_pte_at(mm, addr, ptep, entry);
 			goto out_unlock;
 		}
-		entry = pte_wrprotect(entry);
+		if (pte_dirty(entry))
+			set_page_dirty(page);
+		entry = pte_mkclean(pte_wrprotect(entry));
 		set_pte_at_notify(mm, addr, ptep, entry);
 	}
 	*orig_pte = *ptep;
@@ -717,6 +716,8 @@ static int replace_page(struct vm_area_struct *vma, struct page *oldpage,
 	set_pte_at_notify(mm, addr, ptep, mk_pte(newpage, prot));
 
 	page_remove_rmap(oldpage);
+	if (!page_mapped(oldpage))
+		try_to_free_swap(oldpage);
 	put_page(oldpage);
 
 	pte_unmap_unlock(ptep, ptl);
@@ -827,13 +828,6 @@ static int try_to_merge_two_pages(struct mm_struct *mm1, unsigned long addr1,
 	struct page *kpage;
 	int err = -EFAULT;
 
-	/*
-	 * The number of nodes in the stable tree
-	 * is the number of kernel pages that we hold.
-	 */
-	if (ksm_max_kernel_pages &&
-	    ksm_max_kernel_pages <= ksm_pages_shared)
-		return err;
 
 	kpage = alloc_page(GFP_HIGHUSER);
 	if (!kpage)
@@ -1209,6 +1203,18 @@ static struct rmap_item *scan_get_next_rmap_item(struct page **page)
 
 	slot = ksm_scan.mm_slot;
 	if (slot == &ksm_mm_head) {
+	       /*
+	      	* A number of pages can hang around indefinitely on per-cpu
+	      	* pagevecs, raised page count preventing write_protect_page
+	      	* from merging them.  Though it doesn't really matter much,
+	      	* it is puzzling to see some stuck in pages_volatile until
+	      	* other activity jostles them out, and they also prevented
+	      	* LTP's KSM test from succeeding deterministically; so drain
+	      	* them here (here rather than on entry to ksm_do_scan(),
+	      	* so we don't IPI too often when pages_to_scan is set low).
+	      	*/
+		lru_add_drain_all();
+
 		root_unstable_tree = RB_ROOT;
 
 		spin_lock(&ksm_mmlist_lock);
@@ -1314,7 +1320,7 @@ next_mm:
 static void ksm_do_scan(unsigned int scan_npages)
 {
 	struct rmap_item *rmap_item;
-	struct page *page;
+	struct page *uninitialized_var(page);
 
 	while (scan_npages--) {
 		cond_resched();
@@ -1577,29 +1583,6 @@ static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
 }
 KSM_ATTR(run);
 
-static ssize_t max_kernel_pages_store(struct kobject *kobj,
-				      struct kobj_attribute *attr,
-				      const char *buf, size_t count)
-{
-	int err;
-	unsigned long nr_pages;
-
-	err = strict_strtoul(buf, 10, &nr_pages);
-	if (err)
-		return -EINVAL;
-
-	ksm_max_kernel_pages = nr_pages;
-
-	return count;
-}
-
-static ssize_t max_kernel_pages_show(struct kobject *kobj,
-				     struct kobj_attribute *attr, char *buf)
-{
-	return sprintf(buf, "%lu\n", ksm_max_kernel_pages);
-}
-KSM_ATTR(max_kernel_pages);
-
 static ssize_t pages_shared_show(struct kobject *kobj,
 				 struct kobj_attribute *attr, char *buf)
 {
@@ -1649,7 +1632,6 @@ static struct attribute *ksm_attrs[] = {
 	&sleep_millisecs_attr.attr,
 	&pages_to_scan_attr.attr,
 	&run_attr.attr,
-	&max_kernel_pages_attr.attr,
 	&pages_shared_attr.attr,
 	&pages_sharing_attr.attr,
 	&pages_unshared_attr.attr,
@@ -1669,8 +1651,6 @@ static int __init ksm_init(void)
 	struct task_struct *ksm_thread;
 	int err;
 
-	ksm_max_kernel_pages = totalram_pages / 4;
-
 	err = ksm_slab_init();
 	if (err)
 		goto out;
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
old mode 100644
new mode 100755
index c2287313..b689e2ae
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1470,6 +1470,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 {
 	struct page **pages;
 	unsigned int nr_pages, array_size, i;
+	gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
 
 	nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT;
 	array_size = (nr_pages * sizeof(struct page *));
@@ -1477,13 +1478,11 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 	area->nr_pages = nr_pages;
 	/* Please note that the recursion is strictly bounded. */
 	if (array_size > PAGE_SIZE) {
-		pages = __vmalloc_node(array_size, 1, gfp_mask | __GFP_ZERO,
+		pages = __vmalloc_node(array_size, 1, nested_gfp|__GFP_HIGHMEM,
 				PAGE_KERNEL, node, caller);
 		area->flags |= VM_VPAGES;
 	} else {
-		pages = kmalloc_node(array_size,
-				(gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO,
-				node);
+		pages = kmalloc_node(array_size, nested_gfp, node);
 	}
 	area->pages = pages;
 	area->caller = caller;
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 2f4d30fd..c22bc17c 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -155,6 +155,27 @@ void hci_setup_sync(struct hci_conn *conn, __u16 handle)
 	hci_send_cmd(hdev, HCI_OP_SETUP_SYNC_CONN, sizeof(cp), &cp);
 }
 
+/* Device _must_ be locked */
+void hci_sco_setup(struct hci_conn *conn, __u8 status)
+{
+	struct hci_conn *sco = conn->link;
+
+	BT_DBG("%p", conn);
+
+	if (!sco)
+		return;
+
+	if (!status) {
+		if (lmp_esco_capable(conn->hdev))
+			hci_setup_sync(sco, conn->handle);
+		else
+			hci_add_sco(sco, conn->handle);
+	} else {
+		hci_proto_connect_cfm(sco, status);
+		hci_conn_del(sco);
+	}
+}
+
 static void hci_conn_timeout(unsigned long arg)
 {
 	struct hci_conn *conn = (void *) arg;
@@ -216,6 +237,7 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type,
 
 	conn->power_save = 1;
 	conn->disc_timeout = HCI_DISCONN_TIMEOUT;
+	wake_lock_init(&conn->idle_lock, WAKE_LOCK_SUSPEND, "bt_idle");
 
 	switch (type) {
 	case ACL_LINK:
@@ -271,9 +293,11 @@ int hci_conn_del(struct hci_conn *conn)
 
 	BT_DBG("%s conn %p handle %d", hdev->name, conn, conn->handle);
 
+	/* Make sure no timers are running */
 	del_timer(&conn->idle_timer);
-
+	wake_lock_destroy(&conn->idle_lock);
 	del_timer(&conn->disc_timer);
+	del_timer(&conn->auto_accept_timer);
 
 	if (conn->type == ACL_LINK) {
 		struct hci_conn *sco = conn->link;
@@ -521,9 +545,11 @@ void hci_conn_enter_active_mode(struct hci_conn *conn)
 	}
 
 timer:
-	if (hdev->idle_timeout > 0)
+	if (hdev->idle_timeout > 0) {
 		mod_timer(&conn->idle_timer,
 			jiffies + msecs_to_jiffies(hdev->idle_timeout));
+		wake_lock(&conn->idle_lock);
+	}
 }
 
 /* Enter sniff mode */
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 8c59afcc..6b0b59c4 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1482,6 +1482,12 @@ static inline void hci_mode_change_evt(struct hci_dev *hdev, struct sk_buff *skb
 			else
 				conn->power_save = 0;
 		}
+		if (conn->mode == HCI_CM_SNIFF)
+			if (wake_lock_active(&conn->idle_lock))
+				 wake_unlock(&conn->idle_lock);
+
+		if (test_and_clear_bit(HCI_CONN_SCO_SETUP_PEND, &conn->pend))
+			hci_sco_setup(conn, ev->status);
 	}
 
 	hci_dev_unlock(hdev);
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 4fa12857..10640fdd 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -22,6 +22,7 @@
 
 #include <net/netfilter/ipv4/nf_defrag_ipv4.h>
 
+/*
 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
 #define XT_TPROXY_HAVE_IPV6 1
 #include <net/if_inet6.h>
@@ -29,6 +30,7 @@
 #include <linux/netfilter_ipv6/ip6_tables.h>
 #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
 #endif
+*/
 
 #include <net/netfilter/nf_tproxy_core.h>
 #include <linux/netfilter/xt_TPROXY.h>
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 9b38fd15..0f10dfc6 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -22,11 +22,13 @@
 #include <net/netfilter/nf_tproxy_core.h>
 #include <net/netfilter/ipv4/nf_defrag_ipv4.h>
 
+/*
 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
 #define XT_SOCKET_HAVE_IPV6 1
 #include <linux/netfilter_ipv6/ip6_tables.h>
 #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
 #endif
+*/
 
 #include <linux/netfilter/xt_socket.h>