From 9bb78d2a7fd55bc5e087dd7da0bfe354b86c3dbf Mon Sep 17 00:00:00 2001
From: Shantanu Gupta <shans95g@gmail.com>
Date: Mon, 14 May 2012 02:47:02 +0530
Subject: [PATCH] [KGSL] add missing files for last commit

---
 arch/arm/include/asm/asm-offsets.h            |    1 +
 arch/arm/include/asm/outercache.h             |   75 +
 .../include/mach/internal_power_rail.h        |   63 +
 arch/arm/mach-msm/include/mach/msm_memtypes.h |   64 +
 drivers/gpu/msm/a2xx_reg.h                    |  418 +++++
 drivers/gpu/msm/adreno_a2xx.c                 | 1607 +++++++++++++++++
 drivers/gpu/msm/kgsl_gpummu.c                 |  766 ++++++++
 drivers/gpu/msm/kgsl_gpummu.h                 |   85 +
 drivers/gpu/msm/kgsl_iommu.c                  |  333 ++++
 drivers/gpu/msm/kgsl_pwrscale_idlestats.c     |  221 +++
 drivers/gpu/msm/kgsl_pwrscale_trustzone.c     |  197 ++
 include/drm/kgsl_drm.h                        |  221 +++
 12 files changed, 4051 insertions(+)
 create mode 100644 arch/arm/include/asm/asm-offsets.h
 create mode 100644 arch/arm/include/asm/outercache.h
 create mode 100644 arch/arm/mach-msm/include/mach/internal_power_rail.h
 create mode 100644 arch/arm/mach-msm/include/mach/msm_memtypes.h
 create mode 100644 drivers/gpu/msm/a2xx_reg.h
 create mode 100644 drivers/gpu/msm/adreno_a2xx.c
 create mode 100644 drivers/gpu/msm/kgsl_gpummu.c
 create mode 100644 drivers/gpu/msm/kgsl_gpummu.h
 create mode 100644 drivers/gpu/msm/kgsl_iommu.c
 create mode 100644 drivers/gpu/msm/kgsl_pwrscale_idlestats.c
 create mode 100644 drivers/gpu/msm/kgsl_pwrscale_trustzone.c
 create mode 100644 include/drm/kgsl_drm.h

diff --git a/arch/arm/include/asm/asm-offsets.h b/arch/arm/include/asm/asm-offsets.h
new file mode 100644
index 00000000..d370ee36
--- /dev/null
+++ b/arch/arm/include/asm/asm-offsets.h
@@ -0,0 +1 @@
+#include <generated/asm-offsets.h>
diff --git a/arch/arm/include/asm/outercache.h b/arch/arm/include/asm/outercache.h
new file mode 100644
index 00000000..25f76bae
--- /dev/null
+++ b/arch/arm/include/asm/outercache.h
@@ -0,0 +1,75 @@
+/*
+ * arch/arm/include/asm/outercache.h
+ *
+ * Copyright (C) 2010 ARM Ltd.
+ * Written by Catalin Marinas <catalin.marinas@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef __ASM_OUTERCACHE_H
+#define __ASM_OUTERCACHE_H
+
+struct outer_cache_fns {
+	void (*inv_range)(unsigned long, unsigned long);
+	void (*clean_range)(unsigned long, unsigned long);
+	void (*flush_range)(unsigned long, unsigned long);
+#ifdef CONFIG_OUTER_CACHE_SYNC
+	void (*sync)(void);
+#endif
+};
+
+#ifdef CONFIG_OUTER_CACHE
+
+extern struct outer_cache_fns outer_cache;
+
+static inline void outer_inv_range(unsigned long start, unsigned long end)
+{
+	if (outer_cache.inv_range)
+		outer_cache.inv_range(start, end);
+}
+static inline void outer_clean_range(unsigned long start, unsigned long end)
+{
+	if (outer_cache.clean_range)
+		outer_cache.clean_range(start, end);
+}
+static inline void outer_flush_range(unsigned long start, unsigned long end)
+{
+	if (outer_cache.flush_range)
+		outer_cache.flush_range(start, end);
+}
+
+#else
+
+static inline void outer_inv_range(unsigned long start, unsigned long end)
+{ }
+static inline void outer_clean_range(unsigned long start, unsigned long end)
+{ }
+static inline void outer_flush_range(unsigned long start, unsigned long end)
+{ }
+
+#endif
+
+#ifdef CONFIG_OUTER_CACHE_SYNC
+static inline void outer_sync(void)
+{
+	if (outer_cache.sync)
+		outer_cache.sync();
+}
+#else
+static inline void outer_sync(void)
+{ }
+#endif
+
+#endif	/* __ASM_OUTERCACHE_H */
diff --git a/arch/arm/mach-msm/include/mach/internal_power_rail.h b/arch/arm/mach-msm/include/mach/internal_power_rail.h
new file mode 100644
index 00000000..f489dc57
--- /dev/null
+++ b/arch/arm/mach-msm/include/mach/internal_power_rail.h
@@ -0,0 +1,63 @@
+/* Copyright (c) 2009, Code Aurora Forum. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ *       copyright notice, this list of conditions and the following
+ *       disclaimer in the documentation and/or other materials provided
+ *       with the distribution.
+ *     * Neither the name of Code Aurora Forum, Inc. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _INTERNAL_POWER_RAIL_H
+#define _INTERNAL_POWER_RAIL_H
+
+/* Clock power rail IDs */
+#define PWR_RAIL_GRP_CLK	8
+#define PWR_RAIL_GRP_2D_CLK	58
+#define PWR_RAIL_MDP_CLK	14
+#define PWR_RAIL_MFC_CLK	68
+#define PWR_RAIL_ROTATOR_CLK	90
+#define PWR_RAIL_VDC_CLK	39
+#define PWR_RAIL_VFE_CLK	41
+#define PWR_RAIL_VPE_CLK	76
+
+enum rail_ctl_mode {
+	PWR_RAIL_CTL_AUTO = 0,
+	PWR_RAIL_CTL_MANUAL,
+};
+
+static inline int __maybe_unused internal_pwr_rail_ctl(unsigned rail_id,
+						       bool enable)
+{
+	/* Not yet implemented. */
+	return 0;
+}
+static inline int __maybe_unused internal_pwr_rail_mode(unsigned rail_id,
+							enum rail_ctl_mode mode)
+{
+	/* Not yet implemented. */
+	return 0;
+}
+
+int internal_pwr_rail_ctl_auto(unsigned rail_id, bool enable);
+
+#endif /* _INTERNAL_POWER_RAIL_H */
+
diff --git a/arch/arm/mach-msm/include/mach/msm_memtypes.h b/arch/arm/mach-msm/include/mach/msm_memtypes.h
new file mode 100644
index 00000000..963f25c1
--- /dev/null
+++ b/arch/arm/mach-msm/include/mach/msm_memtypes.h
@@ -0,0 +1,64 @@
+/* Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+*/
+
+/* The MSM Hardware supports multiple flavors of physical memory.
+ * This file captures hardware specific information of these types.
+*/
+
+#ifndef __ASM_ARCH_MSM_MEMTYPES_H
+#define __ASM_ARCH_MSM_MEMTYPES_H
+
+#include <mach/memory.h>
+#include <linux/init.h>
+int __init meminfo_init(unsigned int, unsigned int);
+/* Redundant check to prevent this from being included outside of 7x30 */
+#if defined(CONFIG_ARCH_MSM7X30)
+unsigned int get_num_populated_chipselects(void);
+#endif
+
+unsigned int get_num_memory_banks(void);
+unsigned int get_memory_bank_size(unsigned int);
+unsigned int get_memory_bank_start(unsigned int);
+int soc_change_memory_power(u64, u64, int);
+
+enum {
+	MEMTYPE_NONE = -1,
+	MEMTYPE_SMI_KERNEL = 0,
+	MEMTYPE_SMI,
+	MEMTYPE_EBI0,
+	MEMTYPE_EBI1,
+	MEMTYPE_MAX,
+};
+
+void msm_reserve(void);
+
+#define MEMTYPE_FLAGS_FIXED	0x1
+#define MEMTYPE_FLAGS_1M_ALIGN	0x2
+
+struct memtype_reserve {
+	unsigned long start;
+	unsigned long size;
+	unsigned long limit;
+	int flags;
+};
+
+struct reserve_info {
+	struct memtype_reserve *memtype_reserve_table;
+	void (*calculate_reserve_sizes)(void);
+	int (*paddr_to_memtype)(unsigned int);
+	unsigned long low_unstable_address;
+	unsigned long max_unstable_size;
+	unsigned long bank_size;
+};
+
+extern struct reserve_info *reserve_info;
+#endif
diff --git a/drivers/gpu/msm/a2xx_reg.h b/drivers/gpu/msm/a2xx_reg.h
new file mode 100644
index 00000000..d859d61c
--- /dev/null
+++ b/drivers/gpu/msm/a2xx_reg.h
@@ -0,0 +1,418 @@
+/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __A200_REG_H
+#define __A200_REG_H
+
+enum VGT_EVENT_TYPE {
+	VS_DEALLOC = 0,
+	PS_DEALLOC = 1,
+	VS_DONE_TS = 2,
+	PS_DONE_TS = 3,
+	CACHE_FLUSH_TS = 4,
+	CONTEXT_DONE = 5,
+	CACHE_FLUSH = 6,
+	VIZQUERY_START = 7,
+	VIZQUERY_END = 8,
+	SC_WAIT_WC = 9,
+	RST_PIX_CNT = 13,
+	RST_VTX_CNT = 14,
+	TILE_FLUSH = 15,
+	CACHE_FLUSH_AND_INV_TS_EVENT = 20,
+	ZPASS_DONE = 21,
+	CACHE_FLUSH_AND_INV_EVENT = 22,
+	PERFCOUNTER_START = 23,
+	PERFCOUNTER_STOP = 24,
+	VS_FETCH_DONE = 27,
+	FACENESS_FLUSH = 28,
+};
+
+enum COLORFORMATX {
+	COLORX_4_4_4_4 = 0,
+	COLORX_1_5_5_5 = 1,
+	COLORX_5_6_5 = 2,
+	COLORX_8 = 3,
+	COLORX_8_8 = 4,
+	COLORX_8_8_8_8 = 5,
+	COLORX_S8_8_8_8 = 6,
+	COLORX_16_FLOAT = 7,
+	COLORX_16_16_FLOAT = 8,
+	COLORX_16_16_16_16_FLOAT = 9,
+	COLORX_32_FLOAT = 10,
+	COLORX_32_32_FLOAT = 11,
+	COLORX_32_32_32_32_FLOAT = 12,
+	COLORX_2_3_3 = 13,
+	COLORX_8_8_8 = 14,
+};
+
+enum SURFACEFORMAT {
+	FMT_1_REVERSE                  = 0,
+	FMT_1                          = 1,
+	FMT_8                          = 2,
+	FMT_1_5_5_5                    = 3,
+	FMT_5_6_5                      = 4,
+	FMT_6_5_5                      = 5,
+	FMT_8_8_8_8                    = 6,
+	FMT_2_10_10_10                 = 7,
+	FMT_8_A                        = 8,
+	FMT_8_B                        = 9,
+	FMT_8_8                        = 10,
+	FMT_Cr_Y1_Cb_Y0                = 11,
+	FMT_Y1_Cr_Y0_Cb                = 12,
+	FMT_5_5_5_1                    = 13,
+	FMT_8_8_8_8_A                  = 14,
+	FMT_4_4_4_4                    = 15,
+	FMT_10_11_11                   = 16,
+	FMT_11_11_10                   = 17,
+	FMT_DXT1                       = 18,
+	FMT_DXT2_3                     = 19,
+	FMT_DXT4_5                     = 20,
+	FMT_24_8                       = 22,
+	FMT_24_8_FLOAT                 = 23,
+	FMT_16                         = 24,
+	FMT_16_16                      = 25,
+	FMT_16_16_16_16                = 26,
+	FMT_16_EXPAND                  = 27,
+	FMT_16_16_EXPAND               = 28,
+	FMT_16_16_16_16_EXPAND         = 29,
+	FMT_16_FLOAT                   = 30,
+	FMT_16_16_FLOAT                = 31,
+	FMT_16_16_16_16_FLOAT          = 32,
+	FMT_32                         = 33,
+	FMT_32_32                      = 34,
+	FMT_32_32_32_32                = 35,
+	FMT_32_FLOAT                   = 36,
+	FMT_32_32_FLOAT                = 37,
+	FMT_32_32_32_32_FLOAT          = 38,
+	FMT_32_AS_8                    = 39,
+	FMT_32_AS_8_8                  = 40,
+	FMT_16_MPEG                    = 41,
+	FMT_16_16_MPEG                 = 42,
+	FMT_8_INTERLACED               = 43,
+	FMT_32_AS_8_INTERLACED         = 44,
+	FMT_32_AS_8_8_INTERLACED       = 45,
+	FMT_16_INTERLACED              = 46,
+	FMT_16_MPEG_INTERLACED         = 47,
+	FMT_16_16_MPEG_INTERLACED      = 48,
+	FMT_DXN                        = 49,
+	FMT_8_8_8_8_AS_16_16_16_16     = 50,
+	FMT_DXT1_AS_16_16_16_16        = 51,
+	FMT_DXT2_3_AS_16_16_16_16      = 52,
+	FMT_DXT4_5_AS_16_16_16_16      = 53,
+	FMT_2_10_10_10_AS_16_16_16_16  = 54,
+	FMT_10_11_11_AS_16_16_16_16    = 55,
+	FMT_11_11_10_AS_16_16_16_16    = 56,
+	FMT_32_32_32_FLOAT             = 57,
+	FMT_DXT3A                      = 58,
+	FMT_DXT5A                      = 59,
+	FMT_CTX1                       = 60,
+	FMT_DXT3A_AS_1_1_1_1           = 61
+};
+
+#define REG_PERF_MODE_CNT	0x0
+#define REG_PERF_STATE_RESET	0x0
+#define REG_PERF_STATE_ENABLE	0x1
+#define REG_PERF_STATE_FREEZE	0x2
+
+#define RB_EDRAM_INFO_EDRAM_SIZE_SIZE                      4
+#define RB_EDRAM_INFO_EDRAM_MAPPING_MODE_SIZE              2
+#define RB_EDRAM_INFO_UNUSED0_SIZE                         8
+#define RB_EDRAM_INFO_EDRAM_RANGE_SIZE                     18
+
+struct rb_edram_info_t {
+	unsigned int edram_size:RB_EDRAM_INFO_EDRAM_SIZE_SIZE;
+	unsigned int edram_mapping_mode:RB_EDRAM_INFO_EDRAM_MAPPING_MODE_SIZE;
+	unsigned int unused0:RB_EDRAM_INFO_UNUSED0_SIZE;
+	unsigned int edram_range:RB_EDRAM_INFO_EDRAM_RANGE_SIZE;
+};
+
+union reg_rb_edram_info {
+	unsigned int val;
+	struct rb_edram_info_t f;
+};
+
+#define RBBM_READ_ERROR_UNUSED0_SIZE		2
+#define RBBM_READ_ERROR_READ_ADDRESS_SIZE	15
+#define RBBM_READ_ERROR_UNUSED1_SIZE		13
+#define RBBM_READ_ERROR_READ_REQUESTER_SIZE	1
+#define RBBM_READ_ERROR_READ_ERROR_SIZE		1
+
+struct rbbm_read_error_t {
+	unsigned int unused0:RBBM_READ_ERROR_UNUSED0_SIZE;
+	unsigned int read_address:RBBM_READ_ERROR_READ_ADDRESS_SIZE;
+	unsigned int unused1:RBBM_READ_ERROR_UNUSED1_SIZE;
+	unsigned int read_requester:RBBM_READ_ERROR_READ_REQUESTER_SIZE;
+	unsigned int read_error:RBBM_READ_ERROR_READ_ERROR_SIZE;
+};
+
+union rbbm_read_error_u {
+	unsigned int val:32;
+	struct rbbm_read_error_t f;
+};
+
+#define CP_RB_CNTL_RB_BUFSZ_SIZE                           6
+#define CP_RB_CNTL_UNUSED0_SIZE                            2
+#define CP_RB_CNTL_RB_BLKSZ_SIZE                           6
+#define CP_RB_CNTL_UNUSED1_SIZE                            2
+#define CP_RB_CNTL_BUF_SWAP_SIZE                           2
+#define CP_RB_CNTL_UNUSED2_SIZE                            2
+#define CP_RB_CNTL_RB_POLL_EN_SIZE                         1
+#define CP_RB_CNTL_UNUSED3_SIZE                            6
+#define CP_RB_CNTL_RB_NO_UPDATE_SIZE                       1
+#define CP_RB_CNTL_UNUSED4_SIZE                            3
+#define CP_RB_CNTL_RB_RPTR_WR_ENA_SIZE                     1
+
+struct cp_rb_cntl_t {
+	unsigned int rb_bufsz:CP_RB_CNTL_RB_BUFSZ_SIZE;
+	unsigned int unused0:CP_RB_CNTL_UNUSED0_SIZE;
+	unsigned int rb_blksz:CP_RB_CNTL_RB_BLKSZ_SIZE;
+	unsigned int unused1:CP_RB_CNTL_UNUSED1_SIZE;
+	unsigned int buf_swap:CP_RB_CNTL_BUF_SWAP_SIZE;
+	unsigned int unused2:CP_RB_CNTL_UNUSED2_SIZE;
+	unsigned int rb_poll_en:CP_RB_CNTL_RB_POLL_EN_SIZE;
+	unsigned int unused3:CP_RB_CNTL_UNUSED3_SIZE;
+	unsigned int rb_no_update:CP_RB_CNTL_RB_NO_UPDATE_SIZE;
+	unsigned int unused4:CP_RB_CNTL_UNUSED4_SIZE;
+	unsigned int rb_rptr_wr_ena:CP_RB_CNTL_RB_RPTR_WR_ENA_SIZE;
+};
+
+union reg_cp_rb_cntl {
+	unsigned int val:32;
+	struct cp_rb_cntl_t f;
+};
+
+#define RB_COLOR_INFO__COLOR_FORMAT_MASK                   0x0000000fL
+#define RB_COPY_DEST_INFO__COPY_DEST_FORMAT__SHIFT         0x00000004
+
+
+#define SQ_INT_CNTL__PS_WATCHDOG_MASK                      0x00000001L
+#define SQ_INT_CNTL__VS_WATCHDOG_MASK                      0x00000002L
+
+#define RBBM_INT_CNTL__RDERR_INT_MASK                      0x00000001L
+#define RBBM_INT_CNTL__DISPLAY_UPDATE_INT_MASK             0x00000002L
+#define RBBM_INT_CNTL__GUI_IDLE_INT_MASK                   0x00080000L
+
+#define RBBM_STATUS__CMDFIFO_AVAIL_MASK                    0x0000001fL
+#define RBBM_STATUS__TC_BUSY_MASK                          0x00000020L
+#define RBBM_STATUS__HIRQ_PENDING_MASK                     0x00000100L
+#define RBBM_STATUS__CPRQ_PENDING_MASK                     0x00000200L
+#define RBBM_STATUS__CFRQ_PENDING_MASK                     0x00000400L
+#define RBBM_STATUS__PFRQ_PENDING_MASK                     0x00000800L
+#define RBBM_STATUS__VGT_BUSY_NO_DMA_MASK                  0x00001000L
+#define RBBM_STATUS__RBBM_WU_BUSY_MASK                     0x00004000L
+#define RBBM_STATUS__CP_NRT_BUSY_MASK                      0x00010000L
+#define RBBM_STATUS__MH_BUSY_MASK                          0x00040000L
+#define RBBM_STATUS__MH_COHERENCY_BUSY_MASK                0x00080000L
+#define RBBM_STATUS__SX_BUSY_MASK                          0x00200000L
+#define RBBM_STATUS__TPC_BUSY_MASK                         0x00400000L
+#define RBBM_STATUS__SC_CNTX_BUSY_MASK                     0x01000000L
+#define RBBM_STATUS__PA_BUSY_MASK                          0x02000000L
+#define RBBM_STATUS__VGT_BUSY_MASK                         0x04000000L
+#define RBBM_STATUS__SQ_CNTX17_BUSY_MASK                   0x08000000L
+#define RBBM_STATUS__SQ_CNTX0_BUSY_MASK                    0x10000000L
+#define RBBM_STATUS__RB_CNTX_BUSY_MASK                     0x40000000L
+#define RBBM_STATUS__GUI_ACTIVE_MASK                       0x80000000L
+
+#define CP_INT_CNTL__SW_INT_MASK                           0x00080000L
+#define CP_INT_CNTL__T0_PACKET_IN_IB_MASK                  0x00800000L
+#define CP_INT_CNTL__OPCODE_ERROR_MASK                     0x01000000L
+#define CP_INT_CNTL__PROTECTED_MODE_ERROR_MASK             0x02000000L
+#define CP_INT_CNTL__RESERVED_BIT_ERROR_MASK               0x04000000L
+#define CP_INT_CNTL__IB_ERROR_MASK                         0x08000000L
+#define CP_INT_CNTL__IB2_INT_MASK                          0x20000000L
+#define CP_INT_CNTL__IB1_INT_MASK                          0x40000000L
+#define CP_INT_CNTL__RB_INT_MASK                           0x80000000L
+
+#define MASTER_INT_SIGNAL__MH_INT_STAT                     0x00000020L
+#define MASTER_INT_SIGNAL__SQ_INT_STAT                     0x04000000L
+#define MASTER_INT_SIGNAL__CP_INT_STAT                     0x40000000L
+#define MASTER_INT_SIGNAL__RBBM_INT_STAT                   0x80000000L
+
+#define RB_EDRAM_INFO__EDRAM_SIZE_MASK                     0x0000000fL
+#define RB_EDRAM_INFO__EDRAM_RANGE_MASK                    0xffffc000L
+
+#define MH_ARBITER_CONFIG__SAME_PAGE_GRANULARITY__SHIFT    0x00000006
+#define MH_ARBITER_CONFIG__L1_ARB_ENABLE__SHIFT            0x00000007
+#define MH_ARBITER_CONFIG__L1_ARB_HOLD_ENABLE__SHIFT       0x00000008
+#define MH_ARBITER_CONFIG__L2_ARB_CONTROL__SHIFT           0x00000009
+#define MH_ARBITER_CONFIG__PAGE_SIZE__SHIFT                0x0000000a
+#define MH_ARBITER_CONFIG__TC_REORDER_ENABLE__SHIFT        0x0000000d
+#define MH_ARBITER_CONFIG__TC_ARB_HOLD_ENABLE__SHIFT       0x0000000e
+#define MH_ARBITER_CONFIG__IN_FLIGHT_LIMIT_ENABLE__SHIFT   0x0000000f
+#define MH_ARBITER_CONFIG__IN_FLIGHT_LIMIT__SHIFT          0x00000010
+#define MH_ARBITER_CONFIG__CP_CLNT_ENABLE__SHIFT           0x00000016
+#define MH_ARBITER_CONFIG__VGT_CLNT_ENABLE__SHIFT          0x00000017
+#define MH_ARBITER_CONFIG__TC_CLNT_ENABLE__SHIFT           0x00000018
+#define MH_ARBITER_CONFIG__RB_CLNT_ENABLE__SHIFT           0x00000019
+#define MH_ARBITER_CONFIG__PA_CLNT_ENABLE__SHIFT           0x0000001a
+
+#define CP_RB_CNTL__RB_BUFSZ__SHIFT                        0x00000000
+#define CP_RB_CNTL__RB_BLKSZ__SHIFT                        0x00000008
+#define CP_RB_CNTL__RB_POLL_EN__SHIFT                      0x00000014
+#define CP_RB_CNTL__RB_NO_UPDATE__SHIFT                    0x0000001b
+
+#define RB_COLOR_INFO__COLOR_FORMAT__SHIFT                 0x00000000
+#define RB_EDRAM_INFO__EDRAM_MAPPING_MODE__SHIFT           0x00000004
+#define RB_EDRAM_INFO__EDRAM_RANGE__SHIFT                  0x0000000e
+
+#define REG_CP_CSQ_IB1_STAT              0x01FE
+#define REG_CP_CSQ_IB2_STAT              0x01FF
+#define REG_CP_CSQ_RB_STAT               0x01FD
+#define REG_CP_DEBUG                     0x01FC
+#define REG_CP_IB1_BASE                  0x0458
+#define REG_CP_IB1_BUFSZ                 0x0459
+#define REG_CP_IB2_BASE                  0x045A
+#define REG_CP_IB2_BUFSZ                 0x045B
+#define REG_CP_INT_ACK                   0x01F4
+#define REG_CP_INT_CNTL                  0x01F2
+#define REG_CP_INT_STATUS                0x01F3
+#define REG_CP_ME_CNTL                   0x01F6
+#define REG_CP_ME_RAM_DATA               0x01FA
+#define REG_CP_ME_RAM_WADDR              0x01F8
+#define REG_CP_ME_STATUS                 0x01F7
+#define REG_CP_PFP_UCODE_ADDR            0x00C0
+#define REG_CP_PFP_UCODE_DATA            0x00C1
+#define REG_CP_QUEUE_THRESHOLDS          0x01D5
+#define REG_CP_RB_BASE                   0x01C0
+#define REG_CP_RB_CNTL                   0x01C1
+#define REG_CP_RB_RPTR                   0x01C4
+#define REG_CP_RB_RPTR_ADDR              0x01C3
+#define REG_CP_RB_RPTR_WR                0x01C7
+#define REG_CP_RB_WPTR                   0x01C5
+#define REG_CP_RB_WPTR_BASE              0x01C8
+#define REG_CP_RB_WPTR_DELAY             0x01C6
+#define REG_CP_STAT                      0x047F
+#define REG_CP_STATE_DEBUG_DATA          0x01ED
+#define REG_CP_STATE_DEBUG_INDEX         0x01EC
+#define REG_CP_ST_BASE                   0x044D
+#define REG_CP_ST_BUFSZ                  0x044E
+
+#define REG_CP_PERFMON_CNTL              0x0444
+#define REG_CP_PERFCOUNTER_SELECT        0x0445
+#define REG_CP_PERFCOUNTER_LO            0x0446
+#define REG_CP_PERFCOUNTER_HI            0x0447
+
+#define REG_RBBM_PERFCOUNTER1_SELECT     0x0395
+#define REG_RBBM_PERFCOUNTER1_HI         0x0398
+#define REG_RBBM_PERFCOUNTER1_LO         0x0397
+
+#define REG_MASTER_INT_SIGNAL            0x03B7
+
+#define REG_PA_CL_VPORT_XSCALE           0x210F
+#define REG_PA_CL_VPORT_ZOFFSET          0x2114
+#define REG_PA_CL_VPORT_ZSCALE           0x2113
+#define REG_PA_CL_CLIP_CNTL              0x2204
+#define REG_PA_CL_VTE_CNTL               0x2206
+#define REG_PA_SC_AA_MASK                0x2312
+#define REG_PA_SC_LINE_CNTL              0x2300
+#define REG_PA_SC_SCREEN_SCISSOR_BR      0x200F
+#define REG_PA_SC_SCREEN_SCISSOR_TL      0x200E
+#define REG_PA_SC_VIZ_QUERY              0x2293
+#define REG_PA_SC_VIZ_QUERY_STATUS       0x0C44
+#define REG_PA_SC_WINDOW_OFFSET          0x2080
+#define REG_PA_SC_WINDOW_SCISSOR_BR      0x2082
+#define REG_PA_SC_WINDOW_SCISSOR_TL      0x2081
+#define REG_PA_SU_FACE_DATA              0x0C86
+#define REG_PA_SU_POINT_SIZE             0x2280
+#define REG_PA_SU_LINE_CNTL              0x2282
+#define REG_PA_SU_POLY_OFFSET_BACK_OFFSET 0x2383
+#define REG_PA_SU_POLY_OFFSET_FRONT_SCALE 0x2380
+#define REG_PA_SU_SC_MODE_CNTL           0x2205
+
+#define REG_PC_INDEX_OFFSET              0x2102
+
+#define REG_RBBM_CNTL                    0x003B
+#define REG_RBBM_INT_ACK                 0x03B6
+#define REG_RBBM_INT_CNTL                0x03B4
+#define REG_RBBM_INT_STATUS              0x03B5
+#define REG_RBBM_PATCH_RELEASE           0x0001
+#define REG_RBBM_PERIPHID1               0x03F9
+#define REG_RBBM_PERIPHID2               0x03FA
+#define REG_RBBM_DEBUG                   0x039B
+#define REG_RBBM_DEBUG_OUT               0x03A0
+#define REG_RBBM_DEBUG_CNTL              0x03A1
+#define REG_RBBM_PM_OVERRIDE1            0x039C
+#define REG_RBBM_PM_OVERRIDE2            0x039D
+#define REG_RBBM_READ_ERROR              0x03B3
+#define REG_RBBM_SOFT_RESET              0x003C
+#define REG_RBBM_STATUS                  0x05D0
+
+#define REG_RB_COLORCONTROL              0x2202
+#define REG_RB_COLOR_DEST_MASK           0x2326
+#define REG_RB_COLOR_MASK                0x2104
+#define REG_RB_COPY_CONTROL              0x2318
+#define REG_RB_DEPTHCONTROL              0x2200
+#define REG_RB_EDRAM_INFO                0x0F02
+#define REG_RB_MODECONTROL               0x2208
+#define REG_RB_SURFACE_INFO              0x2000
+#define REG_RB_SAMPLE_POS                0x220a
+
+#define REG_SCRATCH_ADDR                 0x01DD
+#define REG_SCRATCH_REG0                 0x0578
+#define REG_SCRATCH_REG2                 0x057A
+#define REG_SCRATCH_UMSK                 0x01DC
+
+#define REG_SQ_CF_BOOLEANS               0x4900
+#define REG_SQ_CF_LOOP                   0x4908
+#define REG_SQ_GPR_MANAGEMENT            0x0D00
+#define REG_SQ_FLOW_CONTROL              0x0D01
+#define REG_SQ_INST_STORE_MANAGMENT      0x0D02
+#define REG_SQ_INT_ACK                   0x0D36
+#define REG_SQ_INT_CNTL                  0x0D34
+#define REG_SQ_INT_STATUS                0x0D35
+#define REG_SQ_PROGRAM_CNTL              0x2180
+#define REG_SQ_PS_PROGRAM                0x21F6
+#define REG_SQ_VS_PROGRAM                0x21F7
+#define REG_SQ_WRAPPING_0                0x2183
+#define REG_SQ_WRAPPING_1                0x2184
+
+#define REG_VGT_ENHANCE                  0x2294
+#define REG_VGT_INDX_OFFSET              0x2102
+#define REG_VGT_MAX_VTX_INDX             0x2100
+#define REG_VGT_MIN_VTX_INDX             0x2101
+
+#define REG_TP0_CHICKEN                  0x0E1E
+#define REG_TC_CNTL_STATUS               0x0E00
+#define REG_PA_SC_AA_CONFIG              0x2301
+#define REG_VGT_VERTEX_REUSE_BLOCK_CNTL  0x2316
+#define REG_SQ_INTERPOLATOR_CNTL         0x2182
+#define REG_RB_DEPTH_INFO                0x2002
+#define REG_COHER_DEST_BASE_0            0x2006
+#define REG_RB_FOG_COLOR                 0x2109
+#define REG_RB_STENCILREFMASK_BF         0x210C
+#define REG_PA_SC_LINE_STIPPLE           0x2283
+#define REG_SQ_PS_CONST                  0x2308
+#define REG_RB_DEPTH_CLEAR               0x231D
+#define REG_RB_SAMPLE_COUNT_CTL          0x2324
+#define REG_SQ_CONSTANT_0                0x4000
+#define REG_SQ_FETCH_0                   0x4800
+
+#define REG_COHER_BASE_PM4               0xA2A
+#define REG_COHER_STATUS_PM4             0xA2B
+#define REG_COHER_SIZE_PM4               0xA29
+
+/*registers added in adreno220*/
+#define REG_A220_PC_INDX_OFFSET          REG_VGT_INDX_OFFSET
+#define REG_A220_PC_VERTEX_REUSE_BLOCK_CNTL REG_VGT_VERTEX_REUSE_BLOCK_CNTL
+#define REG_A220_PC_MAX_VTX_INDX         REG_VGT_MAX_VTX_INDX
+#define REG_A220_RB_LRZ_VSC_CONTROL	 0x2209
+#define REG_A220_GRAS_CONTROL            0x2210
+#define REG_A220_VSC_BIN_SIZE            0x0C01
+#define REG_A220_VSC_PIPE_DATA_LENGTH_7  0x0C1D
+
+/*registers added in adreno225*/
+#define REG_A225_RB_COLOR_INFO3          0x2005
+#define REG_A225_PC_MULTI_PRIM_IB_RESET_INDX 0x2103
+#define REG_A225_GRAS_UCP0X              0x2340
+#define REG_A225_GRAS_UCP_ENABLED        0x2360
+
+#endif /* __A200_REG_H */
diff --git a/drivers/gpu/msm/adreno_a2xx.c b/drivers/gpu/msm/adreno_a2xx.c
new file mode 100644
index 00000000..064b05e9
--- /dev/null
+++ b/drivers/gpu/msm/adreno_a2xx.c
@@ -0,0 +1,1607 @@
+/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "kgsl.h"
+#include "kgsl_sharedmem.h"
+#include "kgsl_cffdump.h"
+#include "adreno.h"
+
+/*
+ *
+ *  Memory Map for Register, Constant & Instruction Shadow, and Command Buffers
+ *  (34.5KB)
+ *
+ *  +---------------------+------------+-------------+---+---------------------+
+ *  | ALU Constant Shadow | Reg Shadow | C&V Buffers |Tex| Shader Instr Shadow |
+ *  +---------------------+------------+-------------+---+---------------------+
+ *    ________________________________/               \____________________
+ *   /                                                                     |
+ *  +--------------+-----------+------+-----------+------------------------+
+ *  | Restore Regs | Save Regs | Quad | Gmem Save | Gmem Restore | unused  |
+ *  +--------------+-----------+------+-----------+------------------------+
+ *
+ *              8K - ALU Constant Shadow (8K aligned)
+ *              4K - H/W Register Shadow (8K aligned)
+ *              4K - Command and Vertex Buffers
+ *                         - Indirect command buffer : Const/Reg restore
+ *                               - includes Loop & Bool const shadows
+ *                         - Indirect command buffer : Const/Reg save
+ *                         - Quad vertices & texture coordinates
+ *                         - Indirect command buffer : Gmem save
+ *                         - Indirect command buffer : Gmem restore
+ *                         - Unused (padding to 8KB boundary)
+ *             <1K - Texture Constant Shadow (768 bytes) (8K aligned)
+ *       18K - Shader Instruction Shadow
+ *               - 6K vertex (32 byte aligned)
+ *               - 6K pixel  (32 byte aligned)
+ *               - 6K shared (32 byte aligned)
+ *
+ *  Note: Reading constants into a shadow, one at a time using REG_TO_MEM, takes
+ *  3 DWORDS per DWORD transfered, plus 1 DWORD for the shadow, for a total of
+ *  16 bytes per constant.  If the texture constants were transfered this way,
+ *  the Command & Vertex Buffers section would extend past the 16K boundary.
+ *  By moving the texture constant shadow area to start at 16KB boundary, we
+ *  only require approximately 40 bytes more memory, but are able to use the
+ *  LOAD_CONSTANT_CONTEXT shadowing feature for the textures, speeding up
+ *  context switching.
+ *
+ *  [Using LOAD_CONSTANT_CONTEXT shadowing feature for the Loop and/or Bool
+ *  constants would require an additional 8KB each, for alignment.]
+ *
+ */
+
+/* Constants */
+
+#define ALU_CONSTANTS	2048	/* DWORDS */
+#define NUM_REGISTERS	1024	/* DWORDS */
+#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
+#define CMD_BUFFER_LEN	9216	/* DWORDS */
+#else
+#define CMD_BUFFER_LEN	3072	/* DWORDS */
+#endif
+#define TEX_CONSTANTS		(32*6)	/* DWORDS */
+#define BOOL_CONSTANTS		8	/* DWORDS */
+#define LOOP_CONSTANTS		56	/* DWORDS */
+#define SHADER_INSTRUCT_LOG2	9U	/* 2^n == SHADER_INSTRUCTIONS */
+
+/* 96-bit instructions */
+#define SHADER_INSTRUCT		(1<<SHADER_INSTRUCT_LOG2)
+
+/* LOAD_CONSTANT_CONTEXT shadow size */
+#define LCC_SHADOW_SIZE		0x2000	/* 8KB */
+
+#define ALU_SHADOW_SIZE		LCC_SHADOW_SIZE	/* 8KB */
+#define REG_SHADOW_SIZE		0x1000	/* 4KB */
+#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
+#define CMD_BUFFER_SIZE		0x9000	/* 36KB */
+#else
+#define CMD_BUFFER_SIZE		0x3000	/* 12KB */
+#endif
+#define TEX_SHADOW_SIZE		(TEX_CONSTANTS*4)	/* 768 bytes */
+#define SHADER_SHADOW_SIZE	(SHADER_INSTRUCT*12)	/* 6KB */
+
+#define REG_OFFSET		LCC_SHADOW_SIZE
+#define CMD_OFFSET		(REG_OFFSET + REG_SHADOW_SIZE)
+#define TEX_OFFSET		(CMD_OFFSET + CMD_BUFFER_SIZE)
+#define SHADER_OFFSET		((TEX_OFFSET + TEX_SHADOW_SIZE + 32) & ~31)
+
+#define CONTEXT_SIZE		(SHADER_OFFSET + 3 * SHADER_SHADOW_SIZE)
+
+/* A scratchpad used to build commands during context create */
+
+static struct tmp_ctx {
+	unsigned int *start;	/* Command & Vertex buffer start */
+	unsigned int *cmd;	/* Next available dword in C&V buffer */
+
+	/* address of buffers, needed when creating IB1 command buffers. */
+	uint32_t bool_shadow;	/* bool constants */
+	uint32_t loop_shadow;	/* loop constants */
+
+	uint32_t shader_shared;	/* shared shader instruction shadow */
+	uint32_t shader_vertex;	/* vertex shader instruction shadow */
+	uint32_t shader_pixel;	/* pixel shader instruction shadow */
+
+	/* Addresses in command buffer where separately handled registers
+	 * are saved
+	 */
+	uint32_t reg_values[33];
+	uint32_t chicken_restore;
+
+	uint32_t gmem_base;	/* Base gpu address of GMEM */
+
+} tmp_ctx;
+
+/* context save (gmem -> sys) */
+
+/* pre-compiled vertex shader program
+*
+*  attribute vec4  P;
+*  void main(void)
+*  {
+*    gl_Position = P;
+*  }
+*/
+#define GMEM2SYS_VTX_PGM_LEN	0x12
+
+static unsigned int gmem2sys_vtx_pgm[GMEM2SYS_VTX_PGM_LEN] = {
+	0x00011003, 0x00001000, 0xc2000000,
+	0x00001004, 0x00001000, 0xc4000000,
+	0x00001005, 0x00002000, 0x00000000,
+	0x1cb81000, 0x00398a88, 0x00000003,
+	0x140f803e, 0x00000000, 0xe2010100,
+	0x14000000, 0x00000000, 0xe2000000
+};
+
+/* pre-compiled fragment shader program
+*
+*  precision highp float;
+*  uniform   vec4  clear_color;
+*  void main(void)
+*  {
+*     gl_FragColor = clear_color;
+*  }
+*/
+
+#define GMEM2SYS_FRAG_PGM_LEN	0x0c
+
+static unsigned int gmem2sys_frag_pgm[GMEM2SYS_FRAG_PGM_LEN] = {
+	0x00000000, 0x1002c400, 0x10000000,
+	0x00001003, 0x00002000, 0x00000000,
+	0x140f8000, 0x00000000, 0x22000000,
+	0x14000000, 0x00000000, 0xe2000000
+};
+
+/* context restore (sys -> gmem) */
+/* pre-compiled vertex shader program
+*
+*  attribute vec4 position;
+*  attribute vec4 texcoord;
+*  varying   vec4 texcoord0;
+*  void main()
+*  {
+*     gl_Position = position;
+*     texcoord0 = texcoord;
+*  }
+*/
+
+#define SYS2GMEM_VTX_PGM_LEN	0x18
+
+static unsigned int sys2gmem_vtx_pgm[SYS2GMEM_VTX_PGM_LEN] = {
+	0x00052003, 0x00001000, 0xc2000000, 0x00001005,
+	0x00001000, 0xc4000000, 0x00001006, 0x10071000,
+	0x20000000, 0x18981000, 0x0039ba88, 0x00000003,
+	0x12982000, 0x40257b08, 0x00000002, 0x140f803e,
+	0x00000000, 0xe2010100, 0x140f8000, 0x00000000,
+	0xe2020200, 0x14000000, 0x00000000, 0xe2000000
+};
+
+/* pre-compiled fragment shader program
+*
+*  precision mediump   float;
+*  uniform   sampler2D tex0;
+*  varying   vec4      texcoord0;
+*  void main()
+*  {
+*     gl_FragColor = texture2D(tex0, texcoord0.xy);
+*  }
+*/
+
+#define SYS2GMEM_FRAG_PGM_LEN	0x0f
+
+static unsigned int sys2gmem_frag_pgm[SYS2GMEM_FRAG_PGM_LEN] = {
+	0x00011002, 0x00001000, 0xc4000000, 0x00001003,
+	0x10041000, 0x20000000, 0x10000001, 0x1ffff688,
+	0x00000002, 0x140f8000, 0x00000000, 0xe2000000,
+	0x14000000, 0x00000000, 0xe2000000
+};
+
+/* shader texture constants (sysmem -> gmem)  */
+#define SYS2GMEM_TEX_CONST_LEN	6
+
+static unsigned int sys2gmem_tex_const[SYS2GMEM_TEX_CONST_LEN] = {
+	/* Texture, FormatXYZW=Unsigned, ClampXYZ=Wrap/Repeat,
+	 * RFMode=ZeroClamp-1, Dim=1:2d
+	 */
+	0x00000002,		/* Pitch = TBD */
+
+	/* Format=6:8888_WZYX, EndianSwap=0:None, ReqSize=0:256bit, DimHi=0,
+	 * NearestClamp=1:OGL Mode
+	 */
+	0x00000800,		/* Address[31:12] = TBD */
+
+	/* Width, Height, EndianSwap=0:None */
+	0,			/* Width & Height = TBD */
+
+	/* NumFormat=0:RF, DstSelXYZW=XYZW, ExpAdj=0, MagFilt=MinFilt=0:Point,
+	 * Mip=2:BaseMap
+	 */
+	0 << 1 | 1 << 4 | 2 << 7 | 3 << 10 | 2 << 23,
+
+	/* VolMag=VolMin=0:Point, MinMipLvl=0, MaxMipLvl=1, LodBiasH=V=0,
+	 * Dim3d=0
+	 */
+	0,
+
+	/* BorderColor=0:ABGRBlack, ForceBC=0:diable, TriJuice=0, Aniso=0,
+	 * Dim=1:2d, MipPacking=0
+	 */
+	1 << 9			/* Mip Address[31:12] = TBD */
+};
+
+#define NUM_COLOR_FORMATS   13
+
+static enum SURFACEFORMAT surface_format_table[NUM_COLOR_FORMATS] = {
+	FMT_4_4_4_4,		/* COLORX_4_4_4_4 */
+	FMT_1_5_5_5,		/* COLORX_1_5_5_5 */
+	FMT_5_6_5,		/* COLORX_5_6_5 */
+	FMT_8,			/* COLORX_8 */
+	FMT_8_8,		/* COLORX_8_8 */
+	FMT_8_8_8_8,		/* COLORX_8_8_8_8 */
+	FMT_8_8_8_8,		/* COLORX_S8_8_8_8 */
+	FMT_16_FLOAT,		/* COLORX_16_FLOAT */
+	FMT_16_16_FLOAT,	/* COLORX_16_16_FLOAT */
+	FMT_16_16_16_16_FLOAT,	/* COLORX_16_16_16_16_FLOAT */
+	FMT_32_FLOAT,		/* COLORX_32_FLOAT */
+	FMT_32_32_FLOAT,	/* COLORX_32_32_FLOAT */
+	FMT_32_32_32_32_FLOAT,	/* COLORX_32_32_32_32_FLOAT */
+};
+
+static unsigned int format2bytesperpixel[NUM_COLOR_FORMATS] = {
+	2,			/* COLORX_4_4_4_4 */
+	2,			/* COLORX_1_5_5_5 */
+	2,			/* COLORX_5_6_5 */
+	1,			/* COLORX_8 */
+	2,			/* COLORX_8_8 8*/
+	4,			/* COLORX_8_8_8_8 */
+	4,			/* COLORX_S8_8_8_8 */
+	2,			/* COLORX_16_FLOAT */
+	4,			/* COLORX_16_16_FLOAT */
+	8,			/* COLORX_16_16_16_16_FLOAT */
+	4,			/* COLORX_32_FLOAT */
+	8,			/* COLORX_32_32_FLOAT */
+	16,			/* COLORX_32_32_32_32_FLOAT */
+};
+
+/* shader linkage info */
+#define SHADER_CONST_ADDR	(11 * 6 + 3)
+
+
+static unsigned int *program_shader(unsigned int *cmds, int vtxfrag,
+				    unsigned int *shader_pgm, int dwords)
+{
+	/* load the patched vertex shader stream */
+	*cmds++ = cp_type3_packet(CP_IM_LOAD_IMMEDIATE, 2 + dwords);
+	/* 0=vertex shader, 1=fragment shader */
+	*cmds++ = vtxfrag;
+	/* instruction start & size (in 32-bit words) */
+	*cmds++ = ((0 << 16) | dwords);
+
+	memcpy(cmds, shader_pgm, dwords << 2);
+	cmds += dwords;
+
+	return cmds;
+}
+
+static unsigned int *reg_to_mem(unsigned int *cmds, uint32_t dst,
+				uint32_t src, int dwords)
+{
+	while (dwords-- > 0) {
+		*cmds++ = cp_type3_packet(CP_REG_TO_MEM, 2);
+		*cmds++ = src++;
+		*cmds++ = dst;
+		dst += 4;
+	}
+
+	return cmds;
+}
+
+#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
+
+static void build_reg_to_mem_range(unsigned int start, unsigned int end,
+				   unsigned int **cmd,
+				   struct adreno_context *drawctxt)
+{
+	unsigned int i = start;
+
+	for (i = start; i <= end; i++) {
+		*(*cmd)++ = cp_type3_packet(CP_REG_TO_MEM, 2);
+		*(*cmd)++ = i;
+		*(*cmd)++ =
+		    ((drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000) +
+		    (i - 0x2000) * 4;
+	}
+}
+
+#endif
+
+/* chicken restore */
+static unsigned int *build_chicken_restore_cmds(
+					struct adreno_context *drawctxt)
+{
+	unsigned int *start = tmp_ctx.cmd;
+	unsigned int *cmds = start;
+
+	*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	*cmds++ = 0;
+
+	*cmds++ = cp_type0_packet(REG_TP0_CHICKEN, 1);
+	tmp_ctx.chicken_restore = virt2gpu(cmds, &drawctxt->gpustate);
+	*cmds++ = 0x00000000;
+
+	/* create indirect buffer command for above command sequence */
+	create_ib1(drawctxt, drawctxt->chicken_restore, start, cmds);
+
+	return cmds;
+}
+
+/****************************************************************************/
+/* context save                                                             */
+/****************************************************************************/
+
+static const unsigned int register_ranges_a20x[] = {
+	REG_RB_SURFACE_INFO, REG_RB_DEPTH_INFO,
+	REG_COHER_DEST_BASE_0, REG_PA_SC_SCREEN_SCISSOR_BR,
+	REG_PA_SC_WINDOW_OFFSET, REG_PA_SC_WINDOW_SCISSOR_BR,
+	REG_RB_STENCILREFMASK_BF, REG_PA_CL_VPORT_ZOFFSET,
+	REG_SQ_PROGRAM_CNTL, REG_SQ_WRAPPING_1,
+	REG_PA_SC_LINE_CNTL, REG_SQ_PS_CONST,
+	REG_PA_SC_AA_MASK, REG_PA_SC_AA_MASK,
+	REG_RB_SAMPLE_COUNT_CTL, REG_RB_COLOR_DEST_MASK,
+	REG_PA_SU_POLY_OFFSET_FRONT_SCALE, REG_PA_SU_POLY_OFFSET_BACK_OFFSET,
+	REG_VGT_MAX_VTX_INDX, REG_RB_FOG_COLOR,
+	REG_RB_DEPTHCONTROL, REG_RB_MODECONTROL,
+	REG_PA_SU_POINT_SIZE, REG_PA_SC_LINE_STIPPLE,
+	REG_PA_SC_VIZ_QUERY, REG_PA_SC_VIZ_QUERY,
+	REG_VGT_VERTEX_REUSE_BLOCK_CNTL, REG_RB_DEPTH_CLEAR
+};
+
+static const unsigned int register_ranges_a220[] = {
+	REG_RB_SURFACE_INFO, REG_RB_DEPTH_INFO,
+	REG_COHER_DEST_BASE_0, REG_PA_SC_SCREEN_SCISSOR_BR,
+	REG_PA_SC_WINDOW_OFFSET, REG_PA_SC_WINDOW_SCISSOR_BR,
+	REG_RB_STENCILREFMASK_BF, REG_PA_CL_VPORT_ZOFFSET,
+	REG_SQ_PROGRAM_CNTL, REG_SQ_WRAPPING_1,
+	REG_PA_SC_LINE_CNTL, REG_SQ_PS_CONST,
+	REG_PA_SC_AA_MASK, REG_PA_SC_AA_MASK,
+	REG_RB_SAMPLE_COUNT_CTL, REG_RB_COLOR_DEST_MASK,
+	REG_PA_SU_POLY_OFFSET_FRONT_SCALE, REG_PA_SU_POLY_OFFSET_BACK_OFFSET,
+	REG_A220_PC_MAX_VTX_INDX, REG_A220_PC_INDX_OFFSET,
+	REG_RB_COLOR_MASK, REG_RB_FOG_COLOR,
+	REG_RB_DEPTHCONTROL, REG_RB_COLORCONTROL,
+	REG_PA_CL_CLIP_CNTL, REG_PA_CL_VTE_CNTL,
+	REG_RB_MODECONTROL, REG_RB_SAMPLE_POS,
+	REG_PA_SU_POINT_SIZE, REG_PA_SU_LINE_CNTL,
+	REG_A220_PC_VERTEX_REUSE_BLOCK_CNTL,
+	REG_A220_PC_VERTEX_REUSE_BLOCK_CNTL,
+	REG_RB_COPY_CONTROL, REG_RB_DEPTH_CLEAR
+};
+
+static const unsigned int register_ranges_a225[] = {
+	REG_RB_SURFACE_INFO, REG_A225_RB_COLOR_INFO3,
+	REG_COHER_DEST_BASE_0, REG_PA_SC_SCREEN_SCISSOR_BR,
+	REG_PA_SC_WINDOW_OFFSET, REG_PA_SC_WINDOW_SCISSOR_BR,
+	REG_RB_STENCILREFMASK_BF, REG_PA_CL_VPORT_ZOFFSET,
+	REG_SQ_PROGRAM_CNTL, REG_SQ_WRAPPING_1,
+	REG_PA_SC_LINE_CNTL, REG_SQ_PS_CONST,
+	REG_PA_SC_AA_MASK, REG_PA_SC_AA_MASK,
+	REG_RB_SAMPLE_COUNT_CTL, REG_RB_COLOR_DEST_MASK,
+	REG_PA_SU_POLY_OFFSET_FRONT_SCALE, REG_PA_SU_POLY_OFFSET_BACK_OFFSET,
+	REG_A220_PC_MAX_VTX_INDX, REG_A225_PC_MULTI_PRIM_IB_RESET_INDX,
+	REG_RB_COLOR_MASK, REG_RB_FOG_COLOR,
+	REG_RB_DEPTHCONTROL, REG_RB_COLORCONTROL,
+	REG_PA_CL_CLIP_CNTL, REG_PA_CL_VTE_CNTL,
+	REG_RB_MODECONTROL, REG_RB_SAMPLE_POS,
+	REG_PA_SU_POINT_SIZE, REG_PA_SU_LINE_CNTL,
+	REG_A220_PC_VERTEX_REUSE_BLOCK_CNTL,
+	REG_A220_PC_VERTEX_REUSE_BLOCK_CNTL,
+	REG_RB_COPY_CONTROL, REG_RB_DEPTH_CLEAR,
+	REG_A225_GRAS_UCP0X, REG_A225_GRAS_UCP_ENABLED
+};
+
+
+/* save h/w regs, alu constants, texture contants, etc. ...
+*  requires: bool_shadow_gpuaddr, loop_shadow_gpuaddr
+*/
+static void build_regsave_cmds(struct adreno_device *adreno_dev,
+			       struct adreno_context *drawctxt)
+{
+	unsigned int *start = tmp_ctx.cmd;
+	unsigned int *cmd = start;
+
+	*cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	*cmd++ = 0;
+
+#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
+	/* Make sure the HW context has the correct register values
+	 * before reading them. */
+	*cmd++ = cp_type3_packet(CP_CONTEXT_UPDATE, 1);
+	*cmd++ = 0;
+
+	{
+		unsigned int i = 0;
+		unsigned int reg_array_size = 0;
+		const unsigned int *ptr_register_ranges;
+
+		/* Based on chip id choose the register ranges */
+		if (adreno_is_a220(adreno_dev)) {
+			ptr_register_ranges = register_ranges_a220;
+			reg_array_size = ARRAY_SIZE(register_ranges_a220);
+		} else if (adreno_is_a225(adreno_dev)) {
+			ptr_register_ranges = register_ranges_a225;
+			reg_array_size = ARRAY_SIZE(register_ranges_a225);
+		} else {
+			ptr_register_ranges = register_ranges_a20x;
+			reg_array_size = ARRAY_SIZE(register_ranges_a20x);
+		}
+
+
+		/* Write HW registers into shadow */
+		for (i = 0; i < (reg_array_size/2) ; i++) {
+			build_reg_to_mem_range(ptr_register_ranges[i*2],
+					ptr_register_ranges[i*2+1],
+					&cmd, drawctxt);
+		}
+	}
+
+	/* Copy ALU constants */
+	cmd =
+	    reg_to_mem(cmd, (drawctxt->gpustate.gpuaddr) & 0xFFFFE000,
+		       REG_SQ_CONSTANT_0, ALU_CONSTANTS);
+
+	/* Copy Tex constants */
+	cmd =
+	    reg_to_mem(cmd,
+		       (drawctxt->gpustate.gpuaddr + TEX_OFFSET) & 0xFFFFE000,
+		       REG_SQ_FETCH_0, TEX_CONSTANTS);
+#else
+
+	/* Insert a wait for idle packet before reading the registers.
+	 * This is to fix a hang/reset seen during stress testing.  In this
+	 * hang, CP encountered a timeout reading SQ's boolean constant
+	 * register. There is logic in the HW that blocks reading of this
+	 * register when the SQ block is not idle, which we believe is
+	 * contributing to the hang.*/
+	*cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	*cmd++ = 0;
+
+	/* H/w registers are already shadowed; just need to disable shadowing
+	 * to prevent corruption.
+	 */
+	*cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
+	*cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
+	*cmd++ = 4 << 16;	/* regs, start=0 */
+	*cmd++ = 0x0;		/* count = 0 */
+
+	/* ALU constants are already shadowed; just need to disable shadowing
+	 * to prevent corruption.
+	 */
+	*cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
+	*cmd++ = drawctxt->gpustate.gpuaddr & 0xFFFFE000;
+	*cmd++ = 0 << 16;	/* ALU, start=0 */
+	*cmd++ = 0x0;		/* count = 0 */
+
+	/* Tex constants are already shadowed; just need to disable shadowing
+	 *  to prevent corruption.
+	 */
+	*cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
+	*cmd++ = (drawctxt->gpustate.gpuaddr + TEX_OFFSET) & 0xFFFFE000;
+	*cmd++ = 1 << 16;	/* Tex, start=0 */
+	*cmd++ = 0x0;		/* count = 0 */
+#endif
+
+	/* Need to handle some of the registers separately */
+	*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
+	*cmd++ = REG_SQ_GPR_MANAGEMENT;
+	*cmd++ = tmp_ctx.reg_values[0];
+
+	*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
+	*cmd++ = REG_TP0_CHICKEN;
+	*cmd++ = tmp_ctx.reg_values[1];
+
+	if (adreno_is_a22x(adreno_dev)) {
+		unsigned int i;
+		unsigned int j = 2;
+		for (i = REG_A220_VSC_BIN_SIZE; i <=
+				REG_A220_VSC_PIPE_DATA_LENGTH_7; i++) {
+			*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
+			*cmd++ = i;
+			*cmd++ = tmp_ctx.reg_values[j];
+			j++;
+		}
+	}
+
+	/* Copy Boolean constants */
+	cmd = reg_to_mem(cmd, tmp_ctx.bool_shadow, REG_SQ_CF_BOOLEANS,
+			 BOOL_CONSTANTS);
+
+	/* Copy Loop constants */
+	cmd = reg_to_mem(cmd, tmp_ctx.loop_shadow,
+		REG_SQ_CF_LOOP, LOOP_CONSTANTS);
+
+	/* create indirect buffer command for above command sequence */
+	create_ib1(drawctxt, drawctxt->reg_save, start, cmd);
+
+	tmp_ctx.cmd = cmd;
+}
+
+/*copy colour, depth, & stencil buffers from graphics memory to system memory*/
+static unsigned int *build_gmem2sys_cmds(struct adreno_device *adreno_dev,
+					 struct adreno_context *drawctxt,
+					 struct gmem_shadow_t *shadow)
+{
+	unsigned int *cmds = shadow->gmem_save_commands;
+	unsigned int *start = cmds;
+	/* Calculate the new offset based on the adjusted base */
+	unsigned int bytesperpixel = format2bytesperpixel[shadow->format];
+	unsigned int addr = shadow->gmemshadow.gpuaddr;
+	unsigned int offset = (addr - (addr & 0xfffff000)) / bytesperpixel;
+
+	/* Store TP0_CHICKEN register */
+	*cmds++ = cp_type3_packet(CP_REG_TO_MEM, 2);
+	*cmds++ = REG_TP0_CHICKEN;
+
+	*cmds++ = tmp_ctx.chicken_restore;
+
+	*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	*cmds++ = 0;
+
+	/* Set TP0_CHICKEN to zero */
+	*cmds++ = cp_type0_packet(REG_TP0_CHICKEN, 1);
+	*cmds++ = 0x00000000;
+
+	/* Set PA_SC_AA_CONFIG to 0 */
+	*cmds++ = cp_type0_packet(REG_PA_SC_AA_CONFIG, 1);
+	*cmds++ = 0x00000000;
+
+	/* program shader */
+
+	/* load shader vtx constants ... 5 dwords */
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 4);
+	*cmds++ = (0x1 << 16) | SHADER_CONST_ADDR;
+	*cmds++ = 0;
+	/* valid(?) vtx constant flag & addr */
+	*cmds++ = shadow->quad_vertices.gpuaddr | 0x3;
+	/* limit = 12 dwords */
+	*cmds++ = 0x00000030;
+
+	/* Invalidate L2 cache to make sure vertices are updated */
+	*cmds++ = cp_type0_packet(REG_TC_CNTL_STATUS, 1);
+	*cmds++ = 0x1;
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 4);
+	*cmds++ = CP_REG(REG_VGT_MAX_VTX_INDX);
+	*cmds++ = 0x00ffffff;	/* REG_VGT_MAX_VTX_INDX */
+	*cmds++ = 0x0;		/* REG_VGT_MIN_VTX_INDX */
+	*cmds++ = 0x00000000;	/* REG_VGT_INDX_OFFSET */
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(REG_PA_SC_AA_MASK);
+	*cmds++ = 0x0000ffff;	/* REG_PA_SC_AA_MASK */
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(REG_RB_COLORCONTROL);
+	*cmds++ = 0x00000c20;
+
+	/* Repartition shaders */
+	*cmds++ = cp_type0_packet(REG_SQ_INST_STORE_MANAGMENT, 1);
+	*cmds++ = 0x180;
+
+	/* Invalidate Vertex & Pixel instruction code address and sizes */
+	*cmds++ = cp_type3_packet(CP_INVALIDATE_STATE, 1);
+	*cmds++ = 0x00003F00;
+
+	*cmds++ = cp_type3_packet(CP_SET_SHADER_BASES, 1);
+	*cmds++ = (0x80000000) | 0x180;
+
+	/* load the patched vertex shader stream */
+	cmds = program_shader(cmds, 0, gmem2sys_vtx_pgm, GMEM2SYS_VTX_PGM_LEN);
+
+	/* Load the patched fragment shader stream */
+	cmds =
+	    program_shader(cmds, 1, gmem2sys_frag_pgm, GMEM2SYS_FRAG_PGM_LEN);
+
+	/* SQ_PROGRAM_CNTL / SQ_CONTEXT_MISC */
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
+	*cmds++ = CP_REG(REG_SQ_PROGRAM_CNTL);
+	if (adreno_is_a22x(adreno_dev))
+		*cmds++ = 0x10018001;
+	else
+		*cmds++ = 0x10010001;
+	*cmds++ = 0x00000008;
+
+	/* resolve */
+
+	/* PA_CL_VTE_CNTL */
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(REG_PA_CL_VTE_CNTL);
+	/* disable X/Y/Z transforms, X/Y/Z are premultiplied by W */
+	*cmds++ = 0x00000b00;
+
+	/* program surface info */
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
+	*cmds++ = CP_REG(REG_RB_SURFACE_INFO);
+	*cmds++ = shadow->gmem_pitch;	/* pitch, MSAA = 1 */
+
+	/* RB_COLOR_INFO Endian=none, Linear, Format=RGBA8888, Swap=0,
+	 *                Base=gmem_base
+	 */
+	/* gmem base assumed 4K aligned. */
+	BUG_ON(tmp_ctx.gmem_base & 0xFFF);
+	*cmds++ =
+	    (shadow->
+	     format << RB_COLOR_INFO__COLOR_FORMAT__SHIFT) | tmp_ctx.gmem_base;
+
+	/* disable Z */
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(REG_RB_DEPTHCONTROL);
+	if (adreno_is_a22x(adreno_dev))
+		*cmds++ = 0x08;
+	else
+		*cmds++ = 0;
+
+	/* set REG_PA_SU_SC_MODE_CNTL
+	 *              Front_ptype = draw triangles
+	 *              Back_ptype = draw triangles
+	 *              Provoking vertex = last
+	 */
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(REG_PA_SU_SC_MODE_CNTL);
+	*cmds++ = 0x00080240;
+
+	/* Use maximum scissor values -- quad vertices already have the
+	 * correct bounds */
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
+	*cmds++ = CP_REG(REG_PA_SC_SCREEN_SCISSOR_TL);
+	*cmds++ = (0 << 16) | 0;
+	*cmds++ = (0x1fff << 16) | (0x1fff);
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
+	*cmds++ = CP_REG(REG_PA_SC_WINDOW_SCISSOR_TL);
+	*cmds++ = (unsigned int)((1U << 31) | (0 << 16) | 0);
+	*cmds++ = (0x1fff << 16) | (0x1fff);
+
+	/* load the viewport so that z scale = clear depth and
+	 *  z offset = 0.0f
+	 */
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
+	*cmds++ = CP_REG(REG_PA_CL_VPORT_ZSCALE);
+	*cmds++ = 0xbf800000;	/* -1.0f */
+	*cmds++ = 0x0;
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(REG_RB_COLOR_MASK);
+	*cmds++ = 0x0000000f;	/* R = G = B = 1:enabled */
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(REG_RB_COLOR_DEST_MASK);
+	*cmds++ = 0xffffffff;
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
+	*cmds++ = CP_REG(REG_SQ_WRAPPING_0);
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000000;
+
+	/* load the stencil ref value
+	 * $AAM - do this later
+	 */
+
+	/* load the COPY state */
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6);
+	*cmds++ = CP_REG(REG_RB_COPY_CONTROL);
+	*cmds++ = 0;		/* RB_COPY_CONTROL */
+	*cmds++ = addr & 0xfffff000;	/* RB_COPY_DEST_BASE */
+	*cmds++ = shadow->pitch >> 5;	/* RB_COPY_DEST_PITCH */
+
+	/* Endian=none, Linear, Format=RGBA8888,Swap=0,!Dither,
+	 *  MaskWrite:R=G=B=A=1
+	 */
+	*cmds++ = 0x0003c008 |
+	    (shadow->format << RB_COPY_DEST_INFO__COPY_DEST_FORMAT__SHIFT);
+	/* Make sure we stay in offsetx field. */
+	BUG_ON(offset & 0xfffff000);
+	*cmds++ = offset;
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(REG_RB_MODECONTROL);
+	*cmds++ = 0x6;		/* EDRAM copy */
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(REG_PA_CL_CLIP_CNTL);
+	*cmds++ = 0x00010000;
+
+	if (adreno_is_a22x(adreno_dev)) {
+		*cmds++ = cp_type3_packet(CP_SET_DRAW_INIT_FLAGS, 1);
+		*cmds++ = 0;
+
+		*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+		*cmds++ = CP_REG(REG_A220_RB_LRZ_VSC_CONTROL);
+		*cmds++ = 0x0000000;
+
+		*cmds++ = cp_type3_packet(CP_DRAW_INDX, 3);
+		*cmds++ = 0;           /* viz query info. */
+		/* PrimType=RectList, SrcSel=AutoIndex, VisCullMode=Ignore*/
+		*cmds++ = 0x00004088;
+		*cmds++ = 3;	       /* NumIndices=3 */
+	} else {
+		/* queue the draw packet */
+		*cmds++ = cp_type3_packet(CP_DRAW_INDX, 2);
+		*cmds++ = 0;		/* viz query info. */
+		/* PrimType=RectList, NumIndices=3, SrcSel=AutoIndex */
+		*cmds++ = 0x00030088;
+	}
+
+	/* create indirect buffer command for above command sequence */
+	create_ib1(drawctxt, shadow->gmem_save, start, cmds);
+
+	return cmds;
+}
+
+/* context restore */
+
+/*copy colour, depth, & stencil buffers from system memory to graphics memory*/
+static unsigned int *build_sys2gmem_cmds(struct adreno_device *adreno_dev,
+					 struct adreno_context *drawctxt,
+					 struct gmem_shadow_t *shadow)
+{
+	unsigned int *cmds = shadow->gmem_restore_commands;
+	unsigned int *start = cmds;
+
+	/* Store TP0_CHICKEN register */
+	*cmds++ = cp_type3_packet(CP_REG_TO_MEM, 2);
+	*cmds++ = REG_TP0_CHICKEN;
+	*cmds++ = tmp_ctx.chicken_restore;
+
+	*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	*cmds++ = 0;
+
+	/* Set TP0_CHICKEN to zero */
+	*cmds++ = cp_type0_packet(REG_TP0_CHICKEN, 1);
+	*cmds++ = 0x00000000;
+
+	/* Set PA_SC_AA_CONFIG to 0 */
+	*cmds++ = cp_type0_packet(REG_PA_SC_AA_CONFIG, 1);
+	*cmds++ = 0x00000000;
+	/* shader constants */
+
+	/* vertex buffer constants */
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7);
+
+	*cmds++ = (0x1 << 16) | (9 * 6);
+	/* valid(?) vtx constant flag & addr */
+	*cmds++ = shadow->quad_vertices.gpuaddr | 0x3;
+	/* limit = 12 dwords */
+	*cmds++ = 0x00000030;
+	/* valid(?) vtx constant flag & addr */
+	*cmds++ = shadow->quad_texcoords.gpuaddr | 0x3;
+	/* limit = 8 dwords */
+	*cmds++ = 0x00000020;
+	*cmds++ = 0;
+	*cmds++ = 0;
+
+	/* Invalidate L2 cache to make sure vertices are updated */
+	*cmds++ = cp_type0_packet(REG_TC_CNTL_STATUS, 1);
+	*cmds++ = 0x1;
+
+	cmds = program_shader(cmds, 0, sys2gmem_vtx_pgm, SYS2GMEM_VTX_PGM_LEN);
+
+	/* Repartition shaders */
+	*cmds++ = cp_type0_packet(REG_SQ_INST_STORE_MANAGMENT, 1);
+	*cmds++ = 0x180;
+
+	/* Invalidate Vertex & Pixel instruction code address and sizes */
+	*cmds++ = cp_type3_packet(CP_INVALIDATE_STATE, 1);
+	*cmds++ = 0x00000300; /* 0x100 = Vertex, 0x200 = Pixel */
+
+	*cmds++ = cp_type3_packet(CP_SET_SHADER_BASES, 1);
+	*cmds++ = (0x80000000) | 0x180;
+
+	/* Load the patched fragment shader stream */
+	cmds =
+	    program_shader(cmds, 1, sys2gmem_frag_pgm, SYS2GMEM_FRAG_PGM_LEN);
+
+	/* SQ_PROGRAM_CNTL / SQ_CONTEXT_MISC */
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
+	*cmds++ = CP_REG(REG_SQ_PROGRAM_CNTL);
+	*cmds++ = 0x10030002;
+	*cmds++ = 0x00000008;
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(REG_PA_SC_AA_MASK);
+	*cmds++ = 0x0000ffff;	/* REG_PA_SC_AA_MASK */
+
+	if (!adreno_is_a22x(adreno_dev)) {
+		/* PA_SC_VIZ_QUERY */
+		*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+		*cmds++ = CP_REG(REG_PA_SC_VIZ_QUERY);
+		*cmds++ = 0x0;		/*REG_PA_SC_VIZ_QUERY */
+	}
+
+	/* RB_COLORCONTROL */
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(REG_RB_COLORCONTROL);
+	*cmds++ = 0x00000c20;
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 4);
+	*cmds++ = CP_REG(REG_VGT_MAX_VTX_INDX);
+	*cmds++ = 0x00ffffff;	/* mmVGT_MAX_VTX_INDX */
+	*cmds++ = 0x0;		/* mmVGT_MIN_VTX_INDX */
+	*cmds++ = 0x00000000;	/* mmVGT_INDX_OFFSET */
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
+	*cmds++ = CP_REG(REG_VGT_VERTEX_REUSE_BLOCK_CNTL);
+	*cmds++ = 0x00000002;	/* mmVGT_VERTEX_REUSE_BLOCK_CNTL */
+	*cmds++ = 0x00000002;	/* mmVGT_OUT_DEALLOC_CNTL */
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(REG_SQ_INTERPOLATOR_CNTL);
+	*cmds++ = 0xffffffff;	/* mmSQ_INTERPOLATOR_CNTL */
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(REG_PA_SC_AA_CONFIG);
+	*cmds++ = 0x00000000;	/* REG_PA_SC_AA_CONFIG */
+
+	/* set REG_PA_SU_SC_MODE_CNTL
+	 * Front_ptype = draw triangles
+	 * Back_ptype = draw triangles
+	 * Provoking vertex = last
+	 */
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(REG_PA_SU_SC_MODE_CNTL);
+	*cmds++ = 0x00080240;
+
+	/* texture constants */
+	*cmds++ =
+	    cp_type3_packet(CP_SET_CONSTANT, (SYS2GMEM_TEX_CONST_LEN + 1));
+	*cmds++ = (0x1 << 16) | (0 * 6);
+	memcpy(cmds, sys2gmem_tex_const, SYS2GMEM_TEX_CONST_LEN << 2);
+	cmds[0] |= (shadow->pitch >> 5) << 22;
+	cmds[1] |=
+	    shadow->gmemshadow.gpuaddr | surface_format_table[shadow->format];
+	cmds[2] |= (shadow->width - 1) | (shadow->height - 1) << 13;
+	cmds += SYS2GMEM_TEX_CONST_LEN;
+
+	/* program surface info */
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
+	*cmds++ = CP_REG(REG_RB_SURFACE_INFO);
+	*cmds++ = shadow->gmem_pitch;	/* pitch, MSAA = 1 */
+
+	/* RB_COLOR_INFO Endian=none, Linear, Format=RGBA8888, Swap=0,
+	 *                Base=gmem_base
+	 */
+	*cmds++ =
+	    (shadow->
+	     format << RB_COLOR_INFO__COLOR_FORMAT__SHIFT) | tmp_ctx.gmem_base;
+
+	/* RB_DEPTHCONTROL */
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(REG_RB_DEPTHCONTROL);
+
+	if (adreno_is_a22x(adreno_dev))
+		*cmds++ = 8;		/* disable Z */
+	else
+		*cmds++ = 0;		/* disable Z */
+
+	/* Use maximum scissor values -- quad vertices already
+	 * have the correct bounds */
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
+	*cmds++ = CP_REG(REG_PA_SC_SCREEN_SCISSOR_TL);
+	*cmds++ = (0 << 16) | 0;
+	*cmds++ = ((0x1fff) << 16) | 0x1fff;
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
+	*cmds++ = CP_REG(REG_PA_SC_WINDOW_SCISSOR_TL);
+	*cmds++ = (unsigned int)((1U << 31) | (0 << 16) | 0);
+	*cmds++ = ((0x1fff) << 16) | 0x1fff;
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(REG_PA_CL_VTE_CNTL);
+	/* disable X/Y/Z transforms, X/Y/Z are premultiplied by W */
+	*cmds++ = 0x00000b00;
+
+	/*load the viewport so that z scale = clear depth and z offset = 0.0f */
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
+	*cmds++ = CP_REG(REG_PA_CL_VPORT_ZSCALE);
+	*cmds++ = 0xbf800000;
+	*cmds++ = 0x0;
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(REG_RB_COLOR_MASK);
+	*cmds++ = 0x0000000f;	/* R = G = B = 1:enabled */
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(REG_RB_COLOR_DEST_MASK);
+	*cmds++ = 0xffffffff;
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
+	*cmds++ = CP_REG(REG_SQ_WRAPPING_0);
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000000;
+
+	/* load the stencil ref value
+	 *  $AAM - do this later
+	 */
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(REG_RB_MODECONTROL);
+	/* draw pixels with color and depth/stencil component */
+	*cmds++ = 0x4;
+
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(REG_PA_CL_CLIP_CNTL);
+	*cmds++ = 0x00010000;
+
+	if (adreno_is_a22x(adreno_dev)) {
+		*cmds++ = cp_type3_packet(CP_SET_DRAW_INIT_FLAGS, 1);
+		*cmds++ = 0;
+
+		*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+		*cmds++ = CP_REG(REG_A220_RB_LRZ_VSC_CONTROL);
+		*cmds++ = 0x0000000;
+
+		*cmds++ = cp_type3_packet(CP_DRAW_INDX, 3);
+		*cmds++ = 0;           /* viz query info. */
+		/* PrimType=RectList, SrcSel=AutoIndex, VisCullMode=Ignore*/
+		*cmds++ = 0x00004088;
+		*cmds++ = 3;	       /* NumIndices=3 */
+	} else {
+		/* queue the draw packet */
+		*cmds++ = cp_type3_packet(CP_DRAW_INDX, 2);
+		*cmds++ = 0;		/* viz query info. */
+		/* PrimType=RectList, NumIndices=3, SrcSel=AutoIndex */
+		*cmds++ = 0x00030088;
+	}
+
+	/* create indirect buffer command for above command sequence */
+	create_ib1(drawctxt, shadow->gmem_restore, start, cmds);
+
+	return cmds;
+}
+
+static void build_regrestore_cmds(struct adreno_device *adreno_dev,
+				  struct adreno_context *drawctxt)
+{
+	unsigned int *start = tmp_ctx.cmd;
+	unsigned int *cmd = start;
+
+	unsigned int i = 0;
+	unsigned int reg_array_size = 0;
+	const unsigned int *ptr_register_ranges;
+
+	*cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	*cmd++ = 0;
+
+	/* H/W Registers */
+	/* deferred cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, ???); */
+	cmd++;
+#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
+	/* Force mismatch */
+	*cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000) | 1;
+#else
+	*cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
+#endif
+
+	/* Based on chip id choose the registers ranges*/
+	if (adreno_is_a220(adreno_dev)) {
+		ptr_register_ranges = register_ranges_a220;
+		reg_array_size = ARRAY_SIZE(register_ranges_a220);
+	} else if (adreno_is_a225(adreno_dev)) {
+		ptr_register_ranges = register_ranges_a225;
+		reg_array_size = ARRAY_SIZE(register_ranges_a225);
+	} else {
+		ptr_register_ranges = register_ranges_a20x;
+		reg_array_size = ARRAY_SIZE(register_ranges_a20x);
+	}
+
+
+	for (i = 0; i < (reg_array_size/2); i++) {
+		cmd = reg_range(cmd, ptr_register_ranges[i*2],
+				ptr_register_ranges[i*2+1]);
+	}
+
+	/* Now we know how many register blocks we have, we can compute command
+	 * length
+	 */
+	start[2] =
+	    cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, (cmd - start) - 3);
+	/* Enable shadowing for the entire register block. */
+#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
+	start[4] |= (0 << 24) | (4 << 16);	/* Disable shadowing. */
+#else
+	start[4] |= (1 << 24) | (4 << 16);
+#endif
+
+	/* Need to handle some of the registers separately */
+	*cmd++ = cp_type0_packet(REG_SQ_GPR_MANAGEMENT, 1);
+	tmp_ctx.reg_values[0] = virt2gpu(cmd, &drawctxt->gpustate);
+	*cmd++ = 0x00040400;
+
+	*cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	*cmd++ = 0;
+	*cmd++ = cp_type0_packet(REG_TP0_CHICKEN, 1);
+	tmp_ctx.reg_values[1] = virt2gpu(cmd, &drawctxt->gpustate);
+	*cmd++ = 0x00000000;
+
+	if (adreno_is_a22x(adreno_dev)) {
+		unsigned int i;
+		unsigned int j = 2;
+		for (i = REG_A220_VSC_BIN_SIZE; i <=
+				REG_A220_VSC_PIPE_DATA_LENGTH_7; i++) {
+			*cmd++ = cp_type0_packet(i, 1);
+			tmp_ctx.reg_values[j] = virt2gpu(cmd,
+				&drawctxt->gpustate);
+			*cmd++ = 0x00000000;
+			j++;
+		}
+	}
+
+	/* ALU Constants */
+	*cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
+	*cmd++ = drawctxt->gpustate.gpuaddr & 0xFFFFE000;
+#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
+	*cmd++ = (0 << 24) | (0 << 16) | 0;	/* Disable shadowing */
+#else
+	*cmd++ = (1 << 24) | (0 << 16) | 0;
+#endif
+	*cmd++ = ALU_CONSTANTS;
+
+	/* Texture Constants */
+	*cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
+	*cmd++ = (drawctxt->gpustate.gpuaddr + TEX_OFFSET) & 0xFFFFE000;
+#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
+	/* Disable shadowing */
+	*cmd++ = (0 << 24) | (1 << 16) | 0;
+#else
+	*cmd++ = (1 << 24) | (1 << 16) | 0;
+#endif
+	*cmd++ = TEX_CONSTANTS;
+
+	/* Boolean Constants */
+	*cmd++ = cp_type3_packet(CP_SET_CONSTANT, 1 + BOOL_CONSTANTS);
+	*cmd++ = (2 << 16) | 0;
+
+	/* the next BOOL_CONSTANT dwords is the shadow area for
+	 *  boolean constants.
+	 */
+	tmp_ctx.bool_shadow = virt2gpu(cmd, &drawctxt->gpustate);
+	cmd += BOOL_CONSTANTS;
+
+	/* Loop Constants */
+	*cmd++ = cp_type3_packet(CP_SET_CONSTANT, 1 + LOOP_CONSTANTS);
+	*cmd++ = (3 << 16) | 0;
+
+	/* the next LOOP_CONSTANTS dwords is the shadow area for
+	 * loop constants.
+	 */
+	tmp_ctx.loop_shadow = virt2gpu(cmd, &drawctxt->gpustate);
+	cmd += LOOP_CONSTANTS;
+
+	/* create indirect buffer command for above command sequence */
+	create_ib1(drawctxt, drawctxt->reg_restore, start, cmd);
+
+	tmp_ctx.cmd = cmd;
+}
+
+static void
+build_shader_save_restore_cmds(struct adreno_context *drawctxt)
+{
+	unsigned int *cmd = tmp_ctx.cmd;
+	unsigned int *save, *restore, *fixup;
+	unsigned int *startSizeVtx, *startSizePix, *startSizeShared;
+	unsigned int *partition1;
+	unsigned int *shaderBases, *partition2;
+
+	/* compute vertex, pixel and shared instruction shadow GPU addresses */
+	tmp_ctx.shader_vertex = drawctxt->gpustate.gpuaddr + SHADER_OFFSET;
+	tmp_ctx.shader_pixel = tmp_ctx.shader_vertex + SHADER_SHADOW_SIZE;
+	tmp_ctx.shader_shared = tmp_ctx.shader_pixel + SHADER_SHADOW_SIZE;
+
+	/* restore shader partitioning and instructions */
+
+	restore = cmd;		/* start address */
+
+	/* Invalidate Vertex & Pixel instruction code address and sizes */
+	*cmd++ = cp_type3_packet(CP_INVALIDATE_STATE, 1);
+	*cmd++ = 0x00000300;	/* 0x100 = Vertex, 0x200 = Pixel */
+
+	/* Restore previous shader vertex & pixel instruction bases. */
+	*cmd++ = cp_type3_packet(CP_SET_SHADER_BASES, 1);
+	shaderBases = cmd++;	/* TBD #5: shader bases (from fixup) */
+
+	/* write the shader partition information to a scratch register */
+	*cmd++ = cp_type0_packet(REG_SQ_INST_STORE_MANAGMENT, 1);
+	partition1 = cmd++;	/* TBD #4a: partition info (from save) */
+
+	/* load vertex shader instructions from the shadow. */
+	*cmd++ = cp_type3_packet(CP_IM_LOAD, 2);
+	*cmd++ = tmp_ctx.shader_vertex + 0x0;	/* 0x0 = Vertex */
+	startSizeVtx = cmd++;	/* TBD #1: start/size (from save) */
+
+	/* load pixel shader instructions from the shadow. */
+	*cmd++ = cp_type3_packet(CP_IM_LOAD, 2);
+	*cmd++ = tmp_ctx.shader_pixel + 0x1;	/* 0x1 = Pixel */
+	startSizePix = cmd++;	/* TBD #2: start/size (from save) */
+
+	/* load shared shader instructions from the shadow. */
+	*cmd++ = cp_type3_packet(CP_IM_LOAD, 2);
+	*cmd++ = tmp_ctx.shader_shared + 0x2;	/* 0x2 = Shared */
+	startSizeShared = cmd++;	/* TBD #3: start/size (from save) */
+
+	/* create indirect buffer command for above command sequence */
+	create_ib1(drawctxt, drawctxt->shader_restore, restore, cmd);
+
+	/*
+	 *  fixup SET_SHADER_BASES data
+	 *
+	 *  since self-modifying PM4 code is being used here, a seperate
+	 *  command buffer is used for this fixup operation, to ensure the
+	 *  commands are not read by the PM4 engine before the data fields
+	 *  have been written.
+	 */
+
+	fixup = cmd;		/* start address */
+
+	/* write the shader partition information to a scratch register */
+	*cmd++ = cp_type0_packet(REG_SCRATCH_REG2, 1);
+	partition2 = cmd++;	/* TBD #4b: partition info (from save) */
+
+	/* mask off unused bits, then OR with shader instruction memory size */
+	*cmd++ = cp_type3_packet(CP_REG_RMW, 3);
+	*cmd++ = REG_SCRATCH_REG2;
+	/* AND off invalid bits. */
+	*cmd++ = 0x0FFF0FFF;
+	/* OR in instruction memory size */
+	*cmd++ = (unsigned int)((SHADER_INSTRUCT_LOG2 - 5U) << 29);
+
+	/* write the computed value to the SET_SHADER_BASES data field */
+	*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
+	*cmd++ = REG_SCRATCH_REG2;
+	/* TBD #5: shader bases (to restore) */
+	*cmd++ = virt2gpu(shaderBases, &drawctxt->gpustate);
+
+	/* create indirect buffer command for above command sequence */
+	create_ib1(drawctxt, drawctxt->shader_fixup, fixup, cmd);
+
+	/* save shader partitioning and instructions */
+
+	save = cmd;		/* start address */
+
+	*cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	*cmd++ = 0;
+
+	/* fetch the SQ_INST_STORE_MANAGMENT register value,
+	 *  store the value in the data fields of the SET_CONSTANT commands
+	 *  above.
+	 */
+	*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
+	*cmd++ = REG_SQ_INST_STORE_MANAGMENT;
+	/* TBD #4a: partition info (to restore) */
+	*cmd++ = virt2gpu(partition1, &drawctxt->gpustate);
+	*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
+	*cmd++ = REG_SQ_INST_STORE_MANAGMENT;
+	/* TBD #4b: partition info (to fixup) */
+	*cmd++ = virt2gpu(partition2, &drawctxt->gpustate);
+
+
+	/* store the vertex shader instructions */
+	*cmd++ = cp_type3_packet(CP_IM_STORE, 2);
+	*cmd++ = tmp_ctx.shader_vertex + 0x0;	/* 0x0 = Vertex */
+	/* TBD #1: start/size (to restore) */
+	*cmd++ = virt2gpu(startSizeVtx, &drawctxt->gpustate);
+
+	/* store the pixel shader instructions */
+	*cmd++ = cp_type3_packet(CP_IM_STORE, 2);
+	*cmd++ = tmp_ctx.shader_pixel + 0x1;	/* 0x1 = Pixel */
+	/* TBD #2: start/size (to restore) */
+	*cmd++ = virt2gpu(startSizePix, &drawctxt->gpustate);
+
+	/* store the shared shader instructions if vertex base is nonzero */
+
+	*cmd++ = cp_type3_packet(CP_IM_STORE, 2);
+	*cmd++ = tmp_ctx.shader_shared + 0x2;	/* 0x2 = Shared */
+	/* TBD #3: start/size (to restore) */
+	*cmd++ = virt2gpu(startSizeShared, &drawctxt->gpustate);
+
+
+	*cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	*cmd++ = 0;
+
+	/* create indirect buffer command for above command sequence */
+	create_ib1(drawctxt, drawctxt->shader_save, save, cmd);
+
+	tmp_ctx.cmd = cmd;
+}
+
+/* create buffers for saving/restoring registers, constants, & GMEM */
+static int a2xx_ctxt_gpustate_shadow(struct adreno_device *adreno_dev,
+			struct adreno_context *drawctxt)
+{
+	int result;
+
+	/* Allocate vmalloc memory to store the gpustate */
+	result = kgsl_allocate(&drawctxt->gpustate,
+		drawctxt->pagetable, CONTEXT_SIZE);
+
+	if (result)
+		return result;
+
+	drawctxt->flags |= CTXT_FLAGS_STATE_SHADOW;
+
+	/* Blank out h/w register, constant, and command buffer shadows. */
+	kgsl_sharedmem_set(&drawctxt->gpustate, 0, 0, CONTEXT_SIZE);
+
+	/* set-up command and vertex buffer pointers */
+	tmp_ctx.cmd = tmp_ctx.start
+	    = (unsigned int *)((char *)drawctxt->gpustate.hostptr + CMD_OFFSET);
+
+	/* build indirect command buffers to save & restore regs/constants */
+	adreno_idle(&adreno_dev->dev, KGSL_TIMEOUT_DEFAULT);
+	build_regrestore_cmds(adreno_dev, drawctxt);
+	build_regsave_cmds(adreno_dev, drawctxt);
+
+	build_shader_save_restore_cmds(drawctxt);
+
+	kgsl_cache_range_op(&drawctxt->gpustate,
+			    KGSL_CACHE_OP_FLUSH);
+
+	kgsl_cffdump_syncmem(NULL, &drawctxt->gpustate,
+			drawctxt->gpustate.gpuaddr,
+			drawctxt->gpustate.size, false);
+	return 0;
+}
+
+/* create buffers for saving/restoring registers, constants, & GMEM */
+static int a2xx_ctxt_gmem_shadow(struct adreno_device *adreno_dev,
+			struct adreno_context *drawctxt)
+{
+	int result;
+
+	calc_gmemsize(&drawctxt->context_gmem_shadow,
+		adreno_dev->gmemspace.sizebytes);
+	tmp_ctx.gmem_base = adreno_dev->gmemspace.gpu_base;
+
+	result = kgsl_allocate(&drawctxt->context_gmem_shadow.gmemshadow,
+		drawctxt->pagetable, drawctxt->context_gmem_shadow.size);
+
+	if (result)
+		return result;
+
+	/* we've allocated the shadow, when swapped out, GMEM must be saved. */
+	drawctxt->flags |= CTXT_FLAGS_GMEM_SHADOW | CTXT_FLAGS_GMEM_SAVE;
+
+	/* blank out gmem shadow. */
+	kgsl_sharedmem_set(&drawctxt->context_gmem_shadow.gmemshadow, 0, 0,
+			   drawctxt->context_gmem_shadow.size);
+
+	/* build quad vertex buffer */
+	build_quad_vtxbuff(drawctxt, &drawctxt->context_gmem_shadow,
+		&tmp_ctx.cmd);
+
+	/* build TP0_CHICKEN register restore command buffer */
+	tmp_ctx.cmd = build_chicken_restore_cmds(drawctxt);
+
+	/* build indirect command buffers to save & restore gmem */
+	/* Idle because we are reading PM override registers */
+	adreno_idle(&adreno_dev->dev, KGSL_TIMEOUT_DEFAULT);
+	drawctxt->context_gmem_shadow.gmem_save_commands = tmp_ctx.cmd;
+	tmp_ctx.cmd =
+	    build_gmem2sys_cmds(adreno_dev, drawctxt,
+				&drawctxt->context_gmem_shadow);
+	drawctxt->context_gmem_shadow.gmem_restore_commands = tmp_ctx.cmd;
+	tmp_ctx.cmd =
+	    build_sys2gmem_cmds(adreno_dev, drawctxt,
+				&drawctxt->context_gmem_shadow);
+
+	kgsl_cache_range_op(&drawctxt->context_gmem_shadow.gmemshadow,
+			    KGSL_CACHE_OP_FLUSH);
+
+	kgsl_cffdump_syncmem(NULL,
+			&drawctxt->context_gmem_shadow.gmemshadow,
+			drawctxt->context_gmem_shadow.gmemshadow.gpuaddr,
+			drawctxt->context_gmem_shadow.gmemshadow.size, false);
+
+	return 0;
+}
+
+static void a2xx_ctxt_save(struct adreno_device *adreno_dev,
+			struct adreno_context *context)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+
+	if (context == NULL)
+		return;
+
+	if (context->flags & CTXT_FLAGS_GPU_HANG)
+		KGSL_CTXT_WARN(device,
+			"Current active context has caused gpu hang\n");
+
+	KGSL_CTXT_INFO(device,
+		"active context flags %08x\n", context->flags);
+
+	/* save registers and constants. */
+	adreno_ringbuffer_issuecmds(device, 0, context->reg_save, 3);
+
+	if (context->flags & CTXT_FLAGS_SHADER_SAVE) {
+		/* save shader partitioning and instructions. */
+		adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE,
+			context->shader_save, 3);
+
+		/* fixup shader partitioning parameter for
+		 *  SET_SHADER_BASES.
+		 */
+		adreno_ringbuffer_issuecmds(device, 0,
+			context->shader_fixup, 3);
+
+		context->flags |= CTXT_FLAGS_SHADER_RESTORE;
+	}
+
+	if ((context->flags & CTXT_FLAGS_GMEM_SAVE) &&
+	    (context->flags & CTXT_FLAGS_GMEM_SHADOW)) {
+		/* save gmem.
+		 * (note: changes shader. shader must already be saved.)
+		 */
+		adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE,
+			context->context_gmem_shadow.gmem_save, 3);
+
+		/* Restore TP0_CHICKEN */
+		adreno_ringbuffer_issuecmds(device, 0,
+			context->chicken_restore, 3);
+
+		context->flags |= CTXT_FLAGS_GMEM_RESTORE;
+	}
+}
+
+static void a2xx_ctxt_restore(struct adreno_device *adreno_dev,
+			struct adreno_context *context)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	unsigned int cmds[5];
+
+	if (context == NULL) {
+		/* No context - set the default apgetable and thats it */
+		kgsl_mmu_setstate(device, device->mmu.defaultpagetable);
+		return;
+	}
+
+	KGSL_CTXT_INFO(device, "context flags %08x\n", context->flags);
+
+	cmds[0] = cp_nop_packet(1);
+	cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER;
+	cmds[2] = cp_type3_packet(CP_MEM_WRITE, 2);
+	cmds[3] = device->memstore.gpuaddr +
+		KGSL_DEVICE_MEMSTORE_OFFSET(current_context);
+	cmds[4] = (unsigned int) context;
+	adreno_ringbuffer_issuecmds(device, 0, cmds, 5);
+	kgsl_mmu_setstate(device, context->pagetable);
+
+#ifndef CONFIG_MSM_KGSL_CFF_DUMP_NO_CONTEXT_MEM_DUMP
+	kgsl_cffdump_syncmem(NULL, &context->gpustate,
+		context->gpustate.gpuaddr, LCC_SHADOW_SIZE +
+		REG_SHADOW_SIZE + CMD_BUFFER_SIZE + TEX_SHADOW_SIZE, false);
+#endif
+
+	/* restore gmem.
+	 *  (note: changes shader. shader must not already be restored.)
+	 */
+	if (context->flags & CTXT_FLAGS_GMEM_RESTORE) {
+		adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE,
+			context->context_gmem_shadow.gmem_restore, 3);
+
+		/* Restore TP0_CHICKEN */
+		adreno_ringbuffer_issuecmds(device, 0,
+			context->chicken_restore, 3);
+
+		context->flags &= ~CTXT_FLAGS_GMEM_RESTORE;
+	}
+
+	/* restore registers and constants. */
+	adreno_ringbuffer_issuecmds(device, 0,
+		context->reg_restore, 3);
+
+	/* restore shader instructions & partitioning. */
+	if (context->flags & CTXT_FLAGS_SHADER_RESTORE) {
+		adreno_ringbuffer_issuecmds(device, 0,
+			context->shader_restore, 3);
+	}
+
+	if (adreno_is_a20x(adreno_dev)) {
+		cmds[0] = cp_type3_packet(CP_SET_BIN_BASE_OFFSET, 1);
+		cmds[1] = context->bin_base_offset;
+		adreno_ringbuffer_issuecmds(device, 0, cmds, 2);
+	}
+}
+
+/*
+ * Interrupt management
+ *
+ * a2xx interrupt control is distributed among the various
+ * hardware components (RB, CP, MMU).  The main interrupt
+ * tells us which component fired the interrupt, but one needs
+ * to go to the individual component to find out why.  The
+ * following functions provide the broken out support for
+ * managing the interrupts
+ */
+
+#define RBBM_INT_MASK RBBM_INT_CNTL__RDERR_INT_MASK
+
+#define CP_INT_MASK \
+	(CP_INT_CNTL__T0_PACKET_IN_IB_MASK | \
+	CP_INT_CNTL__OPCODE_ERROR_MASK | \
+	CP_INT_CNTL__PROTECTED_MODE_ERROR_MASK | \
+	CP_INT_CNTL__RESERVED_BIT_ERROR_MASK | \
+	CP_INT_CNTL__IB_ERROR_MASK | \
+	CP_INT_CNTL__IB1_INT_MASK | \
+	CP_INT_CNTL__RB_INT_MASK)
+
+#define VALID_STATUS_COUNT_MAX	10
+
+static struct {
+	unsigned int mask;
+	const char *message;
+} kgsl_cp_error_irqs[] = {
+	{ CP_INT_CNTL__T0_PACKET_IN_IB_MASK,
+		"ringbuffer TO packet in IB interrupt" },
+	{ CP_INT_CNTL__OPCODE_ERROR_MASK,
+		"ringbuffer opcode error interrupt" },
+	{ CP_INT_CNTL__PROTECTED_MODE_ERROR_MASK,
+		"ringbuffer protected mode error interrupt" },
+	{ CP_INT_CNTL__RESERVED_BIT_ERROR_MASK,
+		"ringbuffer reserved bit error interrupt" },
+	{ CP_INT_CNTL__IB_ERROR_MASK,
+		"ringbuffer IB error interrupt" },
+};
+
+static void a2xx_cp_intrcallback(struct kgsl_device *device)
+{
+	unsigned int status = 0, num_reads = 0, master_status = 0;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
+	int i;
+
+	adreno_regread(device, REG_MASTER_INT_SIGNAL, &master_status);
+	while (!status && (num_reads < VALID_STATUS_COUNT_MAX) &&
+		(master_status & MASTER_INT_SIGNAL__CP_INT_STAT)) {
+		adreno_regread(device, REG_CP_INT_STATUS, &status);
+		adreno_regread(device, REG_MASTER_INT_SIGNAL,
+					&master_status);
+		num_reads++;
+	}
+	if (num_reads > 1)
+		KGSL_DRV_WARN(device,
+			"Looped %d times to read REG_CP_INT_STATUS\n",
+			num_reads);
+	if (!status) {
+		if (master_status & MASTER_INT_SIGNAL__CP_INT_STAT) {
+			/* This indicates that we could not read CP_INT_STAT.
+			 * As a precaution just wake up processes so
+			 * they can check their timestamps. Since, we
+			 * did not ack any interrupts this interrupt will
+			 * be generated again */
+			KGSL_DRV_WARN(device, "Unable to read CP_INT_STATUS\n");
+			wake_up_interruptible_all(&device->wait_queue);
+		} else
+			KGSL_DRV_WARN(device, "Spurious interrput detected\n");
+		return;
+	}
+
+	if (status & CP_INT_CNTL__RB_INT_MASK) {
+		/* signal intr completion event */
+		unsigned int enableflag = 0;
+		kgsl_sharedmem_writel(&rb->device->memstore,
+			KGSL_DEVICE_MEMSTORE_OFFSET(ts_cmp_enable),
+			enableflag);
+		wmb();
+		KGSL_CMD_WARN(rb->device, "ringbuffer rb interrupt\n");
+	}
+
+	for (i = 0; i < ARRAY_SIZE(kgsl_cp_error_irqs); i++) {
+		if (status & kgsl_cp_error_irqs[i].mask) {
+			KGSL_CMD_CRIT(rb->device, "%s\n",
+				 kgsl_cp_error_irqs[i].message);
+			/*
+			 * on fatal errors, turn off the interrupts to
+			 * avoid storming. This has the side effect of
+			 * forcing a PM dump when the timestamp times out
+			 */
+
+			kgsl_pwrctrl_irq(rb->device, KGSL_PWRFLAGS_OFF);
+		}
+	}
+
+	/* only ack bits we understand */
+	status &= CP_INT_MASK;
+	adreno_regwrite(device, REG_CP_INT_ACK, status);
+
+	if (status & (CP_INT_CNTL__IB1_INT_MASK | CP_INT_CNTL__RB_INT_MASK)) {
+		KGSL_CMD_WARN(rb->device, "ringbuffer ib1/rb interrupt\n");
+		queue_work(device->work_queue, &device->ts_expired_ws);
+		wake_up_interruptible_all(&device->wait_queue);
+		atomic_notifier_call_chain(&(device->ts_notifier_list),
+					   device->id,
+					   NULL);
+	}
+}
+
+static void a2xx_rbbm_intrcallback(struct kgsl_device *device)
+{
+	unsigned int status = 0;
+	unsigned int rderr = 0;
+
+	adreno_regread(device, REG_RBBM_INT_STATUS, &status);
+
+	if (status & RBBM_INT_CNTL__RDERR_INT_MASK) {
+		union rbbm_read_error_u rerr;
+		adreno_regread(device, REG_RBBM_READ_ERROR, &rderr);
+		rerr.val = rderr;
+		if (rerr.f.read_address == REG_CP_INT_STATUS &&
+			rerr.f.read_error &&
+			rerr.f.read_requester)
+			KGSL_DRV_WARN(device,
+				"rbbm read error interrupt: %08x\n", rderr);
+		else
+			KGSL_DRV_CRIT(device,
+				"rbbm read error interrupt: %08x\n", rderr);
+	}
+
+	status &= RBBM_INT_MASK;
+	adreno_regwrite(device, REG_RBBM_INT_ACK, status);
+}
+
+irqreturn_t a2xx_irq_handler(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	irqreturn_t result = IRQ_NONE;
+	unsigned int status;
+
+	adreno_regread(device, REG_MASTER_INT_SIGNAL, &status);
+
+	if (status & MASTER_INT_SIGNAL__MH_INT_STAT) {
+		kgsl_mh_intrcallback(device);
+		result = IRQ_HANDLED;
+	}
+
+	if (status & MASTER_INT_SIGNAL__CP_INT_STAT) {
+		a2xx_cp_intrcallback(device);
+		result = IRQ_HANDLED;
+	}
+
+	if (status & MASTER_INT_SIGNAL__RBBM_INT_STAT) {
+		a2xx_rbbm_intrcallback(device);
+		result = IRQ_HANDLED;
+	}
+
+	return result;
+}
+
+static void a2xx_irq_control(struct adreno_device *adreno_dev, int state)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+
+	if (state) {
+		adreno_regwrite(device, REG_RBBM_INT_CNTL, RBBM_INT_MASK);
+		adreno_regwrite(device, REG_CP_INT_CNTL, CP_INT_MASK);
+		adreno_regwrite(device, MH_INTERRUPT_MASK, KGSL_MMU_INT_MASK);
+	} else {
+		adreno_regwrite(device, REG_RBBM_INT_CNTL, 0);
+		adreno_regwrite(device, REG_CP_INT_CNTL, 0);
+		adreno_regwrite(device, MH_INTERRUPT_MASK, 0);
+	}
+
+	/* Force the writes to post before touching the IRQ line */
+	wmb();
+}
+
+struct adreno_gpudev adreno_a2xx_gpudev = {
+	.ctxt_gpustate_shadow = a2xx_ctxt_gpustate_shadow,
+	.ctxt_gmem_shadow = a2xx_ctxt_gmem_shadow,
+	.ctxt_save = a2xx_ctxt_save,
+	.ctxt_restore = a2xx_ctxt_restore,
+	.irq_handler = a2xx_irq_handler,
+	.irq_control = a2xx_irq_control,
+};
diff --git a/drivers/gpu/msm/kgsl_gpummu.c b/drivers/gpu/msm/kgsl_gpummu.c
new file mode 100644
index 00000000..9e7ef61d
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_gpummu.c
@@ -0,0 +1,766 @@
+/* Copyright (c) 2011-2012, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/spinlock.h>
+#include <linux/genalloc.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+
+#include "kgsl.h"
+#include "kgsl_mmu.h"
+#include "kgsl_device.h"
+#include "kgsl_sharedmem.h"
+
+#include "adreno_ringbuffer.h"
+
+static ssize_t
+sysfs_show_ptpool_entries(struct kobject *kobj,
+			  struct kobj_attribute *attr,
+			  char *buf)
+{
+	struct kgsl_ptpool *pool = (struct kgsl_ptpool *)
+					kgsl_driver.ptpool;
+	return snprintf(buf, PAGE_SIZE, "%d\n", pool->entries);
+}
+
+static ssize_t
+sysfs_show_ptpool_min(struct kobject *kobj,
+			 struct kobj_attribute *attr,
+			 char *buf)
+{
+	struct kgsl_ptpool *pool = (struct kgsl_ptpool *)
+					kgsl_driver.ptpool;
+	return snprintf(buf, PAGE_SIZE, "%d\n",
+			pool->static_entries);
+}
+
+static ssize_t
+sysfs_show_ptpool_chunks(struct kobject *kobj,
+			 struct kobj_attribute *attr,
+			 char *buf)
+{
+	struct kgsl_ptpool *pool = (struct kgsl_ptpool *)
+					kgsl_driver.ptpool;
+	return snprintf(buf, PAGE_SIZE, "%d\n", pool->chunks);
+}
+
+static ssize_t
+sysfs_show_ptpool_ptsize(struct kobject *kobj,
+			 struct kobj_attribute *attr,
+			 char *buf)
+{
+	struct kgsl_ptpool *pool = (struct kgsl_ptpool *)
+					kgsl_driver.ptpool;
+	return snprintf(buf, PAGE_SIZE, "%d\n", pool->ptsize);
+}
+
+static struct kobj_attribute attr_ptpool_entries = {
+	.attr = { .name = "ptpool_entries", .mode = 0444 },
+	.show = sysfs_show_ptpool_entries,
+	.store = NULL,
+};
+
+static struct kobj_attribute attr_ptpool_min = {
+	.attr = { .name = "ptpool_min", .mode = 0444 },
+	.show = sysfs_show_ptpool_min,
+	.store = NULL,
+};
+
+static struct kobj_attribute attr_ptpool_chunks = {
+	.attr = { .name = "ptpool_chunks", .mode = 0444 },
+	.show = sysfs_show_ptpool_chunks,
+	.store = NULL,
+};
+
+static struct kobj_attribute attr_ptpool_ptsize = {
+	.attr = { .name = "ptpool_ptsize", .mode = 0444 },
+	.show = sysfs_show_ptpool_ptsize,
+	.store = NULL,
+};
+
+static struct attribute *ptpool_attrs[] = {
+	&attr_ptpool_entries.attr,
+	&attr_ptpool_min.attr,
+	&attr_ptpool_chunks.attr,
+	&attr_ptpool_ptsize.attr,
+	NULL,
+};
+
+static struct attribute_group ptpool_attr_group = {
+	.attrs = ptpool_attrs,
+};
+
+static int
+_kgsl_ptpool_add_entries(struct kgsl_ptpool *pool, int count, int dynamic)
+{
+	struct kgsl_ptpool_chunk *chunk;
+	size_t size = ALIGN(count * pool->ptsize, PAGE_SIZE);
+
+	BUG_ON(count == 0);
+
+	if (get_order(size) >= MAX_ORDER) {
+		KGSL_CORE_ERR("ptpool allocation is too big: %d\n", size);
+		return -EINVAL;
+	}
+
+	chunk = kzalloc(sizeof(*chunk), GFP_KERNEL);
+	if (chunk == NULL) {
+		KGSL_CORE_ERR("kzalloc(%d) failed\n", sizeof(*chunk));
+		return -ENOMEM;
+	}
+
+	chunk->size = size;
+	chunk->count = count;
+	chunk->dynamic = dynamic;
+
+	chunk->data = dma_alloc_coherent(NULL, size,
+					 &chunk->phys, GFP_KERNEL);
+
+	if (chunk->data == NULL) {
+		KGSL_CORE_ERR("dma_alloc_coherent(%d) failed\n", size);
+		goto err;
+	}
+
+	chunk->bitmap = kzalloc(BITS_TO_LONGS(count) * 4, GFP_KERNEL);
+
+	if (chunk->bitmap == NULL) {
+		KGSL_CORE_ERR("kzalloc(%d) failed\n",
+			BITS_TO_LONGS(count) * 4);
+		goto err_dma;
+	}
+
+	list_add_tail(&chunk->list, &pool->list);
+
+	pool->chunks++;
+	pool->entries += count;
+
+	if (!dynamic)
+		pool->static_entries += count;
+
+	return 0;
+
+err_dma:
+	dma_free_coherent(NULL, chunk->size, chunk->data, chunk->phys);
+err:
+	kfree(chunk);
+	return -ENOMEM;
+}
+
+static void *
+_kgsl_ptpool_get_entry(struct kgsl_ptpool *pool, unsigned int *physaddr)
+{
+	struct kgsl_ptpool_chunk *chunk;
+
+	list_for_each_entry(chunk, &pool->list, list) {
+		int bit = find_first_zero_bit(chunk->bitmap, chunk->count);
+
+		if (bit >= chunk->count)
+			continue;
+
+		set_bit(bit, chunk->bitmap);
+		*physaddr = chunk->phys + (bit * pool->ptsize);
+
+		return chunk->data + (bit * pool->ptsize);
+	}
+
+	return NULL;
+}
+
+/**
+ * kgsl_ptpool_add
+ * @pool:  A pointer to a ptpool structure
+ * @entries: Number of entries to add
+ *
+ * Add static entries to the pagetable pool.
+ */
+
+static int
+kgsl_ptpool_add(struct kgsl_ptpool *pool, int count)
+{
+	int ret = 0;
+	BUG_ON(count == 0);
+
+	mutex_lock(&pool->lock);
+
+	/* Only 4MB can be allocated in one chunk, so larger allocations
+	   need to be split into multiple sections */
+
+	while (count) {
+		int entries = ((count * pool->ptsize) > SZ_4M) ?
+			SZ_4M / pool->ptsize : count;
+
+		/* Add the entries as static, i.e. they don't ever stand
+		   a chance of being removed */
+
+		ret =  _kgsl_ptpool_add_entries(pool, entries, 0);
+		if (ret)
+			break;
+
+		count -= entries;
+	}
+
+	mutex_unlock(&pool->lock);
+	return ret;
+}
+
+/**
+ * kgsl_ptpool_alloc
+ * @pool:  A pointer to a ptpool structure
+ * @addr: A pointer to store the physical address of the chunk
+ *
+ * Allocate a pagetable from the pool.  Returns the virtual address
+ * of the pagetable, the physical address is returned in physaddr
+ */
+
+static void *kgsl_ptpool_alloc(struct kgsl_ptpool *pool,
+				unsigned int *physaddr)
+{
+	void *addr = NULL;
+	int ret;
+
+	mutex_lock(&pool->lock);
+	addr = _kgsl_ptpool_get_entry(pool, physaddr);
+	if (addr)
+		goto done;
+
+	/* Add a chunk for 1 more pagetable and mark it as dynamic */
+	ret = _kgsl_ptpool_add_entries(pool, 1, 1);
+
+	if (ret)
+		goto done;
+
+	addr = _kgsl_ptpool_get_entry(pool, physaddr);
+done:
+	mutex_unlock(&pool->lock);
+	return addr;
+}
+
+static inline void _kgsl_ptpool_rm_chunk(struct kgsl_ptpool_chunk *chunk)
+{
+	list_del(&chunk->list);
+
+	if (chunk->data)
+		dma_free_coherent(NULL, chunk->size, chunk->data,
+			chunk->phys);
+	kfree(chunk->bitmap);
+	kfree(chunk);
+}
+
+/**
+ * kgsl_ptpool_free
+ * @pool:  A pointer to a ptpool structure
+ * @addr: A pointer to the virtual address to free
+ *
+ * Free a pagetable allocated from the pool
+ */
+
+static void kgsl_ptpool_free(struct kgsl_ptpool *pool, void *addr)
+{
+	struct kgsl_ptpool_chunk *chunk, *tmp;
+
+	if (pool == NULL || addr == NULL)
+		return;
+
+	mutex_lock(&pool->lock);
+	list_for_each_entry_safe(chunk, tmp, &pool->list, list)  {
+		if (addr >=  chunk->data &&
+		    addr < chunk->data + chunk->size) {
+			int bit = ((unsigned long) (addr - chunk->data)) /
+				pool->ptsize;
+
+			clear_bit(bit, chunk->bitmap);
+			memset(addr, 0, pool->ptsize);
+
+			if (chunk->dynamic &&
+				bitmap_empty(chunk->bitmap, chunk->count))
+				_kgsl_ptpool_rm_chunk(chunk);
+
+			break;
+		}
+	}
+
+	mutex_unlock(&pool->lock);
+}
+
+void kgsl_gpummu_ptpool_destroy(void *ptpool)
+{
+	struct kgsl_ptpool *pool = (struct kgsl_ptpool *)ptpool;
+	struct kgsl_ptpool_chunk *chunk, *tmp;
+
+	if (pool == NULL)
+		return;
+
+	mutex_lock(&pool->lock);
+	list_for_each_entry_safe(chunk, tmp, &pool->list, list)
+		_kgsl_ptpool_rm_chunk(chunk);
+	mutex_unlock(&pool->lock);
+
+	kfree(pool);
+}
+
+/**
+ * kgsl_ptpool_init
+ * @pool:  A pointer to a ptpool structure to initialize
+ * @ptsize: The size of each pagetable entry
+ * @entries:  The number of inital entries to add to the pool
+ *
+ * Initalize a pool and allocate an initial chunk of entries.
+ */
+void *kgsl_gpummu_ptpool_init(int ptsize, int entries)
+{
+	struct kgsl_ptpool *pool;
+	int ret = 0;
+	BUG_ON(ptsize == 0);
+
+	pool = kzalloc(sizeof(struct kgsl_ptpool), GFP_KERNEL);
+	if (!pool) {
+		KGSL_CORE_ERR("Failed to allocate memory "
+				"for ptpool\n");
+		return NULL;
+	}
+
+	pool->ptsize = ptsize;
+	mutex_init(&pool->lock);
+	INIT_LIST_HEAD(&pool->list);
+
+	if (entries) {
+		ret = kgsl_ptpool_add(pool, entries);
+		if (ret)
+			goto err_ptpool_remove;
+	}
+
+	ret = sysfs_create_group(kgsl_driver.ptkobj, &ptpool_attr_group);
+	if (ret) {
+		KGSL_CORE_ERR("sysfs_create_group failed for ptpool "
+				"statistics: %d\n", ret);
+		goto err_ptpool_remove;
+	}
+	return (void *)pool;
+
+err_ptpool_remove:
+	kgsl_gpummu_ptpool_destroy(pool);
+	return NULL;
+}
+
+int kgsl_gpummu_pt_equal(struct kgsl_pagetable *pt,
+					unsigned int pt_base)
+{
+	struct kgsl_gpummu_pt *gpummu_pt = pt->priv;
+	return pt && pt_base && (gpummu_pt->base.gpuaddr == pt_base);
+}
+
+void kgsl_gpummu_destroy_pagetable(void *mmu_specific_pt)
+{
+	struct kgsl_gpummu_pt *gpummu_pt = (struct kgsl_gpummu_pt *)
+						mmu_specific_pt;
+	kgsl_ptpool_free((struct kgsl_ptpool *)kgsl_driver.ptpool,
+				gpummu_pt->base.hostptr);
+
+	kgsl_driver.stats.coherent -= KGSL_PAGETABLE_SIZE;
+
+	kfree(gpummu_pt->tlbflushfilter.base);
+
+	kfree(gpummu_pt);
+}
+
+static inline uint32_t
+kgsl_pt_entry_get(unsigned int va_base, uint32_t va)
+{
+	return (va - va_base) >> PAGE_SHIFT;
+}
+
+static inline void
+kgsl_pt_map_set(struct kgsl_gpummu_pt *pt, uint32_t pte, uint32_t val)
+{
+	uint32_t *baseptr = (uint32_t *)pt->base.hostptr;
+
+	writel_relaxed(val, &baseptr[pte]);
+}
+
+static inline uint32_t
+kgsl_pt_map_get(struct kgsl_gpummu_pt *pt, uint32_t pte)
+{
+	uint32_t *baseptr = (uint32_t *)pt->base.hostptr;
+	return readl_relaxed(&baseptr[pte]) & GSL_PT_PAGE_ADDR_MASK;
+}
+
+static unsigned int kgsl_gpummu_pt_get_flags(struct kgsl_pagetable *pt,
+				enum kgsl_deviceid id)
+{
+	unsigned int result = 0;
+	struct kgsl_gpummu_pt *gpummu_pt;
+
+	if (pt == NULL)
+		return 0;
+	gpummu_pt = pt->priv;
+
+	spin_lock(&pt->lock);
+	if (gpummu_pt->tlb_flags && (1<<id)) {
+		result = KGSL_MMUFLAGS_TLBFLUSH;
+		gpummu_pt->tlb_flags &= ~(1<<id);
+	}
+	spin_unlock(&pt->lock);
+	return result;
+}
+
+static void kgsl_gpummu_pagefault(struct kgsl_device *device)
+{
+	unsigned int reg;
+	unsigned int ptbase;
+
+	kgsl_regread(device, MH_MMU_PAGE_FAULT, &reg);
+	kgsl_regread(device, MH_MMU_PT_BASE, &ptbase);
+
+	KGSL_MEM_CRIT(device,
+			"mmu page fault: page=0x%lx pt=%d op=%s axi=%d\n",
+			reg & ~(PAGE_SIZE - 1),
+			kgsl_mmu_get_ptname_from_ptbase(ptbase),
+			reg & 0x02 ? "WRITE" : "READ", (reg >> 4) & 0xF);
+}
+
+static void *kgsl_gpummu_create_pagetable(void)
+{
+	struct kgsl_gpummu_pt *gpummu_pt;
+
+	gpummu_pt = kzalloc(sizeof(struct kgsl_gpummu_pt),
+				GFP_KERNEL);
+	if (!gpummu_pt)
+		return NULL;
+
+	gpummu_pt->tlb_flags = 0;
+	gpummu_pt->last_superpte = 0;
+
+	gpummu_pt->tlbflushfilter.size = (CONFIG_MSM_KGSL_PAGE_TABLE_SIZE /
+				(PAGE_SIZE * GSL_PT_SUPER_PTE * 8)) + 1;
+	gpummu_pt->tlbflushfilter.base = (unsigned int *)
+			kzalloc(gpummu_pt->tlbflushfilter.size, GFP_KERNEL);
+	if (!gpummu_pt->tlbflushfilter.base) {
+		KGSL_CORE_ERR("kzalloc(%d) failed\n",
+			gpummu_pt->tlbflushfilter.size);
+		goto err_free_gpummu;
+	}
+	GSL_TLBFLUSH_FILTER_RESET();
+
+	gpummu_pt->base.hostptr = kgsl_ptpool_alloc((struct kgsl_ptpool *)
+						kgsl_driver.ptpool,
+						&gpummu_pt->base.physaddr);
+
+	if (gpummu_pt->base.hostptr == NULL)
+		goto err_flushfilter;
+
+	/* ptpool allocations are from coherent memory, so update the
+	   device statistics acordingly */
+
+	KGSL_STATS_ADD(KGSL_PAGETABLE_SIZE, kgsl_driver.stats.coherent,
+		       kgsl_driver.stats.coherent_max);
+
+	gpummu_pt->base.gpuaddr = gpummu_pt->base.physaddr;
+	gpummu_pt->base.size = KGSL_PAGETABLE_SIZE;
+
+	return (void *)gpummu_pt;
+
+err_flushfilter:
+	kfree(gpummu_pt->tlbflushfilter.base);
+err_free_gpummu:
+	kfree(gpummu_pt);
+
+	return NULL;
+}
+
+static void kgsl_gpummu_default_setstate(struct kgsl_device *device,
+					uint32_t flags)
+{
+	struct kgsl_gpummu_pt *gpummu_pt;
+	if (!kgsl_mmu_enabled())
+		return;
+
+	if (flags & KGSL_MMUFLAGS_PTUPDATE) {
+		kgsl_idle(device, KGSL_TIMEOUT_DEFAULT);
+		gpummu_pt = device->mmu.hwpagetable->priv;
+		kgsl_regwrite(device, MH_MMU_PT_BASE,
+			gpummu_pt->base.gpuaddr);
+	}
+
+	if (flags & KGSL_MMUFLAGS_TLBFLUSH) {
+		/* Invalidate all and tc */
+		kgsl_regwrite(device, MH_MMU_INVALIDATE,  0x00000003);
+	}
+}
+
+static void kgsl_gpummu_setstate(struct kgsl_device *device,
+				struct kgsl_pagetable *pagetable)
+{
+	struct kgsl_mmu *mmu = &device->mmu;
+	struct kgsl_gpummu_pt *gpummu_pt;
+
+	if (mmu->flags & KGSL_FLAGS_STARTED) {
+		/* page table not current, then setup mmu to use new
+		 *  specified page table
+		 */
+		if (mmu->hwpagetable != pagetable) {
+			mmu->hwpagetable = pagetable;
+			spin_lock(&mmu->hwpagetable->lock);
+			gpummu_pt = mmu->hwpagetable->priv;
+			gpummu_pt->tlb_flags &= ~(1<<device->id);
+			spin_unlock(&mmu->hwpagetable->lock);
+
+			/* call device specific set page table */
+			kgsl_setstate(mmu->device, KGSL_MMUFLAGS_TLBFLUSH |
+				KGSL_MMUFLAGS_PTUPDATE);
+		}
+	}
+}
+
+static int kgsl_gpummu_init(struct kgsl_device *device)
+{
+	/*
+	 * intialize device mmu
+	 *
+	 * call this with the global lock held
+	 */
+	int status = 0;
+	struct kgsl_mmu *mmu = &device->mmu;
+
+	mmu->device = device;
+
+	/* sub-client MMU lookups require address translation */
+	if ((mmu->config & ~0x1) > 0) {
+		/*make sure virtual address range is a multiple of 64Kb */
+		if (CONFIG_MSM_KGSL_PAGE_TABLE_SIZE & ((1 << 16) - 1)) {
+			KGSL_CORE_ERR("Invalid pagetable size requested "
+			"for GPUMMU: %x\n", CONFIG_MSM_KGSL_PAGE_TABLE_SIZE);
+			return -EINVAL;
+		}
+
+		/* allocate memory used for completing r/w operations that
+		 * cannot be mapped by the MMU
+		 */
+		status = kgsl_allocate_contiguous(&mmu->setstate_memory, 64);
+		if (!status)
+			kgsl_sharedmem_set(&mmu->setstate_memory, 0, 0,
+					   mmu->setstate_memory.size);
+	}
+
+	dev_info(device->dev, "|%s| MMU type set for device is GPUMMU\n",
+		__func__);
+	return status;
+}
+
+static int kgsl_gpummu_start(struct kgsl_device *device)
+{
+	/*
+	 * intialize device mmu
+	 *
+	 * call this with the global lock held
+	 */
+
+	struct kgsl_mmu *mmu = &device->mmu;
+	struct kgsl_gpummu_pt *gpummu_pt;
+
+	if (mmu->flags & KGSL_FLAGS_STARTED)
+		return 0;
+
+	/* MMU not enabled */
+	if ((mmu->config & 0x1) == 0)
+		return 0;
+
+	/* setup MMU and sub-client behavior */
+	kgsl_regwrite(device, MH_MMU_CONFIG, mmu->config);
+
+	/* idle device */
+	kgsl_idle(device,  KGSL_TIMEOUT_DEFAULT);
+
+	/* enable axi interrupts */
+	kgsl_regwrite(device, MH_INTERRUPT_MASK,
+			GSL_MMU_INT_MASK | MH_INTERRUPT_MASK__MMU_PAGE_FAULT);
+
+	kgsl_sharedmem_set(&mmu->setstate_memory, 0, 0,
+			   mmu->setstate_memory.size);
+
+	/* TRAN_ERROR needs a 32 byte (32 byte aligned) chunk of memory
+	 * to complete transactions in case of an MMU fault. Note that
+	 * we'll leave the bottom 32 bytes of the setstate_memory for other
+	 * purposes (e.g. use it when dummy read cycles are needed
+	 * for other blocks) */
+	kgsl_regwrite(device, MH_MMU_TRAN_ERROR,
+		mmu->setstate_memory.physaddr + 32);
+
+	if (mmu->defaultpagetable == NULL)
+		mmu->defaultpagetable =
+			kgsl_mmu_getpagetable(KGSL_MMU_GLOBAL_PT);
+
+	/* Return error if the default pagetable doesn't exist */
+	if (mmu->defaultpagetable == NULL)
+		return -ENOMEM;
+
+	mmu->hwpagetable = mmu->defaultpagetable;
+	gpummu_pt = mmu->hwpagetable->priv;
+	kgsl_regwrite(device, MH_MMU_PT_BASE,
+		      gpummu_pt->base.gpuaddr);
+	kgsl_regwrite(device, MH_MMU_VA_RANGE,
+		      (KGSL_PAGETABLE_BASE |
+		      (CONFIG_MSM_KGSL_PAGE_TABLE_SIZE >> 16)));
+	kgsl_setstate(device, KGSL_MMUFLAGS_TLBFLUSH);
+	mmu->flags |= KGSL_FLAGS_STARTED;
+
+	return 0;
+}
+
+static int
+kgsl_gpummu_unmap(void *mmu_specific_pt,
+		struct kgsl_memdesc *memdesc)
+{
+	unsigned int numpages;
+	unsigned int pte, ptefirst, ptelast, superpte;
+	unsigned int range = memdesc->size;
+	struct kgsl_gpummu_pt *gpummu_pt = mmu_specific_pt;
+
+	/* All GPU addresses as assigned are page aligned, but some
+	   functions purturb the gpuaddr with an offset, so apply the
+	   mask here to make sure we have the right address */
+
+	unsigned int gpuaddr = memdesc->gpuaddr &  KGSL_MMU_ALIGN_MASK;
+
+	numpages = (range >> PAGE_SHIFT);
+	if (range & (PAGE_SIZE - 1))
+		numpages++;
+
+	ptefirst = kgsl_pt_entry_get(KGSL_PAGETABLE_BASE, gpuaddr);
+	ptelast = ptefirst + numpages;
+
+	superpte = ptefirst - (ptefirst & (GSL_PT_SUPER_PTE-1));
+	GSL_TLBFLUSH_FILTER_SETDIRTY(superpte / GSL_PT_SUPER_PTE);
+	for (pte = ptefirst; pte < ptelast; pte++) {
+#ifdef VERBOSE_DEBUG
+		/* check if PTE exists */
+		if (!kgsl_pt_map_get(gpummu_pt, pte))
+			KGSL_CORE_ERR("pt entry %x is already "
+			"unmapped for pagetable %p\n", pte, gpummu_pt);
+#endif
+		kgsl_pt_map_set(gpummu_pt, pte, GSL_PT_PAGE_DIRTY);
+		superpte = pte - (pte & (GSL_PT_SUPER_PTE - 1));
+		if (pte == superpte)
+			GSL_TLBFLUSH_FILTER_SETDIRTY(superpte /
+				GSL_PT_SUPER_PTE);
+	}
+
+	/* Post all writes to the pagetable */
+	wmb();
+
+	return 0;
+}
+
+#define SUPERPTE_IS_DIRTY(_p) \
+(((_p) & (GSL_PT_SUPER_PTE - 1)) == 0 && \
+GSL_TLBFLUSH_FILTER_ISDIRTY((_p) / GSL_PT_SUPER_PTE))
+
+static int
+kgsl_gpummu_map(void *mmu_specific_pt,
+		struct kgsl_memdesc *memdesc,
+		unsigned int protflags)
+{
+	unsigned int pte;
+	struct kgsl_gpummu_pt *gpummu_pt = mmu_specific_pt;
+	struct scatterlist *s;
+	int flushtlb = 0;
+	int i;
+
+	pte = kgsl_pt_entry_get(KGSL_PAGETABLE_BASE, memdesc->gpuaddr);
+
+	/* Flush the TLB if the first PTE isn't at the superpte boundary */
+	if (pte & (GSL_PT_SUPER_PTE - 1))
+		flushtlb = 1;
+
+	for_each_sg(memdesc->sg, s, memdesc->sglen, i) {
+		unsigned int paddr = sg_phys(s);
+		unsigned int j;
+
+		/* Each sg entry might be multiple pages long */
+		for (j = paddr; j < paddr + s->length; pte++, j += PAGE_SIZE) {
+			if (SUPERPTE_IS_DIRTY(pte))
+				flushtlb = 1;
+			kgsl_pt_map_set(gpummu_pt, pte, j | protflags);
+		}
+	}
+
+	/* Flush the TLB if the last PTE isn't at the superpte boundary */
+	if ((pte + 1) & (GSL_PT_SUPER_PTE - 1))
+		flushtlb = 1;
+
+	wmb();
+
+	if (flushtlb) {
+		/*set all devices as needing flushing*/
+		gpummu_pt->tlb_flags = UINT_MAX;
+		GSL_TLBFLUSH_FILTER_RESET();
+	}
+
+	return 0;
+}
+
+static int kgsl_gpummu_stop(struct kgsl_device *device)
+{
+	struct kgsl_mmu *mmu = &device->mmu;
+
+	kgsl_regwrite(device, MH_MMU_CONFIG, 0x00000000);
+	mmu->flags &= ~KGSL_FLAGS_STARTED;
+
+	return 0;
+}
+
+static int kgsl_gpummu_close(struct kgsl_device *device)
+{
+	/*
+	 *  close device mmu
+	 *
+	 *  call this with the global lock held
+	 */
+	struct kgsl_mmu *mmu = &device->mmu;
+
+	if (mmu->setstate_memory.gpuaddr)
+		kgsl_sharedmem_free(&mmu->setstate_memory);
+
+	if (mmu->defaultpagetable)
+		kgsl_mmu_putpagetable(mmu->defaultpagetable);
+
+	return 0;
+}
+
+static unsigned int
+kgsl_gpummu_get_current_ptbase(struct kgsl_device *device)
+{
+	unsigned int ptbase;
+	kgsl_regread(device, MH_MMU_PT_BASE, &ptbase);
+	return ptbase;
+}
+
+struct kgsl_mmu_ops gpummu_ops = {
+	.mmu_init = kgsl_gpummu_init,
+	.mmu_close = kgsl_gpummu_close,
+	.mmu_start = kgsl_gpummu_start,
+	.mmu_stop = kgsl_gpummu_stop,
+	.mmu_setstate = kgsl_gpummu_setstate,
+	.mmu_device_setstate = kgsl_gpummu_default_setstate,
+	.mmu_pagefault = kgsl_gpummu_pagefault,
+	.mmu_get_current_ptbase = kgsl_gpummu_get_current_ptbase,
+};
+
+struct kgsl_mmu_pt_ops gpummu_pt_ops = {
+	.mmu_map = kgsl_gpummu_map,
+	.mmu_unmap = kgsl_gpummu_unmap,
+	.mmu_create_pagetable = kgsl_gpummu_create_pagetable,
+	.mmu_destroy_pagetable = kgsl_gpummu_destroy_pagetable,
+	.mmu_pt_equal = kgsl_gpummu_pt_equal,
+	.mmu_pt_get_flags = kgsl_gpummu_pt_get_flags,
+};
diff --git a/drivers/gpu/msm/kgsl_gpummu.h b/drivers/gpu/msm/kgsl_gpummu.h
new file mode 100644
index 00000000..46466a8d
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_gpummu.h
@@ -0,0 +1,85 @@
+/* Copyright (c) 2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __KGSL_GPUMMU_H
+#define __KGSL_GPUMMU_H
+
+#define GSL_PT_PAGE_BITS_MASK	0x00000007
+#define GSL_PT_PAGE_ADDR_MASK	PAGE_MASK
+
+#define GSL_MMU_INT_MASK \
+	(MH_INTERRUPT_MASK__AXI_READ_ERROR | \
+	 MH_INTERRUPT_MASK__AXI_WRITE_ERROR)
+
+/* Macros to manage TLB flushing */
+#define GSL_TLBFLUSH_FILTER_ENTRY_NUMBITS     (sizeof(unsigned char) * 8)
+#define GSL_TLBFLUSH_FILTER_GET(superpte)			     \
+	      (*((unsigned char *)				    \
+	      (((unsigned int)gpummu_pt->tlbflushfilter.base)    \
+	      + (superpte / GSL_TLBFLUSH_FILTER_ENTRY_NUMBITS))))
+#define GSL_TLBFLUSH_FILTER_SETDIRTY(superpte)				\
+	      (GSL_TLBFLUSH_FILTER_GET((superpte)) |= 1 <<	    \
+	      (superpte % GSL_TLBFLUSH_FILTER_ENTRY_NUMBITS))
+#define GSL_TLBFLUSH_FILTER_ISDIRTY(superpte)			 \
+	      (GSL_TLBFLUSH_FILTER_GET((superpte)) &		  \
+	      (1 << (superpte % GSL_TLBFLUSH_FILTER_ENTRY_NUMBITS)))
+#define GSL_TLBFLUSH_FILTER_RESET() memset(gpummu_pt->tlbflushfilter.base,\
+				      0, gpummu_pt->tlbflushfilter.size)
+
+extern struct kgsl_mmu_ops gpummu_ops;
+extern struct kgsl_mmu_pt_ops gpummu_pt_ops;
+
+struct kgsl_tlbflushfilter {
+	unsigned int *base;
+	unsigned int size;
+};
+
+struct kgsl_gpummu_pt {
+	struct kgsl_memdesc  base;
+	unsigned int   last_superpte;
+	unsigned int tlb_flags;
+	/* Maintain filter to manage tlb flushing */
+	struct kgsl_tlbflushfilter tlbflushfilter;
+};
+
+struct kgsl_ptpool_chunk {
+	size_t size;
+	unsigned int count;
+	int dynamic;
+
+	void *data;
+	unsigned int phys;
+
+	unsigned long *bitmap;
+	struct list_head list;
+};
+
+struct kgsl_ptpool {
+	size_t ptsize;
+	struct mutex lock;
+	struct list_head list;
+	int entries;
+	int static_entries;
+	int chunks;
+};
+
+void *kgsl_gpummu_ptpool_init(int ptsize,
+			int entries);
+void kgsl_gpummu_ptpool_destroy(void *ptpool);
+
+static inline unsigned int kgsl_pt_get_base_addr(struct kgsl_pagetable *pt)
+{
+	struct kgsl_gpummu_pt *gpummu_pt = pt->priv;
+	return gpummu_pt->base.gpuaddr;
+}
+#endif /* __KGSL_GPUMMU_H */
diff --git a/drivers/gpu/msm/kgsl_iommu.c b/drivers/gpu/msm/kgsl_iommu.c
new file mode 100644
index 00000000..30365a3c
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_iommu.c
@@ -0,0 +1,333 @@
+/* Copyright (c) 2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/spinlock.h>
+#include <linux/genalloc.h>
+#include <linux/slab.h>
+#include <linux/iommu.h>
+#include <mach/iommu.h>
+#include <linux/msm_kgsl.h>
+
+#include "kgsl.h"
+#include "kgsl_device.h"
+#include "kgsl_mmu.h"
+#include "kgsl_sharedmem.h"
+
+struct kgsl_iommu {
+	struct device *iommu_user_dev;
+	int iommu_user_dev_attached;
+	struct device *iommu_priv_dev;
+	int iommu_priv_dev_attached;
+};
+
+static int kgsl_iommu_pt_equal(struct kgsl_pagetable *pt,
+					unsigned int pt_base)
+{
+	struct iommu_domain *domain = pt->priv;
+	return pt && pt_base && ((unsigned int)domain == pt_base);
+}
+
+static void kgsl_iommu_destroy_pagetable(void *mmu_specific_pt)
+{
+	struct iommu_domain *domain = mmu_specific_pt;
+	if (domain)
+		iommu_domain_free(domain);
+}
+
+void *kgsl_iommu_create_pagetable(void)
+{
+	struct iommu_domain *domain = iommu_domain_alloc(0);
+	if (!domain)
+		KGSL_CORE_ERR("Failed to create iommu domain\n");
+
+	return domain;
+}
+
+static void kgsl_detach_pagetable_iommu_domain(struct kgsl_mmu *mmu)
+{
+	struct iommu_domain *domain;
+	struct kgsl_iommu *iommu = mmu->priv;
+
+	BUG_ON(mmu->hwpagetable == NULL);
+	BUG_ON(mmu->hwpagetable->priv == NULL);
+
+	domain = mmu->hwpagetable->priv;
+
+	if (iommu->iommu_user_dev_attached) {
+		iommu_detach_device(domain, iommu->iommu_user_dev);
+		iommu->iommu_user_dev_attached = 0;
+		KGSL_MEM_INFO(mmu->device,
+				"iommu %p detached from user dev of MMU: %p\n",
+				domain, mmu);
+	}
+	if (iommu->iommu_priv_dev_attached) {
+		iommu_detach_device(domain, iommu->iommu_priv_dev);
+		iommu->iommu_priv_dev_attached = 0;
+		KGSL_MEM_INFO(mmu->device,
+				"iommu %p detached from priv dev of MMU: %p\n",
+				domain, mmu);
+	}
+}
+
+static int kgsl_attach_pagetable_iommu_domain(struct kgsl_mmu *mmu)
+{
+	struct iommu_domain *domain;
+	int ret = 0;
+	struct kgsl_iommu *iommu = mmu->priv;
+
+	BUG_ON(mmu->hwpagetable == NULL);
+	BUG_ON(mmu->hwpagetable->priv == NULL);
+
+	domain = mmu->hwpagetable->priv;
+
+	if (iommu->iommu_user_dev && !iommu->iommu_user_dev_attached) {
+		ret = iommu_attach_device(domain, iommu->iommu_user_dev);
+		if (ret) {
+			KGSL_MEM_ERR(mmu->device,
+			"Failed to attach device, err %d\n", ret);
+			goto done;
+		}
+		iommu->iommu_user_dev_attached = 1;
+		KGSL_MEM_INFO(mmu->device,
+				"iommu %p attached to user dev of MMU: %p\n",
+				domain, mmu);
+	}
+	if (iommu->iommu_priv_dev && !iommu->iommu_priv_dev_attached) {
+		ret = iommu_attach_device(domain, iommu->iommu_priv_dev);
+		if (ret) {
+			KGSL_MEM_ERR(mmu->device,
+				"Failed to attach device, err %d\n", ret);
+			iommu_detach_device(domain, iommu->iommu_user_dev);
+			iommu->iommu_user_dev_attached = 0;
+			goto done;
+		}
+		iommu->iommu_priv_dev_attached = 1;
+		KGSL_MEM_INFO(mmu->device,
+				"iommu %p attached to priv dev of MMU: %p\n",
+				domain, mmu);
+	}
+done:
+	return ret;
+}
+
+static int kgsl_get_iommu_ctxt(struct kgsl_iommu *iommu,
+				struct kgsl_device *device)
+{
+	int status = 0;
+	struct platform_device *pdev =
+		container_of(device->parentdev, struct platform_device, dev);
+	struct kgsl_device_platform_data *pdata_dev = pdev->dev.platform_data;
+	if (pdata_dev->iommu_user_ctx_name)
+		iommu->iommu_user_dev = msm_iommu_get_ctx(
+					pdata_dev->iommu_user_ctx_name);
+	if (pdata_dev->iommu_priv_ctx_name)
+		iommu->iommu_priv_dev = msm_iommu_get_ctx(
+					pdata_dev->iommu_priv_ctx_name);
+	if (!iommu->iommu_user_dev) {
+		KGSL_CORE_ERR("Failed to get user iommu dev handle for "
+				"device %s\n",
+				pdata_dev->iommu_user_ctx_name);
+		status = -EINVAL;
+	}
+	return status;
+}
+
+static void kgsl_iommu_setstate(struct kgsl_device *device,
+				struct kgsl_pagetable *pagetable)
+{
+	struct kgsl_mmu *mmu = &device->mmu;
+
+	if (mmu->flags & KGSL_FLAGS_STARTED) {
+		/* page table not current, then setup mmu to use new
+		 *  specified page table
+		 */
+		if (mmu->hwpagetable != pagetable) {
+			kgsl_idle(device, KGSL_TIMEOUT_DEFAULT);
+			kgsl_detach_pagetable_iommu_domain(mmu);
+			mmu->hwpagetable = pagetable;
+			if (mmu->hwpagetable)
+				kgsl_attach_pagetable_iommu_domain(mmu);
+		}
+	}
+}
+
+static int kgsl_iommu_init(struct kgsl_device *device)
+{
+	/*
+	 * intialize device mmu
+	 *
+	 * call this with the global lock held
+	 */
+	int status = 0;
+	struct kgsl_mmu *mmu = &device->mmu;
+	struct kgsl_iommu *iommu;
+
+	mmu->device = device;
+
+	iommu = kzalloc(sizeof(struct kgsl_iommu), GFP_KERNEL);
+	if (!iommu) {
+		KGSL_CORE_ERR("kzalloc(%d) failed\n",
+				sizeof(struct kgsl_iommu));
+		return -ENOMEM;
+	}
+
+	iommu->iommu_priv_dev_attached = 0;
+	iommu->iommu_user_dev_attached = 0;
+	status = kgsl_get_iommu_ctxt(iommu, device);
+	if (status) {
+		kfree(iommu);
+		iommu = NULL;
+	}
+	mmu->priv = iommu;
+
+	dev_info(device->dev, "|%s| MMU type set for device is IOMMU\n",
+			__func__);
+	return status;
+}
+
+static int kgsl_iommu_start(struct kgsl_device *device)
+{
+	int status;
+	struct kgsl_mmu *mmu = &device->mmu;
+
+	if (mmu->flags & KGSL_FLAGS_STARTED)
+		return 0;
+
+	kgsl_regwrite(device, MH_MMU_CONFIG, 0x00000000);
+	if (mmu->defaultpagetable == NULL)
+		mmu->defaultpagetable =
+			kgsl_mmu_getpagetable(KGSL_MMU_GLOBAL_PT);
+	/* Return error if the default pagetable doesn't exist */
+	if (mmu->defaultpagetable == NULL)
+		return -ENOMEM;
+	mmu->hwpagetable = mmu->defaultpagetable;
+
+	status = kgsl_attach_pagetable_iommu_domain(mmu);
+	if (!status)
+		mmu->flags |= KGSL_FLAGS_STARTED;
+
+	return status;
+}
+
+static int
+kgsl_iommu_unmap(void *mmu_specific_pt,
+		struct kgsl_memdesc *memdesc)
+{
+	int ret;
+	unsigned int range = memdesc->size;
+	struct iommu_domain *domain = (struct iommu_domain *)
+					mmu_specific_pt;
+
+	/* All GPU addresses as assigned are page aligned, but some
+	   functions purturb the gpuaddr with an offset, so apply the
+	   mask here to make sure we have the right address */
+
+	unsigned int gpuaddr = memdesc->gpuaddr &  KGSL_MMU_ALIGN_MASK;
+
+	if (range == 0 || gpuaddr == 0)
+		return 0;
+
+	ret = iommu_unmap_range(domain, gpuaddr, range);
+	if (ret)
+		KGSL_CORE_ERR("iommu_unmap_range(%p, %x, %d) failed "
+			"with err: %d\n", domain, gpuaddr,
+			range, ret);
+
+	return 0;
+}
+
+static int
+kgsl_iommu_map(void *mmu_specific_pt,
+			struct kgsl_memdesc *memdesc,
+			unsigned int protflags)
+{
+	int ret;
+	unsigned int iommu_virt_addr;
+	struct iommu_domain *domain = mmu_specific_pt;
+
+	BUG_ON(NULL == domain);
+
+
+	iommu_virt_addr = memdesc->gpuaddr;
+
+	ret = iommu_map_range(domain, iommu_virt_addr, memdesc->sg,
+				memdesc->size, MSM_IOMMU_ATTR_NONCACHED);
+	if (ret) {
+		KGSL_CORE_ERR("iommu_map_range(%p, %x, %p, %d, %d) "
+				"failed with err: %d\n", domain,
+				iommu_virt_addr, memdesc->sg, memdesc->size,
+				MSM_IOMMU_ATTR_NONCACHED, ret);
+		return ret;
+	}
+
+	return ret;
+}
+
+static int kgsl_iommu_stop(struct kgsl_device *device)
+{
+	/*
+	 *  stop device mmu
+	 *
+	 *  call this with the global lock held
+	 */
+	struct kgsl_mmu *mmu = &device->mmu;
+
+	if (mmu->flags & KGSL_FLAGS_STARTED) {
+		/* detach iommu attachment */
+		kgsl_detach_pagetable_iommu_domain(mmu);
+
+		mmu->flags &= ~KGSL_FLAGS_STARTED;
+	}
+
+	return 0;
+}
+
+static int kgsl_iommu_close(struct kgsl_device *device)
+{
+	struct kgsl_mmu *mmu = &device->mmu;
+	if (mmu->defaultpagetable)
+		kgsl_mmu_putpagetable(mmu->defaultpagetable);
+
+	return 0;
+}
+
+static unsigned int
+kgsl_iommu_get_current_ptbase(struct kgsl_device *device)
+{
+	/* Current base is always the hwpagetables domain as we
+	 * do not use per process pagetables right not for iommu.
+	 * This will change when we switch to per process pagetables.
+	 */
+	return (unsigned int)device->mmu.hwpagetable->priv;
+}
+
+struct kgsl_mmu_ops iommu_ops = {
+	.mmu_init = kgsl_iommu_init,
+	.mmu_close = kgsl_iommu_close,
+	.mmu_start = kgsl_iommu_start,
+	.mmu_stop = kgsl_iommu_stop,
+	.mmu_setstate = kgsl_iommu_setstate,
+	.mmu_device_setstate = NULL,
+	.mmu_pagefault = NULL,
+	.mmu_get_current_ptbase = kgsl_iommu_get_current_ptbase,
+};
+
+struct kgsl_mmu_pt_ops iommu_pt_ops = {
+	.mmu_map = kgsl_iommu_map,
+	.mmu_unmap = kgsl_iommu_unmap,
+	.mmu_create_pagetable = kgsl_iommu_create_pagetable,
+	.mmu_destroy_pagetable = kgsl_iommu_destroy_pagetable,
+	.mmu_pt_equal = kgsl_iommu_pt_equal,
+	.mmu_pt_get_flags = NULL,
+};
diff --git a/drivers/gpu/msm/kgsl_pwrscale_idlestats.c b/drivers/gpu/msm/kgsl_pwrscale_idlestats.c
new file mode 100644
index 00000000..d5fa84ed
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_pwrscale_idlestats.c
@@ -0,0 +1,221 @@
+/* Copyright (c) 2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/timer.h>
+#include <linux/idle_stats_device.h>
+#include <linux/cpufreq.h>
+#include <linux/notifier.h>
+#include <linux/cpumask.h>
+#include <linux/tick.h>
+
+#include "kgsl.h"
+#include "kgsl_pwrscale.h"
+#include "kgsl_device.h"
+
+#define MAX_CORES 4
+struct _cpu_info {
+	spinlock_t lock;
+	struct notifier_block cpu_nb;
+	u64 start[MAX_CORES];
+	u64 end[MAX_CORES];
+	int curr_freq[MAX_CORES];
+	int max_freq[MAX_CORES];
+};
+
+struct idlestats_priv {
+	char name[32];
+	struct msm_idle_stats_device idledev;
+	struct kgsl_device *device;
+	struct msm_idle_pulse pulse;
+	struct _cpu_info cpu_info;
+};
+
+static int idlestats_cpufreq_notifier(
+				struct notifier_block *nb,
+				unsigned long val, void *data)
+{
+	struct _cpu_info *cpu = container_of(nb,
+						struct _cpu_info, cpu_nb);
+	struct cpufreq_freqs *freq = data;
+
+	if (val != CPUFREQ_POSTCHANGE)
+		return 0;
+
+	spin_lock(&cpu->lock);
+	if (freq->cpu < num_possible_cpus())
+		cpu->curr_freq[freq->cpu] = freq->new / 1000;
+	spin_unlock(&cpu->lock);
+
+	return 0;
+}
+
+static void idlestats_get_sample(struct msm_idle_stats_device *idledev,
+	struct msm_idle_pulse *pulse)
+{
+	struct kgsl_power_stats stats;
+	struct idlestats_priv *priv = container_of(idledev,
+		struct idlestats_priv, idledev);
+	struct kgsl_device *device = priv->device;
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+
+	mutex_lock(&device->mutex);
+	/* If the GPU is asleep, don't wake it up - assume that we
+	   are idle */
+
+	if (!(device->state & (KGSL_STATE_SLEEP | KGSL_STATE_NAP))) {
+		device->ftbl->power_stats(device, &stats);
+		pulse->busy_start_time = pwr->time - stats.busy_time;
+		pulse->busy_interval = stats.busy_time;
+	} else {
+		pulse->busy_start_time = pwr->time;
+		pulse->busy_interval = 0;
+	}
+	pulse->wait_interval = 0;
+	mutex_unlock(&device->mutex);
+}
+
+static void idlestats_busy(struct kgsl_device *device,
+			struct kgsl_pwrscale *pwrscale)
+{
+	struct idlestats_priv *priv = pwrscale->priv;
+	int i, busy, nr_cpu = 1;
+
+	if (priv->pulse.busy_start_time != 0) {
+		priv->pulse.wait_interval = 0;
+		/* Calculate the total CPU busy time for this GPU pulse */
+		for (i = 0; i < num_possible_cpus(); i++) {
+			spin_lock(&priv->cpu_info.lock);
+			if (cpu_online(i)) {
+				priv->cpu_info.end[i] =
+						(u64)ktime_to_us(ktime_get()) -
+						get_cpu_idle_time_us(i, NULL);
+				busy = priv->cpu_info.end[i] -
+						priv->cpu_info.start[i];
+				/* Normalize the busy time by frequency */
+				busy = priv->cpu_info.curr_freq[i] *
+					(busy / priv->cpu_info.max_freq[i]);
+				priv->pulse.wait_interval += busy;
+				nr_cpu++;
+			}
+			spin_unlock(&priv->cpu_info.lock);
+		}
+		priv->pulse.wait_interval /= nr_cpu;
+		msm_idle_stats_idle_end(&priv->idledev, &priv->pulse);
+	}
+	priv->pulse.busy_start_time = ktime_to_us(ktime_get());
+}
+
+static void idlestats_idle(struct kgsl_device *device,
+			struct kgsl_pwrscale *pwrscale)
+{
+	int i, nr_cpu;
+	struct kgsl_power_stats stats;
+	struct idlestats_priv *priv = pwrscale->priv;
+
+	/* This is called from within a mutex protected function, so
+	   no additional locking required */
+	device->ftbl->power_stats(device, &stats);
+
+	/* If total_time is zero, then we don't have
+	   any interesting statistics to store */
+	if (stats.total_time == 0) {
+		priv->pulse.busy_start_time = 0;
+		return;
+	}
+
+	priv->pulse.busy_interval   = stats.busy_time;
+	nr_cpu = num_possible_cpus();
+	for (i = 0; i < nr_cpu; i++)
+		if (cpu_online(i))
+			priv->cpu_info.start[i] =
+					(u64)ktime_to_us(ktime_get()) -
+					get_cpu_idle_time_us(i, NULL);
+
+	msm_idle_stats_idle_start(&priv->idledev);
+}
+
+static void idlestats_sleep(struct kgsl_device *device,
+			struct kgsl_pwrscale *pwrscale)
+{
+	struct idlestats_priv *priv = pwrscale->priv;
+	priv->idledev.stats->event |= MSM_IDLE_STATS_EVENT_IDLE_TIMER_EXPIRED;
+}
+
+static int idlestats_init(struct kgsl_device *device,
+		     struct kgsl_pwrscale *pwrscale)
+{
+	struct idlestats_priv *priv;
+	struct cpufreq_policy cpu_policy;
+	int ret, i;
+
+	priv = pwrscale->priv = kzalloc(sizeof(struct idlestats_priv),
+		GFP_KERNEL);
+	if (pwrscale->priv == NULL)
+		return -ENOMEM;
+
+	snprintf(priv->name, sizeof(priv->name), "idle_stats_%s",
+		 device->name);
+
+	priv->device = device;
+
+	priv->idledev.name = (const char *) priv->name;
+	priv->idledev.get_sample = idlestats_get_sample;
+
+	spin_lock_init(&priv->cpu_info.lock);
+	priv->cpu_info.cpu_nb.notifier_call =
+			idlestats_cpufreq_notifier;
+	ret = cpufreq_register_notifier(&priv->cpu_info.cpu_nb,
+				CPUFREQ_TRANSITION_NOTIFIER);
+	if (ret)
+		goto err;
+	for (i = 0; i < num_possible_cpus(); i++) {
+		cpufreq_frequency_table_cpuinfo(&cpu_policy,
+					cpufreq_frequency_get_table(i));
+		priv->cpu_info.max_freq[i] = cpu_policy.max / 1000;
+		priv->cpu_info.curr_freq[i] = cpu_policy.max / 1000;
+	}
+	ret = msm_idle_stats_register_device(&priv->idledev);
+err:
+	if (ret) {
+		kfree(pwrscale->priv);
+		pwrscale->priv = NULL;
+	}
+
+	return ret;
+}
+
+static void idlestats_close(struct kgsl_device *device,
+		      struct kgsl_pwrscale *pwrscale)
+{
+	struct idlestats_priv *priv = pwrscale->priv;
+
+	if (pwrscale->priv == NULL)
+		return;
+
+	cpufreq_unregister_notifier(&priv->cpu_info.cpu_nb,
+						CPUFREQ_TRANSITION_NOTIFIER);
+	msm_idle_stats_deregister_device(&priv->idledev);
+
+	kfree(pwrscale->priv);
+	pwrscale->priv = NULL;
+}
+
+struct kgsl_pwrscale_policy kgsl_pwrscale_policy_idlestats = {
+	.name = "idlestats",
+	.init = idlestats_init,
+	.idle = idlestats_idle,
+	.busy = idlestats_busy,
+	.sleep = idlestats_sleep,
+	.close = idlestats_close
+};
diff --git a/drivers/gpu/msm/kgsl_pwrscale_trustzone.c b/drivers/gpu/msm/kgsl_pwrscale_trustzone.c
new file mode 100644
index 00000000..f3e84e45
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_pwrscale_trustzone.c
@@ -0,0 +1,197 @@
+/* Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <mach/socinfo.h>
+#include <mach/scm.h>
+
+#include "kgsl.h"
+#include "kgsl_pwrscale.h"
+#include "kgsl_device.h"
+
+#define TZ_GOVERNOR_PERFORMANCE 0
+#define TZ_GOVERNOR_ONDEMAND    1
+
+struct tz_priv {
+	int governor;
+	unsigned int no_switch_cnt;
+	unsigned int skip_cnt;
+};
+
+#define SWITCH_OFF		200
+#define SWITCH_OFF_RESET_TH	40
+#define SKIP_COUNTER		500
+#define TZ_RESET_ID		0x3
+#define TZ_UPDATE_ID		0x4
+
+#ifdef CONFIG_MSM_SCM
+/* Trap into the TrustZone, and call funcs there. */
+static int __secure_tz_entry(u32 cmd, u32 val)
+{
+	__iowmb();
+	return scm_call_atomic1(SCM_SVC_IO, cmd, val);
+}
+#else
+static int __secure_tz_entry(u32 cmd, u32 val)
+{
+	return 0;
+}
+#endif /* CONFIG_MSM_SCM */
+
+static ssize_t tz_governor_show(struct kgsl_device *device,
+				struct kgsl_pwrscale *pwrscale,
+				char *buf)
+{
+	struct tz_priv *priv = pwrscale->priv;
+	int ret;
+
+	if (priv->governor == TZ_GOVERNOR_ONDEMAND)
+		ret = snprintf(buf, 10, "ondemand\n");
+	else
+		ret = snprintf(buf, 13, "performance\n");
+
+	return ret;
+}
+
+static ssize_t tz_governor_store(struct kgsl_device *device,
+				struct kgsl_pwrscale *pwrscale,
+				 const char *buf, size_t count)
+{
+	char str[20];
+	struct tz_priv *priv = pwrscale->priv;
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	int ret;
+
+	ret = sscanf(buf, "%20s", str);
+	if (ret != 1)
+		return -EINVAL;
+
+	mutex_lock(&device->mutex);
+
+	if (!strncmp(str, "ondemand", 8))
+		priv->governor = TZ_GOVERNOR_ONDEMAND;
+	else if (!strncmp(str, "performance", 11))
+		priv->governor = TZ_GOVERNOR_PERFORMANCE;
+
+	if (priv->governor == TZ_GOVERNOR_PERFORMANCE)
+		kgsl_pwrctrl_pwrlevel_change(device, pwr->thermal_pwrlevel);
+
+	mutex_unlock(&device->mutex);
+	return count;
+}
+
+PWRSCALE_POLICY_ATTR(governor, 0644, tz_governor_show, tz_governor_store);
+
+static struct attribute *tz_attrs[] = {
+	&policy_attr_governor.attr,
+	NULL
+};
+
+static struct attribute_group tz_attr_group = {
+	.attrs = tz_attrs,
+};
+
+static void tz_wake(struct kgsl_device *device, struct kgsl_pwrscale *pwrscale)
+{
+	struct tz_priv *priv = pwrscale->priv;
+	if (device->state != KGSL_STATE_NAP &&
+		priv->governor == TZ_GOVERNOR_ONDEMAND)
+		kgsl_pwrctrl_pwrlevel_change(device,
+					     device->pwrctrl.thermal_pwrlevel);
+}
+
+static void tz_idle(struct kgsl_device *device, struct kgsl_pwrscale *pwrscale)
+{
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct tz_priv *priv = pwrscale->priv;
+	struct kgsl_power_stats stats;
+	int val;
+
+	/* In "performance" mode the clock speed always stays
+	   the same */
+
+	if (priv->governor == TZ_GOVERNOR_PERFORMANCE)
+		return;
+
+	device->ftbl->power_stats(device, &stats);
+	if (stats.total_time == 0)
+		return;
+
+	/* If the GPU has stayed in turbo mode for a while, *
+	 * stop writing out values. */
+	if (pwr->active_pwrlevel == 0) {
+		if (priv->no_switch_cnt > SWITCH_OFF) {
+			priv->skip_cnt++;
+			if (priv->skip_cnt > SKIP_COUNTER) {
+				priv->no_switch_cnt -= SWITCH_OFF_RESET_TH;
+				priv->skip_cnt = 0;
+			}
+			return;
+		}
+		priv->no_switch_cnt++;
+	} else {
+		priv->no_switch_cnt = 0;
+	}
+
+	val = __secure_tz_entry(TZ_UPDATE_ID,
+				stats.total_time - stats.busy_time);
+	if (val)
+		kgsl_pwrctrl_pwrlevel_change(device,
+					     pwr->active_pwrlevel + val);
+}
+
+static void tz_sleep(struct kgsl_device *device,
+	struct kgsl_pwrscale *pwrscale)
+{
+	struct tz_priv *priv = pwrscale->priv;
+
+	__secure_tz_entry(TZ_RESET_ID, 0);
+	priv->no_switch_cnt = 0;
+}
+
+static int tz_init(struct kgsl_device *device, struct kgsl_pwrscale *pwrscale)
+{
+	struct tz_priv *priv;
+
+	/* Trustzone is only valid for some SOCs */
+	if (!(cpu_is_msm8x60() || cpu_is_msm8960() || cpu_is_msm8930()))
+		return -EINVAL;
+
+	priv = pwrscale->priv = kzalloc(sizeof(struct tz_priv), GFP_KERNEL);
+	if (pwrscale->priv == NULL)
+		return -ENOMEM;
+
+	priv->governor = TZ_GOVERNOR_ONDEMAND;
+	kgsl_pwrscale_policy_add_files(device, pwrscale, &tz_attr_group);
+
+	return 0;
+}
+
+static void tz_close(struct kgsl_device *device, struct kgsl_pwrscale *pwrscale)
+{
+	kgsl_pwrscale_policy_remove_files(device, pwrscale, &tz_attr_group);
+	kfree(pwrscale->priv);
+	pwrscale->priv = NULL;
+}
+
+struct kgsl_pwrscale_policy kgsl_pwrscale_policy_tz = {
+	.name = "trustzone",
+	.init = tz_init,
+	.idle = tz_idle,
+	.sleep = tz_sleep,
+	.wake = tz_wake,
+	.close = tz_close
+};
+EXPORT_SYMBOL(kgsl_pwrscale_policy_tz);
diff --git a/include/drm/kgsl_drm.h b/include/drm/kgsl_drm.h
new file mode 100644
index 00000000..934bdf3f
--- /dev/null
+++ b/include/drm/kgsl_drm.h
@@ -0,0 +1,221 @@
+/* Copyright (c) 2009-2010, Code Aurora Forum. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ *       copyright notice, this list of conditions and the following
+ *       disclaimer in the documentation and/or other materials provided
+ *       with the distribution.
+ *     * Neither the name of Code Aurora Forum, Inc. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef _KGSL_DRM_H_
+#define _KGSL_DRM_H_
+
+#include "drm.h"
+
+#define DRM_KGSL_GEM_CREATE 0x00
+#define DRM_KGSL_GEM_PREP   0x01
+#define DRM_KGSL_GEM_SETMEMTYPE 0x02
+#define DRM_KGSL_GEM_GETMEMTYPE 0x03
+#define DRM_KGSL_GEM_MMAP 0x04
+#define DRM_KGSL_GEM_ALLOC 0x05
+#define DRM_KGSL_GEM_BIND_GPU 0x06
+#define DRM_KGSL_GEM_UNBIND_GPU 0x07
+
+#define DRM_KGSL_GEM_GET_BUFINFO 0x08
+#define DRM_KGSL_GEM_SET_BUFCOUNT 0x09
+#define DRM_KGSL_GEM_SET_ACTIVE 0x0A
+#define DRM_KGSL_GEM_LOCK_HANDLE 0x0B
+#define DRM_KGSL_GEM_UNLOCK_HANDLE 0x0C
+#define DRM_KGSL_GEM_UNLOCK_ON_TS 0x0D
+#define DRM_KGSL_GEM_CREATE_FD 0x0E
+
+#define DRM_IOCTL_KGSL_GEM_CREATE \
+DRM_IOWR(DRM_COMMAND_BASE + DRM_KGSL_GEM_CREATE, struct drm_kgsl_gem_create)
+
+#define DRM_IOCTL_KGSL_GEM_PREP \
+DRM_IOWR(DRM_COMMAND_BASE + DRM_KGSL_GEM_PREP, struct drm_kgsl_gem_prep)
+
+#define DRM_IOCTL_KGSL_GEM_SETMEMTYPE \
+DRM_IOWR(DRM_COMMAND_BASE + DRM_KGSL_GEM_SETMEMTYPE, \
+struct drm_kgsl_gem_memtype)
+
+#define DRM_IOCTL_KGSL_GEM_GETMEMTYPE \
+DRM_IOWR(DRM_COMMAND_BASE + DRM_KGSL_GEM_GETMEMTYPE, \
+struct drm_kgsl_gem_memtype)
+
+#define DRM_IOCTL_KGSL_GEM_MMAP \
+DRM_IOWR(DRM_COMMAND_BASE + DRM_KGSL_GEM_MMAP, struct drm_kgsl_gem_mmap)
+
+#define DRM_IOCTL_KGSL_GEM_ALLOC \
+DRM_IOWR(DRM_COMMAND_BASE + DRM_KGSL_GEM_ALLOC, struct drm_kgsl_gem_alloc)
+
+#define DRM_IOCTL_KGSL_GEM_BIND_GPU \
+DRM_IOWR(DRM_COMMAND_BASE + DRM_KGSL_GEM_BIND_GPU, struct drm_kgsl_gem_bind_gpu)
+
+#define DRM_IOCTL_KGSL_GEM_UNBIND_GPU \
+DRM_IOWR(DRM_COMMAND_BASE + DRM_KGSL_GEM_UNBIND_GPU, \
+struct drm_kgsl_gem_bind_gpu)
+
+#define DRM_IOCTL_KGSL_GEM_GET_BUFINFO \
+DRM_IOWR(DRM_COMMAND_BASE + DRM_KGSL_GEM_GET_BUFINFO, \
+	 struct drm_kgsl_gem_bufinfo)
+
+#define DRM_IOCTL_KGSL_GEM_SET_BUFCOUNT \
+DRM_IOWR(DRM_COMMAND_BASE + DRM_KGSL_GEM_SET_BUFCOUNT, \
+	 struct drm_kgsl_gem_bufcount)
+
+#define DRM_IOCTL_KGSL_GEM_SET_ACTIVE \
+DRM_IOWR(DRM_COMMAND_BASE + DRM_KGSL_GEM_SET_ACTIVE, \
+	 struct drm_kgsl_gem_active)
+
+#define DRM_IOCTL_KGSL_GEM_LOCK_HANDLE \
+DRM_IOWR(DRM_COMMAND_BASE + DRM_KGSL_GEM_LOCK_HANDLE, \
+struct drm_kgsl_gem_lock_handles)
+
+#define DRM_IOCTL_KGSL_GEM_UNLOCK_HANDLE \
+DRM_IOWR(DRM_COMMAND_BASE + DRM_KGSL_GEM_UNLOCK_HANDLE, \
+struct drm_kgsl_gem_unlock_handles)
+
+#define DRM_IOCTL_KGSL_GEM_UNLOCK_ON_TS \
+DRM_IOWR(DRM_COMMAND_BASE + DRM_KGSL_GEM_UNLOCK_ON_TS, \
+struct drm_kgsl_gem_unlock_on_ts)
+
+#define DRM_IOCTL_KGSL_GEM_CREATE_FD \
+DRM_IOWR(DRM_COMMAND_BASE + DRM_KGSL_GEM_CREATE_FD, \
+struct drm_kgsl_gem_create_fd)
+
+/* Maximum number of sub buffers per GEM object */
+#define DRM_KGSL_GEM_MAX_BUFFERS 2
+
+/* Memory types - these define the source and caching policies
+   of the GEM memory chunk */
+
+/* Legacy definitions left for compatability */
+
+#define DRM_KGSL_GEM_TYPE_EBI          0
+#define DRM_KGSL_GEM_TYPE_SMI          1
+#define DRM_KGSL_GEM_TYPE_KMEM         2
+#define DRM_KGSL_GEM_TYPE_KMEM_NOCACHE 3
+#define DRM_KGSL_GEM_TYPE_MEM_MASK     0xF
+
+/* Contiguous memory (PMEM) */
+#define DRM_KGSL_GEM_TYPE_PMEM       0x000100
+
+/* PMEM memory types */
+#define DRM_KGSL_GEM_PMEM_EBI        0x001000
+#define DRM_KGSL_GEM_PMEM_SMI        0x002000
+
+/* Standard paged memory */
+#define DRM_KGSL_GEM_TYPE_MEM        0x010000
+
+/* Caching controls */
+#define DRM_KGSL_GEM_CACHE_NONE      0x000000
+#define DRM_KGSL_GEM_CACHE_WCOMBINE  0x100000
+#define DRM_KGSL_GEM_CACHE_WTHROUGH  0x200000
+#define DRM_KGSL_GEM_CACHE_WBACK     0x400000
+#define DRM_KGSL_GEM_CACHE_WBACKWA   0x800000
+#define DRM_KGSL_GEM_CACHE_MASK      0xF00000
+
+/* FD based objects */
+#define DRM_KGSL_GEM_TYPE_FD_FBMEM   0x1000000
+#define DRM_KGSL_GEM_TYPE_FD_MASK    0xF000000
+
+/* Timestamp types */
+#define DRM_KGSL_GEM_TS_3D         0x00000430
+#define DRM_KGSL_GEM_TS_2D         0x00000180
+
+
+struct drm_kgsl_gem_create {
+	uint32_t size;
+	uint32_t handle;
+};
+
+struct drm_kgsl_gem_prep {
+	uint32_t handle;
+	uint32_t phys;
+	uint64_t offset;
+};
+
+struct drm_kgsl_gem_memtype {
+	uint32_t handle;
+	uint32_t type;
+};
+
+struct drm_kgsl_gem_mmap {
+	uint32_t handle;
+	uint32_t size;
+	uint32_t hostptr;
+	uint64_t offset;
+};
+
+struct drm_kgsl_gem_alloc {
+	uint32_t handle;
+	uint64_t offset;
+};
+
+struct drm_kgsl_gem_bind_gpu {
+	uint32_t handle;
+	uint32_t gpuptr;
+};
+
+struct drm_kgsl_gem_bufinfo {
+	uint32_t handle;
+	uint32_t count;
+	uint32_t active;
+	uint32_t offset[DRM_KGSL_GEM_MAX_BUFFERS];
+	uint32_t gpuaddr[DRM_KGSL_GEM_MAX_BUFFERS];
+};
+
+struct drm_kgsl_gem_bufcount {
+	uint32_t handle;
+	uint32_t bufcount;
+};
+
+struct drm_kgsl_gem_active {
+	uint32_t handle;
+	uint32_t active;
+};
+
+struct drm_kgsl_gem_lock_handles {
+	uint32_t num_handles;
+	uint32_t *handle_list;
+	uint32_t pid;
+	uint32_t lock_id;	  /* Returned lock id used for unlocking */
+};
+
+struct drm_kgsl_gem_unlock_handles {
+	uint32_t lock_id;
+};
+
+struct drm_kgsl_gem_unlock_on_ts {
+	uint32_t lock_id;
+	uint32_t timestamp;	 /* This field is a hw generated ts */
+	uint32_t type;		 /* Which pipe to check for ts generation */
+};
+
+struct drm_kgsl_gem_create_fd {
+	uint32_t fd;
+	uint32_t handle;
+};
+
+#endif