From 9bb78d2a7fd55bc5e087dd7da0bfe354b86c3dbf Mon Sep 17 00:00:00 2001 From: Shantanu Gupta Date: Mon, 14 May 2012 02:47:02 +0530 Subject: [PATCH] [KGSL] add missing files for last commit --- arch/arm/include/asm/asm-offsets.h | 1 + arch/arm/include/asm/outercache.h | 75 + .../include/mach/internal_power_rail.h | 63 + arch/arm/mach-msm/include/mach/msm_memtypes.h | 64 + drivers/gpu/msm/a2xx_reg.h | 418 +++++ drivers/gpu/msm/adreno_a2xx.c | 1607 +++++++++++++++++ drivers/gpu/msm/kgsl_gpummu.c | 766 ++++++++ drivers/gpu/msm/kgsl_gpummu.h | 85 + drivers/gpu/msm/kgsl_iommu.c | 333 ++++ drivers/gpu/msm/kgsl_pwrscale_idlestats.c | 221 +++ drivers/gpu/msm/kgsl_pwrscale_trustzone.c | 197 ++ include/drm/kgsl_drm.h | 221 +++ 12 files changed, 4051 insertions(+) create mode 100644 arch/arm/include/asm/asm-offsets.h create mode 100644 arch/arm/include/asm/outercache.h create mode 100644 arch/arm/mach-msm/include/mach/internal_power_rail.h create mode 100644 arch/arm/mach-msm/include/mach/msm_memtypes.h create mode 100644 drivers/gpu/msm/a2xx_reg.h create mode 100644 drivers/gpu/msm/adreno_a2xx.c create mode 100644 drivers/gpu/msm/kgsl_gpummu.c create mode 100644 drivers/gpu/msm/kgsl_gpummu.h create mode 100644 drivers/gpu/msm/kgsl_iommu.c create mode 100644 drivers/gpu/msm/kgsl_pwrscale_idlestats.c create mode 100644 drivers/gpu/msm/kgsl_pwrscale_trustzone.c create mode 100644 include/drm/kgsl_drm.h diff --git a/arch/arm/include/asm/asm-offsets.h b/arch/arm/include/asm/asm-offsets.h new file mode 100644 index 00000000..d370ee36 --- /dev/null +++ b/arch/arm/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include diff --git a/arch/arm/include/asm/outercache.h b/arch/arm/include/asm/outercache.h new file mode 100644 index 00000000..25f76bae --- /dev/null +++ b/arch/arm/include/asm/outercache.h @@ -0,0 +1,75 @@ +/* + * arch/arm/include/asm/outercache.h + * + * Copyright (C) 2010 ARM Ltd. + * Written by Catalin Marinas + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __ASM_OUTERCACHE_H +#define __ASM_OUTERCACHE_H + +struct outer_cache_fns { + void (*inv_range)(unsigned long, unsigned long); + void (*clean_range)(unsigned long, unsigned long); + void (*flush_range)(unsigned long, unsigned long); +#ifdef CONFIG_OUTER_CACHE_SYNC + void (*sync)(void); +#endif +}; + +#ifdef CONFIG_OUTER_CACHE + +extern struct outer_cache_fns outer_cache; + +static inline void outer_inv_range(unsigned long start, unsigned long end) +{ + if (outer_cache.inv_range) + outer_cache.inv_range(start, end); +} +static inline void outer_clean_range(unsigned long start, unsigned long end) +{ + if (outer_cache.clean_range) + outer_cache.clean_range(start, end); +} +static inline void outer_flush_range(unsigned long start, unsigned long end) +{ + if (outer_cache.flush_range) + outer_cache.flush_range(start, end); +} + +#else + +static inline void outer_inv_range(unsigned long start, unsigned long end) +{ } +static inline void outer_clean_range(unsigned long start, unsigned long end) +{ } +static inline void outer_flush_range(unsigned long start, unsigned long end) +{ } + +#endif + +#ifdef CONFIG_OUTER_CACHE_SYNC +static inline void outer_sync(void) +{ + if (outer_cache.sync) + outer_cache.sync(); +} +#else +static inline void outer_sync(void) +{ } +#endif + +#endif /* __ASM_OUTERCACHE_H */ diff --git a/arch/arm/mach-msm/include/mach/internal_power_rail.h b/arch/arm/mach-msm/include/mach/internal_power_rail.h new file mode 100644 index 00000000..f489dc57 --- /dev/null +++ b/arch/arm/mach-msm/include/mach/internal_power_rail.h @@ -0,0 +1,63 @@ +/* Copyright (c) 2009, Code Aurora Forum. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * * Neither the name of Code Aurora Forum, Inc. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _INTERNAL_POWER_RAIL_H +#define _INTERNAL_POWER_RAIL_H + +/* Clock power rail IDs */ +#define PWR_RAIL_GRP_CLK 8 +#define PWR_RAIL_GRP_2D_CLK 58 +#define PWR_RAIL_MDP_CLK 14 +#define PWR_RAIL_MFC_CLK 68 +#define PWR_RAIL_ROTATOR_CLK 90 +#define PWR_RAIL_VDC_CLK 39 +#define PWR_RAIL_VFE_CLK 41 +#define PWR_RAIL_VPE_CLK 76 + +enum rail_ctl_mode { + PWR_RAIL_CTL_AUTO = 0, + PWR_RAIL_CTL_MANUAL, +}; + +static inline int __maybe_unused internal_pwr_rail_ctl(unsigned rail_id, + bool enable) +{ + /* Not yet implemented. */ + return 0; +} +static inline int __maybe_unused internal_pwr_rail_mode(unsigned rail_id, + enum rail_ctl_mode mode) +{ + /* Not yet implemented. */ + return 0; +} + +int internal_pwr_rail_ctl_auto(unsigned rail_id, bool enable); + +#endif /* _INTERNAL_POWER_RAIL_H */ + diff --git a/arch/arm/mach-msm/include/mach/msm_memtypes.h b/arch/arm/mach-msm/include/mach/msm_memtypes.h new file mode 100644 index 00000000..963f25c1 --- /dev/null +++ b/arch/arm/mach-msm/include/mach/msm_memtypes.h @@ -0,0 +1,64 @@ +/* Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. +*/ + +/* The MSM Hardware supports multiple flavors of physical memory. + * This file captures hardware specific information of these types. +*/ + +#ifndef __ASM_ARCH_MSM_MEMTYPES_H +#define __ASM_ARCH_MSM_MEMTYPES_H + +#include +#include +int __init meminfo_init(unsigned int, unsigned int); +/* Redundant check to prevent this from being included outside of 7x30 */ +#if defined(CONFIG_ARCH_MSM7X30) +unsigned int get_num_populated_chipselects(void); +#endif + +unsigned int get_num_memory_banks(void); +unsigned int get_memory_bank_size(unsigned int); +unsigned int get_memory_bank_start(unsigned int); +int soc_change_memory_power(u64, u64, int); + +enum { + MEMTYPE_NONE = -1, + MEMTYPE_SMI_KERNEL = 0, + MEMTYPE_SMI, + MEMTYPE_EBI0, + MEMTYPE_EBI1, + MEMTYPE_MAX, +}; + +void msm_reserve(void); + +#define MEMTYPE_FLAGS_FIXED 0x1 +#define MEMTYPE_FLAGS_1M_ALIGN 0x2 + +struct memtype_reserve { + unsigned long start; + unsigned long size; + unsigned long limit; + int flags; +}; + +struct reserve_info { + struct memtype_reserve *memtype_reserve_table; + void (*calculate_reserve_sizes)(void); + int (*paddr_to_memtype)(unsigned int); + unsigned long low_unstable_address; + unsigned long max_unstable_size; + unsigned long bank_size; +}; + +extern struct reserve_info *reserve_info; +#endif diff --git a/drivers/gpu/msm/a2xx_reg.h b/drivers/gpu/msm/a2xx_reg.h new file mode 100644 index 00000000..d859d61c --- /dev/null +++ b/drivers/gpu/msm/a2xx_reg.h @@ -0,0 +1,418 @@ +/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#ifndef __A200_REG_H +#define __A200_REG_H + +enum VGT_EVENT_TYPE { + VS_DEALLOC = 0, + PS_DEALLOC = 1, + VS_DONE_TS = 2, + PS_DONE_TS = 3, + CACHE_FLUSH_TS = 4, + CONTEXT_DONE = 5, + CACHE_FLUSH = 6, + VIZQUERY_START = 7, + VIZQUERY_END = 8, + SC_WAIT_WC = 9, + RST_PIX_CNT = 13, + RST_VTX_CNT = 14, + TILE_FLUSH = 15, + CACHE_FLUSH_AND_INV_TS_EVENT = 20, + ZPASS_DONE = 21, + CACHE_FLUSH_AND_INV_EVENT = 22, + PERFCOUNTER_START = 23, + PERFCOUNTER_STOP = 24, + VS_FETCH_DONE = 27, + FACENESS_FLUSH = 28, +}; + +enum COLORFORMATX { + COLORX_4_4_4_4 = 0, + COLORX_1_5_5_5 = 1, + COLORX_5_6_5 = 2, + COLORX_8 = 3, + COLORX_8_8 = 4, + COLORX_8_8_8_8 = 5, + COLORX_S8_8_8_8 = 6, + COLORX_16_FLOAT = 7, + COLORX_16_16_FLOAT = 8, + COLORX_16_16_16_16_FLOAT = 9, + COLORX_32_FLOAT = 10, + COLORX_32_32_FLOAT = 11, + COLORX_32_32_32_32_FLOAT = 12, + COLORX_2_3_3 = 13, + COLORX_8_8_8 = 14, +}; + +enum SURFACEFORMAT { + FMT_1_REVERSE = 0, + FMT_1 = 1, + FMT_8 = 2, + FMT_1_5_5_5 = 3, + FMT_5_6_5 = 4, + FMT_6_5_5 = 5, + FMT_8_8_8_8 = 6, + FMT_2_10_10_10 = 7, + FMT_8_A = 8, + FMT_8_B = 9, + FMT_8_8 = 10, + FMT_Cr_Y1_Cb_Y0 = 11, + FMT_Y1_Cr_Y0_Cb = 12, + FMT_5_5_5_1 = 13, + FMT_8_8_8_8_A = 14, + FMT_4_4_4_4 = 15, + FMT_10_11_11 = 16, + FMT_11_11_10 = 17, + FMT_DXT1 = 18, + FMT_DXT2_3 = 19, + FMT_DXT4_5 = 20, + FMT_24_8 = 22, + FMT_24_8_FLOAT = 23, + FMT_16 = 24, + FMT_16_16 = 25, + FMT_16_16_16_16 = 26, + FMT_16_EXPAND = 27, + FMT_16_16_EXPAND = 28, + FMT_16_16_16_16_EXPAND = 29, + FMT_16_FLOAT = 30, + FMT_16_16_FLOAT = 31, + FMT_16_16_16_16_FLOAT = 32, + FMT_32 = 33, + FMT_32_32 = 34, + FMT_32_32_32_32 = 35, + FMT_32_FLOAT = 36, + FMT_32_32_FLOAT = 37, + FMT_32_32_32_32_FLOAT = 38, + FMT_32_AS_8 = 39, + FMT_32_AS_8_8 = 40, + FMT_16_MPEG = 41, + FMT_16_16_MPEG = 42, + FMT_8_INTERLACED = 43, + FMT_32_AS_8_INTERLACED = 44, + FMT_32_AS_8_8_INTERLACED = 45, + FMT_16_INTERLACED = 46, + FMT_16_MPEG_INTERLACED = 47, + FMT_16_16_MPEG_INTERLACED = 48, + FMT_DXN = 49, + FMT_8_8_8_8_AS_16_16_16_16 = 50, + FMT_DXT1_AS_16_16_16_16 = 51, + FMT_DXT2_3_AS_16_16_16_16 = 52, + FMT_DXT4_5_AS_16_16_16_16 = 53, + FMT_2_10_10_10_AS_16_16_16_16 = 54, + FMT_10_11_11_AS_16_16_16_16 = 55, + FMT_11_11_10_AS_16_16_16_16 = 56, + FMT_32_32_32_FLOAT = 57, + FMT_DXT3A = 58, + FMT_DXT5A = 59, + FMT_CTX1 = 60, + FMT_DXT3A_AS_1_1_1_1 = 61 +}; + +#define REG_PERF_MODE_CNT 0x0 +#define REG_PERF_STATE_RESET 0x0 +#define REG_PERF_STATE_ENABLE 0x1 +#define REG_PERF_STATE_FREEZE 0x2 + +#define RB_EDRAM_INFO_EDRAM_SIZE_SIZE 4 +#define RB_EDRAM_INFO_EDRAM_MAPPING_MODE_SIZE 2 +#define RB_EDRAM_INFO_UNUSED0_SIZE 8 +#define RB_EDRAM_INFO_EDRAM_RANGE_SIZE 18 + +struct rb_edram_info_t { + unsigned int edram_size:RB_EDRAM_INFO_EDRAM_SIZE_SIZE; + unsigned int edram_mapping_mode:RB_EDRAM_INFO_EDRAM_MAPPING_MODE_SIZE; + unsigned int unused0:RB_EDRAM_INFO_UNUSED0_SIZE; + unsigned int edram_range:RB_EDRAM_INFO_EDRAM_RANGE_SIZE; +}; + +union reg_rb_edram_info { + unsigned int val; + struct rb_edram_info_t f; +}; + +#define RBBM_READ_ERROR_UNUSED0_SIZE 2 +#define RBBM_READ_ERROR_READ_ADDRESS_SIZE 15 +#define RBBM_READ_ERROR_UNUSED1_SIZE 13 +#define RBBM_READ_ERROR_READ_REQUESTER_SIZE 1 +#define RBBM_READ_ERROR_READ_ERROR_SIZE 1 + +struct rbbm_read_error_t { + unsigned int unused0:RBBM_READ_ERROR_UNUSED0_SIZE; + unsigned int read_address:RBBM_READ_ERROR_READ_ADDRESS_SIZE; + unsigned int unused1:RBBM_READ_ERROR_UNUSED1_SIZE; + unsigned int read_requester:RBBM_READ_ERROR_READ_REQUESTER_SIZE; + unsigned int read_error:RBBM_READ_ERROR_READ_ERROR_SIZE; +}; + +union rbbm_read_error_u { + unsigned int val:32; + struct rbbm_read_error_t f; +}; + +#define CP_RB_CNTL_RB_BUFSZ_SIZE 6 +#define CP_RB_CNTL_UNUSED0_SIZE 2 +#define CP_RB_CNTL_RB_BLKSZ_SIZE 6 +#define CP_RB_CNTL_UNUSED1_SIZE 2 +#define CP_RB_CNTL_BUF_SWAP_SIZE 2 +#define CP_RB_CNTL_UNUSED2_SIZE 2 +#define CP_RB_CNTL_RB_POLL_EN_SIZE 1 +#define CP_RB_CNTL_UNUSED3_SIZE 6 +#define CP_RB_CNTL_RB_NO_UPDATE_SIZE 1 +#define CP_RB_CNTL_UNUSED4_SIZE 3 +#define CP_RB_CNTL_RB_RPTR_WR_ENA_SIZE 1 + +struct cp_rb_cntl_t { + unsigned int rb_bufsz:CP_RB_CNTL_RB_BUFSZ_SIZE; + unsigned int unused0:CP_RB_CNTL_UNUSED0_SIZE; + unsigned int rb_blksz:CP_RB_CNTL_RB_BLKSZ_SIZE; + unsigned int unused1:CP_RB_CNTL_UNUSED1_SIZE; + unsigned int buf_swap:CP_RB_CNTL_BUF_SWAP_SIZE; + unsigned int unused2:CP_RB_CNTL_UNUSED2_SIZE; + unsigned int rb_poll_en:CP_RB_CNTL_RB_POLL_EN_SIZE; + unsigned int unused3:CP_RB_CNTL_UNUSED3_SIZE; + unsigned int rb_no_update:CP_RB_CNTL_RB_NO_UPDATE_SIZE; + unsigned int unused4:CP_RB_CNTL_UNUSED4_SIZE; + unsigned int rb_rptr_wr_ena:CP_RB_CNTL_RB_RPTR_WR_ENA_SIZE; +}; + +union reg_cp_rb_cntl { + unsigned int val:32; + struct cp_rb_cntl_t f; +}; + +#define RB_COLOR_INFO__COLOR_FORMAT_MASK 0x0000000fL +#define RB_COPY_DEST_INFO__COPY_DEST_FORMAT__SHIFT 0x00000004 + + +#define SQ_INT_CNTL__PS_WATCHDOG_MASK 0x00000001L +#define SQ_INT_CNTL__VS_WATCHDOG_MASK 0x00000002L + +#define RBBM_INT_CNTL__RDERR_INT_MASK 0x00000001L +#define RBBM_INT_CNTL__DISPLAY_UPDATE_INT_MASK 0x00000002L +#define RBBM_INT_CNTL__GUI_IDLE_INT_MASK 0x00080000L + +#define RBBM_STATUS__CMDFIFO_AVAIL_MASK 0x0000001fL +#define RBBM_STATUS__TC_BUSY_MASK 0x00000020L +#define RBBM_STATUS__HIRQ_PENDING_MASK 0x00000100L +#define RBBM_STATUS__CPRQ_PENDING_MASK 0x00000200L +#define RBBM_STATUS__CFRQ_PENDING_MASK 0x00000400L +#define RBBM_STATUS__PFRQ_PENDING_MASK 0x00000800L +#define RBBM_STATUS__VGT_BUSY_NO_DMA_MASK 0x00001000L +#define RBBM_STATUS__RBBM_WU_BUSY_MASK 0x00004000L +#define RBBM_STATUS__CP_NRT_BUSY_MASK 0x00010000L +#define RBBM_STATUS__MH_BUSY_MASK 0x00040000L +#define RBBM_STATUS__MH_COHERENCY_BUSY_MASK 0x00080000L +#define RBBM_STATUS__SX_BUSY_MASK 0x00200000L +#define RBBM_STATUS__TPC_BUSY_MASK 0x00400000L +#define RBBM_STATUS__SC_CNTX_BUSY_MASK 0x01000000L +#define RBBM_STATUS__PA_BUSY_MASK 0x02000000L +#define RBBM_STATUS__VGT_BUSY_MASK 0x04000000L +#define RBBM_STATUS__SQ_CNTX17_BUSY_MASK 0x08000000L +#define RBBM_STATUS__SQ_CNTX0_BUSY_MASK 0x10000000L +#define RBBM_STATUS__RB_CNTX_BUSY_MASK 0x40000000L +#define RBBM_STATUS__GUI_ACTIVE_MASK 0x80000000L + +#define CP_INT_CNTL__SW_INT_MASK 0x00080000L +#define CP_INT_CNTL__T0_PACKET_IN_IB_MASK 0x00800000L +#define CP_INT_CNTL__OPCODE_ERROR_MASK 0x01000000L +#define CP_INT_CNTL__PROTECTED_MODE_ERROR_MASK 0x02000000L +#define CP_INT_CNTL__RESERVED_BIT_ERROR_MASK 0x04000000L +#define CP_INT_CNTL__IB_ERROR_MASK 0x08000000L +#define CP_INT_CNTL__IB2_INT_MASK 0x20000000L +#define CP_INT_CNTL__IB1_INT_MASK 0x40000000L +#define CP_INT_CNTL__RB_INT_MASK 0x80000000L + +#define MASTER_INT_SIGNAL__MH_INT_STAT 0x00000020L +#define MASTER_INT_SIGNAL__SQ_INT_STAT 0x04000000L +#define MASTER_INT_SIGNAL__CP_INT_STAT 0x40000000L +#define MASTER_INT_SIGNAL__RBBM_INT_STAT 0x80000000L + +#define RB_EDRAM_INFO__EDRAM_SIZE_MASK 0x0000000fL +#define RB_EDRAM_INFO__EDRAM_RANGE_MASK 0xffffc000L + +#define MH_ARBITER_CONFIG__SAME_PAGE_GRANULARITY__SHIFT 0x00000006 +#define MH_ARBITER_CONFIG__L1_ARB_ENABLE__SHIFT 0x00000007 +#define MH_ARBITER_CONFIG__L1_ARB_HOLD_ENABLE__SHIFT 0x00000008 +#define MH_ARBITER_CONFIG__L2_ARB_CONTROL__SHIFT 0x00000009 +#define MH_ARBITER_CONFIG__PAGE_SIZE__SHIFT 0x0000000a +#define MH_ARBITER_CONFIG__TC_REORDER_ENABLE__SHIFT 0x0000000d +#define MH_ARBITER_CONFIG__TC_ARB_HOLD_ENABLE__SHIFT 0x0000000e +#define MH_ARBITER_CONFIG__IN_FLIGHT_LIMIT_ENABLE__SHIFT 0x0000000f +#define MH_ARBITER_CONFIG__IN_FLIGHT_LIMIT__SHIFT 0x00000010 +#define MH_ARBITER_CONFIG__CP_CLNT_ENABLE__SHIFT 0x00000016 +#define MH_ARBITER_CONFIG__VGT_CLNT_ENABLE__SHIFT 0x00000017 +#define MH_ARBITER_CONFIG__TC_CLNT_ENABLE__SHIFT 0x00000018 +#define MH_ARBITER_CONFIG__RB_CLNT_ENABLE__SHIFT 0x00000019 +#define MH_ARBITER_CONFIG__PA_CLNT_ENABLE__SHIFT 0x0000001a + +#define CP_RB_CNTL__RB_BUFSZ__SHIFT 0x00000000 +#define CP_RB_CNTL__RB_BLKSZ__SHIFT 0x00000008 +#define CP_RB_CNTL__RB_POLL_EN__SHIFT 0x00000014 +#define CP_RB_CNTL__RB_NO_UPDATE__SHIFT 0x0000001b + +#define RB_COLOR_INFO__COLOR_FORMAT__SHIFT 0x00000000 +#define RB_EDRAM_INFO__EDRAM_MAPPING_MODE__SHIFT 0x00000004 +#define RB_EDRAM_INFO__EDRAM_RANGE__SHIFT 0x0000000e + +#define REG_CP_CSQ_IB1_STAT 0x01FE +#define REG_CP_CSQ_IB2_STAT 0x01FF +#define REG_CP_CSQ_RB_STAT 0x01FD +#define REG_CP_DEBUG 0x01FC +#define REG_CP_IB1_BASE 0x0458 +#define REG_CP_IB1_BUFSZ 0x0459 +#define REG_CP_IB2_BASE 0x045A +#define REG_CP_IB2_BUFSZ 0x045B +#define REG_CP_INT_ACK 0x01F4 +#define REG_CP_INT_CNTL 0x01F2 +#define REG_CP_INT_STATUS 0x01F3 +#define REG_CP_ME_CNTL 0x01F6 +#define REG_CP_ME_RAM_DATA 0x01FA +#define REG_CP_ME_RAM_WADDR 0x01F8 +#define REG_CP_ME_STATUS 0x01F7 +#define REG_CP_PFP_UCODE_ADDR 0x00C0 +#define REG_CP_PFP_UCODE_DATA 0x00C1 +#define REG_CP_QUEUE_THRESHOLDS 0x01D5 +#define REG_CP_RB_BASE 0x01C0 +#define REG_CP_RB_CNTL 0x01C1 +#define REG_CP_RB_RPTR 0x01C4 +#define REG_CP_RB_RPTR_ADDR 0x01C3 +#define REG_CP_RB_RPTR_WR 0x01C7 +#define REG_CP_RB_WPTR 0x01C5 +#define REG_CP_RB_WPTR_BASE 0x01C8 +#define REG_CP_RB_WPTR_DELAY 0x01C6 +#define REG_CP_STAT 0x047F +#define REG_CP_STATE_DEBUG_DATA 0x01ED +#define REG_CP_STATE_DEBUG_INDEX 0x01EC +#define REG_CP_ST_BASE 0x044D +#define REG_CP_ST_BUFSZ 0x044E + +#define REG_CP_PERFMON_CNTL 0x0444 +#define REG_CP_PERFCOUNTER_SELECT 0x0445 +#define REG_CP_PERFCOUNTER_LO 0x0446 +#define REG_CP_PERFCOUNTER_HI 0x0447 + +#define REG_RBBM_PERFCOUNTER1_SELECT 0x0395 +#define REG_RBBM_PERFCOUNTER1_HI 0x0398 +#define REG_RBBM_PERFCOUNTER1_LO 0x0397 + +#define REG_MASTER_INT_SIGNAL 0x03B7 + +#define REG_PA_CL_VPORT_XSCALE 0x210F +#define REG_PA_CL_VPORT_ZOFFSET 0x2114 +#define REG_PA_CL_VPORT_ZSCALE 0x2113 +#define REG_PA_CL_CLIP_CNTL 0x2204 +#define REG_PA_CL_VTE_CNTL 0x2206 +#define REG_PA_SC_AA_MASK 0x2312 +#define REG_PA_SC_LINE_CNTL 0x2300 +#define REG_PA_SC_SCREEN_SCISSOR_BR 0x200F +#define REG_PA_SC_SCREEN_SCISSOR_TL 0x200E +#define REG_PA_SC_VIZ_QUERY 0x2293 +#define REG_PA_SC_VIZ_QUERY_STATUS 0x0C44 +#define REG_PA_SC_WINDOW_OFFSET 0x2080 +#define REG_PA_SC_WINDOW_SCISSOR_BR 0x2082 +#define REG_PA_SC_WINDOW_SCISSOR_TL 0x2081 +#define REG_PA_SU_FACE_DATA 0x0C86 +#define REG_PA_SU_POINT_SIZE 0x2280 +#define REG_PA_SU_LINE_CNTL 0x2282 +#define REG_PA_SU_POLY_OFFSET_BACK_OFFSET 0x2383 +#define REG_PA_SU_POLY_OFFSET_FRONT_SCALE 0x2380 +#define REG_PA_SU_SC_MODE_CNTL 0x2205 + +#define REG_PC_INDEX_OFFSET 0x2102 + +#define REG_RBBM_CNTL 0x003B +#define REG_RBBM_INT_ACK 0x03B6 +#define REG_RBBM_INT_CNTL 0x03B4 +#define REG_RBBM_INT_STATUS 0x03B5 +#define REG_RBBM_PATCH_RELEASE 0x0001 +#define REG_RBBM_PERIPHID1 0x03F9 +#define REG_RBBM_PERIPHID2 0x03FA +#define REG_RBBM_DEBUG 0x039B +#define REG_RBBM_DEBUG_OUT 0x03A0 +#define REG_RBBM_DEBUG_CNTL 0x03A1 +#define REG_RBBM_PM_OVERRIDE1 0x039C +#define REG_RBBM_PM_OVERRIDE2 0x039D +#define REG_RBBM_READ_ERROR 0x03B3 +#define REG_RBBM_SOFT_RESET 0x003C +#define REG_RBBM_STATUS 0x05D0 + +#define REG_RB_COLORCONTROL 0x2202 +#define REG_RB_COLOR_DEST_MASK 0x2326 +#define REG_RB_COLOR_MASK 0x2104 +#define REG_RB_COPY_CONTROL 0x2318 +#define REG_RB_DEPTHCONTROL 0x2200 +#define REG_RB_EDRAM_INFO 0x0F02 +#define REG_RB_MODECONTROL 0x2208 +#define REG_RB_SURFACE_INFO 0x2000 +#define REG_RB_SAMPLE_POS 0x220a + +#define REG_SCRATCH_ADDR 0x01DD +#define REG_SCRATCH_REG0 0x0578 +#define REG_SCRATCH_REG2 0x057A +#define REG_SCRATCH_UMSK 0x01DC + +#define REG_SQ_CF_BOOLEANS 0x4900 +#define REG_SQ_CF_LOOP 0x4908 +#define REG_SQ_GPR_MANAGEMENT 0x0D00 +#define REG_SQ_FLOW_CONTROL 0x0D01 +#define REG_SQ_INST_STORE_MANAGMENT 0x0D02 +#define REG_SQ_INT_ACK 0x0D36 +#define REG_SQ_INT_CNTL 0x0D34 +#define REG_SQ_INT_STATUS 0x0D35 +#define REG_SQ_PROGRAM_CNTL 0x2180 +#define REG_SQ_PS_PROGRAM 0x21F6 +#define REG_SQ_VS_PROGRAM 0x21F7 +#define REG_SQ_WRAPPING_0 0x2183 +#define REG_SQ_WRAPPING_1 0x2184 + +#define REG_VGT_ENHANCE 0x2294 +#define REG_VGT_INDX_OFFSET 0x2102 +#define REG_VGT_MAX_VTX_INDX 0x2100 +#define REG_VGT_MIN_VTX_INDX 0x2101 + +#define REG_TP0_CHICKEN 0x0E1E +#define REG_TC_CNTL_STATUS 0x0E00 +#define REG_PA_SC_AA_CONFIG 0x2301 +#define REG_VGT_VERTEX_REUSE_BLOCK_CNTL 0x2316 +#define REG_SQ_INTERPOLATOR_CNTL 0x2182 +#define REG_RB_DEPTH_INFO 0x2002 +#define REG_COHER_DEST_BASE_0 0x2006 +#define REG_RB_FOG_COLOR 0x2109 +#define REG_RB_STENCILREFMASK_BF 0x210C +#define REG_PA_SC_LINE_STIPPLE 0x2283 +#define REG_SQ_PS_CONST 0x2308 +#define REG_RB_DEPTH_CLEAR 0x231D +#define REG_RB_SAMPLE_COUNT_CTL 0x2324 +#define REG_SQ_CONSTANT_0 0x4000 +#define REG_SQ_FETCH_0 0x4800 + +#define REG_COHER_BASE_PM4 0xA2A +#define REG_COHER_STATUS_PM4 0xA2B +#define REG_COHER_SIZE_PM4 0xA29 + +/*registers added in adreno220*/ +#define REG_A220_PC_INDX_OFFSET REG_VGT_INDX_OFFSET +#define REG_A220_PC_VERTEX_REUSE_BLOCK_CNTL REG_VGT_VERTEX_REUSE_BLOCK_CNTL +#define REG_A220_PC_MAX_VTX_INDX REG_VGT_MAX_VTX_INDX +#define REG_A220_RB_LRZ_VSC_CONTROL 0x2209 +#define REG_A220_GRAS_CONTROL 0x2210 +#define REG_A220_VSC_BIN_SIZE 0x0C01 +#define REG_A220_VSC_PIPE_DATA_LENGTH_7 0x0C1D + +/*registers added in adreno225*/ +#define REG_A225_RB_COLOR_INFO3 0x2005 +#define REG_A225_PC_MULTI_PRIM_IB_RESET_INDX 0x2103 +#define REG_A225_GRAS_UCP0X 0x2340 +#define REG_A225_GRAS_UCP_ENABLED 0x2360 + +#endif /* __A200_REG_H */ diff --git a/drivers/gpu/msm/adreno_a2xx.c b/drivers/gpu/msm/adreno_a2xx.c new file mode 100644 index 00000000..064b05e9 --- /dev/null +++ b/drivers/gpu/msm/adreno_a2xx.c @@ -0,0 +1,1607 @@ +/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include "kgsl.h" +#include "kgsl_sharedmem.h" +#include "kgsl_cffdump.h" +#include "adreno.h" + +/* + * + * Memory Map for Register, Constant & Instruction Shadow, and Command Buffers + * (34.5KB) + * + * +---------------------+------------+-------------+---+---------------------+ + * | ALU Constant Shadow | Reg Shadow | C&V Buffers |Tex| Shader Instr Shadow | + * +---------------------+------------+-------------+---+---------------------+ + * ________________________________/ \____________________ + * / | + * +--------------+-----------+------+-----------+------------------------+ + * | Restore Regs | Save Regs | Quad | Gmem Save | Gmem Restore | unused | + * +--------------+-----------+------+-----------+------------------------+ + * + * 8K - ALU Constant Shadow (8K aligned) + * 4K - H/W Register Shadow (8K aligned) + * 4K - Command and Vertex Buffers + * - Indirect command buffer : Const/Reg restore + * - includes Loop & Bool const shadows + * - Indirect command buffer : Const/Reg save + * - Quad vertices & texture coordinates + * - Indirect command buffer : Gmem save + * - Indirect command buffer : Gmem restore + * - Unused (padding to 8KB boundary) + * <1K - Texture Constant Shadow (768 bytes) (8K aligned) + * 18K - Shader Instruction Shadow + * - 6K vertex (32 byte aligned) + * - 6K pixel (32 byte aligned) + * - 6K shared (32 byte aligned) + * + * Note: Reading constants into a shadow, one at a time using REG_TO_MEM, takes + * 3 DWORDS per DWORD transfered, plus 1 DWORD for the shadow, for a total of + * 16 bytes per constant. If the texture constants were transfered this way, + * the Command & Vertex Buffers section would extend past the 16K boundary. + * By moving the texture constant shadow area to start at 16KB boundary, we + * only require approximately 40 bytes more memory, but are able to use the + * LOAD_CONSTANT_CONTEXT shadowing feature for the textures, speeding up + * context switching. + * + * [Using LOAD_CONSTANT_CONTEXT shadowing feature for the Loop and/or Bool + * constants would require an additional 8KB each, for alignment.] + * + */ + +/* Constants */ + +#define ALU_CONSTANTS 2048 /* DWORDS */ +#define NUM_REGISTERS 1024 /* DWORDS */ +#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES +#define CMD_BUFFER_LEN 9216 /* DWORDS */ +#else +#define CMD_BUFFER_LEN 3072 /* DWORDS */ +#endif +#define TEX_CONSTANTS (32*6) /* DWORDS */ +#define BOOL_CONSTANTS 8 /* DWORDS */ +#define LOOP_CONSTANTS 56 /* DWORDS */ +#define SHADER_INSTRUCT_LOG2 9U /* 2^n == SHADER_INSTRUCTIONS */ + +/* 96-bit instructions */ +#define SHADER_INSTRUCT (1< sys) */ + +/* pre-compiled vertex shader program +* +* attribute vec4 P; +* void main(void) +* { +* gl_Position = P; +* } +*/ +#define GMEM2SYS_VTX_PGM_LEN 0x12 + +static unsigned int gmem2sys_vtx_pgm[GMEM2SYS_VTX_PGM_LEN] = { + 0x00011003, 0x00001000, 0xc2000000, + 0x00001004, 0x00001000, 0xc4000000, + 0x00001005, 0x00002000, 0x00000000, + 0x1cb81000, 0x00398a88, 0x00000003, + 0x140f803e, 0x00000000, 0xe2010100, + 0x14000000, 0x00000000, 0xe2000000 +}; + +/* pre-compiled fragment shader program +* +* precision highp float; +* uniform vec4 clear_color; +* void main(void) +* { +* gl_FragColor = clear_color; +* } +*/ + +#define GMEM2SYS_FRAG_PGM_LEN 0x0c + +static unsigned int gmem2sys_frag_pgm[GMEM2SYS_FRAG_PGM_LEN] = { + 0x00000000, 0x1002c400, 0x10000000, + 0x00001003, 0x00002000, 0x00000000, + 0x140f8000, 0x00000000, 0x22000000, + 0x14000000, 0x00000000, 0xe2000000 +}; + +/* context restore (sys -> gmem) */ +/* pre-compiled vertex shader program +* +* attribute vec4 position; +* attribute vec4 texcoord; +* varying vec4 texcoord0; +* void main() +* { +* gl_Position = position; +* texcoord0 = texcoord; +* } +*/ + +#define SYS2GMEM_VTX_PGM_LEN 0x18 + +static unsigned int sys2gmem_vtx_pgm[SYS2GMEM_VTX_PGM_LEN] = { + 0x00052003, 0x00001000, 0xc2000000, 0x00001005, + 0x00001000, 0xc4000000, 0x00001006, 0x10071000, + 0x20000000, 0x18981000, 0x0039ba88, 0x00000003, + 0x12982000, 0x40257b08, 0x00000002, 0x140f803e, + 0x00000000, 0xe2010100, 0x140f8000, 0x00000000, + 0xe2020200, 0x14000000, 0x00000000, 0xe2000000 +}; + +/* pre-compiled fragment shader program +* +* precision mediump float; +* uniform sampler2D tex0; +* varying vec4 texcoord0; +* void main() +* { +* gl_FragColor = texture2D(tex0, texcoord0.xy); +* } +*/ + +#define SYS2GMEM_FRAG_PGM_LEN 0x0f + +static unsigned int sys2gmem_frag_pgm[SYS2GMEM_FRAG_PGM_LEN] = { + 0x00011002, 0x00001000, 0xc4000000, 0x00001003, + 0x10041000, 0x20000000, 0x10000001, 0x1ffff688, + 0x00000002, 0x140f8000, 0x00000000, 0xe2000000, + 0x14000000, 0x00000000, 0xe2000000 +}; + +/* shader texture constants (sysmem -> gmem) */ +#define SYS2GMEM_TEX_CONST_LEN 6 + +static unsigned int sys2gmem_tex_const[SYS2GMEM_TEX_CONST_LEN] = { + /* Texture, FormatXYZW=Unsigned, ClampXYZ=Wrap/Repeat, + * RFMode=ZeroClamp-1, Dim=1:2d + */ + 0x00000002, /* Pitch = TBD */ + + /* Format=6:8888_WZYX, EndianSwap=0:None, ReqSize=0:256bit, DimHi=0, + * NearestClamp=1:OGL Mode + */ + 0x00000800, /* Address[31:12] = TBD */ + + /* Width, Height, EndianSwap=0:None */ + 0, /* Width & Height = TBD */ + + /* NumFormat=0:RF, DstSelXYZW=XYZW, ExpAdj=0, MagFilt=MinFilt=0:Point, + * Mip=2:BaseMap + */ + 0 << 1 | 1 << 4 | 2 << 7 | 3 << 10 | 2 << 23, + + /* VolMag=VolMin=0:Point, MinMipLvl=0, MaxMipLvl=1, LodBiasH=V=0, + * Dim3d=0 + */ + 0, + + /* BorderColor=0:ABGRBlack, ForceBC=0:diable, TriJuice=0, Aniso=0, + * Dim=1:2d, MipPacking=0 + */ + 1 << 9 /* Mip Address[31:12] = TBD */ +}; + +#define NUM_COLOR_FORMATS 13 + +static enum SURFACEFORMAT surface_format_table[NUM_COLOR_FORMATS] = { + FMT_4_4_4_4, /* COLORX_4_4_4_4 */ + FMT_1_5_5_5, /* COLORX_1_5_5_5 */ + FMT_5_6_5, /* COLORX_5_6_5 */ + FMT_8, /* COLORX_8 */ + FMT_8_8, /* COLORX_8_8 */ + FMT_8_8_8_8, /* COLORX_8_8_8_8 */ + FMT_8_8_8_8, /* COLORX_S8_8_8_8 */ + FMT_16_FLOAT, /* COLORX_16_FLOAT */ + FMT_16_16_FLOAT, /* COLORX_16_16_FLOAT */ + FMT_16_16_16_16_FLOAT, /* COLORX_16_16_16_16_FLOAT */ + FMT_32_FLOAT, /* COLORX_32_FLOAT */ + FMT_32_32_FLOAT, /* COLORX_32_32_FLOAT */ + FMT_32_32_32_32_FLOAT, /* COLORX_32_32_32_32_FLOAT */ +}; + +static unsigned int format2bytesperpixel[NUM_COLOR_FORMATS] = { + 2, /* COLORX_4_4_4_4 */ + 2, /* COLORX_1_5_5_5 */ + 2, /* COLORX_5_6_5 */ + 1, /* COLORX_8 */ + 2, /* COLORX_8_8 8*/ + 4, /* COLORX_8_8_8_8 */ + 4, /* COLORX_S8_8_8_8 */ + 2, /* COLORX_16_FLOAT */ + 4, /* COLORX_16_16_FLOAT */ + 8, /* COLORX_16_16_16_16_FLOAT */ + 4, /* COLORX_32_FLOAT */ + 8, /* COLORX_32_32_FLOAT */ + 16, /* COLORX_32_32_32_32_FLOAT */ +}; + +/* shader linkage info */ +#define SHADER_CONST_ADDR (11 * 6 + 3) + + +static unsigned int *program_shader(unsigned int *cmds, int vtxfrag, + unsigned int *shader_pgm, int dwords) +{ + /* load the patched vertex shader stream */ + *cmds++ = cp_type3_packet(CP_IM_LOAD_IMMEDIATE, 2 + dwords); + /* 0=vertex shader, 1=fragment shader */ + *cmds++ = vtxfrag; + /* instruction start & size (in 32-bit words) */ + *cmds++ = ((0 << 16) | dwords); + + memcpy(cmds, shader_pgm, dwords << 2); + cmds += dwords; + + return cmds; +} + +static unsigned int *reg_to_mem(unsigned int *cmds, uint32_t dst, + uint32_t src, int dwords) +{ + while (dwords-- > 0) { + *cmds++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmds++ = src++; + *cmds++ = dst; + dst += 4; + } + + return cmds; +} + +#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES + +static void build_reg_to_mem_range(unsigned int start, unsigned int end, + unsigned int **cmd, + struct adreno_context *drawctxt) +{ + unsigned int i = start; + + for (i = start; i <= end; i++) { + *(*cmd)++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *(*cmd)++ = i; + *(*cmd)++ = + ((drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000) + + (i - 0x2000) * 4; + } +} + +#endif + +/* chicken restore */ +static unsigned int *build_chicken_restore_cmds( + struct adreno_context *drawctxt) +{ + unsigned int *start = tmp_ctx.cmd; + unsigned int *cmds = start; + + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmds++ = 0; + + *cmds++ = cp_type0_packet(REG_TP0_CHICKEN, 1); + tmp_ctx.chicken_restore = virt2gpu(cmds, &drawctxt->gpustate); + *cmds++ = 0x00000000; + + /* create indirect buffer command for above command sequence */ + create_ib1(drawctxt, drawctxt->chicken_restore, start, cmds); + + return cmds; +} + +/****************************************************************************/ +/* context save */ +/****************************************************************************/ + +static const unsigned int register_ranges_a20x[] = { + REG_RB_SURFACE_INFO, REG_RB_DEPTH_INFO, + REG_COHER_DEST_BASE_0, REG_PA_SC_SCREEN_SCISSOR_BR, + REG_PA_SC_WINDOW_OFFSET, REG_PA_SC_WINDOW_SCISSOR_BR, + REG_RB_STENCILREFMASK_BF, REG_PA_CL_VPORT_ZOFFSET, + REG_SQ_PROGRAM_CNTL, REG_SQ_WRAPPING_1, + REG_PA_SC_LINE_CNTL, REG_SQ_PS_CONST, + REG_PA_SC_AA_MASK, REG_PA_SC_AA_MASK, + REG_RB_SAMPLE_COUNT_CTL, REG_RB_COLOR_DEST_MASK, + REG_PA_SU_POLY_OFFSET_FRONT_SCALE, REG_PA_SU_POLY_OFFSET_BACK_OFFSET, + REG_VGT_MAX_VTX_INDX, REG_RB_FOG_COLOR, + REG_RB_DEPTHCONTROL, REG_RB_MODECONTROL, + REG_PA_SU_POINT_SIZE, REG_PA_SC_LINE_STIPPLE, + REG_PA_SC_VIZ_QUERY, REG_PA_SC_VIZ_QUERY, + REG_VGT_VERTEX_REUSE_BLOCK_CNTL, REG_RB_DEPTH_CLEAR +}; + +static const unsigned int register_ranges_a220[] = { + REG_RB_SURFACE_INFO, REG_RB_DEPTH_INFO, + REG_COHER_DEST_BASE_0, REG_PA_SC_SCREEN_SCISSOR_BR, + REG_PA_SC_WINDOW_OFFSET, REG_PA_SC_WINDOW_SCISSOR_BR, + REG_RB_STENCILREFMASK_BF, REG_PA_CL_VPORT_ZOFFSET, + REG_SQ_PROGRAM_CNTL, REG_SQ_WRAPPING_1, + REG_PA_SC_LINE_CNTL, REG_SQ_PS_CONST, + REG_PA_SC_AA_MASK, REG_PA_SC_AA_MASK, + REG_RB_SAMPLE_COUNT_CTL, REG_RB_COLOR_DEST_MASK, + REG_PA_SU_POLY_OFFSET_FRONT_SCALE, REG_PA_SU_POLY_OFFSET_BACK_OFFSET, + REG_A220_PC_MAX_VTX_INDX, REG_A220_PC_INDX_OFFSET, + REG_RB_COLOR_MASK, REG_RB_FOG_COLOR, + REG_RB_DEPTHCONTROL, REG_RB_COLORCONTROL, + REG_PA_CL_CLIP_CNTL, REG_PA_CL_VTE_CNTL, + REG_RB_MODECONTROL, REG_RB_SAMPLE_POS, + REG_PA_SU_POINT_SIZE, REG_PA_SU_LINE_CNTL, + REG_A220_PC_VERTEX_REUSE_BLOCK_CNTL, + REG_A220_PC_VERTEX_REUSE_BLOCK_CNTL, + REG_RB_COPY_CONTROL, REG_RB_DEPTH_CLEAR +}; + +static const unsigned int register_ranges_a225[] = { + REG_RB_SURFACE_INFO, REG_A225_RB_COLOR_INFO3, + REG_COHER_DEST_BASE_0, REG_PA_SC_SCREEN_SCISSOR_BR, + REG_PA_SC_WINDOW_OFFSET, REG_PA_SC_WINDOW_SCISSOR_BR, + REG_RB_STENCILREFMASK_BF, REG_PA_CL_VPORT_ZOFFSET, + REG_SQ_PROGRAM_CNTL, REG_SQ_WRAPPING_1, + REG_PA_SC_LINE_CNTL, REG_SQ_PS_CONST, + REG_PA_SC_AA_MASK, REG_PA_SC_AA_MASK, + REG_RB_SAMPLE_COUNT_CTL, REG_RB_COLOR_DEST_MASK, + REG_PA_SU_POLY_OFFSET_FRONT_SCALE, REG_PA_SU_POLY_OFFSET_BACK_OFFSET, + REG_A220_PC_MAX_VTX_INDX, REG_A225_PC_MULTI_PRIM_IB_RESET_INDX, + REG_RB_COLOR_MASK, REG_RB_FOG_COLOR, + REG_RB_DEPTHCONTROL, REG_RB_COLORCONTROL, + REG_PA_CL_CLIP_CNTL, REG_PA_CL_VTE_CNTL, + REG_RB_MODECONTROL, REG_RB_SAMPLE_POS, + REG_PA_SU_POINT_SIZE, REG_PA_SU_LINE_CNTL, + REG_A220_PC_VERTEX_REUSE_BLOCK_CNTL, + REG_A220_PC_VERTEX_REUSE_BLOCK_CNTL, + REG_RB_COPY_CONTROL, REG_RB_DEPTH_CLEAR, + REG_A225_GRAS_UCP0X, REG_A225_GRAS_UCP_ENABLED +}; + + +/* save h/w regs, alu constants, texture contants, etc. ... +* requires: bool_shadow_gpuaddr, loop_shadow_gpuaddr +*/ +static void build_regsave_cmds(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + unsigned int *start = tmp_ctx.cmd; + unsigned int *cmd = start; + + *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmd++ = 0; + +#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES + /* Make sure the HW context has the correct register values + * before reading them. */ + *cmd++ = cp_type3_packet(CP_CONTEXT_UPDATE, 1); + *cmd++ = 0; + + { + unsigned int i = 0; + unsigned int reg_array_size = 0; + const unsigned int *ptr_register_ranges; + + /* Based on chip id choose the register ranges */ + if (adreno_is_a220(adreno_dev)) { + ptr_register_ranges = register_ranges_a220; + reg_array_size = ARRAY_SIZE(register_ranges_a220); + } else if (adreno_is_a225(adreno_dev)) { + ptr_register_ranges = register_ranges_a225; + reg_array_size = ARRAY_SIZE(register_ranges_a225); + } else { + ptr_register_ranges = register_ranges_a20x; + reg_array_size = ARRAY_SIZE(register_ranges_a20x); + } + + + /* Write HW registers into shadow */ + for (i = 0; i < (reg_array_size/2) ; i++) { + build_reg_to_mem_range(ptr_register_ranges[i*2], + ptr_register_ranges[i*2+1], + &cmd, drawctxt); + } + } + + /* Copy ALU constants */ + cmd = + reg_to_mem(cmd, (drawctxt->gpustate.gpuaddr) & 0xFFFFE000, + REG_SQ_CONSTANT_0, ALU_CONSTANTS); + + /* Copy Tex constants */ + cmd = + reg_to_mem(cmd, + (drawctxt->gpustate.gpuaddr + TEX_OFFSET) & 0xFFFFE000, + REG_SQ_FETCH_0, TEX_CONSTANTS); +#else + + /* Insert a wait for idle packet before reading the registers. + * This is to fix a hang/reset seen during stress testing. In this + * hang, CP encountered a timeout reading SQ's boolean constant + * register. There is logic in the HW that blocks reading of this + * register when the SQ block is not idle, which we believe is + * contributing to the hang.*/ + *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmd++ = 0; + + /* H/w registers are already shadowed; just need to disable shadowing + * to prevent corruption. + */ + *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3); + *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000; + *cmd++ = 4 << 16; /* regs, start=0 */ + *cmd++ = 0x0; /* count = 0 */ + + /* ALU constants are already shadowed; just need to disable shadowing + * to prevent corruption. + */ + *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3); + *cmd++ = drawctxt->gpustate.gpuaddr & 0xFFFFE000; + *cmd++ = 0 << 16; /* ALU, start=0 */ + *cmd++ = 0x0; /* count = 0 */ + + /* Tex constants are already shadowed; just need to disable shadowing + * to prevent corruption. + */ + *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3); + *cmd++ = (drawctxt->gpustate.gpuaddr + TEX_OFFSET) & 0xFFFFE000; + *cmd++ = 1 << 16; /* Tex, start=0 */ + *cmd++ = 0x0; /* count = 0 */ +#endif + + /* Need to handle some of the registers separately */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = REG_SQ_GPR_MANAGEMENT; + *cmd++ = tmp_ctx.reg_values[0]; + + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = REG_TP0_CHICKEN; + *cmd++ = tmp_ctx.reg_values[1]; + + if (adreno_is_a22x(adreno_dev)) { + unsigned int i; + unsigned int j = 2; + for (i = REG_A220_VSC_BIN_SIZE; i <= + REG_A220_VSC_PIPE_DATA_LENGTH_7; i++) { + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = i; + *cmd++ = tmp_ctx.reg_values[j]; + j++; + } + } + + /* Copy Boolean constants */ + cmd = reg_to_mem(cmd, tmp_ctx.bool_shadow, REG_SQ_CF_BOOLEANS, + BOOL_CONSTANTS); + + /* Copy Loop constants */ + cmd = reg_to_mem(cmd, tmp_ctx.loop_shadow, + REG_SQ_CF_LOOP, LOOP_CONSTANTS); + + /* create indirect buffer command for above command sequence */ + create_ib1(drawctxt, drawctxt->reg_save, start, cmd); + + tmp_ctx.cmd = cmd; +} + +/*copy colour, depth, & stencil buffers from graphics memory to system memory*/ +static unsigned int *build_gmem2sys_cmds(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, + struct gmem_shadow_t *shadow) +{ + unsigned int *cmds = shadow->gmem_save_commands; + unsigned int *start = cmds; + /* Calculate the new offset based on the adjusted base */ + unsigned int bytesperpixel = format2bytesperpixel[shadow->format]; + unsigned int addr = shadow->gmemshadow.gpuaddr; + unsigned int offset = (addr - (addr & 0xfffff000)) / bytesperpixel; + + /* Store TP0_CHICKEN register */ + *cmds++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmds++ = REG_TP0_CHICKEN; + + *cmds++ = tmp_ctx.chicken_restore; + + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmds++ = 0; + + /* Set TP0_CHICKEN to zero */ + *cmds++ = cp_type0_packet(REG_TP0_CHICKEN, 1); + *cmds++ = 0x00000000; + + /* Set PA_SC_AA_CONFIG to 0 */ + *cmds++ = cp_type0_packet(REG_PA_SC_AA_CONFIG, 1); + *cmds++ = 0x00000000; + + /* program shader */ + + /* load shader vtx constants ... 5 dwords */ + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 4); + *cmds++ = (0x1 << 16) | SHADER_CONST_ADDR; + *cmds++ = 0; + /* valid(?) vtx constant flag & addr */ + *cmds++ = shadow->quad_vertices.gpuaddr | 0x3; + /* limit = 12 dwords */ + *cmds++ = 0x00000030; + + /* Invalidate L2 cache to make sure vertices are updated */ + *cmds++ = cp_type0_packet(REG_TC_CNTL_STATUS, 1); + *cmds++ = 0x1; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 4); + *cmds++ = CP_REG(REG_VGT_MAX_VTX_INDX); + *cmds++ = 0x00ffffff; /* REG_VGT_MAX_VTX_INDX */ + *cmds++ = 0x0; /* REG_VGT_MIN_VTX_INDX */ + *cmds++ = 0x00000000; /* REG_VGT_INDX_OFFSET */ + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(REG_PA_SC_AA_MASK); + *cmds++ = 0x0000ffff; /* REG_PA_SC_AA_MASK */ + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(REG_RB_COLORCONTROL); + *cmds++ = 0x00000c20; + + /* Repartition shaders */ + *cmds++ = cp_type0_packet(REG_SQ_INST_STORE_MANAGMENT, 1); + *cmds++ = 0x180; + + /* Invalidate Vertex & Pixel instruction code address and sizes */ + *cmds++ = cp_type3_packet(CP_INVALIDATE_STATE, 1); + *cmds++ = 0x00003F00; + + *cmds++ = cp_type3_packet(CP_SET_SHADER_BASES, 1); + *cmds++ = (0x80000000) | 0x180; + + /* load the patched vertex shader stream */ + cmds = program_shader(cmds, 0, gmem2sys_vtx_pgm, GMEM2SYS_VTX_PGM_LEN); + + /* Load the patched fragment shader stream */ + cmds = + program_shader(cmds, 1, gmem2sys_frag_pgm, GMEM2SYS_FRAG_PGM_LEN); + + /* SQ_PROGRAM_CNTL / SQ_CONTEXT_MISC */ + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(REG_SQ_PROGRAM_CNTL); + if (adreno_is_a22x(adreno_dev)) + *cmds++ = 0x10018001; + else + *cmds++ = 0x10010001; + *cmds++ = 0x00000008; + + /* resolve */ + + /* PA_CL_VTE_CNTL */ + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(REG_PA_CL_VTE_CNTL); + /* disable X/Y/Z transforms, X/Y/Z are premultiplied by W */ + *cmds++ = 0x00000b00; + + /* program surface info */ + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(REG_RB_SURFACE_INFO); + *cmds++ = shadow->gmem_pitch; /* pitch, MSAA = 1 */ + + /* RB_COLOR_INFO Endian=none, Linear, Format=RGBA8888, Swap=0, + * Base=gmem_base + */ + /* gmem base assumed 4K aligned. */ + BUG_ON(tmp_ctx.gmem_base & 0xFFF); + *cmds++ = + (shadow-> + format << RB_COLOR_INFO__COLOR_FORMAT__SHIFT) | tmp_ctx.gmem_base; + + /* disable Z */ + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(REG_RB_DEPTHCONTROL); + if (adreno_is_a22x(adreno_dev)) + *cmds++ = 0x08; + else + *cmds++ = 0; + + /* set REG_PA_SU_SC_MODE_CNTL + * Front_ptype = draw triangles + * Back_ptype = draw triangles + * Provoking vertex = last + */ + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(REG_PA_SU_SC_MODE_CNTL); + *cmds++ = 0x00080240; + + /* Use maximum scissor values -- quad vertices already have the + * correct bounds */ + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(REG_PA_SC_SCREEN_SCISSOR_TL); + *cmds++ = (0 << 16) | 0; + *cmds++ = (0x1fff << 16) | (0x1fff); + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(REG_PA_SC_WINDOW_SCISSOR_TL); + *cmds++ = (unsigned int)((1U << 31) | (0 << 16) | 0); + *cmds++ = (0x1fff << 16) | (0x1fff); + + /* load the viewport so that z scale = clear depth and + * z offset = 0.0f + */ + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(REG_PA_CL_VPORT_ZSCALE); + *cmds++ = 0xbf800000; /* -1.0f */ + *cmds++ = 0x0; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(REG_RB_COLOR_MASK); + *cmds++ = 0x0000000f; /* R = G = B = 1:enabled */ + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(REG_RB_COLOR_DEST_MASK); + *cmds++ = 0xffffffff; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(REG_SQ_WRAPPING_0); + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + + /* load the stencil ref value + * $AAM - do this later + */ + + /* load the COPY state */ + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6); + *cmds++ = CP_REG(REG_RB_COPY_CONTROL); + *cmds++ = 0; /* RB_COPY_CONTROL */ + *cmds++ = addr & 0xfffff000; /* RB_COPY_DEST_BASE */ + *cmds++ = shadow->pitch >> 5; /* RB_COPY_DEST_PITCH */ + + /* Endian=none, Linear, Format=RGBA8888,Swap=0,!Dither, + * MaskWrite:R=G=B=A=1 + */ + *cmds++ = 0x0003c008 | + (shadow->format << RB_COPY_DEST_INFO__COPY_DEST_FORMAT__SHIFT); + /* Make sure we stay in offsetx field. */ + BUG_ON(offset & 0xfffff000); + *cmds++ = offset; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(REG_RB_MODECONTROL); + *cmds++ = 0x6; /* EDRAM copy */ + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(REG_PA_CL_CLIP_CNTL); + *cmds++ = 0x00010000; + + if (adreno_is_a22x(adreno_dev)) { + *cmds++ = cp_type3_packet(CP_SET_DRAW_INIT_FLAGS, 1); + *cmds++ = 0; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(REG_A220_RB_LRZ_VSC_CONTROL); + *cmds++ = 0x0000000; + + *cmds++ = cp_type3_packet(CP_DRAW_INDX, 3); + *cmds++ = 0; /* viz query info. */ + /* PrimType=RectList, SrcSel=AutoIndex, VisCullMode=Ignore*/ + *cmds++ = 0x00004088; + *cmds++ = 3; /* NumIndices=3 */ + } else { + /* queue the draw packet */ + *cmds++ = cp_type3_packet(CP_DRAW_INDX, 2); + *cmds++ = 0; /* viz query info. */ + /* PrimType=RectList, NumIndices=3, SrcSel=AutoIndex */ + *cmds++ = 0x00030088; + } + + /* create indirect buffer command for above command sequence */ + create_ib1(drawctxt, shadow->gmem_save, start, cmds); + + return cmds; +} + +/* context restore */ + +/*copy colour, depth, & stencil buffers from system memory to graphics memory*/ +static unsigned int *build_sys2gmem_cmds(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, + struct gmem_shadow_t *shadow) +{ + unsigned int *cmds = shadow->gmem_restore_commands; + unsigned int *start = cmds; + + /* Store TP0_CHICKEN register */ + *cmds++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmds++ = REG_TP0_CHICKEN; + *cmds++ = tmp_ctx.chicken_restore; + + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmds++ = 0; + + /* Set TP0_CHICKEN to zero */ + *cmds++ = cp_type0_packet(REG_TP0_CHICKEN, 1); + *cmds++ = 0x00000000; + + /* Set PA_SC_AA_CONFIG to 0 */ + *cmds++ = cp_type0_packet(REG_PA_SC_AA_CONFIG, 1); + *cmds++ = 0x00000000; + /* shader constants */ + + /* vertex buffer constants */ + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7); + + *cmds++ = (0x1 << 16) | (9 * 6); + /* valid(?) vtx constant flag & addr */ + *cmds++ = shadow->quad_vertices.gpuaddr | 0x3; + /* limit = 12 dwords */ + *cmds++ = 0x00000030; + /* valid(?) vtx constant flag & addr */ + *cmds++ = shadow->quad_texcoords.gpuaddr | 0x3; + /* limit = 8 dwords */ + *cmds++ = 0x00000020; + *cmds++ = 0; + *cmds++ = 0; + + /* Invalidate L2 cache to make sure vertices are updated */ + *cmds++ = cp_type0_packet(REG_TC_CNTL_STATUS, 1); + *cmds++ = 0x1; + + cmds = program_shader(cmds, 0, sys2gmem_vtx_pgm, SYS2GMEM_VTX_PGM_LEN); + + /* Repartition shaders */ + *cmds++ = cp_type0_packet(REG_SQ_INST_STORE_MANAGMENT, 1); + *cmds++ = 0x180; + + /* Invalidate Vertex & Pixel instruction code address and sizes */ + *cmds++ = cp_type3_packet(CP_INVALIDATE_STATE, 1); + *cmds++ = 0x00000300; /* 0x100 = Vertex, 0x200 = Pixel */ + + *cmds++ = cp_type3_packet(CP_SET_SHADER_BASES, 1); + *cmds++ = (0x80000000) | 0x180; + + /* Load the patched fragment shader stream */ + cmds = + program_shader(cmds, 1, sys2gmem_frag_pgm, SYS2GMEM_FRAG_PGM_LEN); + + /* SQ_PROGRAM_CNTL / SQ_CONTEXT_MISC */ + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(REG_SQ_PROGRAM_CNTL); + *cmds++ = 0x10030002; + *cmds++ = 0x00000008; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(REG_PA_SC_AA_MASK); + *cmds++ = 0x0000ffff; /* REG_PA_SC_AA_MASK */ + + if (!adreno_is_a22x(adreno_dev)) { + /* PA_SC_VIZ_QUERY */ + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(REG_PA_SC_VIZ_QUERY); + *cmds++ = 0x0; /*REG_PA_SC_VIZ_QUERY */ + } + + /* RB_COLORCONTROL */ + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(REG_RB_COLORCONTROL); + *cmds++ = 0x00000c20; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 4); + *cmds++ = CP_REG(REG_VGT_MAX_VTX_INDX); + *cmds++ = 0x00ffffff; /* mmVGT_MAX_VTX_INDX */ + *cmds++ = 0x0; /* mmVGT_MIN_VTX_INDX */ + *cmds++ = 0x00000000; /* mmVGT_INDX_OFFSET */ + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(REG_VGT_VERTEX_REUSE_BLOCK_CNTL); + *cmds++ = 0x00000002; /* mmVGT_VERTEX_REUSE_BLOCK_CNTL */ + *cmds++ = 0x00000002; /* mmVGT_OUT_DEALLOC_CNTL */ + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(REG_SQ_INTERPOLATOR_CNTL); + *cmds++ = 0xffffffff; /* mmSQ_INTERPOLATOR_CNTL */ + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(REG_PA_SC_AA_CONFIG); + *cmds++ = 0x00000000; /* REG_PA_SC_AA_CONFIG */ + + /* set REG_PA_SU_SC_MODE_CNTL + * Front_ptype = draw triangles + * Back_ptype = draw triangles + * Provoking vertex = last + */ + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(REG_PA_SU_SC_MODE_CNTL); + *cmds++ = 0x00080240; + + /* texture constants */ + *cmds++ = + cp_type3_packet(CP_SET_CONSTANT, (SYS2GMEM_TEX_CONST_LEN + 1)); + *cmds++ = (0x1 << 16) | (0 * 6); + memcpy(cmds, sys2gmem_tex_const, SYS2GMEM_TEX_CONST_LEN << 2); + cmds[0] |= (shadow->pitch >> 5) << 22; + cmds[1] |= + shadow->gmemshadow.gpuaddr | surface_format_table[shadow->format]; + cmds[2] |= (shadow->width - 1) | (shadow->height - 1) << 13; + cmds += SYS2GMEM_TEX_CONST_LEN; + + /* program surface info */ + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(REG_RB_SURFACE_INFO); + *cmds++ = shadow->gmem_pitch; /* pitch, MSAA = 1 */ + + /* RB_COLOR_INFO Endian=none, Linear, Format=RGBA8888, Swap=0, + * Base=gmem_base + */ + *cmds++ = + (shadow-> + format << RB_COLOR_INFO__COLOR_FORMAT__SHIFT) | tmp_ctx.gmem_base; + + /* RB_DEPTHCONTROL */ + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(REG_RB_DEPTHCONTROL); + + if (adreno_is_a22x(adreno_dev)) + *cmds++ = 8; /* disable Z */ + else + *cmds++ = 0; /* disable Z */ + + /* Use maximum scissor values -- quad vertices already + * have the correct bounds */ + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(REG_PA_SC_SCREEN_SCISSOR_TL); + *cmds++ = (0 << 16) | 0; + *cmds++ = ((0x1fff) << 16) | 0x1fff; + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(REG_PA_SC_WINDOW_SCISSOR_TL); + *cmds++ = (unsigned int)((1U << 31) | (0 << 16) | 0); + *cmds++ = ((0x1fff) << 16) | 0x1fff; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(REG_PA_CL_VTE_CNTL); + /* disable X/Y/Z transforms, X/Y/Z are premultiplied by W */ + *cmds++ = 0x00000b00; + + /*load the viewport so that z scale = clear depth and z offset = 0.0f */ + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(REG_PA_CL_VPORT_ZSCALE); + *cmds++ = 0xbf800000; + *cmds++ = 0x0; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(REG_RB_COLOR_MASK); + *cmds++ = 0x0000000f; /* R = G = B = 1:enabled */ + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(REG_RB_COLOR_DEST_MASK); + *cmds++ = 0xffffffff; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(REG_SQ_WRAPPING_0); + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + + /* load the stencil ref value + * $AAM - do this later + */ + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(REG_RB_MODECONTROL); + /* draw pixels with color and depth/stencil component */ + *cmds++ = 0x4; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(REG_PA_CL_CLIP_CNTL); + *cmds++ = 0x00010000; + + if (adreno_is_a22x(adreno_dev)) { + *cmds++ = cp_type3_packet(CP_SET_DRAW_INIT_FLAGS, 1); + *cmds++ = 0; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(REG_A220_RB_LRZ_VSC_CONTROL); + *cmds++ = 0x0000000; + + *cmds++ = cp_type3_packet(CP_DRAW_INDX, 3); + *cmds++ = 0; /* viz query info. */ + /* PrimType=RectList, SrcSel=AutoIndex, VisCullMode=Ignore*/ + *cmds++ = 0x00004088; + *cmds++ = 3; /* NumIndices=3 */ + } else { + /* queue the draw packet */ + *cmds++ = cp_type3_packet(CP_DRAW_INDX, 2); + *cmds++ = 0; /* viz query info. */ + /* PrimType=RectList, NumIndices=3, SrcSel=AutoIndex */ + *cmds++ = 0x00030088; + } + + /* create indirect buffer command for above command sequence */ + create_ib1(drawctxt, shadow->gmem_restore, start, cmds); + + return cmds; +} + +static void build_regrestore_cmds(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + unsigned int *start = tmp_ctx.cmd; + unsigned int *cmd = start; + + unsigned int i = 0; + unsigned int reg_array_size = 0; + const unsigned int *ptr_register_ranges; + + *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmd++ = 0; + + /* H/W Registers */ + /* deferred cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, ???); */ + cmd++; +#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES + /* Force mismatch */ + *cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000) | 1; +#else + *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000; +#endif + + /* Based on chip id choose the registers ranges*/ + if (adreno_is_a220(adreno_dev)) { + ptr_register_ranges = register_ranges_a220; + reg_array_size = ARRAY_SIZE(register_ranges_a220); + } else if (adreno_is_a225(adreno_dev)) { + ptr_register_ranges = register_ranges_a225; + reg_array_size = ARRAY_SIZE(register_ranges_a225); + } else { + ptr_register_ranges = register_ranges_a20x; + reg_array_size = ARRAY_SIZE(register_ranges_a20x); + } + + + for (i = 0; i < (reg_array_size/2); i++) { + cmd = reg_range(cmd, ptr_register_ranges[i*2], + ptr_register_ranges[i*2+1]); + } + + /* Now we know how many register blocks we have, we can compute command + * length + */ + start[2] = + cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, (cmd - start) - 3); + /* Enable shadowing for the entire register block. */ +#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES + start[4] |= (0 << 24) | (4 << 16); /* Disable shadowing. */ +#else + start[4] |= (1 << 24) | (4 << 16); +#endif + + /* Need to handle some of the registers separately */ + *cmd++ = cp_type0_packet(REG_SQ_GPR_MANAGEMENT, 1); + tmp_ctx.reg_values[0] = virt2gpu(cmd, &drawctxt->gpustate); + *cmd++ = 0x00040400; + + *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmd++ = 0; + *cmd++ = cp_type0_packet(REG_TP0_CHICKEN, 1); + tmp_ctx.reg_values[1] = virt2gpu(cmd, &drawctxt->gpustate); + *cmd++ = 0x00000000; + + if (adreno_is_a22x(adreno_dev)) { + unsigned int i; + unsigned int j = 2; + for (i = REG_A220_VSC_BIN_SIZE; i <= + REG_A220_VSC_PIPE_DATA_LENGTH_7; i++) { + *cmd++ = cp_type0_packet(i, 1); + tmp_ctx.reg_values[j] = virt2gpu(cmd, + &drawctxt->gpustate); + *cmd++ = 0x00000000; + j++; + } + } + + /* ALU Constants */ + *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3); + *cmd++ = drawctxt->gpustate.gpuaddr & 0xFFFFE000; +#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES + *cmd++ = (0 << 24) | (0 << 16) | 0; /* Disable shadowing */ +#else + *cmd++ = (1 << 24) | (0 << 16) | 0; +#endif + *cmd++ = ALU_CONSTANTS; + + /* Texture Constants */ + *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3); + *cmd++ = (drawctxt->gpustate.gpuaddr + TEX_OFFSET) & 0xFFFFE000; +#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES + /* Disable shadowing */ + *cmd++ = (0 << 24) | (1 << 16) | 0; +#else + *cmd++ = (1 << 24) | (1 << 16) | 0; +#endif + *cmd++ = TEX_CONSTANTS; + + /* Boolean Constants */ + *cmd++ = cp_type3_packet(CP_SET_CONSTANT, 1 + BOOL_CONSTANTS); + *cmd++ = (2 << 16) | 0; + + /* the next BOOL_CONSTANT dwords is the shadow area for + * boolean constants. + */ + tmp_ctx.bool_shadow = virt2gpu(cmd, &drawctxt->gpustate); + cmd += BOOL_CONSTANTS; + + /* Loop Constants */ + *cmd++ = cp_type3_packet(CP_SET_CONSTANT, 1 + LOOP_CONSTANTS); + *cmd++ = (3 << 16) | 0; + + /* the next LOOP_CONSTANTS dwords is the shadow area for + * loop constants. + */ + tmp_ctx.loop_shadow = virt2gpu(cmd, &drawctxt->gpustate); + cmd += LOOP_CONSTANTS; + + /* create indirect buffer command for above command sequence */ + create_ib1(drawctxt, drawctxt->reg_restore, start, cmd); + + tmp_ctx.cmd = cmd; +} + +static void +build_shader_save_restore_cmds(struct adreno_context *drawctxt) +{ + unsigned int *cmd = tmp_ctx.cmd; + unsigned int *save, *restore, *fixup; + unsigned int *startSizeVtx, *startSizePix, *startSizeShared; + unsigned int *partition1; + unsigned int *shaderBases, *partition2; + + /* compute vertex, pixel and shared instruction shadow GPU addresses */ + tmp_ctx.shader_vertex = drawctxt->gpustate.gpuaddr + SHADER_OFFSET; + tmp_ctx.shader_pixel = tmp_ctx.shader_vertex + SHADER_SHADOW_SIZE; + tmp_ctx.shader_shared = tmp_ctx.shader_pixel + SHADER_SHADOW_SIZE; + + /* restore shader partitioning and instructions */ + + restore = cmd; /* start address */ + + /* Invalidate Vertex & Pixel instruction code address and sizes */ + *cmd++ = cp_type3_packet(CP_INVALIDATE_STATE, 1); + *cmd++ = 0x00000300; /* 0x100 = Vertex, 0x200 = Pixel */ + + /* Restore previous shader vertex & pixel instruction bases. */ + *cmd++ = cp_type3_packet(CP_SET_SHADER_BASES, 1); + shaderBases = cmd++; /* TBD #5: shader bases (from fixup) */ + + /* write the shader partition information to a scratch register */ + *cmd++ = cp_type0_packet(REG_SQ_INST_STORE_MANAGMENT, 1); + partition1 = cmd++; /* TBD #4a: partition info (from save) */ + + /* load vertex shader instructions from the shadow. */ + *cmd++ = cp_type3_packet(CP_IM_LOAD, 2); + *cmd++ = tmp_ctx.shader_vertex + 0x0; /* 0x0 = Vertex */ + startSizeVtx = cmd++; /* TBD #1: start/size (from save) */ + + /* load pixel shader instructions from the shadow. */ + *cmd++ = cp_type3_packet(CP_IM_LOAD, 2); + *cmd++ = tmp_ctx.shader_pixel + 0x1; /* 0x1 = Pixel */ + startSizePix = cmd++; /* TBD #2: start/size (from save) */ + + /* load shared shader instructions from the shadow. */ + *cmd++ = cp_type3_packet(CP_IM_LOAD, 2); + *cmd++ = tmp_ctx.shader_shared + 0x2; /* 0x2 = Shared */ + startSizeShared = cmd++; /* TBD #3: start/size (from save) */ + + /* create indirect buffer command for above command sequence */ + create_ib1(drawctxt, drawctxt->shader_restore, restore, cmd); + + /* + * fixup SET_SHADER_BASES data + * + * since self-modifying PM4 code is being used here, a seperate + * command buffer is used for this fixup operation, to ensure the + * commands are not read by the PM4 engine before the data fields + * have been written. + */ + + fixup = cmd; /* start address */ + + /* write the shader partition information to a scratch register */ + *cmd++ = cp_type0_packet(REG_SCRATCH_REG2, 1); + partition2 = cmd++; /* TBD #4b: partition info (from save) */ + + /* mask off unused bits, then OR with shader instruction memory size */ + *cmd++ = cp_type3_packet(CP_REG_RMW, 3); + *cmd++ = REG_SCRATCH_REG2; + /* AND off invalid bits. */ + *cmd++ = 0x0FFF0FFF; + /* OR in instruction memory size */ + *cmd++ = (unsigned int)((SHADER_INSTRUCT_LOG2 - 5U) << 29); + + /* write the computed value to the SET_SHADER_BASES data field */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = REG_SCRATCH_REG2; + /* TBD #5: shader bases (to restore) */ + *cmd++ = virt2gpu(shaderBases, &drawctxt->gpustate); + + /* create indirect buffer command for above command sequence */ + create_ib1(drawctxt, drawctxt->shader_fixup, fixup, cmd); + + /* save shader partitioning and instructions */ + + save = cmd; /* start address */ + + *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmd++ = 0; + + /* fetch the SQ_INST_STORE_MANAGMENT register value, + * store the value in the data fields of the SET_CONSTANT commands + * above. + */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = REG_SQ_INST_STORE_MANAGMENT; + /* TBD #4a: partition info (to restore) */ + *cmd++ = virt2gpu(partition1, &drawctxt->gpustate); + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = REG_SQ_INST_STORE_MANAGMENT; + /* TBD #4b: partition info (to fixup) */ + *cmd++ = virt2gpu(partition2, &drawctxt->gpustate); + + + /* store the vertex shader instructions */ + *cmd++ = cp_type3_packet(CP_IM_STORE, 2); + *cmd++ = tmp_ctx.shader_vertex + 0x0; /* 0x0 = Vertex */ + /* TBD #1: start/size (to restore) */ + *cmd++ = virt2gpu(startSizeVtx, &drawctxt->gpustate); + + /* store the pixel shader instructions */ + *cmd++ = cp_type3_packet(CP_IM_STORE, 2); + *cmd++ = tmp_ctx.shader_pixel + 0x1; /* 0x1 = Pixel */ + /* TBD #2: start/size (to restore) */ + *cmd++ = virt2gpu(startSizePix, &drawctxt->gpustate); + + /* store the shared shader instructions if vertex base is nonzero */ + + *cmd++ = cp_type3_packet(CP_IM_STORE, 2); + *cmd++ = tmp_ctx.shader_shared + 0x2; /* 0x2 = Shared */ + /* TBD #3: start/size (to restore) */ + *cmd++ = virt2gpu(startSizeShared, &drawctxt->gpustate); + + + *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmd++ = 0; + + /* create indirect buffer command for above command sequence */ + create_ib1(drawctxt, drawctxt->shader_save, save, cmd); + + tmp_ctx.cmd = cmd; +} + +/* create buffers for saving/restoring registers, constants, & GMEM */ +static int a2xx_ctxt_gpustate_shadow(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + int result; + + /* Allocate vmalloc memory to store the gpustate */ + result = kgsl_allocate(&drawctxt->gpustate, + drawctxt->pagetable, CONTEXT_SIZE); + + if (result) + return result; + + drawctxt->flags |= CTXT_FLAGS_STATE_SHADOW; + + /* Blank out h/w register, constant, and command buffer shadows. */ + kgsl_sharedmem_set(&drawctxt->gpustate, 0, 0, CONTEXT_SIZE); + + /* set-up command and vertex buffer pointers */ + tmp_ctx.cmd = tmp_ctx.start + = (unsigned int *)((char *)drawctxt->gpustate.hostptr + CMD_OFFSET); + + /* build indirect command buffers to save & restore regs/constants */ + adreno_idle(&adreno_dev->dev, KGSL_TIMEOUT_DEFAULT); + build_regrestore_cmds(adreno_dev, drawctxt); + build_regsave_cmds(adreno_dev, drawctxt); + + build_shader_save_restore_cmds(drawctxt); + + kgsl_cache_range_op(&drawctxt->gpustate, + KGSL_CACHE_OP_FLUSH); + + kgsl_cffdump_syncmem(NULL, &drawctxt->gpustate, + drawctxt->gpustate.gpuaddr, + drawctxt->gpustate.size, false); + return 0; +} + +/* create buffers for saving/restoring registers, constants, & GMEM */ +static int a2xx_ctxt_gmem_shadow(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + int result; + + calc_gmemsize(&drawctxt->context_gmem_shadow, + adreno_dev->gmemspace.sizebytes); + tmp_ctx.gmem_base = adreno_dev->gmemspace.gpu_base; + + result = kgsl_allocate(&drawctxt->context_gmem_shadow.gmemshadow, + drawctxt->pagetable, drawctxt->context_gmem_shadow.size); + + if (result) + return result; + + /* we've allocated the shadow, when swapped out, GMEM must be saved. */ + drawctxt->flags |= CTXT_FLAGS_GMEM_SHADOW | CTXT_FLAGS_GMEM_SAVE; + + /* blank out gmem shadow. */ + kgsl_sharedmem_set(&drawctxt->context_gmem_shadow.gmemshadow, 0, 0, + drawctxt->context_gmem_shadow.size); + + /* build quad vertex buffer */ + build_quad_vtxbuff(drawctxt, &drawctxt->context_gmem_shadow, + &tmp_ctx.cmd); + + /* build TP0_CHICKEN register restore command buffer */ + tmp_ctx.cmd = build_chicken_restore_cmds(drawctxt); + + /* build indirect command buffers to save & restore gmem */ + /* Idle because we are reading PM override registers */ + adreno_idle(&adreno_dev->dev, KGSL_TIMEOUT_DEFAULT); + drawctxt->context_gmem_shadow.gmem_save_commands = tmp_ctx.cmd; + tmp_ctx.cmd = + build_gmem2sys_cmds(adreno_dev, drawctxt, + &drawctxt->context_gmem_shadow); + drawctxt->context_gmem_shadow.gmem_restore_commands = tmp_ctx.cmd; + tmp_ctx.cmd = + build_sys2gmem_cmds(adreno_dev, drawctxt, + &drawctxt->context_gmem_shadow); + + kgsl_cache_range_op(&drawctxt->context_gmem_shadow.gmemshadow, + KGSL_CACHE_OP_FLUSH); + + kgsl_cffdump_syncmem(NULL, + &drawctxt->context_gmem_shadow.gmemshadow, + drawctxt->context_gmem_shadow.gmemshadow.gpuaddr, + drawctxt->context_gmem_shadow.gmemshadow.size, false); + + return 0; +} + +static void a2xx_ctxt_save(struct adreno_device *adreno_dev, + struct adreno_context *context) +{ + struct kgsl_device *device = &adreno_dev->dev; + + if (context == NULL) + return; + + if (context->flags & CTXT_FLAGS_GPU_HANG) + KGSL_CTXT_WARN(device, + "Current active context has caused gpu hang\n"); + + KGSL_CTXT_INFO(device, + "active context flags %08x\n", context->flags); + + /* save registers and constants. */ + adreno_ringbuffer_issuecmds(device, 0, context->reg_save, 3); + + if (context->flags & CTXT_FLAGS_SHADER_SAVE) { + /* save shader partitioning and instructions. */ + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE, + context->shader_save, 3); + + /* fixup shader partitioning parameter for + * SET_SHADER_BASES. + */ + adreno_ringbuffer_issuecmds(device, 0, + context->shader_fixup, 3); + + context->flags |= CTXT_FLAGS_SHADER_RESTORE; + } + + if ((context->flags & CTXT_FLAGS_GMEM_SAVE) && + (context->flags & CTXT_FLAGS_GMEM_SHADOW)) { + /* save gmem. + * (note: changes shader. shader must already be saved.) + */ + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE, + context->context_gmem_shadow.gmem_save, 3); + + /* Restore TP0_CHICKEN */ + adreno_ringbuffer_issuecmds(device, 0, + context->chicken_restore, 3); + + context->flags |= CTXT_FLAGS_GMEM_RESTORE; + } +} + +static void a2xx_ctxt_restore(struct adreno_device *adreno_dev, + struct adreno_context *context) +{ + struct kgsl_device *device = &adreno_dev->dev; + unsigned int cmds[5]; + + if (context == NULL) { + /* No context - set the default apgetable and thats it */ + kgsl_mmu_setstate(device, device->mmu.defaultpagetable); + return; + } + + KGSL_CTXT_INFO(device, "context flags %08x\n", context->flags); + + cmds[0] = cp_nop_packet(1); + cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER; + cmds[2] = cp_type3_packet(CP_MEM_WRITE, 2); + cmds[3] = device->memstore.gpuaddr + + KGSL_DEVICE_MEMSTORE_OFFSET(current_context); + cmds[4] = (unsigned int) context; + adreno_ringbuffer_issuecmds(device, 0, cmds, 5); + kgsl_mmu_setstate(device, context->pagetable); + +#ifndef CONFIG_MSM_KGSL_CFF_DUMP_NO_CONTEXT_MEM_DUMP + kgsl_cffdump_syncmem(NULL, &context->gpustate, + context->gpustate.gpuaddr, LCC_SHADOW_SIZE + + REG_SHADOW_SIZE + CMD_BUFFER_SIZE + TEX_SHADOW_SIZE, false); +#endif + + /* restore gmem. + * (note: changes shader. shader must not already be restored.) + */ + if (context->flags & CTXT_FLAGS_GMEM_RESTORE) { + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE, + context->context_gmem_shadow.gmem_restore, 3); + + /* Restore TP0_CHICKEN */ + adreno_ringbuffer_issuecmds(device, 0, + context->chicken_restore, 3); + + context->flags &= ~CTXT_FLAGS_GMEM_RESTORE; + } + + /* restore registers and constants. */ + adreno_ringbuffer_issuecmds(device, 0, + context->reg_restore, 3); + + /* restore shader instructions & partitioning. */ + if (context->flags & CTXT_FLAGS_SHADER_RESTORE) { + adreno_ringbuffer_issuecmds(device, 0, + context->shader_restore, 3); + } + + if (adreno_is_a20x(adreno_dev)) { + cmds[0] = cp_type3_packet(CP_SET_BIN_BASE_OFFSET, 1); + cmds[1] = context->bin_base_offset; + adreno_ringbuffer_issuecmds(device, 0, cmds, 2); + } +} + +/* + * Interrupt management + * + * a2xx interrupt control is distributed among the various + * hardware components (RB, CP, MMU). The main interrupt + * tells us which component fired the interrupt, but one needs + * to go to the individual component to find out why. The + * following functions provide the broken out support for + * managing the interrupts + */ + +#define RBBM_INT_MASK RBBM_INT_CNTL__RDERR_INT_MASK + +#define CP_INT_MASK \ + (CP_INT_CNTL__T0_PACKET_IN_IB_MASK | \ + CP_INT_CNTL__OPCODE_ERROR_MASK | \ + CP_INT_CNTL__PROTECTED_MODE_ERROR_MASK | \ + CP_INT_CNTL__RESERVED_BIT_ERROR_MASK | \ + CP_INT_CNTL__IB_ERROR_MASK | \ + CP_INT_CNTL__IB1_INT_MASK | \ + CP_INT_CNTL__RB_INT_MASK) + +#define VALID_STATUS_COUNT_MAX 10 + +static struct { + unsigned int mask; + const char *message; +} kgsl_cp_error_irqs[] = { + { CP_INT_CNTL__T0_PACKET_IN_IB_MASK, + "ringbuffer TO packet in IB interrupt" }, + { CP_INT_CNTL__OPCODE_ERROR_MASK, + "ringbuffer opcode error interrupt" }, + { CP_INT_CNTL__PROTECTED_MODE_ERROR_MASK, + "ringbuffer protected mode error interrupt" }, + { CP_INT_CNTL__RESERVED_BIT_ERROR_MASK, + "ringbuffer reserved bit error interrupt" }, + { CP_INT_CNTL__IB_ERROR_MASK, + "ringbuffer IB error interrupt" }, +}; + +static void a2xx_cp_intrcallback(struct kgsl_device *device) +{ + unsigned int status = 0, num_reads = 0, master_status = 0; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer; + int i; + + adreno_regread(device, REG_MASTER_INT_SIGNAL, &master_status); + while (!status && (num_reads < VALID_STATUS_COUNT_MAX) && + (master_status & MASTER_INT_SIGNAL__CP_INT_STAT)) { + adreno_regread(device, REG_CP_INT_STATUS, &status); + adreno_regread(device, REG_MASTER_INT_SIGNAL, + &master_status); + num_reads++; + } + if (num_reads > 1) + KGSL_DRV_WARN(device, + "Looped %d times to read REG_CP_INT_STATUS\n", + num_reads); + if (!status) { + if (master_status & MASTER_INT_SIGNAL__CP_INT_STAT) { + /* This indicates that we could not read CP_INT_STAT. + * As a precaution just wake up processes so + * they can check their timestamps. Since, we + * did not ack any interrupts this interrupt will + * be generated again */ + KGSL_DRV_WARN(device, "Unable to read CP_INT_STATUS\n"); + wake_up_interruptible_all(&device->wait_queue); + } else + KGSL_DRV_WARN(device, "Spurious interrput detected\n"); + return; + } + + if (status & CP_INT_CNTL__RB_INT_MASK) { + /* signal intr completion event */ + unsigned int enableflag = 0; + kgsl_sharedmem_writel(&rb->device->memstore, + KGSL_DEVICE_MEMSTORE_OFFSET(ts_cmp_enable), + enableflag); + wmb(); + KGSL_CMD_WARN(rb->device, "ringbuffer rb interrupt\n"); + } + + for (i = 0; i < ARRAY_SIZE(kgsl_cp_error_irqs); i++) { + if (status & kgsl_cp_error_irqs[i].mask) { + KGSL_CMD_CRIT(rb->device, "%s\n", + kgsl_cp_error_irqs[i].message); + /* + * on fatal errors, turn off the interrupts to + * avoid storming. This has the side effect of + * forcing a PM dump when the timestamp times out + */ + + kgsl_pwrctrl_irq(rb->device, KGSL_PWRFLAGS_OFF); + } + } + + /* only ack bits we understand */ + status &= CP_INT_MASK; + adreno_regwrite(device, REG_CP_INT_ACK, status); + + if (status & (CP_INT_CNTL__IB1_INT_MASK | CP_INT_CNTL__RB_INT_MASK)) { + KGSL_CMD_WARN(rb->device, "ringbuffer ib1/rb interrupt\n"); + queue_work(device->work_queue, &device->ts_expired_ws); + wake_up_interruptible_all(&device->wait_queue); + atomic_notifier_call_chain(&(device->ts_notifier_list), + device->id, + NULL); + } +} + +static void a2xx_rbbm_intrcallback(struct kgsl_device *device) +{ + unsigned int status = 0; + unsigned int rderr = 0; + + adreno_regread(device, REG_RBBM_INT_STATUS, &status); + + if (status & RBBM_INT_CNTL__RDERR_INT_MASK) { + union rbbm_read_error_u rerr; + adreno_regread(device, REG_RBBM_READ_ERROR, &rderr); + rerr.val = rderr; + if (rerr.f.read_address == REG_CP_INT_STATUS && + rerr.f.read_error && + rerr.f.read_requester) + KGSL_DRV_WARN(device, + "rbbm read error interrupt: %08x\n", rderr); + else + KGSL_DRV_CRIT(device, + "rbbm read error interrupt: %08x\n", rderr); + } + + status &= RBBM_INT_MASK; + adreno_regwrite(device, REG_RBBM_INT_ACK, status); +} + +irqreturn_t a2xx_irq_handler(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = &adreno_dev->dev; + irqreturn_t result = IRQ_NONE; + unsigned int status; + + adreno_regread(device, REG_MASTER_INT_SIGNAL, &status); + + if (status & MASTER_INT_SIGNAL__MH_INT_STAT) { + kgsl_mh_intrcallback(device); + result = IRQ_HANDLED; + } + + if (status & MASTER_INT_SIGNAL__CP_INT_STAT) { + a2xx_cp_intrcallback(device); + result = IRQ_HANDLED; + } + + if (status & MASTER_INT_SIGNAL__RBBM_INT_STAT) { + a2xx_rbbm_intrcallback(device); + result = IRQ_HANDLED; + } + + return result; +} + +static void a2xx_irq_control(struct adreno_device *adreno_dev, int state) +{ + struct kgsl_device *device = &adreno_dev->dev; + + if (state) { + adreno_regwrite(device, REG_RBBM_INT_CNTL, RBBM_INT_MASK); + adreno_regwrite(device, REG_CP_INT_CNTL, CP_INT_MASK); + adreno_regwrite(device, MH_INTERRUPT_MASK, KGSL_MMU_INT_MASK); + } else { + adreno_regwrite(device, REG_RBBM_INT_CNTL, 0); + adreno_regwrite(device, REG_CP_INT_CNTL, 0); + adreno_regwrite(device, MH_INTERRUPT_MASK, 0); + } + + /* Force the writes to post before touching the IRQ line */ + wmb(); +} + +struct adreno_gpudev adreno_a2xx_gpudev = { + .ctxt_gpustate_shadow = a2xx_ctxt_gpustate_shadow, + .ctxt_gmem_shadow = a2xx_ctxt_gmem_shadow, + .ctxt_save = a2xx_ctxt_save, + .ctxt_restore = a2xx_ctxt_restore, + .irq_handler = a2xx_irq_handler, + .irq_control = a2xx_irq_control, +}; diff --git a/drivers/gpu/msm/kgsl_gpummu.c b/drivers/gpu/msm/kgsl_gpummu.c new file mode 100644 index 00000000..9e7ef61d --- /dev/null +++ b/drivers/gpu/msm/kgsl_gpummu.c @@ -0,0 +1,766 @@ +/* Copyright (c) 2011-2012, Code Aurora Forum. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include +#include +#include +#include +#include +#include + +#include "kgsl.h" +#include "kgsl_mmu.h" +#include "kgsl_device.h" +#include "kgsl_sharedmem.h" + +#include "adreno_ringbuffer.h" + +static ssize_t +sysfs_show_ptpool_entries(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct kgsl_ptpool *pool = (struct kgsl_ptpool *) + kgsl_driver.ptpool; + return snprintf(buf, PAGE_SIZE, "%d\n", pool->entries); +} + +static ssize_t +sysfs_show_ptpool_min(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct kgsl_ptpool *pool = (struct kgsl_ptpool *) + kgsl_driver.ptpool; + return snprintf(buf, PAGE_SIZE, "%d\n", + pool->static_entries); +} + +static ssize_t +sysfs_show_ptpool_chunks(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct kgsl_ptpool *pool = (struct kgsl_ptpool *) + kgsl_driver.ptpool; + return snprintf(buf, PAGE_SIZE, "%d\n", pool->chunks); +} + +static ssize_t +sysfs_show_ptpool_ptsize(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct kgsl_ptpool *pool = (struct kgsl_ptpool *) + kgsl_driver.ptpool; + return snprintf(buf, PAGE_SIZE, "%d\n", pool->ptsize); +} + +static struct kobj_attribute attr_ptpool_entries = { + .attr = { .name = "ptpool_entries", .mode = 0444 }, + .show = sysfs_show_ptpool_entries, + .store = NULL, +}; + +static struct kobj_attribute attr_ptpool_min = { + .attr = { .name = "ptpool_min", .mode = 0444 }, + .show = sysfs_show_ptpool_min, + .store = NULL, +}; + +static struct kobj_attribute attr_ptpool_chunks = { + .attr = { .name = "ptpool_chunks", .mode = 0444 }, + .show = sysfs_show_ptpool_chunks, + .store = NULL, +}; + +static struct kobj_attribute attr_ptpool_ptsize = { + .attr = { .name = "ptpool_ptsize", .mode = 0444 }, + .show = sysfs_show_ptpool_ptsize, + .store = NULL, +}; + +static struct attribute *ptpool_attrs[] = { + &attr_ptpool_entries.attr, + &attr_ptpool_min.attr, + &attr_ptpool_chunks.attr, + &attr_ptpool_ptsize.attr, + NULL, +}; + +static struct attribute_group ptpool_attr_group = { + .attrs = ptpool_attrs, +}; + +static int +_kgsl_ptpool_add_entries(struct kgsl_ptpool *pool, int count, int dynamic) +{ + struct kgsl_ptpool_chunk *chunk; + size_t size = ALIGN(count * pool->ptsize, PAGE_SIZE); + + BUG_ON(count == 0); + + if (get_order(size) >= MAX_ORDER) { + KGSL_CORE_ERR("ptpool allocation is too big: %d\n", size); + return -EINVAL; + } + + chunk = kzalloc(sizeof(*chunk), GFP_KERNEL); + if (chunk == NULL) { + KGSL_CORE_ERR("kzalloc(%d) failed\n", sizeof(*chunk)); + return -ENOMEM; + } + + chunk->size = size; + chunk->count = count; + chunk->dynamic = dynamic; + + chunk->data = dma_alloc_coherent(NULL, size, + &chunk->phys, GFP_KERNEL); + + if (chunk->data == NULL) { + KGSL_CORE_ERR("dma_alloc_coherent(%d) failed\n", size); + goto err; + } + + chunk->bitmap = kzalloc(BITS_TO_LONGS(count) * 4, GFP_KERNEL); + + if (chunk->bitmap == NULL) { + KGSL_CORE_ERR("kzalloc(%d) failed\n", + BITS_TO_LONGS(count) * 4); + goto err_dma; + } + + list_add_tail(&chunk->list, &pool->list); + + pool->chunks++; + pool->entries += count; + + if (!dynamic) + pool->static_entries += count; + + return 0; + +err_dma: + dma_free_coherent(NULL, chunk->size, chunk->data, chunk->phys); +err: + kfree(chunk); + return -ENOMEM; +} + +static void * +_kgsl_ptpool_get_entry(struct kgsl_ptpool *pool, unsigned int *physaddr) +{ + struct kgsl_ptpool_chunk *chunk; + + list_for_each_entry(chunk, &pool->list, list) { + int bit = find_first_zero_bit(chunk->bitmap, chunk->count); + + if (bit >= chunk->count) + continue; + + set_bit(bit, chunk->bitmap); + *physaddr = chunk->phys + (bit * pool->ptsize); + + return chunk->data + (bit * pool->ptsize); + } + + return NULL; +} + +/** + * kgsl_ptpool_add + * @pool: A pointer to a ptpool structure + * @entries: Number of entries to add + * + * Add static entries to the pagetable pool. + */ + +static int +kgsl_ptpool_add(struct kgsl_ptpool *pool, int count) +{ + int ret = 0; + BUG_ON(count == 0); + + mutex_lock(&pool->lock); + + /* Only 4MB can be allocated in one chunk, so larger allocations + need to be split into multiple sections */ + + while (count) { + int entries = ((count * pool->ptsize) > SZ_4M) ? + SZ_4M / pool->ptsize : count; + + /* Add the entries as static, i.e. they don't ever stand + a chance of being removed */ + + ret = _kgsl_ptpool_add_entries(pool, entries, 0); + if (ret) + break; + + count -= entries; + } + + mutex_unlock(&pool->lock); + return ret; +} + +/** + * kgsl_ptpool_alloc + * @pool: A pointer to a ptpool structure + * @addr: A pointer to store the physical address of the chunk + * + * Allocate a pagetable from the pool. Returns the virtual address + * of the pagetable, the physical address is returned in physaddr + */ + +static void *kgsl_ptpool_alloc(struct kgsl_ptpool *pool, + unsigned int *physaddr) +{ + void *addr = NULL; + int ret; + + mutex_lock(&pool->lock); + addr = _kgsl_ptpool_get_entry(pool, physaddr); + if (addr) + goto done; + + /* Add a chunk for 1 more pagetable and mark it as dynamic */ + ret = _kgsl_ptpool_add_entries(pool, 1, 1); + + if (ret) + goto done; + + addr = _kgsl_ptpool_get_entry(pool, physaddr); +done: + mutex_unlock(&pool->lock); + return addr; +} + +static inline void _kgsl_ptpool_rm_chunk(struct kgsl_ptpool_chunk *chunk) +{ + list_del(&chunk->list); + + if (chunk->data) + dma_free_coherent(NULL, chunk->size, chunk->data, + chunk->phys); + kfree(chunk->bitmap); + kfree(chunk); +} + +/** + * kgsl_ptpool_free + * @pool: A pointer to a ptpool structure + * @addr: A pointer to the virtual address to free + * + * Free a pagetable allocated from the pool + */ + +static void kgsl_ptpool_free(struct kgsl_ptpool *pool, void *addr) +{ + struct kgsl_ptpool_chunk *chunk, *tmp; + + if (pool == NULL || addr == NULL) + return; + + mutex_lock(&pool->lock); + list_for_each_entry_safe(chunk, tmp, &pool->list, list) { + if (addr >= chunk->data && + addr < chunk->data + chunk->size) { + int bit = ((unsigned long) (addr - chunk->data)) / + pool->ptsize; + + clear_bit(bit, chunk->bitmap); + memset(addr, 0, pool->ptsize); + + if (chunk->dynamic && + bitmap_empty(chunk->bitmap, chunk->count)) + _kgsl_ptpool_rm_chunk(chunk); + + break; + } + } + + mutex_unlock(&pool->lock); +} + +void kgsl_gpummu_ptpool_destroy(void *ptpool) +{ + struct kgsl_ptpool *pool = (struct kgsl_ptpool *)ptpool; + struct kgsl_ptpool_chunk *chunk, *tmp; + + if (pool == NULL) + return; + + mutex_lock(&pool->lock); + list_for_each_entry_safe(chunk, tmp, &pool->list, list) + _kgsl_ptpool_rm_chunk(chunk); + mutex_unlock(&pool->lock); + + kfree(pool); +} + +/** + * kgsl_ptpool_init + * @pool: A pointer to a ptpool structure to initialize + * @ptsize: The size of each pagetable entry + * @entries: The number of inital entries to add to the pool + * + * Initalize a pool and allocate an initial chunk of entries. + */ +void *kgsl_gpummu_ptpool_init(int ptsize, int entries) +{ + struct kgsl_ptpool *pool; + int ret = 0; + BUG_ON(ptsize == 0); + + pool = kzalloc(sizeof(struct kgsl_ptpool), GFP_KERNEL); + if (!pool) { + KGSL_CORE_ERR("Failed to allocate memory " + "for ptpool\n"); + return NULL; + } + + pool->ptsize = ptsize; + mutex_init(&pool->lock); + INIT_LIST_HEAD(&pool->list); + + if (entries) { + ret = kgsl_ptpool_add(pool, entries); + if (ret) + goto err_ptpool_remove; + } + + ret = sysfs_create_group(kgsl_driver.ptkobj, &ptpool_attr_group); + if (ret) { + KGSL_CORE_ERR("sysfs_create_group failed for ptpool " + "statistics: %d\n", ret); + goto err_ptpool_remove; + } + return (void *)pool; + +err_ptpool_remove: + kgsl_gpummu_ptpool_destroy(pool); + return NULL; +} + +int kgsl_gpummu_pt_equal(struct kgsl_pagetable *pt, + unsigned int pt_base) +{ + struct kgsl_gpummu_pt *gpummu_pt = pt->priv; + return pt && pt_base && (gpummu_pt->base.gpuaddr == pt_base); +} + +void kgsl_gpummu_destroy_pagetable(void *mmu_specific_pt) +{ + struct kgsl_gpummu_pt *gpummu_pt = (struct kgsl_gpummu_pt *) + mmu_specific_pt; + kgsl_ptpool_free((struct kgsl_ptpool *)kgsl_driver.ptpool, + gpummu_pt->base.hostptr); + + kgsl_driver.stats.coherent -= KGSL_PAGETABLE_SIZE; + + kfree(gpummu_pt->tlbflushfilter.base); + + kfree(gpummu_pt); +} + +static inline uint32_t +kgsl_pt_entry_get(unsigned int va_base, uint32_t va) +{ + return (va - va_base) >> PAGE_SHIFT; +} + +static inline void +kgsl_pt_map_set(struct kgsl_gpummu_pt *pt, uint32_t pte, uint32_t val) +{ + uint32_t *baseptr = (uint32_t *)pt->base.hostptr; + + writel_relaxed(val, &baseptr[pte]); +} + +static inline uint32_t +kgsl_pt_map_get(struct kgsl_gpummu_pt *pt, uint32_t pte) +{ + uint32_t *baseptr = (uint32_t *)pt->base.hostptr; + return readl_relaxed(&baseptr[pte]) & GSL_PT_PAGE_ADDR_MASK; +} + +static unsigned int kgsl_gpummu_pt_get_flags(struct kgsl_pagetable *pt, + enum kgsl_deviceid id) +{ + unsigned int result = 0; + struct kgsl_gpummu_pt *gpummu_pt; + + if (pt == NULL) + return 0; + gpummu_pt = pt->priv; + + spin_lock(&pt->lock); + if (gpummu_pt->tlb_flags && (1<tlb_flags &= ~(1<lock); + return result; +} + +static void kgsl_gpummu_pagefault(struct kgsl_device *device) +{ + unsigned int reg; + unsigned int ptbase; + + kgsl_regread(device, MH_MMU_PAGE_FAULT, ®); + kgsl_regread(device, MH_MMU_PT_BASE, &ptbase); + + KGSL_MEM_CRIT(device, + "mmu page fault: page=0x%lx pt=%d op=%s axi=%d\n", + reg & ~(PAGE_SIZE - 1), + kgsl_mmu_get_ptname_from_ptbase(ptbase), + reg & 0x02 ? "WRITE" : "READ", (reg >> 4) & 0xF); +} + +static void *kgsl_gpummu_create_pagetable(void) +{ + struct kgsl_gpummu_pt *gpummu_pt; + + gpummu_pt = kzalloc(sizeof(struct kgsl_gpummu_pt), + GFP_KERNEL); + if (!gpummu_pt) + return NULL; + + gpummu_pt->tlb_flags = 0; + gpummu_pt->last_superpte = 0; + + gpummu_pt->tlbflushfilter.size = (CONFIG_MSM_KGSL_PAGE_TABLE_SIZE / + (PAGE_SIZE * GSL_PT_SUPER_PTE * 8)) + 1; + gpummu_pt->tlbflushfilter.base = (unsigned int *) + kzalloc(gpummu_pt->tlbflushfilter.size, GFP_KERNEL); + if (!gpummu_pt->tlbflushfilter.base) { + KGSL_CORE_ERR("kzalloc(%d) failed\n", + gpummu_pt->tlbflushfilter.size); + goto err_free_gpummu; + } + GSL_TLBFLUSH_FILTER_RESET(); + + gpummu_pt->base.hostptr = kgsl_ptpool_alloc((struct kgsl_ptpool *) + kgsl_driver.ptpool, + &gpummu_pt->base.physaddr); + + if (gpummu_pt->base.hostptr == NULL) + goto err_flushfilter; + + /* ptpool allocations are from coherent memory, so update the + device statistics acordingly */ + + KGSL_STATS_ADD(KGSL_PAGETABLE_SIZE, kgsl_driver.stats.coherent, + kgsl_driver.stats.coherent_max); + + gpummu_pt->base.gpuaddr = gpummu_pt->base.physaddr; + gpummu_pt->base.size = KGSL_PAGETABLE_SIZE; + + return (void *)gpummu_pt; + +err_flushfilter: + kfree(gpummu_pt->tlbflushfilter.base); +err_free_gpummu: + kfree(gpummu_pt); + + return NULL; +} + +static void kgsl_gpummu_default_setstate(struct kgsl_device *device, + uint32_t flags) +{ + struct kgsl_gpummu_pt *gpummu_pt; + if (!kgsl_mmu_enabled()) + return; + + if (flags & KGSL_MMUFLAGS_PTUPDATE) { + kgsl_idle(device, KGSL_TIMEOUT_DEFAULT); + gpummu_pt = device->mmu.hwpagetable->priv; + kgsl_regwrite(device, MH_MMU_PT_BASE, + gpummu_pt->base.gpuaddr); + } + + if (flags & KGSL_MMUFLAGS_TLBFLUSH) { + /* Invalidate all and tc */ + kgsl_regwrite(device, MH_MMU_INVALIDATE, 0x00000003); + } +} + +static void kgsl_gpummu_setstate(struct kgsl_device *device, + struct kgsl_pagetable *pagetable) +{ + struct kgsl_mmu *mmu = &device->mmu; + struct kgsl_gpummu_pt *gpummu_pt; + + if (mmu->flags & KGSL_FLAGS_STARTED) { + /* page table not current, then setup mmu to use new + * specified page table + */ + if (mmu->hwpagetable != pagetable) { + mmu->hwpagetable = pagetable; + spin_lock(&mmu->hwpagetable->lock); + gpummu_pt = mmu->hwpagetable->priv; + gpummu_pt->tlb_flags &= ~(1<id); + spin_unlock(&mmu->hwpagetable->lock); + + /* call device specific set page table */ + kgsl_setstate(mmu->device, KGSL_MMUFLAGS_TLBFLUSH | + KGSL_MMUFLAGS_PTUPDATE); + } + } +} + +static int kgsl_gpummu_init(struct kgsl_device *device) +{ + /* + * intialize device mmu + * + * call this with the global lock held + */ + int status = 0; + struct kgsl_mmu *mmu = &device->mmu; + + mmu->device = device; + + /* sub-client MMU lookups require address translation */ + if ((mmu->config & ~0x1) > 0) { + /*make sure virtual address range is a multiple of 64Kb */ + if (CONFIG_MSM_KGSL_PAGE_TABLE_SIZE & ((1 << 16) - 1)) { + KGSL_CORE_ERR("Invalid pagetable size requested " + "for GPUMMU: %x\n", CONFIG_MSM_KGSL_PAGE_TABLE_SIZE); + return -EINVAL; + } + + /* allocate memory used for completing r/w operations that + * cannot be mapped by the MMU + */ + status = kgsl_allocate_contiguous(&mmu->setstate_memory, 64); + if (!status) + kgsl_sharedmem_set(&mmu->setstate_memory, 0, 0, + mmu->setstate_memory.size); + } + + dev_info(device->dev, "|%s| MMU type set for device is GPUMMU\n", + __func__); + return status; +} + +static int kgsl_gpummu_start(struct kgsl_device *device) +{ + /* + * intialize device mmu + * + * call this with the global lock held + */ + + struct kgsl_mmu *mmu = &device->mmu; + struct kgsl_gpummu_pt *gpummu_pt; + + if (mmu->flags & KGSL_FLAGS_STARTED) + return 0; + + /* MMU not enabled */ + if ((mmu->config & 0x1) == 0) + return 0; + + /* setup MMU and sub-client behavior */ + kgsl_regwrite(device, MH_MMU_CONFIG, mmu->config); + + /* idle device */ + kgsl_idle(device, KGSL_TIMEOUT_DEFAULT); + + /* enable axi interrupts */ + kgsl_regwrite(device, MH_INTERRUPT_MASK, + GSL_MMU_INT_MASK | MH_INTERRUPT_MASK__MMU_PAGE_FAULT); + + kgsl_sharedmem_set(&mmu->setstate_memory, 0, 0, + mmu->setstate_memory.size); + + /* TRAN_ERROR needs a 32 byte (32 byte aligned) chunk of memory + * to complete transactions in case of an MMU fault. Note that + * we'll leave the bottom 32 bytes of the setstate_memory for other + * purposes (e.g. use it when dummy read cycles are needed + * for other blocks) */ + kgsl_regwrite(device, MH_MMU_TRAN_ERROR, + mmu->setstate_memory.physaddr + 32); + + if (mmu->defaultpagetable == NULL) + mmu->defaultpagetable = + kgsl_mmu_getpagetable(KGSL_MMU_GLOBAL_PT); + + /* Return error if the default pagetable doesn't exist */ + if (mmu->defaultpagetable == NULL) + return -ENOMEM; + + mmu->hwpagetable = mmu->defaultpagetable; + gpummu_pt = mmu->hwpagetable->priv; + kgsl_regwrite(device, MH_MMU_PT_BASE, + gpummu_pt->base.gpuaddr); + kgsl_regwrite(device, MH_MMU_VA_RANGE, + (KGSL_PAGETABLE_BASE | + (CONFIG_MSM_KGSL_PAGE_TABLE_SIZE >> 16))); + kgsl_setstate(device, KGSL_MMUFLAGS_TLBFLUSH); + mmu->flags |= KGSL_FLAGS_STARTED; + + return 0; +} + +static int +kgsl_gpummu_unmap(void *mmu_specific_pt, + struct kgsl_memdesc *memdesc) +{ + unsigned int numpages; + unsigned int pte, ptefirst, ptelast, superpte; + unsigned int range = memdesc->size; + struct kgsl_gpummu_pt *gpummu_pt = mmu_specific_pt; + + /* All GPU addresses as assigned are page aligned, but some + functions purturb the gpuaddr with an offset, so apply the + mask here to make sure we have the right address */ + + unsigned int gpuaddr = memdesc->gpuaddr & KGSL_MMU_ALIGN_MASK; + + numpages = (range >> PAGE_SHIFT); + if (range & (PAGE_SIZE - 1)) + numpages++; + + ptefirst = kgsl_pt_entry_get(KGSL_PAGETABLE_BASE, gpuaddr); + ptelast = ptefirst + numpages; + + superpte = ptefirst - (ptefirst & (GSL_PT_SUPER_PTE-1)); + GSL_TLBFLUSH_FILTER_SETDIRTY(superpte / GSL_PT_SUPER_PTE); + for (pte = ptefirst; pte < ptelast; pte++) { +#ifdef VERBOSE_DEBUG + /* check if PTE exists */ + if (!kgsl_pt_map_get(gpummu_pt, pte)) + KGSL_CORE_ERR("pt entry %x is already " + "unmapped for pagetable %p\n", pte, gpummu_pt); +#endif + kgsl_pt_map_set(gpummu_pt, pte, GSL_PT_PAGE_DIRTY); + superpte = pte - (pte & (GSL_PT_SUPER_PTE - 1)); + if (pte == superpte) + GSL_TLBFLUSH_FILTER_SETDIRTY(superpte / + GSL_PT_SUPER_PTE); + } + + /* Post all writes to the pagetable */ + wmb(); + + return 0; +} + +#define SUPERPTE_IS_DIRTY(_p) \ +(((_p) & (GSL_PT_SUPER_PTE - 1)) == 0 && \ +GSL_TLBFLUSH_FILTER_ISDIRTY((_p) / GSL_PT_SUPER_PTE)) + +static int +kgsl_gpummu_map(void *mmu_specific_pt, + struct kgsl_memdesc *memdesc, + unsigned int protflags) +{ + unsigned int pte; + struct kgsl_gpummu_pt *gpummu_pt = mmu_specific_pt; + struct scatterlist *s; + int flushtlb = 0; + int i; + + pte = kgsl_pt_entry_get(KGSL_PAGETABLE_BASE, memdesc->gpuaddr); + + /* Flush the TLB if the first PTE isn't at the superpte boundary */ + if (pte & (GSL_PT_SUPER_PTE - 1)) + flushtlb = 1; + + for_each_sg(memdesc->sg, s, memdesc->sglen, i) { + unsigned int paddr = sg_phys(s); + unsigned int j; + + /* Each sg entry might be multiple pages long */ + for (j = paddr; j < paddr + s->length; pte++, j += PAGE_SIZE) { + if (SUPERPTE_IS_DIRTY(pte)) + flushtlb = 1; + kgsl_pt_map_set(gpummu_pt, pte, j | protflags); + } + } + + /* Flush the TLB if the last PTE isn't at the superpte boundary */ + if ((pte + 1) & (GSL_PT_SUPER_PTE - 1)) + flushtlb = 1; + + wmb(); + + if (flushtlb) { + /*set all devices as needing flushing*/ + gpummu_pt->tlb_flags = UINT_MAX; + GSL_TLBFLUSH_FILTER_RESET(); + } + + return 0; +} + +static int kgsl_gpummu_stop(struct kgsl_device *device) +{ + struct kgsl_mmu *mmu = &device->mmu; + + kgsl_regwrite(device, MH_MMU_CONFIG, 0x00000000); + mmu->flags &= ~KGSL_FLAGS_STARTED; + + return 0; +} + +static int kgsl_gpummu_close(struct kgsl_device *device) +{ + /* + * close device mmu + * + * call this with the global lock held + */ + struct kgsl_mmu *mmu = &device->mmu; + + if (mmu->setstate_memory.gpuaddr) + kgsl_sharedmem_free(&mmu->setstate_memory); + + if (mmu->defaultpagetable) + kgsl_mmu_putpagetable(mmu->defaultpagetable); + + return 0; +} + +static unsigned int +kgsl_gpummu_get_current_ptbase(struct kgsl_device *device) +{ + unsigned int ptbase; + kgsl_regread(device, MH_MMU_PT_BASE, &ptbase); + return ptbase; +} + +struct kgsl_mmu_ops gpummu_ops = { + .mmu_init = kgsl_gpummu_init, + .mmu_close = kgsl_gpummu_close, + .mmu_start = kgsl_gpummu_start, + .mmu_stop = kgsl_gpummu_stop, + .mmu_setstate = kgsl_gpummu_setstate, + .mmu_device_setstate = kgsl_gpummu_default_setstate, + .mmu_pagefault = kgsl_gpummu_pagefault, + .mmu_get_current_ptbase = kgsl_gpummu_get_current_ptbase, +}; + +struct kgsl_mmu_pt_ops gpummu_pt_ops = { + .mmu_map = kgsl_gpummu_map, + .mmu_unmap = kgsl_gpummu_unmap, + .mmu_create_pagetable = kgsl_gpummu_create_pagetable, + .mmu_destroy_pagetable = kgsl_gpummu_destroy_pagetable, + .mmu_pt_equal = kgsl_gpummu_pt_equal, + .mmu_pt_get_flags = kgsl_gpummu_pt_get_flags, +}; diff --git a/drivers/gpu/msm/kgsl_gpummu.h b/drivers/gpu/msm/kgsl_gpummu.h new file mode 100644 index 00000000..46466a8d --- /dev/null +++ b/drivers/gpu/msm/kgsl_gpummu.h @@ -0,0 +1,85 @@ +/* Copyright (c) 2011, Code Aurora Forum. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef __KGSL_GPUMMU_H +#define __KGSL_GPUMMU_H + +#define GSL_PT_PAGE_BITS_MASK 0x00000007 +#define GSL_PT_PAGE_ADDR_MASK PAGE_MASK + +#define GSL_MMU_INT_MASK \ + (MH_INTERRUPT_MASK__AXI_READ_ERROR | \ + MH_INTERRUPT_MASK__AXI_WRITE_ERROR) + +/* Macros to manage TLB flushing */ +#define GSL_TLBFLUSH_FILTER_ENTRY_NUMBITS (sizeof(unsigned char) * 8) +#define GSL_TLBFLUSH_FILTER_GET(superpte) \ + (*((unsigned char *) \ + (((unsigned int)gpummu_pt->tlbflushfilter.base) \ + + (superpte / GSL_TLBFLUSH_FILTER_ENTRY_NUMBITS)))) +#define GSL_TLBFLUSH_FILTER_SETDIRTY(superpte) \ + (GSL_TLBFLUSH_FILTER_GET((superpte)) |= 1 << \ + (superpte % GSL_TLBFLUSH_FILTER_ENTRY_NUMBITS)) +#define GSL_TLBFLUSH_FILTER_ISDIRTY(superpte) \ + (GSL_TLBFLUSH_FILTER_GET((superpte)) & \ + (1 << (superpte % GSL_TLBFLUSH_FILTER_ENTRY_NUMBITS))) +#define GSL_TLBFLUSH_FILTER_RESET() memset(gpummu_pt->tlbflushfilter.base,\ + 0, gpummu_pt->tlbflushfilter.size) + +extern struct kgsl_mmu_ops gpummu_ops; +extern struct kgsl_mmu_pt_ops gpummu_pt_ops; + +struct kgsl_tlbflushfilter { + unsigned int *base; + unsigned int size; +}; + +struct kgsl_gpummu_pt { + struct kgsl_memdesc base; + unsigned int last_superpte; + unsigned int tlb_flags; + /* Maintain filter to manage tlb flushing */ + struct kgsl_tlbflushfilter tlbflushfilter; +}; + +struct kgsl_ptpool_chunk { + size_t size; + unsigned int count; + int dynamic; + + void *data; + unsigned int phys; + + unsigned long *bitmap; + struct list_head list; +}; + +struct kgsl_ptpool { + size_t ptsize; + struct mutex lock; + struct list_head list; + int entries; + int static_entries; + int chunks; +}; + +void *kgsl_gpummu_ptpool_init(int ptsize, + int entries); +void kgsl_gpummu_ptpool_destroy(void *ptpool); + +static inline unsigned int kgsl_pt_get_base_addr(struct kgsl_pagetable *pt) +{ + struct kgsl_gpummu_pt *gpummu_pt = pt->priv; + return gpummu_pt->base.gpuaddr; +} +#endif /* __KGSL_GPUMMU_H */ diff --git a/drivers/gpu/msm/kgsl_iommu.c b/drivers/gpu/msm/kgsl_iommu.c new file mode 100644 index 00000000..30365a3c --- /dev/null +++ b/drivers/gpu/msm/kgsl_iommu.c @@ -0,0 +1,333 @@ +/* Copyright (c) 2011, Code Aurora Forum. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "kgsl.h" +#include "kgsl_device.h" +#include "kgsl_mmu.h" +#include "kgsl_sharedmem.h" + +struct kgsl_iommu { + struct device *iommu_user_dev; + int iommu_user_dev_attached; + struct device *iommu_priv_dev; + int iommu_priv_dev_attached; +}; + +static int kgsl_iommu_pt_equal(struct kgsl_pagetable *pt, + unsigned int pt_base) +{ + struct iommu_domain *domain = pt->priv; + return pt && pt_base && ((unsigned int)domain == pt_base); +} + +static void kgsl_iommu_destroy_pagetable(void *mmu_specific_pt) +{ + struct iommu_domain *domain = mmu_specific_pt; + if (domain) + iommu_domain_free(domain); +} + +void *kgsl_iommu_create_pagetable(void) +{ + struct iommu_domain *domain = iommu_domain_alloc(0); + if (!domain) + KGSL_CORE_ERR("Failed to create iommu domain\n"); + + return domain; +} + +static void kgsl_detach_pagetable_iommu_domain(struct kgsl_mmu *mmu) +{ + struct iommu_domain *domain; + struct kgsl_iommu *iommu = mmu->priv; + + BUG_ON(mmu->hwpagetable == NULL); + BUG_ON(mmu->hwpagetable->priv == NULL); + + domain = mmu->hwpagetable->priv; + + if (iommu->iommu_user_dev_attached) { + iommu_detach_device(domain, iommu->iommu_user_dev); + iommu->iommu_user_dev_attached = 0; + KGSL_MEM_INFO(mmu->device, + "iommu %p detached from user dev of MMU: %p\n", + domain, mmu); + } + if (iommu->iommu_priv_dev_attached) { + iommu_detach_device(domain, iommu->iommu_priv_dev); + iommu->iommu_priv_dev_attached = 0; + KGSL_MEM_INFO(mmu->device, + "iommu %p detached from priv dev of MMU: %p\n", + domain, mmu); + } +} + +static int kgsl_attach_pagetable_iommu_domain(struct kgsl_mmu *mmu) +{ + struct iommu_domain *domain; + int ret = 0; + struct kgsl_iommu *iommu = mmu->priv; + + BUG_ON(mmu->hwpagetable == NULL); + BUG_ON(mmu->hwpagetable->priv == NULL); + + domain = mmu->hwpagetable->priv; + + if (iommu->iommu_user_dev && !iommu->iommu_user_dev_attached) { + ret = iommu_attach_device(domain, iommu->iommu_user_dev); + if (ret) { + KGSL_MEM_ERR(mmu->device, + "Failed to attach device, err %d\n", ret); + goto done; + } + iommu->iommu_user_dev_attached = 1; + KGSL_MEM_INFO(mmu->device, + "iommu %p attached to user dev of MMU: %p\n", + domain, mmu); + } + if (iommu->iommu_priv_dev && !iommu->iommu_priv_dev_attached) { + ret = iommu_attach_device(domain, iommu->iommu_priv_dev); + if (ret) { + KGSL_MEM_ERR(mmu->device, + "Failed to attach device, err %d\n", ret); + iommu_detach_device(domain, iommu->iommu_user_dev); + iommu->iommu_user_dev_attached = 0; + goto done; + } + iommu->iommu_priv_dev_attached = 1; + KGSL_MEM_INFO(mmu->device, + "iommu %p attached to priv dev of MMU: %p\n", + domain, mmu); + } +done: + return ret; +} + +static int kgsl_get_iommu_ctxt(struct kgsl_iommu *iommu, + struct kgsl_device *device) +{ + int status = 0; + struct platform_device *pdev = + container_of(device->parentdev, struct platform_device, dev); + struct kgsl_device_platform_data *pdata_dev = pdev->dev.platform_data; + if (pdata_dev->iommu_user_ctx_name) + iommu->iommu_user_dev = msm_iommu_get_ctx( + pdata_dev->iommu_user_ctx_name); + if (pdata_dev->iommu_priv_ctx_name) + iommu->iommu_priv_dev = msm_iommu_get_ctx( + pdata_dev->iommu_priv_ctx_name); + if (!iommu->iommu_user_dev) { + KGSL_CORE_ERR("Failed to get user iommu dev handle for " + "device %s\n", + pdata_dev->iommu_user_ctx_name); + status = -EINVAL; + } + return status; +} + +static void kgsl_iommu_setstate(struct kgsl_device *device, + struct kgsl_pagetable *pagetable) +{ + struct kgsl_mmu *mmu = &device->mmu; + + if (mmu->flags & KGSL_FLAGS_STARTED) { + /* page table not current, then setup mmu to use new + * specified page table + */ + if (mmu->hwpagetable != pagetable) { + kgsl_idle(device, KGSL_TIMEOUT_DEFAULT); + kgsl_detach_pagetable_iommu_domain(mmu); + mmu->hwpagetable = pagetable; + if (mmu->hwpagetable) + kgsl_attach_pagetable_iommu_domain(mmu); + } + } +} + +static int kgsl_iommu_init(struct kgsl_device *device) +{ + /* + * intialize device mmu + * + * call this with the global lock held + */ + int status = 0; + struct kgsl_mmu *mmu = &device->mmu; + struct kgsl_iommu *iommu; + + mmu->device = device; + + iommu = kzalloc(sizeof(struct kgsl_iommu), GFP_KERNEL); + if (!iommu) { + KGSL_CORE_ERR("kzalloc(%d) failed\n", + sizeof(struct kgsl_iommu)); + return -ENOMEM; + } + + iommu->iommu_priv_dev_attached = 0; + iommu->iommu_user_dev_attached = 0; + status = kgsl_get_iommu_ctxt(iommu, device); + if (status) { + kfree(iommu); + iommu = NULL; + } + mmu->priv = iommu; + + dev_info(device->dev, "|%s| MMU type set for device is IOMMU\n", + __func__); + return status; +} + +static int kgsl_iommu_start(struct kgsl_device *device) +{ + int status; + struct kgsl_mmu *mmu = &device->mmu; + + if (mmu->flags & KGSL_FLAGS_STARTED) + return 0; + + kgsl_regwrite(device, MH_MMU_CONFIG, 0x00000000); + if (mmu->defaultpagetable == NULL) + mmu->defaultpagetable = + kgsl_mmu_getpagetable(KGSL_MMU_GLOBAL_PT); + /* Return error if the default pagetable doesn't exist */ + if (mmu->defaultpagetable == NULL) + return -ENOMEM; + mmu->hwpagetable = mmu->defaultpagetable; + + status = kgsl_attach_pagetable_iommu_domain(mmu); + if (!status) + mmu->flags |= KGSL_FLAGS_STARTED; + + return status; +} + +static int +kgsl_iommu_unmap(void *mmu_specific_pt, + struct kgsl_memdesc *memdesc) +{ + int ret; + unsigned int range = memdesc->size; + struct iommu_domain *domain = (struct iommu_domain *) + mmu_specific_pt; + + /* All GPU addresses as assigned are page aligned, but some + functions purturb the gpuaddr with an offset, so apply the + mask here to make sure we have the right address */ + + unsigned int gpuaddr = memdesc->gpuaddr & KGSL_MMU_ALIGN_MASK; + + if (range == 0 || gpuaddr == 0) + return 0; + + ret = iommu_unmap_range(domain, gpuaddr, range); + if (ret) + KGSL_CORE_ERR("iommu_unmap_range(%p, %x, %d) failed " + "with err: %d\n", domain, gpuaddr, + range, ret); + + return 0; +} + +static int +kgsl_iommu_map(void *mmu_specific_pt, + struct kgsl_memdesc *memdesc, + unsigned int protflags) +{ + int ret; + unsigned int iommu_virt_addr; + struct iommu_domain *domain = mmu_specific_pt; + + BUG_ON(NULL == domain); + + + iommu_virt_addr = memdesc->gpuaddr; + + ret = iommu_map_range(domain, iommu_virt_addr, memdesc->sg, + memdesc->size, MSM_IOMMU_ATTR_NONCACHED); + if (ret) { + KGSL_CORE_ERR("iommu_map_range(%p, %x, %p, %d, %d) " + "failed with err: %d\n", domain, + iommu_virt_addr, memdesc->sg, memdesc->size, + MSM_IOMMU_ATTR_NONCACHED, ret); + return ret; + } + + return ret; +} + +static int kgsl_iommu_stop(struct kgsl_device *device) +{ + /* + * stop device mmu + * + * call this with the global lock held + */ + struct kgsl_mmu *mmu = &device->mmu; + + if (mmu->flags & KGSL_FLAGS_STARTED) { + /* detach iommu attachment */ + kgsl_detach_pagetable_iommu_domain(mmu); + + mmu->flags &= ~KGSL_FLAGS_STARTED; + } + + return 0; +} + +static int kgsl_iommu_close(struct kgsl_device *device) +{ + struct kgsl_mmu *mmu = &device->mmu; + if (mmu->defaultpagetable) + kgsl_mmu_putpagetable(mmu->defaultpagetable); + + return 0; +} + +static unsigned int +kgsl_iommu_get_current_ptbase(struct kgsl_device *device) +{ + /* Current base is always the hwpagetables domain as we + * do not use per process pagetables right not for iommu. + * This will change when we switch to per process pagetables. + */ + return (unsigned int)device->mmu.hwpagetable->priv; +} + +struct kgsl_mmu_ops iommu_ops = { + .mmu_init = kgsl_iommu_init, + .mmu_close = kgsl_iommu_close, + .mmu_start = kgsl_iommu_start, + .mmu_stop = kgsl_iommu_stop, + .mmu_setstate = kgsl_iommu_setstate, + .mmu_device_setstate = NULL, + .mmu_pagefault = NULL, + .mmu_get_current_ptbase = kgsl_iommu_get_current_ptbase, +}; + +struct kgsl_mmu_pt_ops iommu_pt_ops = { + .mmu_map = kgsl_iommu_map, + .mmu_unmap = kgsl_iommu_unmap, + .mmu_create_pagetable = kgsl_iommu_create_pagetable, + .mmu_destroy_pagetable = kgsl_iommu_destroy_pagetable, + .mmu_pt_equal = kgsl_iommu_pt_equal, + .mmu_pt_get_flags = NULL, +}; diff --git a/drivers/gpu/msm/kgsl_pwrscale_idlestats.c b/drivers/gpu/msm/kgsl_pwrscale_idlestats.c new file mode 100644 index 00000000..d5fa84ed --- /dev/null +++ b/drivers/gpu/msm/kgsl_pwrscale_idlestats.c @@ -0,0 +1,221 @@ +/* Copyright (c) 2011, Code Aurora Forum. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "kgsl.h" +#include "kgsl_pwrscale.h" +#include "kgsl_device.h" + +#define MAX_CORES 4 +struct _cpu_info { + spinlock_t lock; + struct notifier_block cpu_nb; + u64 start[MAX_CORES]; + u64 end[MAX_CORES]; + int curr_freq[MAX_CORES]; + int max_freq[MAX_CORES]; +}; + +struct idlestats_priv { + char name[32]; + struct msm_idle_stats_device idledev; + struct kgsl_device *device; + struct msm_idle_pulse pulse; + struct _cpu_info cpu_info; +}; + +static int idlestats_cpufreq_notifier( + struct notifier_block *nb, + unsigned long val, void *data) +{ + struct _cpu_info *cpu = container_of(nb, + struct _cpu_info, cpu_nb); + struct cpufreq_freqs *freq = data; + + if (val != CPUFREQ_POSTCHANGE) + return 0; + + spin_lock(&cpu->lock); + if (freq->cpu < num_possible_cpus()) + cpu->curr_freq[freq->cpu] = freq->new / 1000; + spin_unlock(&cpu->lock); + + return 0; +} + +static void idlestats_get_sample(struct msm_idle_stats_device *idledev, + struct msm_idle_pulse *pulse) +{ + struct kgsl_power_stats stats; + struct idlestats_priv *priv = container_of(idledev, + struct idlestats_priv, idledev); + struct kgsl_device *device = priv->device; + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + mutex_lock(&device->mutex); + /* If the GPU is asleep, don't wake it up - assume that we + are idle */ + + if (!(device->state & (KGSL_STATE_SLEEP | KGSL_STATE_NAP))) { + device->ftbl->power_stats(device, &stats); + pulse->busy_start_time = pwr->time - stats.busy_time; + pulse->busy_interval = stats.busy_time; + } else { + pulse->busy_start_time = pwr->time; + pulse->busy_interval = 0; + } + pulse->wait_interval = 0; + mutex_unlock(&device->mutex); +} + +static void idlestats_busy(struct kgsl_device *device, + struct kgsl_pwrscale *pwrscale) +{ + struct idlestats_priv *priv = pwrscale->priv; + int i, busy, nr_cpu = 1; + + if (priv->pulse.busy_start_time != 0) { + priv->pulse.wait_interval = 0; + /* Calculate the total CPU busy time for this GPU pulse */ + for (i = 0; i < num_possible_cpus(); i++) { + spin_lock(&priv->cpu_info.lock); + if (cpu_online(i)) { + priv->cpu_info.end[i] = + (u64)ktime_to_us(ktime_get()) - + get_cpu_idle_time_us(i, NULL); + busy = priv->cpu_info.end[i] - + priv->cpu_info.start[i]; + /* Normalize the busy time by frequency */ + busy = priv->cpu_info.curr_freq[i] * + (busy / priv->cpu_info.max_freq[i]); + priv->pulse.wait_interval += busy; + nr_cpu++; + } + spin_unlock(&priv->cpu_info.lock); + } + priv->pulse.wait_interval /= nr_cpu; + msm_idle_stats_idle_end(&priv->idledev, &priv->pulse); + } + priv->pulse.busy_start_time = ktime_to_us(ktime_get()); +} + +static void idlestats_idle(struct kgsl_device *device, + struct kgsl_pwrscale *pwrscale) +{ + int i, nr_cpu; + struct kgsl_power_stats stats; + struct idlestats_priv *priv = pwrscale->priv; + + /* This is called from within a mutex protected function, so + no additional locking required */ + device->ftbl->power_stats(device, &stats); + + /* If total_time is zero, then we don't have + any interesting statistics to store */ + if (stats.total_time == 0) { + priv->pulse.busy_start_time = 0; + return; + } + + priv->pulse.busy_interval = stats.busy_time; + nr_cpu = num_possible_cpus(); + for (i = 0; i < nr_cpu; i++) + if (cpu_online(i)) + priv->cpu_info.start[i] = + (u64)ktime_to_us(ktime_get()) - + get_cpu_idle_time_us(i, NULL); + + msm_idle_stats_idle_start(&priv->idledev); +} + +static void idlestats_sleep(struct kgsl_device *device, + struct kgsl_pwrscale *pwrscale) +{ + struct idlestats_priv *priv = pwrscale->priv; + priv->idledev.stats->event |= MSM_IDLE_STATS_EVENT_IDLE_TIMER_EXPIRED; +} + +static int idlestats_init(struct kgsl_device *device, + struct kgsl_pwrscale *pwrscale) +{ + struct idlestats_priv *priv; + struct cpufreq_policy cpu_policy; + int ret, i; + + priv = pwrscale->priv = kzalloc(sizeof(struct idlestats_priv), + GFP_KERNEL); + if (pwrscale->priv == NULL) + return -ENOMEM; + + snprintf(priv->name, sizeof(priv->name), "idle_stats_%s", + device->name); + + priv->device = device; + + priv->idledev.name = (const char *) priv->name; + priv->idledev.get_sample = idlestats_get_sample; + + spin_lock_init(&priv->cpu_info.lock); + priv->cpu_info.cpu_nb.notifier_call = + idlestats_cpufreq_notifier; + ret = cpufreq_register_notifier(&priv->cpu_info.cpu_nb, + CPUFREQ_TRANSITION_NOTIFIER); + if (ret) + goto err; + for (i = 0; i < num_possible_cpus(); i++) { + cpufreq_frequency_table_cpuinfo(&cpu_policy, + cpufreq_frequency_get_table(i)); + priv->cpu_info.max_freq[i] = cpu_policy.max / 1000; + priv->cpu_info.curr_freq[i] = cpu_policy.max / 1000; + } + ret = msm_idle_stats_register_device(&priv->idledev); +err: + if (ret) { + kfree(pwrscale->priv); + pwrscale->priv = NULL; + } + + return ret; +} + +static void idlestats_close(struct kgsl_device *device, + struct kgsl_pwrscale *pwrscale) +{ + struct idlestats_priv *priv = pwrscale->priv; + + if (pwrscale->priv == NULL) + return; + + cpufreq_unregister_notifier(&priv->cpu_info.cpu_nb, + CPUFREQ_TRANSITION_NOTIFIER); + msm_idle_stats_deregister_device(&priv->idledev); + + kfree(pwrscale->priv); + pwrscale->priv = NULL; +} + +struct kgsl_pwrscale_policy kgsl_pwrscale_policy_idlestats = { + .name = "idlestats", + .init = idlestats_init, + .idle = idlestats_idle, + .busy = idlestats_busy, + .sleep = idlestats_sleep, + .close = idlestats_close +}; diff --git a/drivers/gpu/msm/kgsl_pwrscale_trustzone.c b/drivers/gpu/msm/kgsl_pwrscale_trustzone.c new file mode 100644 index 00000000..f3e84e45 --- /dev/null +++ b/drivers/gpu/msm/kgsl_pwrscale_trustzone.c @@ -0,0 +1,197 @@ +/* Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include +#include +#include +#include + +#include "kgsl.h" +#include "kgsl_pwrscale.h" +#include "kgsl_device.h" + +#define TZ_GOVERNOR_PERFORMANCE 0 +#define TZ_GOVERNOR_ONDEMAND 1 + +struct tz_priv { + int governor; + unsigned int no_switch_cnt; + unsigned int skip_cnt; +}; + +#define SWITCH_OFF 200 +#define SWITCH_OFF_RESET_TH 40 +#define SKIP_COUNTER 500 +#define TZ_RESET_ID 0x3 +#define TZ_UPDATE_ID 0x4 + +#ifdef CONFIG_MSM_SCM +/* Trap into the TrustZone, and call funcs there. */ +static int __secure_tz_entry(u32 cmd, u32 val) +{ + __iowmb(); + return scm_call_atomic1(SCM_SVC_IO, cmd, val); +} +#else +static int __secure_tz_entry(u32 cmd, u32 val) +{ + return 0; +} +#endif /* CONFIG_MSM_SCM */ + +static ssize_t tz_governor_show(struct kgsl_device *device, + struct kgsl_pwrscale *pwrscale, + char *buf) +{ + struct tz_priv *priv = pwrscale->priv; + int ret; + + if (priv->governor == TZ_GOVERNOR_ONDEMAND) + ret = snprintf(buf, 10, "ondemand\n"); + else + ret = snprintf(buf, 13, "performance\n"); + + return ret; +} + +static ssize_t tz_governor_store(struct kgsl_device *device, + struct kgsl_pwrscale *pwrscale, + const char *buf, size_t count) +{ + char str[20]; + struct tz_priv *priv = pwrscale->priv; + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int ret; + + ret = sscanf(buf, "%20s", str); + if (ret != 1) + return -EINVAL; + + mutex_lock(&device->mutex); + + if (!strncmp(str, "ondemand", 8)) + priv->governor = TZ_GOVERNOR_ONDEMAND; + else if (!strncmp(str, "performance", 11)) + priv->governor = TZ_GOVERNOR_PERFORMANCE; + + if (priv->governor == TZ_GOVERNOR_PERFORMANCE) + kgsl_pwrctrl_pwrlevel_change(device, pwr->thermal_pwrlevel); + + mutex_unlock(&device->mutex); + return count; +} + +PWRSCALE_POLICY_ATTR(governor, 0644, tz_governor_show, tz_governor_store); + +static struct attribute *tz_attrs[] = { + &policy_attr_governor.attr, + NULL +}; + +static struct attribute_group tz_attr_group = { + .attrs = tz_attrs, +}; + +static void tz_wake(struct kgsl_device *device, struct kgsl_pwrscale *pwrscale) +{ + struct tz_priv *priv = pwrscale->priv; + if (device->state != KGSL_STATE_NAP && + priv->governor == TZ_GOVERNOR_ONDEMAND) + kgsl_pwrctrl_pwrlevel_change(device, + device->pwrctrl.thermal_pwrlevel); +} + +static void tz_idle(struct kgsl_device *device, struct kgsl_pwrscale *pwrscale) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct tz_priv *priv = pwrscale->priv; + struct kgsl_power_stats stats; + int val; + + /* In "performance" mode the clock speed always stays + the same */ + + if (priv->governor == TZ_GOVERNOR_PERFORMANCE) + return; + + device->ftbl->power_stats(device, &stats); + if (stats.total_time == 0) + return; + + /* If the GPU has stayed in turbo mode for a while, * + * stop writing out values. */ + if (pwr->active_pwrlevel == 0) { + if (priv->no_switch_cnt > SWITCH_OFF) { + priv->skip_cnt++; + if (priv->skip_cnt > SKIP_COUNTER) { + priv->no_switch_cnt -= SWITCH_OFF_RESET_TH; + priv->skip_cnt = 0; + } + return; + } + priv->no_switch_cnt++; + } else { + priv->no_switch_cnt = 0; + } + + val = __secure_tz_entry(TZ_UPDATE_ID, + stats.total_time - stats.busy_time); + if (val) + kgsl_pwrctrl_pwrlevel_change(device, + pwr->active_pwrlevel + val); +} + +static void tz_sleep(struct kgsl_device *device, + struct kgsl_pwrscale *pwrscale) +{ + struct tz_priv *priv = pwrscale->priv; + + __secure_tz_entry(TZ_RESET_ID, 0); + priv->no_switch_cnt = 0; +} + +static int tz_init(struct kgsl_device *device, struct kgsl_pwrscale *pwrscale) +{ + struct tz_priv *priv; + + /* Trustzone is only valid for some SOCs */ + if (!(cpu_is_msm8x60() || cpu_is_msm8960() || cpu_is_msm8930())) + return -EINVAL; + + priv = pwrscale->priv = kzalloc(sizeof(struct tz_priv), GFP_KERNEL); + if (pwrscale->priv == NULL) + return -ENOMEM; + + priv->governor = TZ_GOVERNOR_ONDEMAND; + kgsl_pwrscale_policy_add_files(device, pwrscale, &tz_attr_group); + + return 0; +} + +static void tz_close(struct kgsl_device *device, struct kgsl_pwrscale *pwrscale) +{ + kgsl_pwrscale_policy_remove_files(device, pwrscale, &tz_attr_group); + kfree(pwrscale->priv); + pwrscale->priv = NULL; +} + +struct kgsl_pwrscale_policy kgsl_pwrscale_policy_tz = { + .name = "trustzone", + .init = tz_init, + .idle = tz_idle, + .sleep = tz_sleep, + .wake = tz_wake, + .close = tz_close +}; +EXPORT_SYMBOL(kgsl_pwrscale_policy_tz); diff --git a/include/drm/kgsl_drm.h b/include/drm/kgsl_drm.h new file mode 100644 index 00000000..934bdf3f --- /dev/null +++ b/include/drm/kgsl_drm.h @@ -0,0 +1,221 @@ +/* Copyright (c) 2009-2010, Code Aurora Forum. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * * Neither the name of Code Aurora Forum, Inc. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef _KGSL_DRM_H_ +#define _KGSL_DRM_H_ + +#include "drm.h" + +#define DRM_KGSL_GEM_CREATE 0x00 +#define DRM_KGSL_GEM_PREP 0x01 +#define DRM_KGSL_GEM_SETMEMTYPE 0x02 +#define DRM_KGSL_GEM_GETMEMTYPE 0x03 +#define DRM_KGSL_GEM_MMAP 0x04 +#define DRM_KGSL_GEM_ALLOC 0x05 +#define DRM_KGSL_GEM_BIND_GPU 0x06 +#define DRM_KGSL_GEM_UNBIND_GPU 0x07 + +#define DRM_KGSL_GEM_GET_BUFINFO 0x08 +#define DRM_KGSL_GEM_SET_BUFCOUNT 0x09 +#define DRM_KGSL_GEM_SET_ACTIVE 0x0A +#define DRM_KGSL_GEM_LOCK_HANDLE 0x0B +#define DRM_KGSL_GEM_UNLOCK_HANDLE 0x0C +#define DRM_KGSL_GEM_UNLOCK_ON_TS 0x0D +#define DRM_KGSL_GEM_CREATE_FD 0x0E + +#define DRM_IOCTL_KGSL_GEM_CREATE \ +DRM_IOWR(DRM_COMMAND_BASE + DRM_KGSL_GEM_CREATE, struct drm_kgsl_gem_create) + +#define DRM_IOCTL_KGSL_GEM_PREP \ +DRM_IOWR(DRM_COMMAND_BASE + DRM_KGSL_GEM_PREP, struct drm_kgsl_gem_prep) + +#define DRM_IOCTL_KGSL_GEM_SETMEMTYPE \ +DRM_IOWR(DRM_COMMAND_BASE + DRM_KGSL_GEM_SETMEMTYPE, \ +struct drm_kgsl_gem_memtype) + +#define DRM_IOCTL_KGSL_GEM_GETMEMTYPE \ +DRM_IOWR(DRM_COMMAND_BASE + DRM_KGSL_GEM_GETMEMTYPE, \ +struct drm_kgsl_gem_memtype) + +#define DRM_IOCTL_KGSL_GEM_MMAP \ +DRM_IOWR(DRM_COMMAND_BASE + DRM_KGSL_GEM_MMAP, struct drm_kgsl_gem_mmap) + +#define DRM_IOCTL_KGSL_GEM_ALLOC \ +DRM_IOWR(DRM_COMMAND_BASE + DRM_KGSL_GEM_ALLOC, struct drm_kgsl_gem_alloc) + +#define DRM_IOCTL_KGSL_GEM_BIND_GPU \ +DRM_IOWR(DRM_COMMAND_BASE + DRM_KGSL_GEM_BIND_GPU, struct drm_kgsl_gem_bind_gpu) + +#define DRM_IOCTL_KGSL_GEM_UNBIND_GPU \ +DRM_IOWR(DRM_COMMAND_BASE + DRM_KGSL_GEM_UNBIND_GPU, \ +struct drm_kgsl_gem_bind_gpu) + +#define DRM_IOCTL_KGSL_GEM_GET_BUFINFO \ +DRM_IOWR(DRM_COMMAND_BASE + DRM_KGSL_GEM_GET_BUFINFO, \ + struct drm_kgsl_gem_bufinfo) + +#define DRM_IOCTL_KGSL_GEM_SET_BUFCOUNT \ +DRM_IOWR(DRM_COMMAND_BASE + DRM_KGSL_GEM_SET_BUFCOUNT, \ + struct drm_kgsl_gem_bufcount) + +#define DRM_IOCTL_KGSL_GEM_SET_ACTIVE \ +DRM_IOWR(DRM_COMMAND_BASE + DRM_KGSL_GEM_SET_ACTIVE, \ + struct drm_kgsl_gem_active) + +#define DRM_IOCTL_KGSL_GEM_LOCK_HANDLE \ +DRM_IOWR(DRM_COMMAND_BASE + DRM_KGSL_GEM_LOCK_HANDLE, \ +struct drm_kgsl_gem_lock_handles) + +#define DRM_IOCTL_KGSL_GEM_UNLOCK_HANDLE \ +DRM_IOWR(DRM_COMMAND_BASE + DRM_KGSL_GEM_UNLOCK_HANDLE, \ +struct drm_kgsl_gem_unlock_handles) + +#define DRM_IOCTL_KGSL_GEM_UNLOCK_ON_TS \ +DRM_IOWR(DRM_COMMAND_BASE + DRM_KGSL_GEM_UNLOCK_ON_TS, \ +struct drm_kgsl_gem_unlock_on_ts) + +#define DRM_IOCTL_KGSL_GEM_CREATE_FD \ +DRM_IOWR(DRM_COMMAND_BASE + DRM_KGSL_GEM_CREATE_FD, \ +struct drm_kgsl_gem_create_fd) + +/* Maximum number of sub buffers per GEM object */ +#define DRM_KGSL_GEM_MAX_BUFFERS 2 + +/* Memory types - these define the source and caching policies + of the GEM memory chunk */ + +/* Legacy definitions left for compatability */ + +#define DRM_KGSL_GEM_TYPE_EBI 0 +#define DRM_KGSL_GEM_TYPE_SMI 1 +#define DRM_KGSL_GEM_TYPE_KMEM 2 +#define DRM_KGSL_GEM_TYPE_KMEM_NOCACHE 3 +#define DRM_KGSL_GEM_TYPE_MEM_MASK 0xF + +/* Contiguous memory (PMEM) */ +#define DRM_KGSL_GEM_TYPE_PMEM 0x000100 + +/* PMEM memory types */ +#define DRM_KGSL_GEM_PMEM_EBI 0x001000 +#define DRM_KGSL_GEM_PMEM_SMI 0x002000 + +/* Standard paged memory */ +#define DRM_KGSL_GEM_TYPE_MEM 0x010000 + +/* Caching controls */ +#define DRM_KGSL_GEM_CACHE_NONE 0x000000 +#define DRM_KGSL_GEM_CACHE_WCOMBINE 0x100000 +#define DRM_KGSL_GEM_CACHE_WTHROUGH 0x200000 +#define DRM_KGSL_GEM_CACHE_WBACK 0x400000 +#define DRM_KGSL_GEM_CACHE_WBACKWA 0x800000 +#define DRM_KGSL_GEM_CACHE_MASK 0xF00000 + +/* FD based objects */ +#define DRM_KGSL_GEM_TYPE_FD_FBMEM 0x1000000 +#define DRM_KGSL_GEM_TYPE_FD_MASK 0xF000000 + +/* Timestamp types */ +#define DRM_KGSL_GEM_TS_3D 0x00000430 +#define DRM_KGSL_GEM_TS_2D 0x00000180 + + +struct drm_kgsl_gem_create { + uint32_t size; + uint32_t handle; +}; + +struct drm_kgsl_gem_prep { + uint32_t handle; + uint32_t phys; + uint64_t offset; +}; + +struct drm_kgsl_gem_memtype { + uint32_t handle; + uint32_t type; +}; + +struct drm_kgsl_gem_mmap { + uint32_t handle; + uint32_t size; + uint32_t hostptr; + uint64_t offset; +}; + +struct drm_kgsl_gem_alloc { + uint32_t handle; + uint64_t offset; +}; + +struct drm_kgsl_gem_bind_gpu { + uint32_t handle; + uint32_t gpuptr; +}; + +struct drm_kgsl_gem_bufinfo { + uint32_t handle; + uint32_t count; + uint32_t active; + uint32_t offset[DRM_KGSL_GEM_MAX_BUFFERS]; + uint32_t gpuaddr[DRM_KGSL_GEM_MAX_BUFFERS]; +}; + +struct drm_kgsl_gem_bufcount { + uint32_t handle; + uint32_t bufcount; +}; + +struct drm_kgsl_gem_active { + uint32_t handle; + uint32_t active; +}; + +struct drm_kgsl_gem_lock_handles { + uint32_t num_handles; + uint32_t *handle_list; + uint32_t pid; + uint32_t lock_id; /* Returned lock id used for unlocking */ +}; + +struct drm_kgsl_gem_unlock_handles { + uint32_t lock_id; +}; + +struct drm_kgsl_gem_unlock_on_ts { + uint32_t lock_id; + uint32_t timestamp; /* This field is a hw generated ts */ + uint32_t type; /* Which pipe to check for ts generation */ +}; + +struct drm_kgsl_gem_create_fd { + uint32_t fd; + uint32_t handle; +}; + +#endif