diff --git a/arch/arm/mach-msm/board-htcleo.c b/arch/arm/mach-msm/board-htcleo.c old mode 100644 new mode 100755 index 948af5cd..4697ef53 --- a/arch/arm/mach-msm/board-htcleo.c +++ b/arch/arm/mach-msm/board-htcleo.c @@ -755,7 +755,7 @@ static struct android_pmem_platform_data mdp_pmem_pdata = { .start = MSM_PMEM_MDP_BASE, .size = MSM_PMEM_MDP_SIZE, #ifdef CONFIG_MSM_KGSL - .allocator_type = PMEM_ALLOCATORTYPE_BITMAP, + .allocator_type = PMEM_ALLOCATORTYPE_ALLORNOTHING, #else .no_allocator = 0, #endif diff --git a/arch/arm/mach-msm/board-htcleo.h b/arch/arm/mach-msm/board-htcleo.h old mode 100644 new mode 100755 index daf3db03..8d633974 --- a/arch/arm/mach-msm/board-htcleo.h +++ b/arch/arm/mach-msm/board-htcleo.h @@ -26,6 +26,7 @@ #define MSM_EBI1_BANK0_SIZE 0x1E7C0000 /* 488MB - 0x00040000 RAM CONSOLE*/ #endif + /* Don't change that */ #define MSM_SMI_BASE 0x00000000 #define MSM_SMI_SIZE 0x04000000 @@ -42,7 +43,7 @@ #define MSM_PMEM_MDP_SIZE 0x02000000 #define MSM_PMEM_ADSP_BASE 0x3D700000 -#define MSM_PMEM_ADSP_SIZE 0x02900000 +#define MSM_PMEM_ADSP_SIZE 0x01800000 #define MSM_GPU_PHYS_BASE (MSM_PMEM_SMI_BASE + MSM_FB_SIZE) #define MSM_GPU_PHYS_SIZE 0x00800000 diff --git a/arch/arm/mach-msm/include/mach/ion.h b/arch/arm/mach-msm/include/mach/ion.h new file mode 100755 index 00000000..4d12249d --- /dev/null +++ b/arch/arm/mach-msm/include/mach/ion.h @@ -0,0 +1,23 @@ +/** + * + * Copyright (c) 2011, Code Aurora Forum. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __MACH_ION_H_ +#define __MACH_ION_H_ + +enum ion_memory_types { + ION_EBI_TYPE, + ION_SMI_TYPE, +}; + +#endif diff --git a/arch/arm/mach-msm/qdsp6_1550/msm_q6vdec.c b/arch/arm/mach-msm/qdsp6_1550/msm_q6vdec.c old mode 100644 new mode 100755 index 69ce1804..b6162d89 --- a/arch/arm/mach-msm/qdsp6_1550/msm_q6vdec.c +++ b/arch/arm/mach-msm/qdsp6_1550/msm_q6vdec.c @@ -61,7 +61,7 @@ #define VDEC_GET_MAJOR_VERSION(version) (((version)&MAJOR_MASK)>>16) #define VDEC_GET_MINOR_VERSION(version) ((version)&MINOR_MASK) - +//#define DEBUG_TRACE_VDEC #ifdef DEBUG_TRACE_VDEC #define TRACE(fmt,x...) \ do { pr_debug("%s:%d " fmt, __func__, __LINE__, ##x); } while (0) @@ -69,6 +69,8 @@ #define TRACE(fmt,x...) do { } while (0) #endif +/* the version check will cause vdec hang up!!! */ +#define VERSION_CHECK 0 static DEFINE_MUTEX(idlecount_lock); static int idlecount; @@ -696,7 +698,7 @@ static long vdec_ioctl(struct file *file, unsigned int cmd, unsigned long arg) break; default: - pr_err("%s: invalid ioctl!\n", __func__); + pr_err("%s: invalid ioctl! cmd= %08x \n", __func__,cmd); ret = -EINVAL; break; } @@ -799,8 +801,9 @@ static int vdec_open(struct inode *inode, struct file *file) int i; struct vdec_msg_list *l; struct vdec_data *vd; +#if VERSION_CHECK struct dal_info version_info; - +#endif pr_info("q6vdec_open()\n"); mutex_lock(&vdec_ref_lock); if (ref_cnt >= MAX_SUPPORTED_INSTANCES) { @@ -845,6 +848,7 @@ static int vdec_open(struct inode *inode, struct file *file) ret = -EIO; goto vdec_open_err_handle_list; } +#if VERSION_CHECK ret = dal_call_f9(vd->vdec_handle, DAL_OP_INFO, &version_info, sizeof(struct dal_info)); @@ -859,12 +863,15 @@ static int vdec_open(struct inode *inode, struct file *file) pr_err("%s: driver version mismatch !\n", __func__); goto vdec_open_err_handle_version; } - +#endif vd->running = 1; prevent_sleep(); return 0; + +#if VERSION_CHECK vdec_open_err_handle_version: dal_detach(vd->vdec_handle); +#endif vdec_open_err_handle_list: { struct vdec_msg_list *l, *n; diff --git a/drivers/gpu/msm/Makefile b/drivers/gpu/msm/Makefile old mode 100644 new mode 100755 index f49e7164..8ef724e2 --- a/drivers/gpu/msm/Makefile +++ b/drivers/gpu/msm/Makefile @@ -19,6 +19,7 @@ msm_adreno-y += \ adreno_drawctxt.o \ adreno_postmortem.o \ adreno_a2xx.o \ + adreno_a3xx.o \ adreno.o msm_adreno-$(CONFIG_DEBUG_FS) += adreno_debugfs.o diff --git a/drivers/gpu/msm/a2xx_reg.h b/drivers/gpu/msm/a2xx_reg.h old mode 100644 new mode 100755 index d859d61c..1c7be3e6 --- a/drivers/gpu/msm/a2xx_reg.h +++ b/drivers/gpu/msm/a2xx_reg.h @@ -140,24 +140,9 @@ union reg_rb_edram_info { struct rb_edram_info_t f; }; -#define RBBM_READ_ERROR_UNUSED0_SIZE 2 -#define RBBM_READ_ERROR_READ_ADDRESS_SIZE 15 -#define RBBM_READ_ERROR_UNUSED1_SIZE 13 -#define RBBM_READ_ERROR_READ_REQUESTER_SIZE 1 -#define RBBM_READ_ERROR_READ_ERROR_SIZE 1 - -struct rbbm_read_error_t { - unsigned int unused0:RBBM_READ_ERROR_UNUSED0_SIZE; - unsigned int read_address:RBBM_READ_ERROR_READ_ADDRESS_SIZE; - unsigned int unused1:RBBM_READ_ERROR_UNUSED1_SIZE; - unsigned int read_requester:RBBM_READ_ERROR_READ_REQUESTER_SIZE; - unsigned int read_error:RBBM_READ_ERROR_READ_ERROR_SIZE; -}; - -union rbbm_read_error_u { - unsigned int val:32; - struct rbbm_read_error_t f; -}; +#define RBBM_READ_ERROR_ADDRESS_MASK 0x0001fffc +#define RBBM_READ_ERROR_REQUESTER (1<<30) +#define RBBM_READ_ERROR_ERROR (1<<31) #define CP_RB_CNTL_RB_BUFSZ_SIZE 6 #define CP_RB_CNTL_UNUSED0_SIZE 2 @@ -278,6 +263,7 @@ union reg_cp_rb_cntl { #define REG_CP_ME_CNTL 0x01F6 #define REG_CP_ME_RAM_DATA 0x01FA #define REG_CP_ME_RAM_WADDR 0x01F8 +#define REG_CP_ME_RAM_RADDR 0x01F9 #define REG_CP_ME_STATUS 0x01F7 #define REG_CP_PFP_UCODE_ADDR 0x00C0 #define REG_CP_PFP_UCODE_DATA 0x00C1 diff --git a/drivers/gpu/msm/a3xx_reg.h b/drivers/gpu/msm/a3xx_reg.h new file mode 100755 index 00000000..84c83b8c --- /dev/null +++ b/drivers/gpu/msm/a3xx_reg.h @@ -0,0 +1,453 @@ +/* Copyright (c) 2012, Code Aurora Forum. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _A300_REG_H +#define _A300_REG_H + +/* Interrupt bit positions within RBBM_INT_0 */ + +#define A3XX_INT_RBBM_GPU_IDLE 0 +#define A3XX_INT_RBBM_AHB_ERROR 1 +#define A3XX_INT_RBBM_REG_TIMEOUT 2 +#define A3XX_INT_RBBM_ME_MS_TIMEOUT 3 +#define A3XX_INT_RBBM_PFP_MS_TIMEOUT 4 +#define A3XX_INT_RBBM_ATB_BUS_OVERFLOW 5 +#define A3XX_INT_VFD_ERROR 6 +#define A3XX_INT_CP_SW_INT 7 +#define A3XX_INT_CP_T0_PACKET_IN_IB 8 +#define A3XX_INT_CP_OPCODE_ERROR 9 +#define A3XX_INT_CP_RESERVED_BIT_ERROR 10 +#define A3XX_INT_CP_HW_FAULT 11 +#define A3xx_INT_CP_DMA 12 +#define A3XX_INT_CP_IB2_INT 13 +#define A3XX_INT_CP_IB1_INT 14 +#define A3XX_INT_CP_RB_INT 15 +#define A3XX_INT_CP_REG_PROTECT_FAULT 16 +#define A3XX_INT_CP_RB_DONE_TS 17 +#define A3XX_INT_CP_VS_DONE_TS 18 +#define A3XX_INT_CP_PS_DONE_TS 19 +#define A3XX_INT_CACHE_FLUSH_TS 20 +#define A3XX_INT_CP_AHB_ERROR_HALT 21 +#define A3XX_INT_MISC_HANG_DETECT 24 +#define A3XX_INT_UCHE_OOB_ACCESS 25 + +/* Register definitions */ + +#define A3XX_RBBM_HW_VERSION 0x000 +#define A3XX_RBBM_HW_RELEASE 0x001 +#define A3XX_RBBM_HW_CONFIGURATION 0x002 +#define A3XX_RBBM_SW_RESET_CMD 0x018 +#define A3XX_RBBM_AHB_CTL0 0x020 +#define A3XX_RBBM_AHB_CTL1 0x021 +#define A3XX_RBBM_AHB_CMD 0x022 +#define A3XX_RBBM_AHB_ERROR_STATUS 0x027 +#define A3XX_RBBM_GPR0_CTL 0x02E +/* This the same register as on A2XX, just in a different place */ +#define A3XX_RBBM_STATUS 0x030 +#define A3XX_RBBM_INTERFACE_HANG_INT_CTL 0x50 +#define A3XX_RBBM_INTERFACE_HANG_MASK_CTL0 0x51 +#define A3XX_RBBM_INTERFACE_HANG_MASK_CTL1 0x54 +#define A3XX_RBBM_INTERFACE_HANG_MASK_CTL2 0x57 +#define A3XX_RBBM_INTERFACE_HANG_MASK_CTL3 0x5A +#define A3XX_RBBM_INT_CLEAR_CMD 0x061 +#define A3XX_RBBM_INT_0_MASK 0x063 +#define A3XX_RBBM_INT_0_STATUS 0x064 +#define A3XX_RBBM_GPU_BUSY_MASKED 0x88 +#define A3XX_RBBM_RBBM_CTL 0x100 +#define A3XX_RBBM_RBBM_CTL 0x100 +#define A3XX_RBBM_PERFCTR_PWR_1_LO 0x0EC +#define A3XX_RBBM_PERFCTR_PWR_1_HI 0x0ED +/* Following two are same as on A2XX, just in a different place */ +#define A3XX_CP_PFP_UCODE_ADDR 0x1C9 +#define A3XX_CP_PFP_UCODE_DATA 0x1CA +#define A3XX_CP_HW_FAULT 0x45C +#define A3XX_CP_AHB_FAULT 0x54D +#define A3XX_CP_PROTECT_CTRL 0x45E +#define A3XX_CP_PROTECT_STATUS 0x45F +#define A3XX_CP_PROTECT_REG_0 0x460 +#define A3XX_CP_PROTECT_REG_1 0x461 +#define A3XX_CP_PROTECT_REG_2 0x462 +#define A3XX_CP_PROTECT_REG_3 0x463 +#define A3XX_CP_PROTECT_REG_4 0x464 +#define A3XX_CP_PROTECT_REG_5 0x465 +#define A3XX_CP_PROTECT_REG_6 0x466 +#define A3XX_CP_PROTECT_REG_7 0x467 +#define A3XX_CP_PROTECT_REG_8 0x468 +#define A3XX_CP_PROTECT_REG_9 0x469 +#define A3XX_CP_PROTECT_REG_A 0x46A +#define A3XX_CP_PROTECT_REG_B 0x46B +#define A3XX_CP_PROTECT_REG_C 0x46C +#define A3XX_CP_PROTECT_REG_D 0x46D +#define A3XX_CP_PROTECT_REG_E 0x46E +#define A3XX_CP_PROTECT_REG_F 0x46F +#define A3XX_CP_SCRATCH_REG2 0x57A +#define A3XX_CP_SCRATCH_REG3 0x57B +#define A3XX_VSC_BIN_SIZE 0xC01 +#define A3XX_VSC_SIZE_ADDRESS 0xC02 +#define A3XX_VSC_PIPE_CONFIG_0 0xC06 +#define A3XX_VSC_PIPE_DATA_ADDRESS_0 0xC07 +#define A3XX_VSC_PIPE_DATA_LENGTH_0 0xC08 +#define A3XX_VSC_PIPE_CONFIG_1 0xC09 +#define A3XX_VSC_PIPE_DATA_ADDRESS_1 0xC0A +#define A3XX_VSC_PIPE_DATA_LENGTH_1 0xC0B +#define A3XX_VSC_PIPE_CONFIG_2 0xC0C +#define A3XX_VSC_PIPE_DATA_ADDRESS_2 0xC0D +#define A3XX_VSC_PIPE_DATA_LENGTH_2 0xC0E +#define A3XX_VSC_PIPE_CONFIG_3 0xC0F +#define A3XX_VSC_PIPE_DATA_ADDRESS_3 0xC10 +#define A3XX_VSC_PIPE_DATA_LENGTH_3 0xC11 +#define A3XX_VSC_PIPE_CONFIG_4 0xC12 +#define A3XX_VSC_PIPE_DATA_ADDRESS_4 0xC13 +#define A3XX_VSC_PIPE_DATA_LENGTH_4 0xC14 +#define A3XX_VSC_PIPE_CONFIG_5 0xC15 +#define A3XX_VSC_PIPE_DATA_ADDRESS_5 0xC16 +#define A3XX_VSC_PIPE_DATA_LENGTH_5 0xC17 +#define A3XX_VSC_PIPE_CONFIG_6 0xC18 +#define A3XX_VSC_PIPE_DATA_ADDRESS_6 0xC19 +#define A3XX_VSC_PIPE_DATA_LENGTH_6 0xC1A +#define A3XX_VSC_PIPE_CONFIG_7 0xC1B +#define A3XX_VSC_PIPE_DATA_ADDRESS_7 0xC1C +#define A3XX_VSC_PIPE_DATA_LENGTH_7 0xC1D +#define A3XX_GRAS_CL_USER_PLANE_X0 0xCA0 +#define A3XX_GRAS_CL_USER_PLANE_Y0 0xCA1 +#define A3XX_GRAS_CL_USER_PLANE_Z0 0xCA2 +#define A3XX_GRAS_CL_USER_PLANE_W0 0xCA3 +#define A3XX_GRAS_CL_USER_PLANE_X1 0xCA4 +#define A3XX_GRAS_CL_USER_PLANE_Y1 0xCA5 +#define A3XX_GRAS_CL_USER_PLANE_Z1 0xCA6 +#define A3XX_GRAS_CL_USER_PLANE_W1 0xCA7 +#define A3XX_GRAS_CL_USER_PLANE_X2 0xCA8 +#define A3XX_GRAS_CL_USER_PLANE_Y2 0xCA9 +#define A3XX_GRAS_CL_USER_PLANE_Z2 0xCAA +#define A3XX_GRAS_CL_USER_PLANE_W2 0xCAB +#define A3XX_GRAS_CL_USER_PLANE_X3 0xCAC +#define A3XX_GRAS_CL_USER_PLANE_Y3 0xCAD +#define A3XX_GRAS_CL_USER_PLANE_Z3 0xCAE +#define A3XX_GRAS_CL_USER_PLANE_W3 0xCAF +#define A3XX_GRAS_CL_USER_PLANE_X4 0xCB0 +#define A3XX_GRAS_CL_USER_PLANE_Y4 0xCB1 +#define A3XX_GRAS_CL_USER_PLANE_Z4 0xCB2 +#define A3XX_GRAS_CL_USER_PLANE_W4 0xCB3 +#define A3XX_GRAS_CL_USER_PLANE_X5 0xCB4 +#define A3XX_GRAS_CL_USER_PLANE_Y5 0xCB5 +#define A3XX_GRAS_CL_USER_PLANE_Z5 0xCB6 +#define A3XX_GRAS_CL_USER_PLANE_W5 0xCB7 +#define A3XX_UCHE_CACHE_INVALIDATE0_REG 0xEA0 +#define A3XX_GRAS_CL_CLIP_CNTL 0x2040 +#define A3XX_GRAS_CL_GB_CLIP_ADJ 0x2044 +#define A3XX_GRAS_CL_VPORT_XOFFSET 0x2048 +#define A3XX_GRAS_CL_VPORT_ZOFFSET 0x204C +#define A3XX_GRAS_CL_VPORT_ZSCALE 0x204D +#define A3XX_GRAS_SU_POINT_MINMAX 0x2068 +#define A3XX_GRAS_SU_POINT_SIZE 0x2069 +#define A3XX_GRAS_SU_POLY_OFFSET_SCALE 0x206C +#define A3XX_GRAS_SU_POLY_OFFSET_OFFSET 0x206D +#define A3XX_GRAS_SU_MODE_CONTROL 0x2070 +#define A3XX_GRAS_SC_CONTROL 0x2072 +#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL 0x2074 +#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR 0x2075 +#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL 0x2079 +#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR 0x207A +#define A3XX_RB_MODE_CONTROL 0x20C0 +#define A3XX_RB_RENDER_CONTROL 0x20C1 +#define A3XX_RB_MSAA_CONTROL 0x20C2 +#define A3XX_RB_MRT_CONTROL0 0x20C4 +#define A3XX_RB_MRT_BUF_INFO0 0x20C5 +#define A3XX_RB_MRT_BLEND_CONTROL0 0x20C7 +#define A3XX_RB_MRT_BLEND_CONTROL1 0x20CB +#define A3XX_RB_MRT_BLEND_CONTROL2 0x20CF +#define A3XX_RB_MRT_BLEND_CONTROL3 0x20D3 +#define A3XX_RB_BLEND_RED 0x20E4 +#define A3XX_RB_COPY_CONTROL 0x20EC +#define A3XX_RB_COPY_DEST_INFO 0x20EF +#define A3XX_RB_DEPTH_CONTROL 0x2100 +#define A3XX_RB_STENCIL_CONTROL 0x2104 +#define A3XX_PC_VSTREAM_CONTROL 0x21E4 +#define A3XX_PC_VERTEX_REUSE_BLOCK_CNTL 0x21EA +#define A3XX_PC_PRIM_VTX_CNTL 0x21EC +#define A3XX_PC_RESTART_INDEX 0x21ED +#define A3XX_HLSQ_CONTROL_0_REG 0x2200 +#define A3XX_HLSQ_VS_CONTROL_REG 0x2204 +#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG 0x2207 +#define A3XX_HLSQ_CL_NDRANGE_0_REG 0x220A +#define A3XX_HLSQ_CL_NDRANGE_2_REG 0x220C +#define A3XX_HLSQ_CL_CONTROL_0_REG 0x2211 +#define A3XX_HLSQ_CL_CONTROL_1_REG 0x2212 +#define A3XX_HLSQ_CL_KERNEL_CONST_REG 0x2214 +#define A3XX_HLSQ_CL_KERNEL_GROUP_X_REG 0x2215 +#define A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG 0x2217 +#define A3XX_HLSQ_CL_WG_OFFSET_REG 0x221A +#define A3XX_VFD_CONTROL_0 0x2240 +#define A3XX_VFD_INDEX_MIN 0x2242 +#define A3XX_VFD_FETCH_INSTR_0_0 0x2246 +#define A3XX_VFD_FETCH_INSTR_0_4 0x224E +#define A3XX_VFD_DECODE_INSTR_0 0x2266 +#define A3XX_VFD_VS_THREADING_THRESHOLD 0x227E +#define A3XX_VPC_ATTR 0x2280 +#define A3XX_VPC_VARY_CYLWRAP_ENABLE_1 0x228B +#define A3XX_SP_SP_CTRL_REG 0x22C0 +#define A3XX_SP_VS_CTRL_REG0 0x22C4 +#define A3XX_SP_VS_CTRL_REG1 0x22C5 +#define A3XX_SP_VS_PARAM_REG 0x22C6 +#define A3XX_SP_VS_OUT_REG_7 0x22CE +#define A3XX_SP_VS_VPC_DST_REG_0 0x22D0 +#define A3XX_SP_VS_OBJ_OFFSET_REG 0x22D4 +#define A3XX_SP_VS_PVT_MEM_SIZE_REG 0x22D8 +#define A3XX_SP_VS_LENGTH_REG 0x22DF +#define A3XX_SP_FS_CTRL_REG0 0x22E0 +#define A3XX_SP_FS_CTRL_REG1 0x22E1 +#define A3XX_SP_FS_OBJ_OFFSET_REG 0x22E2 +#define A3XX_SP_FS_PVT_MEM_SIZE_REG 0x22E6 +#define A3XX_SP_FS_FLAT_SHAD_MODE_REG_0 0x22E8 +#define A3XX_SP_FS_FLAT_SHAD_MODE_REG_1 0x22E9 +#define A3XX_SP_FS_OUTPUT_REG 0x22EC +#define A3XX_SP_FS_MRT_REG_0 0x22F0 +#define A3XX_SP_FS_IMAGE_OUTPUT_REG_0 0x22F4 +#define A3XX_SP_FS_IMAGE_OUTPUT_REG_3 0x22F7 +#define A3XX_SP_FS_LENGTH_REG 0x22FF +#define A3XX_TPL1_TP_VS_TEX_OFFSET 0x2340 +#define A3XX_TPL1_TP_FS_TEX_OFFSET 0x2342 +#define A3XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR 0x2343 +#define A3XX_VBIF_FIXED_SORT_EN 0x300C +#define A3XX_VBIF_FIXED_SORT_SEL0 0x300D +#define A3XX_VBIF_FIXED_SORT_SEL1 0x300E + +/* Bit flags for RBBM_CTL */ +#define RBBM_RBBM_CTL_RESET_PWR_CTR1 (1 << 1) +#define RBBM_RBBM_CTL_ENABLE_PWR_CTR1 (17 << 1) + +/* Various flags used by the context switch code */ + +#define SP_MULTI 0 +#define SP_BUFFER_MODE 1 +#define SP_TWO_VTX_QUADS 0 +#define SP_PIXEL_BASED 0 +#define SP_R8G8B8A8_UNORM 8 +#define SP_FOUR_PIX_QUADS 1 + +#define HLSQ_DIRECT 0 +#define HLSQ_BLOCK_ID_SP_VS 4 +#define HLSQ_SP_VS_INSTR 0 +#define HLSQ_SP_FS_INSTR 0 +#define HLSQ_BLOCK_ID_SP_FS 6 +#define HLSQ_TWO_PIX_QUADS 0 +#define HLSQ_TWO_VTX_QUADS 0 +#define HLSQ_BLOCK_ID_TP_TEX 2 +#define HLSQ_TP_TEX_SAMPLERS 0 +#define HLSQ_TP_TEX_MEMOBJ 1 +#define HLSQ_BLOCK_ID_TP_MIPMAP 3 +#define HLSQ_TP_MIPMAP_BASE 1 +#define HLSQ_FOUR_PIX_QUADS 1 + +#define RB_FACTOR_ONE 1 +#define RB_BLEND_OP_ADD 0 +#define RB_FACTOR_ZERO 0 +#define RB_DITHER_DISABLE 0 +#define RB_DITHER_ALWAYS 1 +#define RB_FRAG_NEVER 0 +#define RB_ENDIAN_NONE 0 +#define RB_R8G8B8A8_UNORM 8 +#define RB_RESOLVE_PASS 2 +#define RB_CLEAR_MODE_RESOLVE 1 +#define RB_TILINGMODE_LINEAR 0 +#define RB_REF_NEVER 0 +#define RB_STENCIL_KEEP 0 +#define RB_RENDERING_PASS 0 +#define RB_TILINGMODE_32X32 2 + +#define PC_DRAW_TRIANGLES 2 +#define PC_DI_PT_RECTLIST 8 +#define PC_DI_SRC_SEL_AUTO_INDEX 2 +#define PC_DI_INDEX_SIZE_16_BIT 0 +#define PC_DI_IGNORE_VISIBILITY 0 +#define PC_DI_PT_TRILIST 4 +#define PC_DI_SRC_SEL_IMMEDIATE 1 +#define PC_DI_INDEX_SIZE_32_BIT 1 + +#define UCHE_ENTIRE_CACHE 1 +#define UCHE_OP_INVALIDATE 1 + +/* + * The following are bit field shifts within some of the registers defined + * above. These are used in the context switch code in conjunction with the + * _SET macro + */ + +#define GRAS_CL_CLIP_CNTL_CLIP_DISABLE 16 +#define GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER 12 +#define GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE 21 +#define GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE 19 +#define GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE 20 +#define GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE 17 +#define GRAS_CL_VPORT_XSCALE_VPORT_XSCALE 0 +#define GRAS_CL_VPORT_YSCALE_VPORT_YSCALE 0 +#define GRAS_CL_VPORT_ZSCALE_VPORT_ZSCALE 0 +#define GRAS_SC_CONTROL_RASTER_MODE 12 +#define GRAS_SC_CONTROL_RENDER_MODE 4 +#define GRAS_SC_SCREEN_SCISSOR_BR_BR_X 0 +#define GRAS_SC_SCREEN_SCISSOR_BR_BR_Y 16 +#define GRAS_SC_WINDOW_SCISSOR_BR_BR_X 0 +#define GRAS_SC_WINDOW_SCISSOR_BR_BR_Y 16 +#define HLSQ_CONSTFSPRESERVEDRANGEREG_ENDENTRY 16 +#define HLSQ_CONSTFSPRESERVEDRANGEREG_STARTENTRY 0 +#define HLSQ_CTRL0REG_CHUNKDISABLE 26 +#define HLSQ_CTRL0REG_CONSTSWITCHMODE 27 +#define HLSQ_CTRL0REG_FSSUPERTHREADENABLE 6 +#define HLSQ_CTRL0REG_FSTHREADSIZE 4 +#define HLSQ_CTRL0REG_LAZYUPDATEDISABLE 28 +#define HLSQ_CTRL0REG_RESERVED2 10 +#define HLSQ_CTRL0REG_SPCONSTFULLUPDATE 29 +#define HLSQ_CTRL0REG_SPSHADERRESTART 9 +#define HLSQ_CTRL0REG_TPFULLUPDATE 30 +#define HLSQ_CTRL1REG_RESERVED1 9 +#define HLSQ_CTRL1REG_VSSUPERTHREADENABLE 8 +#define HLSQ_CTRL1REG_VSTHREADSIZE 6 +#define HLSQ_CTRL2REG_PRIMALLOCTHRESHOLD 26 +#define HLSQ_FSCTRLREG_FSCONSTLENGTH 0 +#define HLSQ_FSCTRLREG_FSCONSTSTARTOFFSET 12 +#define HLSQ_FSCTRLREG_FSINSTRLENGTH 24 +#define HLSQ_VSCTRLREG_VSINSTRLENGTH 24 +#define PC_PRIM_VTX_CONTROL_POLYMODE_BACK_PTYPE 8 +#define PC_PRIM_VTX_CONTROL_POLYMODE_FRONT_PTYPE 5 +#define PC_PRIM_VTX_CONTROL_PROVOKING_VTX_LAST 25 +#define PC_PRIM_VTX_CONTROL_STRIDE_IN_VPC 0 +#define PC_DRAW_INITIATOR_PRIM_TYPE 0 +#define PC_DRAW_INITIATOR_SOURCE_SELECT 6 +#define PC_DRAW_INITIATOR_VISIBILITY_CULLING_MODE 9 +#define PC_DRAW_INITIATOR_INDEX_SIZE 0x0B +#define PC_DRAW_INITIATOR_SMALL_INDEX 0x0D +#define PC_DRAW_INITIATOR_PRE_DRAW_INITIATOR_ENABLE 0x0E +#define RB_COPYCONTROL_COPY_GMEM_BASE 14 +#define RB_COPYCONTROL_RESOLVE_CLEAR_MODE 4 +#define RB_COPYDESTBASE_COPY_DEST_BASE 4 +#define RB_COPYDESTINFO_COPY_COMPONENT_ENABLE 14 +#define RB_COPYDESTINFO_COPY_DEST_ENDIAN 18 +#define RB_COPYDESTINFO_COPY_DEST_FORMAT 2 +#define RB_COPYDESTINFO_COPY_DEST_TILE 0 +#define RB_COPYDESTPITCH_COPY_DEST_PITCH 0 +#define RB_DEPTHCONTROL_Z_TEST_FUNC 4 +#define RB_MODECONTROL_RENDER_MODE 8 +#define RB_MODECONTROL_MARB_CACHE_SPLIT_MODE 15 +#define RB_MODECONTROL_PACKER_TIMER_ENABLE 16 +#define RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE 21 +#define RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR 24 +#define RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR 16 +#define RB_MRTBLENDCONTROL_CLAMP_ENABLE 29 +#define RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE 5 +#define RB_MRTBLENDCONTROL_RGB_DEST_FACTOR 8 +#define RB_MRTBLENDCONTROL_RGB_SRC_FACTOR 0 +#define RB_MRTBUFBASE_COLOR_BUF_BASE 4 +#define RB_MRTBUFINFO_COLOR_BUF_PITCH 17 +#define RB_MRTBUFINFO_COLOR_FORMAT 0 +#define RB_MRTBUFINFO_COLOR_TILE_MODE 6 +#define RB_MRTCONTROL_COMPONENT_ENABLE 24 +#define RB_MRTCONTROL_DITHER_MODE 12 +#define RB_MRTCONTROL_READ_DEST_ENABLE 3 +#define RB_MRTCONTROL_ROP_CODE 8 +#define RB_MSAACONTROL_MSAA_DISABLE 10 +#define RB_MSAACONTROL_SAMPLE_MASK 16 +#define RB_RENDERCONTROL_ALPHA_TEST_FUNC 24 +#define RB_RENDERCONTROL_BIN_WIDTH 4 +#define RB_RENDERCONTROL_DISABLE_COLOR_PIPE 12 +#define RB_STENCILCONTROL_STENCIL_FAIL 11 +#define RB_STENCILCONTROL_STENCIL_FAIL_BF 23 +#define RB_STENCILCONTROL_STENCIL_FUNC 8 +#define RB_STENCILCONTROL_STENCIL_FUNC_BF 20 +#define RB_STENCILCONTROL_STENCIL_ZFAIL 17 +#define RB_STENCILCONTROL_STENCIL_ZFAIL_BF 29 +#define RB_STENCILCONTROL_STENCIL_ZPASS 14 +#define RB_STENCILCONTROL_STENCIL_ZPASS_BF 26 +#define SP_FSCTRLREG0_FSFULLREGFOOTPRINT 10 +#define SP_FSCTRLREG0_FSICACHEINVALID 2 +#define SP_FSCTRLREG0_FSINOUTREGOVERLAP 18 +#define SP_FSCTRLREG0_FSINSTRBUFFERMODE 1 +#define SP_FSCTRLREG0_FSLENGTH 24 +#define SP_FSCTRLREG0_FSSUPERTHREADMODE 21 +#define SP_FSCTRLREG0_FSTHREADMODE 0 +#define SP_FSCTRLREG0_FSTHREADSIZE 20 +#define SP_FSCTRLREG0_PIXLODENABLE 22 +#define SP_FSCTRLREG1_FSCONSTLENGTH 0 +#define SP_FSCTRLREG1_FSINITIALOUTSTANDING 20 +#define SP_FSCTRLREG1_HALFPRECVAROFFSET 24 +#define SP_FSMRTREG_REGID 0 +#define SP_FSOUTREG_PAD0 2 +#define SP_IMAGEOUTPUTREG_MRTFORMAT 0 +#define SP_IMAGEOUTPUTREG_PAD0 6 +#define SP_OBJOFFSETREG_CONSTOBJECTSTARTOFFSET 16 +#define SP_OBJOFFSETREG_SHADEROBJOFFSETINIC 25 +#define SP_SHADERLENGTH_LEN 0 +#define SP_SPCTRLREG_CONSTMODE 18 +#define SP_SPCTRLREG_SLEEPMODE 20 +#define SP_VSCTRLREG0_VSFULLREGFOOTPRINT 10 +#define SP_VSCTRLREG0_VSICACHEINVALID 2 +#define SP_VSCTRLREG0_VSINSTRBUFFERMODE 1 +#define SP_VSCTRLREG0_VSLENGTH 24 +#define SP_VSCTRLREG0_VSSUPERTHREADMODE 21 +#define SP_VSCTRLREG0_VSTHREADMODE 0 +#define SP_VSCTRLREG0_VSTHREADSIZE 20 +#define SP_VSCTRLREG1_VSINITIALOUTSTANDING 24 +#define SP_VSOUTREG_COMPMASK0 9 +#define SP_VSPARAMREG_POSREGID 0 +#define SP_VSPARAMREG_PSIZEREGID 8 +#define SP_VSPARAMREG_TOTALVSOUTVAR 20 +#define SP_VSVPCDSTREG_OUTLOC0 0 +#define TPL1_TPTEXOFFSETREG_BASETABLEPTR 16 +#define TPL1_TPTEXOFFSETREG_MEMOBJOFFSET 8 +#define TPL1_TPTEXOFFSETREG_SAMPLEROFFSET 0 +#define UCHE_INVALIDATE1REG_OPCODE 0x1C +#define UCHE_INVALIDATE1REG_ALLORPORTION 0x1F +#define VFD_BASEADDR_BASEADDR 0 +#define VFD_CTRLREG0_PACKETSIZE 18 +#define VFD_CTRLREG0_STRMDECINSTRCNT 22 +#define VFD_CTRLREG0_STRMFETCHINSTRCNT 27 +#define VFD_CTRLREG0_TOTALATTRTOVS 0 +#define VFD_CTRLREG1_MAXSTORAGE 0 +#define VFD_CTRLREG1_REGID4INST 24 +#define VFD_CTRLREG1_REGID4VTX 16 +#define VFD_DECODEINSTRUCTIONS_CONSTFILL 4 +#define VFD_DECODEINSTRUCTIONS_FORMAT 6 +#define VFD_DECODEINSTRUCTIONS_LASTCOMPVALID 29 +#define VFD_DECODEINSTRUCTIONS_REGID 12 +#define VFD_DECODEINSTRUCTIONS_SHIFTCNT 24 +#define VFD_DECODEINSTRUCTIONS_SWITCHNEXT 30 +#define VFD_DECODEINSTRUCTIONS_WRITEMASK 0 +#define VFD_FETCHINSTRUCTIONS_BUFSTRIDE 7 +#define VFD_FETCHINSTRUCTIONS_FETCHSIZE 0 +#define VFD_FETCHINSTRUCTIONS_INDEXDECODE 18 +#define VFD_FETCHINSTRUCTIONS_STEPRATE 24 +#define VFD_FETCHINSTRUCTIONS_SWITCHNEXT 17 +#define VFD_THREADINGTHRESHOLD_REGID_VTXCNT 8 +#define VFD_THREADINGTHRESHOLD_RESERVED6 4 +#define VPC_VPCATTR_LMSIZE 28 +#define VPC_VPCATTR_THRHDASSIGN 12 +#define VPC_VPCATTR_TOTALATTR 0 +#define VPC_VPCPACK_NUMFPNONPOSVAR 8 +#define VPC_VPCPACK_NUMNONPOSVSVAR 16 +#define VPC_VPCVARPSREPLMODE_COMPONENT08 0 +#define VPC_VPCVARPSREPLMODE_COMPONENT09 2 +#define VPC_VPCVARPSREPLMODE_COMPONENT0A 4 +#define VPC_VPCVARPSREPLMODE_COMPONENT0B 6 +#define VPC_VPCVARPSREPLMODE_COMPONENT0C 8 +#define VPC_VPCVARPSREPLMODE_COMPONENT0D 10 +#define VPC_VPCVARPSREPLMODE_COMPONENT0E 12 +#define VPC_VPCVARPSREPLMODE_COMPONENT0F 14 +#define VPC_VPCVARPSREPLMODE_COMPONENT10 16 +#define VPC_VPCVARPSREPLMODE_COMPONENT11 18 +#define VPC_VPCVARPSREPLMODE_COMPONENT12 20 +#define VPC_VPCVARPSREPLMODE_COMPONENT13 22 +#define VPC_VPCVARPSREPLMODE_COMPONENT14 24 +#define VPC_VPCVARPSREPLMODE_COMPONENT15 26 +#define VPC_VPCVARPSREPLMODE_COMPONENT16 28 +#define VPC_VPCVARPSREPLMODE_COMPONENT17 30 + +#endif diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c old mode 100644 new mode 100755 index 61f14a4a..4020fefd --- a/drivers/gpu/msm/adreno.c +++ b/drivers/gpu/msm/adreno.c @@ -72,6 +72,7 @@ | (MMU_CONFIG << MH_MMU_CONFIG__PA_W_CLNT_BEHAVIOR__SHIFT)) static const struct kgsl_functable adreno_functable; +unsigned int kgsl_cff_dump_enable=0; static struct adreno_device device_3d0 = { .dev = { @@ -120,8 +121,11 @@ static struct adreno_device device_3d0 = { }, .pfp_fw = NULL, .pm4_fw = NULL, + .wait_timeout = 10000, /* in milliseconds */ + .ib_check_level = 0, }; + /* * This is the master list of all GPU cores that are supported by this * driver. @@ -135,50 +139,47 @@ static const struct { const char *pm4fw; const char *pfpfw; struct adreno_gpudev *gpudev; + unsigned int istore_size; + unsigned int pix_shader_start; + unsigned int instruction_size; /* Size of an instruction in dwords */ + unsigned int gmem_size; /* size of gmem for gpu*/ } adreno_gpulist[] = { { ADRENO_REV_A200, 0, 2, ANY_ID, ANY_ID, - "yamato_pm4.fw", "yamato_pfp.fw", &adreno_a2xx_gpudev }, + "yamato_pm4.fw", "yamato_pfp.fw", &adreno_a2xx_gpudev, + 512, 384, 3, SZ_256K }, + { ADRENO_REV_A203, 0, 1, 1, ANY_ID, + "yamato_pm4.fw", "yamato_pfp.fw", &adreno_a2xx_gpudev, + 512, 384, 3, SZ_256K }, { ADRENO_REV_A205, 0, 1, 0, ANY_ID, - "yamato_pm4.fw", "yamato_pfp.fw", &adreno_a2xx_gpudev }, + "yamato_pm4.fw", "yamato_pfp.fw", &adreno_a2xx_gpudev, + 512, 384, 3, SZ_256K }, { ADRENO_REV_A220, 2, 1, ANY_ID, ANY_ID, - "leia_pm4_470.fw", "leia_pfp_470.fw", &adreno_a2xx_gpudev }, + "leia_pm4_470.fw", "leia_pfp_470.fw", &adreno_a2xx_gpudev, + 512, 384, 3, SZ_512K }, /* * patchlevel 5 (8960v2) needs special pm4 firmware to work around * a hardware problem. */ { ADRENO_REV_A225, 2, 2, 0, 5, - "a225p5_pm4.fw", "a225_pfp.fw", &adreno_a2xx_gpudev }, + "a225p5_pm4.fw", "a225_pfp.fw", &adreno_a2xx_gpudev, + 1536, 768, 3, SZ_512K }, + { ADRENO_REV_A225, 2, 2, 0, 6, + "a225_pm4.fw", "a225_pfp.fw", &adreno_a2xx_gpudev, + 1536, 768, 3, SZ_512K }, { ADRENO_REV_A225, 2, 2, ANY_ID, ANY_ID, - "a225_pm4.fw", "a225_pfp.fw", &adreno_a2xx_gpudev }, + "a225_pm4.fw", "a225_pfp.fw", &adreno_a2xx_gpudev, + 1536, 768, 3, SZ_512K }, + /* A3XX doesn't use the pix_shader_start */ + { ADRENO_REV_A305, 3, 1, ANY_ID, ANY_ID, + "a300_pm4.fw", "a300_pfp.fw", &adreno_a3xx_gpudev, + 512, 0, 2, SZ_256K }, + /* A3XX doesn't use the pix_shader_start */ + { ADRENO_REV_A320, 3, 1, ANY_ID, ANY_ID, + "a300_pm4.fw", "a300_pfp.fw", &adreno_a3xx_gpudev, + 512, 0, 2, SZ_512K }, + }; -static void adreno_gmeminit(struct adreno_device *adreno_dev) -{ - struct kgsl_device *device = &adreno_dev->dev; - union reg_rb_edram_info rb_edram_info; - unsigned int gmem_size; - unsigned int edram_value = 0; - - /* make sure edram range is aligned to size */ - BUG_ON(adreno_dev->gmemspace.gpu_base & - (adreno_dev->gmemspace.sizebytes - 1)); - - /* get edram_size value equivalent */ - gmem_size = (adreno_dev->gmemspace.sizebytes >> 14); - while (gmem_size >>= 1) - edram_value++; - - rb_edram_info.val = 0; - - rb_edram_info.f.edram_size = edram_value; - rb_edram_info.f.edram_mapping_mode = 0; /* EDRAM_MAP_UPPER */ - - /* must be aligned to size */ - rb_edram_info.f.edram_range = (adreno_dev->gmemspace.gpu_base >> 14); - - adreno_regwrite(device, REG_RB_EDRAM_INFO, rb_edram_info.val); -} - static irqreturn_t adreno_isr(int irq, void *data) { irqreturn_t result; @@ -268,10 +269,13 @@ static void adreno_setstate(struct kgsl_device *device, int sizedwords = 0; unsigned int mh_mmu_invalidate = 0x00000003; /*invalidate all and tc */ - /* If possible, then set the state via the command stream to avoid - a CPU idle. Otherwise, use the default setstate which uses register - writes */ - if (adreno_dev->drawctxt_active) { + /* + * If possible, then set the state via the command stream to avoid + * a CPU idle. Otherwise, use the default setstate which uses register + * writes For CFF dump we must idle and use the registers so that it is + * easier to filter out the mmu accesses from the dump + */ + if (!kgsl_cff_dump_enable && adreno_dev->drawctxt_active) { if (flags & KGSL_MMUFLAGS_PTUPDATE) { /* wait for graphics pipe to be idle */ *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); @@ -419,6 +423,10 @@ adreno_identify_gpu(struct adreno_device *adreno_dev) adreno_dev->gpudev = adreno_gpulist[i].gpudev; adreno_dev->pfp_fwfile = adreno_gpulist[i].pfpfw; adreno_dev->pm4_fwfile = adreno_gpulist[i].pm4fw; + adreno_dev->istore_size = adreno_gpulist[i].istore_size; + adreno_dev->pix_shader_start = adreno_gpulist[i].pix_shader_start; + adreno_dev->instruction_size = adreno_gpulist[i].instruction_size; + adreno_dev->gmemspace.sizebytes = adreno_gpulist[i].gmem_size; } static int __devinit @@ -432,8 +440,6 @@ adreno_probe(struct platform_device *pdev) adreno_dev = ADRENO_DEVICE(device); device->parentdev = &pdev->dev; - adreno_dev->wait_timeout = 10000; /* default value in milliseconds */ - init_completion(&device->recovery_gate); status = adreno_ringbuffer_init(device); @@ -480,7 +486,6 @@ static int adreno_start(struct kgsl_device *device, unsigned int init_ram) { int status = -EINVAL; struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - int init_reftimestamp = 0x7fffffff; device->state = KGSL_STATE_INIT; device->requested_state = KGSL_STATE_NONE; @@ -508,80 +513,22 @@ static int adreno_start(struct kgsl_device *device, unsigned int init_ram) kgsl_mh_start(device); - if (kgsl_mmu_start(device)) + status = kgsl_mmu_start(device); + if (status) goto error_clk_off; - /*We need to make sure all blocks are powered up and clocked before - *issuing a soft reset. The overrides will then be turned off (set to 0) - */ - adreno_regwrite(device, REG_RBBM_PM_OVERRIDE1, 0xfffffffe); - adreno_regwrite(device, REG_RBBM_PM_OVERRIDE2, 0xffffffff); - - /* Only reset CP block if all blocks have previously been reset */ - if (!(device->flags & KGSL_FLAGS_SOFT_RESET) || - !adreno_is_a22x(adreno_dev)) { - adreno_regwrite(device, REG_RBBM_SOFT_RESET, 0xFFFFFFFF); - device->flags |= KGSL_FLAGS_SOFT_RESET; - } else - adreno_regwrite(device, REG_RBBM_SOFT_RESET, 0x00000001); - - /* The core is in an indeterminate state until the reset completes - * after 30ms. - */ - msleep(30); - - adreno_regwrite(device, REG_RBBM_SOFT_RESET, 0x00000000); - - adreno_regwrite(device, REG_RBBM_CNTL, 0x00004442); - - if (adreno_is_a225(adreno_dev)) { - /* Enable large instruction store for A225 */ - adreno_regwrite(device, REG_SQ_FLOW_CONTROL, 0x18000000); - } - - adreno_regwrite(device, REG_SQ_VS_PROGRAM, 0x00000000); - adreno_regwrite(device, REG_SQ_PS_PROGRAM, 0x00000000); - - if (cpu_is_msm8960() || cpu_is_msm8930()) - adreno_regwrite(device, REG_RBBM_PM_OVERRIDE1, 0x200); - else - adreno_regwrite(device, REG_RBBM_PM_OVERRIDE1, 0); - - if (!adreno_is_a22x(adreno_dev)) - adreno_regwrite(device, REG_RBBM_PM_OVERRIDE2, 0); - else - adreno_regwrite(device, REG_RBBM_PM_OVERRIDE2, 0x80); - - kgsl_sharedmem_set(&device->memstore, 0, 0, device->memstore.size); - - kgsl_sharedmem_writel(&device->memstore, - KGSL_DEVICE_MEMSTORE_OFFSET(ref_wait_ts), - init_reftimestamp); - - adreno_regwrite(device, REG_RBBM_DEBUG, 0x00080000); - - /* Make sure interrupts are disabled */ - - adreno_regwrite(device, REG_RBBM_INT_CNTL, 0); - adreno_regwrite(device, REG_CP_INT_CNTL, 0); - adreno_regwrite(device, REG_SQ_INT_CNTL, 0); - - if (adreno_is_a22x(adreno_dev)) - adreno_dev->gmemspace.sizebytes = SZ_512K; - else - adreno_dev->gmemspace.sizebytes = SZ_256K; - adreno_gmeminit(adreno_dev); + /* Start the GPU */ + adreno_dev->gpudev->start(adreno_dev); kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_ON); + device->ftbl->irqctrl(device, 1); status = adreno_ringbuffer_start(&adreno_dev->ringbuffer, init_ram); - if (status != 0) - goto error_irq_off; - + if (status == 0) { mod_timer(&device->idle_timer, jiffies + FIRST_TIMEOUT); - return status; + return 0; + } -error_irq_off: kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF); kgsl_mmu_stop(device); error_clk_off: @@ -624,6 +571,8 @@ adreno_recover_hang(struct kgsl_device *device) unsigned int soptimestamp; unsigned int eoptimestamp; struct adreno_context *drawctxt; + struct kgsl_context *context; + int next = 0; KGSL_DRV_ERR(device, "Starting recovery from 3D GPU hang....\n"); rb_buffer = vmalloc(rb->buffer_desc.size); @@ -692,6 +641,24 @@ adreno_recover_hang(struct kgsl_device *device) drawctxt->flags |= CTXT_FLAGS_GPU_HANG; + /* + * Set the reset status of all contexts to + * INNOCENT_CONTEXT_RESET_EXT except for the bad context + * since thats the guilty party + */ + while ((context = idr_get_next(&device->context_idr, &next))) { + if (KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT != + context->reset_status) { + if (context->devctxt != drawctxt) + context->reset_status = + KGSL_CTX_STAT_INNOCENT_CONTEXT_RESET_EXT; + else + context->reset_status = + KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT; + } + next = next + 1; + } + /* Restore valid commands in ringbuffer */ adreno_ringbuffer_restore(rb, rb_buffer, num_rb_contents); rb->timestamp = timestamp; @@ -834,6 +801,12 @@ static int adreno_getproperty(struct kgsl_device *device, return status; } +static inline void adreno_poke(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + adreno_regwrite(device, REG_CP_RB_WPTR, adreno_dev->ringbuffer.wptr); +} + /* Caller must hold the device mutex. */ int adreno_idle(struct kgsl_device *device, unsigned int timeout) { @@ -842,16 +815,32 @@ int adreno_idle(struct kgsl_device *device, unsigned int timeout) unsigned int rbbm_status; unsigned long wait_timeout = msecs_to_jiffies(adreno_dev->wait_timeout); - unsigned long wait_time = jiffies + wait_timeout; + unsigned long wait_time; + unsigned long wait_time_part; + unsigned int msecs; + unsigned int msecs_first; + unsigned int msecs_part; - kgsl_cffdump_regpoll(device->id, REG_RBBM_STATUS << 2, + kgsl_cffdump_regpoll(device->id, + adreno_dev->gpudev->reg_rbbm_status << 2, 0x00000000, 0x80000000); /* first, wait until the CP has consumed all the commands in * the ring buffer */ retry: if (rb->flags & KGSL_FLAGS_STARTED) { + msecs = adreno_dev->wait_timeout; + msecs_first = (msecs <= 100) ? ((msecs + 4) / 5) : 100; + msecs_part = (msecs - msecs_first + 3) / 4; + wait_time = jiffies + wait_timeout; + wait_time_part = jiffies + msecs_to_jiffies(msecs_first); + adreno_poke(device); do { + if (time_after(jiffies, wait_time_part)) { + adreno_poke(device); + wait_time_part = jiffies + + msecs_to_jiffies(msecs_part); + } GSL_RB_GET_READPTR(rb, &rb->rptr); if (time_after(jiffies, wait_time)) { KGSL_DRV_ERR(device, "rptr: %x, wptr: %x\n", @@ -864,7 +853,8 @@ retry: /* now, wait for the GPU to finish its operations */ wait_time = jiffies + wait_timeout; while (time_before(jiffies, wait_time)) { - adreno_regread(device, REG_RBBM_STATUS, &rbbm_status); + adreno_regread(device, adreno_dev->gpudev->reg_rbbm_status, + &rbbm_status); if (rbbm_status == 0x110) return 0; } @@ -918,58 +908,70 @@ static int adreno_suspend_context(struct kgsl_device *device) return status; } -uint8_t *kgsl_sharedmem_convertaddr(struct kgsl_device *device, - unsigned int pt_base, unsigned int gpuaddr, unsigned int *size) +struct kgsl_memdesc *adreno_find_region(struct kgsl_device *device, + unsigned int pt_base, + unsigned int gpuaddr, + unsigned int size) { - uint8_t *result = NULL; + struct kgsl_memdesc *result = NULL; struct kgsl_mem_entry *entry; - struct kgsl_process_private *priv; struct adreno_device *adreno_dev = ADRENO_DEVICE(device); struct adreno_ringbuffer *ringbuffer = &adreno_dev->ringbuffer; + struct kgsl_context *context; + int next = 0; - if (kgsl_gpuaddr_in_memdesc(&ringbuffer->buffer_desc, gpuaddr)) { - return kgsl_gpuaddr_to_vaddr(&ringbuffer->buffer_desc, - gpuaddr, size); - } + if (kgsl_gpuaddr_in_memdesc(&ringbuffer->buffer_desc, gpuaddr, size)) + return &ringbuffer->buffer_desc; - if (kgsl_gpuaddr_in_memdesc(&ringbuffer->memptrs_desc, gpuaddr)) { - return kgsl_gpuaddr_to_vaddr(&ringbuffer->memptrs_desc, - gpuaddr, size); - } + if (kgsl_gpuaddr_in_memdesc(&ringbuffer->memptrs_desc, gpuaddr, size)) + return &ringbuffer->memptrs_desc; - if (kgsl_gpuaddr_in_memdesc(&device->memstore, gpuaddr)) { - return kgsl_gpuaddr_to_vaddr(&device->memstore, - gpuaddr, size); - } + if (kgsl_gpuaddr_in_memdesc(&device->memstore, gpuaddr, size)) + return &device->memstore; - mutex_lock(&kgsl_driver.process_mutex); - list_for_each_entry(priv, &kgsl_driver.process_list, list) { - if (!kgsl_mmu_pt_equal(priv->pagetable, pt_base)) - continue; - spin_lock(&priv->mem_lock); - entry = kgsl_sharedmem_find_region(priv, gpuaddr, - sizeof(unsigned int)); - if (entry) { - result = kgsl_gpuaddr_to_vaddr(&entry->memdesc, - gpuaddr, size); - spin_unlock(&priv->mem_lock); - mutex_unlock(&kgsl_driver.process_mutex); - return result; - } - spin_unlock(&priv->mem_lock); - } - mutex_unlock(&kgsl_driver.process_mutex); + entry = kgsl_get_mem_entry(pt_base, gpuaddr, size); - BUG_ON(!mutex_is_locked(&device->mutex)); - list_for_each_entry(entry, &device->memqueue, list) { - if (kgsl_gpuaddr_in_memdesc(&entry->memdesc, gpuaddr)) { - result = kgsl_gpuaddr_to_vaddr(&entry->memdesc, - gpuaddr, size); + if (entry) + return &entry->memdesc; + + while (1) { + struct adreno_context *adreno_context = NULL; + context = idr_get_next(&device->context_idr, &next); + if (context == NULL) break; + + adreno_context = (struct adreno_context *)context->devctxt; + + if (kgsl_mmu_pt_equal(adreno_context->pagetable, pt_base)) { + struct kgsl_memdesc *desc; + + desc = &adreno_context->gpustate; + if (kgsl_gpuaddr_in_memdesc(desc, gpuaddr, size)) { + result = desc; + return result; } + desc = &adreno_context->context_gmem_shadow.gmemshadow; + if (kgsl_gpuaddr_in_memdesc(desc, gpuaddr, size)) { + result = desc; + return result; + } } - return result; + next = next + 1; + } + + return NULL; + +} + +uint8_t *adreno_convertaddr(struct kgsl_device *device, unsigned int pt_base, + unsigned int gpuaddr, unsigned int size) +{ + struct kgsl_memdesc *memdesc; + + memdesc = adreno_find_region(device, pt_base, gpuaddr, size); + + return memdesc ? kgsl_gpuaddr_to_vaddr(memdesc, gpuaddr) : NULL; } void adreno_regread(struct kgsl_device *device, unsigned int offsetwords, @@ -1047,7 +1049,8 @@ static int kgsl_check_interrupt_timestamp(struct kgsl_device *device, * get an interrupt */ cmds[0] = cp_type3_packet(CP_NOP, 1); cmds[1] = 0; - adreno_ringbuffer_issuecmds(device, 0, &cmds[0], 2); + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, + &cmds[0], 2); } mutex_unlock(&device->mutex); } @@ -1078,8 +1081,12 @@ static int adreno_waittimestamp(struct kgsl_device *device, { long status = 0; uint io = 1; + static uint io_cnt; struct adreno_device *adreno_dev = ADRENO_DEVICE(device); struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int retries; + unsigned int msecs_first; + unsigned int msecs_part; /* Don't wait forever, set a max value for now */ if (msecs == -1) @@ -1092,47 +1099,65 @@ static int adreno_waittimestamp(struct kgsl_device *device, status = -EINVAL; goto done; } - if (!kgsl_check_timestamp(device, timestamp)) { - if (pwr->active_pwrlevel) { - int low_pwrlevel = pwr->num_pwrlevels - - KGSL_PWRLEVEL_LOW_OFFSET; - if (pwr->active_pwrlevel == low_pwrlevel) - io = 0; + + /* Keep the first timeout as 100msecs before rewriting + * the WPTR. Less visible impact if the WPTR has not + * been updated properly. + */ + msecs_first = (msecs <= 100) ? ((msecs + 4) / 5) : 100; + msecs_part = (msecs - msecs_first + 3) / 4; + for (retries = 0; retries < 5; retries++) { + if (kgsl_check_timestamp(device, timestamp)) { + /* if the timestamp happens while we're not + * waiting, there's a chance that an interrupt + * will not be generated and thus the timestamp + * work needs to be queued. + */ + queue_work(device->work_queue, &device->ts_expired_ws); + status = 0; + goto done; } + adreno_poke(device); +// the QSD8X50 don't support io_fraction ?? // SecureCRT 2012-06-20 +// io_cnt = (io_cnt + 1) % 100; +// if (io_cnt < +// pwr->pwrlevels[pwr->active_pwrlevel].o_fraction) +// io = 0; mutex_unlock(&device->mutex); - /* We need to make sure that the process is placed in wait-q - * before its condition is called */ + /* We need to make sure that the process is + * placed in wait-q before its condition is called + */ status = kgsl_wait_event_interruptible_timeout( device->wait_queue, kgsl_check_interrupt_timestamp(device, timestamp), - msecs_to_jiffies(msecs), io); + msecs_to_jiffies(retries ? + msecs_part : msecs_first), io); mutex_lock(&device->mutex); - if (status > 0) - status = 0; - else if (status == 0) { - if (!kgsl_check_timestamp(device, timestamp)) { + if (status > 0) { + /*completed before the wait finished */ + status = 0; + goto done; + } else if (status < 0) { + /*an error occurred*/ + goto done; + } + /*this wait timed out*/ + } status = -ETIMEDOUT; KGSL_DRV_ERR(device, - "Device hang detected while waiting " - "for timestamp: %x, last " - "submitted(rb->timestamp): %x, wptr: " - "%x\n", timestamp, - adreno_dev->ringbuffer.timestamp, + "Device hang detected while waiting for timestamp: %x," + "last submitted(rb->timestamp): %x, wptr: %x\n", + timestamp, adreno_dev->ringbuffer.timestamp, adreno_dev->ringbuffer.wptr); if (!adreno_dump_and_recover(device)) { /* wait for idle after recovery as the * timestamp that this process wanted * to wait on may be invalid */ - if (!adreno_idle(device, - KGSL_TIMEOUT_DEFAULT)) - status = 0; - } - } - } + if (!adreno_idle(device, KGSL_TIMEOUT_DEFAULT)) + status = 0; } - done: return (int)status; } @@ -1179,7 +1204,7 @@ static long adreno_ioctl(struct kgsl_device_private *dev_priv, default: KGSL_DRV_INFO(dev_priv->device, "invalid ioctl code %08x\n", cmd); - result = -EINVAL; + result = -ENOIOCTLCMD; break; } return result; @@ -1195,44 +1220,29 @@ static inline s64 adreno_ticks_to_us(u32 ticks, u32 gpu_freq) static void adreno_power_stats(struct kgsl_device *device, struct kgsl_power_stats *stats) { - unsigned int reg; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); struct kgsl_pwrctrl *pwr = &device->pwrctrl; + unsigned int cycles; + + /* Get the busy cycles counted since the counter was last reset */ + /* Calling this function also resets and restarts the counter */ + + cycles = adreno_dev->gpudev->busy_cycles(adreno_dev); /* In order to calculate idle you have to have run the algorithm * * at least once to get a start time. */ if (pwr->time != 0) { - s64 tmp; - /* Stop the performance moniter and read the current * - * busy cycles. */ - adreno_regwrite(device, - REG_CP_PERFMON_CNTL, - REG_PERF_MODE_CNT | - REG_PERF_STATE_FREEZE); - adreno_regread(device, REG_RBBM_PERFCOUNTER1_LO, ®); - tmp = ktime_to_us(ktime_get()); + s64 tmp = ktime_to_us(ktime_get()); stats->total_time = tmp - pwr->time; pwr->time = tmp; - stats->busy_time = adreno_ticks_to_us(reg, device->pwrctrl. + stats->busy_time = adreno_ticks_to_us(cycles, device->pwrctrl. pwrlevels[device->pwrctrl.active_pwrlevel]. gpu_freq); - - adreno_regwrite(device, - REG_CP_PERFMON_CNTL, - REG_PERF_MODE_CNT | - REG_PERF_STATE_RESET); } else { stats->total_time = 0; stats->busy_time = 0; pwr->time = ktime_to_us(ktime_get()); } - - /* re-enable the performance moniters */ - adreno_regread(device, REG_RBBM_PM_OVERRIDE2, ®); - adreno_regwrite(device, REG_RBBM_PM_OVERRIDE2, (reg | 0x40)); - adreno_regwrite(device, REG_RBBM_PERFCOUNTER1_SELECT, 0x1); - adreno_regwrite(device, - REG_CP_PERFMON_CNTL, - REG_PERF_MODE_CNT | REG_PERF_STATE_ENABLE); } void adreno_irqctrl(struct kgsl_device *device, int state) diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h old mode 100644 new mode 100755 index 51ee31a5..2a57203b --- a/drivers/gpu/msm/adreno.h +++ b/drivers/gpu/msm/adreno.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-2011, Code Aurora Forum. All rights reserved. +/* Copyright (c) 2008-2012, Code Aurora Forum. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -24,13 +24,16 @@ KGSL_CONTAINER_OF(device, struct adreno_device, dev) /* Flags to control command packet settings */ +#define KGSL_CMD_FLAGS_NONE 0x00000000 #define KGSL_CMD_FLAGS_PMODE 0x00000001 #define KGSL_CMD_FLAGS_NO_TS_CMP 0x00000002 #define KGSL_CMD_FLAGS_NOT_KERNEL_CMD 0x00000004 /* Command identifiers */ -#define KGSL_CONTEXT_TO_MEM_IDENTIFIER 0xDEADBEEF -#define KGSL_CMD_IDENTIFIER 0xFEEDFACE +#define KGSL_CONTEXT_TO_MEM_IDENTIFIER 0x2EADBEEF +#define KGSL_CMD_IDENTIFIER 0x2EEDFACE +#define KGSL_START_OF_IB_IDENTIFIER 0x2EADEABE +#define KGSL_END_OF_IB_IDENTIFIER 0x2ABEDEAD #ifdef CONFIG_MSM_SCM #define ADRENO_DEFAULT_PWRSCALE_POLICY (&kgsl_pwrscale_policy_tz) @@ -38,12 +41,17 @@ #define ADRENO_DEFAULT_PWRSCALE_POLICY NULL #endif +#define ADRENO_ISTORE_START 0x5000 /* Istore offset */ + enum adreno_gpurev { ADRENO_REV_UNKNOWN = 0, ADRENO_REV_A200 = 200, + ADRENO_REV_A203 = 203, ADRENO_REV_A205 = 205, ADRENO_REV_A220 = 220, ADRENO_REV_A225 = 225, + ADRENO_REV_A305 = 305, + ADRENO_REV_A320 = 320, }; struct adreno_gpudev; @@ -64,20 +72,34 @@ struct adreno_device { unsigned int mharb; struct adreno_gpudev *gpudev; unsigned int wait_timeout; + unsigned int istore_size; + unsigned int pix_shader_start; + unsigned int instruction_size; + unsigned int ib_check_level; }; struct adreno_gpudev { - int (*ctxt_gpustate_shadow)(struct adreno_device *, - struct adreno_context *); - int (*ctxt_gmem_shadow)(struct adreno_device *, - struct adreno_context *); + /* + * These registers are in a different location on A3XX, so define + * them in the structure and use them as variables. + */ + unsigned int reg_rbbm_status; + unsigned int reg_cp_pfp_ucode_data; + unsigned int reg_cp_pfp_ucode_addr; + + /* GPU specific function hooks */ + int (*ctxt_create)(struct adreno_device *, struct adreno_context *); void (*ctxt_save)(struct adreno_device *, struct adreno_context *); void (*ctxt_restore)(struct adreno_device *, struct adreno_context *); irqreturn_t (*irq_handler)(struct adreno_device *); void (*irq_control)(struct adreno_device *, int); + void (*rb_init)(struct adreno_device *, struct adreno_ringbuffer *); + void (*start)(struct adreno_device *); + unsigned int (*busy_cycles)(struct adreno_device *); }; extern struct adreno_gpudev adreno_a2xx_gpudev; +extern struct adreno_gpudev adreno_a3xx_gpudev; int adreno_idle(struct kgsl_device *device, unsigned int timeout); void adreno_regread(struct kgsl_device *device, unsigned int offsetwords, @@ -85,23 +107,32 @@ void adreno_regread(struct kgsl_device *device, unsigned int offsetwords, void adreno_regwrite(struct kgsl_device *device, unsigned int offsetwords, unsigned int value); -uint8_t *kgsl_sharedmem_convertaddr(struct kgsl_device *device, - unsigned int pt_base, unsigned int gpuaddr, unsigned int *size); +struct kgsl_memdesc *adreno_find_region(struct kgsl_device *device, + unsigned int pt_base, + unsigned int gpuaddr, + unsigned int size); + +uint8_t *adreno_convertaddr(struct kgsl_device *device, + unsigned int pt_base, unsigned int gpuaddr, unsigned int size); static inline int adreno_is_a200(struct adreno_device *adreno_dev) { return (adreno_dev->gpurev == ADRENO_REV_A200); } +static inline int adreno_is_a203(struct adreno_device *adreno_dev) +{ + return (adreno_dev->gpurev == ADRENO_REV_A203); +} + static inline int adreno_is_a205(struct adreno_device *adreno_dev) { - return (adreno_dev->gpurev == ADRENO_REV_A200); + return (adreno_dev->gpurev == ADRENO_REV_A205); } static inline int adreno_is_a20x(struct adreno_device *adreno_dev) { - return (adreno_dev->gpurev == ADRENO_REV_A200 || - adreno_dev->gpurev == ADRENO_REV_A205); + return (adreno_dev->gpurev <= 209); } static inline int adreno_is_a220(struct adreno_device *adreno_dev) @@ -122,8 +153,36 @@ static inline int adreno_is_a22x(struct adreno_device *adreno_dev) static inline int adreno_is_a2xx(struct adreno_device *adreno_dev) { - return (adreno_dev->gpurev <= ADRENO_REV_A225); + return (adreno_dev->gpurev <= 299); } +static inline int adreno_is_a3xx(struct adreno_device *adreno_dev) +{ + return (adreno_dev->gpurev >= 300); +} + +/** + * adreno_encode_istore_size - encode istore size in CP format + * @adreno_dev - The 3D device. + * + * Encode the istore size into the format expected that the + * CP_SET_SHADER_BASES and CP_ME_INIT commands: + * bits 31:29 - istore size as encoded by this function + * bits 27:16 - vertex shader start offset in instructions + * bits 11:0 - pixel shader start offset in instructions. + */ +static inline int adreno_encode_istore_size(struct adreno_device *adreno_dev) +{ + unsigned int size; + /* in a225 the CP microcode multiplies the encoded + * value by 3 while decoding. + */ + if (adreno_is_a225(adreno_dev)) + size = adreno_dev->istore_size/3; + else + size = adreno_dev->istore_size; + + return (ilog2(size) - 5) << 29; +} #endif /*__ADRENO_H */ diff --git a/drivers/gpu/msm/adreno_a2xx.c b/drivers/gpu/msm/adreno_a2xx.c old mode 100644 new mode 100755 index 064b05e9..cc611779 --- a/drivers/gpu/msm/adreno_a2xx.c +++ b/drivers/gpu/msm/adreno_a2xx.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved. +/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -11,6 +11,8 @@ * */ +#include + #include "kgsl.h" #include "kgsl_sharedmem.h" #include "kgsl_cffdump.h" @@ -72,10 +74,6 @@ #define TEX_CONSTANTS (32*6) /* DWORDS */ #define BOOL_CONSTANTS 8 /* DWORDS */ #define LOOP_CONSTANTS 56 /* DWORDS */ -#define SHADER_INSTRUCT_LOG2 9U /* 2^n == SHADER_INSTRUCTIONS */ - -/* 96-bit instructions */ -#define SHADER_INSTRUCT (1<istore_size * + (adreno_dev->instruction_size * sizeof(unsigned int)); +} + +static inline int _context_size(struct adreno_device *adreno_dev) +{ + return SHADER_OFFSET + 3*_shader_shadow_size(adreno_dev); +} /* A scratchpad used to build commands during context create */ @@ -546,6 +552,7 @@ static unsigned int *build_gmem2sys_cmds(struct adreno_device *adreno_dev, unsigned int addr = shadow->gmemshadow.gpuaddr; unsigned int offset = (addr - (addr & 0xfffff000)) / bytesperpixel; + if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) { /* Store TP0_CHICKEN register */ *cmds++ = cp_type3_packet(CP_REG_TO_MEM, 2); *cmds++ = REG_TP0_CHICKEN; @@ -554,6 +561,7 @@ static unsigned int *build_gmem2sys_cmds(struct adreno_device *adreno_dev, *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); *cmds++ = 0; + } /* Set TP0_CHICKEN to zero */ *cmds++ = cp_type0_packet(REG_TP0_CHICKEN, 1); @@ -601,7 +609,8 @@ static unsigned int *build_gmem2sys_cmds(struct adreno_device *adreno_dev, *cmds++ = 0x00003F00; *cmds++ = cp_type3_packet(CP_SET_SHADER_BASES, 1); - *cmds++ = (0x80000000) | 0x180; + *cmds++ = adreno_encode_istore_size(adreno_dev) + | adreno_dev->pix_shader_start; /* load the patched vertex shader stream */ cmds = program_shader(cmds, 0, gmem2sys_vtx_pgm, GMEM2SYS_VTX_PGM_LEN); @@ -755,6 +764,7 @@ static unsigned int *build_sys2gmem_cmds(struct adreno_device *adreno_dev, unsigned int *cmds = shadow->gmem_restore_commands; unsigned int *start = cmds; + if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) { /* Store TP0_CHICKEN register */ *cmds++ = cp_type3_packet(CP_REG_TO_MEM, 2); *cmds++ = REG_TP0_CHICKEN; @@ -762,6 +772,7 @@ static unsigned int *build_sys2gmem_cmds(struct adreno_device *adreno_dev, *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); *cmds++ = 0; + } /* Set TP0_CHICKEN to zero */ *cmds++ = cp_type0_packet(REG_TP0_CHICKEN, 1); @@ -802,7 +813,8 @@ static unsigned int *build_sys2gmem_cmds(struct adreno_device *adreno_dev, *cmds++ = 0x00000300; /* 0x100 = Vertex, 0x200 = Pixel */ *cmds++ = cp_type3_packet(CP_SET_SHADER_BASES, 1); - *cmds++ = (0x80000000) | 0x180; + *cmds++ = adreno_encode_istore_size(adreno_dev) + | adreno_dev->pix_shader_start; /* Load the patched fragment shader stream */ cmds = @@ -1089,7 +1101,8 @@ static void build_regrestore_cmds(struct adreno_device *adreno_dev, } static void -build_shader_save_restore_cmds(struct adreno_context *drawctxt) +build_shader_save_restore_cmds(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) { unsigned int *cmd = tmp_ctx.cmd; unsigned int *save, *restore, *fixup; @@ -1099,8 +1112,10 @@ build_shader_save_restore_cmds(struct adreno_context *drawctxt) /* compute vertex, pixel and shared instruction shadow GPU addresses */ tmp_ctx.shader_vertex = drawctxt->gpustate.gpuaddr + SHADER_OFFSET; - tmp_ctx.shader_pixel = tmp_ctx.shader_vertex + SHADER_SHADOW_SIZE; - tmp_ctx.shader_shared = tmp_ctx.shader_pixel + SHADER_SHADOW_SIZE; + tmp_ctx.shader_pixel = tmp_ctx.shader_vertex + + _shader_shadow_size(adreno_dev); + tmp_ctx.shader_shared = tmp_ctx.shader_pixel + + _shader_shadow_size(adreno_dev); /* restore shader partitioning and instructions */ @@ -1156,8 +1171,8 @@ build_shader_save_restore_cmds(struct adreno_context *drawctxt) *cmd++ = REG_SCRATCH_REG2; /* AND off invalid bits. */ *cmd++ = 0x0FFF0FFF; - /* OR in instruction memory size */ - *cmd++ = (unsigned int)((SHADER_INSTRUCT_LOG2 - 5U) << 29); + /* OR in instruction memory size. */ + *cmd++ = adreno_encode_istore_size(adreno_dev); /* write the computed value to the SET_SHADER_BASES data field */ *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); @@ -1219,45 +1234,22 @@ build_shader_save_restore_cmds(struct adreno_context *drawctxt) } /* create buffers for saving/restoring registers, constants, & GMEM */ -static int a2xx_ctxt_gpustate_shadow(struct adreno_device *adreno_dev, +static int a2xx_create_gpustate_shadow(struct adreno_device *adreno_dev, struct adreno_context *drawctxt) { - int result; - - /* Allocate vmalloc memory to store the gpustate */ - result = kgsl_allocate(&drawctxt->gpustate, - drawctxt->pagetable, CONTEXT_SIZE); - - if (result) - return result; - drawctxt->flags |= CTXT_FLAGS_STATE_SHADOW; - /* Blank out h/w register, constant, and command buffer shadows. */ - kgsl_sharedmem_set(&drawctxt->gpustate, 0, 0, CONTEXT_SIZE); - - /* set-up command and vertex buffer pointers */ - tmp_ctx.cmd = tmp_ctx.start - = (unsigned int *)((char *)drawctxt->gpustate.hostptr + CMD_OFFSET); - /* build indirect command buffers to save & restore regs/constants */ - adreno_idle(&adreno_dev->dev, KGSL_TIMEOUT_DEFAULT); build_regrestore_cmds(adreno_dev, drawctxt); build_regsave_cmds(adreno_dev, drawctxt); - build_shader_save_restore_cmds(drawctxt); + build_shader_save_restore_cmds(adreno_dev, drawctxt); - kgsl_cache_range_op(&drawctxt->gpustate, - KGSL_CACHE_OP_FLUSH); - - kgsl_cffdump_syncmem(NULL, &drawctxt->gpustate, - drawctxt->gpustate.gpuaddr, - drawctxt->gpustate.size, false); return 0; } /* create buffers for saving/restoring registers, constants, & GMEM */ -static int a2xx_ctxt_gmem_shadow(struct adreno_device *adreno_dev, +static int a2xx_create_gmem_shadow(struct adreno_device *adreno_dev, struct adreno_context *drawctxt) { int result; @@ -1272,8 +1264,8 @@ static int a2xx_ctxt_gmem_shadow(struct adreno_device *adreno_dev, if (result) return result; - /* we've allocated the shadow, when swapped out, GMEM must be saved. */ - drawctxt->flags |= CTXT_FLAGS_GMEM_SHADOW | CTXT_FLAGS_GMEM_SAVE; + /* set the gmem shadow flag for the context */ + drawctxt->flags |= CTXT_FLAGS_GMEM_SHADOW; /* blank out gmem shadow. */ kgsl_sharedmem_set(&drawctxt->context_gmem_shadow.gmemshadow, 0, 0, @@ -1284,6 +1276,7 @@ static int a2xx_ctxt_gmem_shadow(struct adreno_device *adreno_dev, &tmp_ctx.cmd); /* build TP0_CHICKEN register restore command buffer */ + if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) tmp_ctx.cmd = build_chicken_restore_cmds(drawctxt); /* build indirect command buffers to save & restore gmem */ @@ -1309,7 +1302,61 @@ static int a2xx_ctxt_gmem_shadow(struct adreno_device *adreno_dev, return 0; } -static void a2xx_ctxt_save(struct adreno_device *adreno_dev, +static int a2xx_drawctxt_create(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + int ret; + + /* + * Allocate memory for the GPU state and the context commands. + * Despite the name, this is much more then just storage for + * the gpustate. This contains command space for gmem save + * and texture and vertex buffer storage too + */ + + ret = kgsl_allocate(&drawctxt->gpustate, + drawctxt->pagetable, _context_size(adreno_dev)); + + if (ret) + return ret; + + kgsl_sharedmem_set(&drawctxt->gpustate, 0, 0, + _context_size(adreno_dev)); + + tmp_ctx.cmd = tmp_ctx.start + = (unsigned int *)((char *)drawctxt->gpustate.hostptr + CMD_OFFSET); + + if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) { + ret = a2xx_create_gpustate_shadow(adreno_dev, drawctxt); + if (ret) + goto done; + + drawctxt->flags |= CTXT_FLAGS_SHADER_SAVE; + } + + if (!(drawctxt->flags & CTXT_FLAGS_NOGMEMALLOC)) { + ret = a2xx_create_gmem_shadow(adreno_dev, drawctxt); + if (ret) + goto done; + } + + /* Flush and sync the gpustate memory */ + + kgsl_cache_range_op(&drawctxt->gpustate, + KGSL_CACHE_OP_FLUSH); + + kgsl_cffdump_syncmem(NULL, &drawctxt->gpustate, + drawctxt->gpustate.gpuaddr, + drawctxt->gpustate.size, false); + +done: + if (ret) + kgsl_sharedmem_free(&drawctxt->gpustate); + + return ret; +} + +static void a2xx_drawctxt_save(struct adreno_device *adreno_dev, struct adreno_context *context) { struct kgsl_device *device = &adreno_dev->dev; @@ -1321,25 +1368,28 @@ static void a2xx_ctxt_save(struct adreno_device *adreno_dev, KGSL_CTXT_WARN(device, "Current active context has caused gpu hang\n"); - KGSL_CTXT_INFO(device, - "active context flags %08x\n", context->flags); + if (!(context->flags & CTXT_FLAGS_PREAMBLE)) { /* save registers and constants. */ - adreno_ringbuffer_issuecmds(device, 0, context->reg_save, 3); + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, + context->reg_save, 3); if (context->flags & CTXT_FLAGS_SHADER_SAVE) { /* save shader partitioning and instructions. */ - adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE, + adreno_ringbuffer_issuecmds(device, + KGSL_CMD_FLAGS_PMODE, context->shader_save, 3); - /* fixup shader partitioning parameter for + /* + * fixup shader partitioning parameter for * SET_SHADER_BASES. */ - adreno_ringbuffer_issuecmds(device, 0, + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, context->shader_fixup, 3); context->flags |= CTXT_FLAGS_SHADER_RESTORE; } + } if ((context->flags & CTXT_FLAGS_GMEM_SAVE) && (context->flags & CTXT_FLAGS_GMEM_SHADOW)) { @@ -1350,14 +1400,16 @@ static void a2xx_ctxt_save(struct adreno_device *adreno_dev, context->context_gmem_shadow.gmem_save, 3); /* Restore TP0_CHICKEN */ - adreno_ringbuffer_issuecmds(device, 0, + if (!(context->flags & CTXT_FLAGS_PREAMBLE)) { + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, context->chicken_restore, 3); + } context->flags |= CTXT_FLAGS_GMEM_RESTORE; } } -static void a2xx_ctxt_restore(struct adreno_device *adreno_dev, +static void a2xx_drawctxt_restore(struct adreno_device *adreno_dev, struct adreno_context *context) { struct kgsl_device *device = &adreno_dev->dev; @@ -1377,7 +1429,7 @@ static void a2xx_ctxt_restore(struct adreno_device *adreno_dev, cmds[3] = device->memstore.gpuaddr + KGSL_DEVICE_MEMSTORE_OFFSET(current_context); cmds[4] = (unsigned int) context; - adreno_ringbuffer_issuecmds(device, 0, cmds, 5); + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, cmds, 5); kgsl_mmu_setstate(device, context->pagetable); #ifndef CONFIG_MSM_KGSL_CFF_DUMP_NO_CONTEXT_MEM_DUMP @@ -1393,27 +1445,34 @@ static void a2xx_ctxt_restore(struct adreno_device *adreno_dev, adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE, context->context_gmem_shadow.gmem_restore, 3); + if (!(context->flags & CTXT_FLAGS_PREAMBLE)) { /* Restore TP0_CHICKEN */ - adreno_ringbuffer_issuecmds(device, 0, + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, context->chicken_restore, 3); + } context->flags &= ~CTXT_FLAGS_GMEM_RESTORE; } + if (!(context->flags & CTXT_FLAGS_PREAMBLE)) { + /* restore registers and constants. */ - adreno_ringbuffer_issuecmds(device, 0, + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, context->reg_restore, 3); /* restore shader instructions & partitioning. */ if (context->flags & CTXT_FLAGS_SHADER_RESTORE) { - adreno_ringbuffer_issuecmds(device, 0, + adreno_ringbuffer_issuecmds(device, + KGSL_CMD_FLAGS_NONE, context->shader_restore, 3); } + } if (adreno_is_a20x(adreno_dev)) { cmds[0] = cp_type3_packet(CP_SET_BIN_BASE_OFFSET, 1); cmds[1] = context->bin_base_offset; - adreno_ringbuffer_issuecmds(device, 0, cmds, 2); + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, + cmds, 2); } } @@ -1532,21 +1591,33 @@ static void a2xx_rbbm_intrcallback(struct kgsl_device *device) { unsigned int status = 0; unsigned int rderr = 0; + unsigned int addr = 0; + const char *source; adreno_regread(device, REG_RBBM_INT_STATUS, &status); if (status & RBBM_INT_CNTL__RDERR_INT_MASK) { - union rbbm_read_error_u rerr; adreno_regread(device, REG_RBBM_READ_ERROR, &rderr); - rerr.val = rderr; - if (rerr.f.read_address == REG_CP_INT_STATUS && - rerr.f.read_error && - rerr.f.read_requester) + source = (rderr & RBBM_READ_ERROR_REQUESTER) + ? "host" : "cp"; + /* convert to dword address */ + addr = (rderr & RBBM_READ_ERROR_ADDRESS_MASK) >> 2; + + /* + * Log CP_INT_STATUS interrupts from the CP at a + * lower level because they can happen frequently + * and are worked around in a2xx_irq_handler. + */ + if (addr == REG_CP_INT_STATUS && + rderr & RBBM_READ_ERROR_ERROR && + rderr & RBBM_READ_ERROR_REQUESTER) KGSL_DRV_WARN(device, - "rbbm read error interrupt: %08x\n", rderr); + "rbbm read error interrupt: %s reg: %04X\n", + source, addr); else KGSL_DRV_CRIT(device, - "rbbm read error interrupt: %08x\n", rderr); + "rbbm read error interrupt: %s reg: %04X\n", + source, addr); } status &= RBBM_INT_MASK; @@ -1597,11 +1668,202 @@ static void a2xx_irq_control(struct adreno_device *adreno_dev, int state) wmb(); } +static void a2xx_rb_init(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb) +{ + unsigned int *cmds, cmds_gpu; + + /* ME_INIT */ + cmds = adreno_ringbuffer_allocspace(rb, 19); + cmds_gpu = rb->buffer_desc.gpuaddr + sizeof(uint)*(rb->wptr-19); + + GSL_RB_WRITE(cmds, cmds_gpu, cp_type3_packet(CP_ME_INIT, 18)); + /* All fields present (bits 9:0) */ + GSL_RB_WRITE(cmds, cmds_gpu, 0x000003ff); + /* Disable/Enable Real-Time Stream processing (present but ignored) */ + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); + /* Enable (2D <-> 3D) implicit synchronization (present but ignored) */ + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); + + GSL_RB_WRITE(cmds, cmds_gpu, + SUBBLOCK_OFFSET(REG_RB_SURFACE_INFO)); + GSL_RB_WRITE(cmds, cmds_gpu, + SUBBLOCK_OFFSET(REG_PA_SC_WINDOW_OFFSET)); + GSL_RB_WRITE(cmds, cmds_gpu, + SUBBLOCK_OFFSET(REG_VGT_MAX_VTX_INDX)); + GSL_RB_WRITE(cmds, cmds_gpu, + SUBBLOCK_OFFSET(REG_SQ_PROGRAM_CNTL)); + GSL_RB_WRITE(cmds, cmds_gpu, + SUBBLOCK_OFFSET(REG_RB_DEPTHCONTROL)); + GSL_RB_WRITE(cmds, cmds_gpu, + SUBBLOCK_OFFSET(REG_PA_SU_POINT_SIZE)); + GSL_RB_WRITE(cmds, cmds_gpu, + SUBBLOCK_OFFSET(REG_PA_SC_LINE_CNTL)); + GSL_RB_WRITE(cmds, cmds_gpu, + SUBBLOCK_OFFSET(REG_PA_SU_POLY_OFFSET_FRONT_SCALE)); + + /* Instruction memory size: */ + GSL_RB_WRITE(cmds, cmds_gpu, + (adreno_encode_istore_size(adreno_dev) + | adreno_dev->pix_shader_start)); + /* Maximum Contexts */ + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000001); + /* Write Confirm Interval and The CP will wait the + * wait_interval * 16 clocks between polling */ + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); + + /* NQ and External Memory Swap */ + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); + /* Protected mode error checking */ + GSL_RB_WRITE(cmds, cmds_gpu, GSL_RB_PROTECTED_MODE_CONTROL); + /* Disable header dumping and Header dump address */ + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); + /* Header dump size */ + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); + + adreno_ringbuffer_submit(rb); +} + +static unsigned int a2xx_busy_cycles(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = &adreno_dev->dev; + unsigned int reg, val; + + /* Freeze the counter */ + adreno_regwrite(device, REG_CP_PERFMON_CNTL, + REG_PERF_MODE_CNT | REG_PERF_STATE_FREEZE); + + /* Get the value */ + adreno_regread(device, REG_RBBM_PERFCOUNTER1_LO, &val); + + /* Reset the counter */ + adreno_regwrite(device, REG_CP_PERFMON_CNTL, + REG_PERF_MODE_CNT | REG_PERF_STATE_RESET); + + /* Re-Enable the performance monitors */ + adreno_regread(device, REG_RBBM_PM_OVERRIDE2, ®); + adreno_regwrite(device, REG_RBBM_PM_OVERRIDE2, (reg | 0x40)); + adreno_regwrite(device, REG_RBBM_PERFCOUNTER1_SELECT, 0x1); + adreno_regwrite(device, REG_CP_PERFMON_CNTL, + REG_PERF_MODE_CNT | REG_PERF_STATE_ENABLE); + + return val; +} + +static void a2xx_gmeminit(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = &adreno_dev->dev; + union reg_rb_edram_info rb_edram_info; + unsigned int gmem_size; + unsigned int edram_value = 0; + + /* make sure edram range is aligned to size */ + BUG_ON(adreno_dev->gmemspace.gpu_base & + (adreno_dev->gmemspace.sizebytes - 1)); + + /* get edram_size value equivalent */ + gmem_size = (adreno_dev->gmemspace.sizebytes >> 14); + while (gmem_size >>= 1) + edram_value++; + + rb_edram_info.val = 0; + + rb_edram_info.f.edram_size = edram_value; + rb_edram_info.f.edram_mapping_mode = 0; /* EDRAM_MAP_UPPER */ + + /* must be aligned to size */ + rb_edram_info.f.edram_range = (adreno_dev->gmemspace.gpu_base >> 14); + + adreno_regwrite(device, REG_RB_EDRAM_INFO, rb_edram_info.val); +} + +static void a2xx_start(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = &adreno_dev->dev; + int init_reftimestamp = 0x7fffffff; + + /* + * We need to make sure all blocks are powered up and clocked + * before issuing a soft reset. The overrides will then be + * turned off (set to 0) + */ + adreno_regwrite(device, REG_RBBM_PM_OVERRIDE1, 0xfffffffe); + adreno_regwrite(device, REG_RBBM_PM_OVERRIDE2, 0xffffffff); + + /* + * Only reset CP block if all blocks have previously been + * reset + */ + if (!(device->flags & KGSL_FLAGS_SOFT_RESET) || + !adreno_is_a22x(adreno_dev)) { + adreno_regwrite(device, REG_RBBM_SOFT_RESET, + 0xFFFFFFFF); + device->flags |= KGSL_FLAGS_SOFT_RESET; + } else { + adreno_regwrite(device, REG_RBBM_SOFT_RESET, + 0x00000001); + } + /* + * The core is in an indeterminate state until the reset + * completes after 30ms. + */ + msleep(30); + + adreno_regwrite(device, REG_RBBM_SOFT_RESET, 0x00000000); + + if (adreno_is_a225(adreno_dev)) { + /* Enable large instruction store for A225 */ + adreno_regwrite(device, REG_SQ_FLOW_CONTROL, + 0x18000000); + } + + adreno_regwrite(device, REG_RBBM_CNTL, 0x00004442); + + adreno_regwrite(device, REG_SQ_VS_PROGRAM, 0x00000000); + adreno_regwrite(device, REG_SQ_PS_PROGRAM, 0x00000000); + +// if (cpu_is_msm8960() || cpu_is_msm8930()) + if(0) + adreno_regwrite(device, REG_RBBM_PM_OVERRIDE1, 0x200); + else + adreno_regwrite(device, REG_RBBM_PM_OVERRIDE1, 0); + + if (!adreno_is_a22x(adreno_dev)) + adreno_regwrite(device, REG_RBBM_PM_OVERRIDE2, 0); + else + adreno_regwrite(device, REG_RBBM_PM_OVERRIDE2, 0x80); + + kgsl_sharedmem_set(&device->memstore, 0, 0, device->memstore.size); + + kgsl_sharedmem_writel(&device->memstore, + KGSL_DEVICE_MEMSTORE_OFFSET(ref_wait_ts), + init_reftimestamp); + + adreno_regwrite(device, REG_RBBM_DEBUG, 0x00080000); + + /* Make sure interrupts are disabled */ + adreno_regwrite(device, REG_RBBM_INT_CNTL, 0); + adreno_regwrite(device, REG_CP_INT_CNTL, 0); + adreno_regwrite(device, REG_SQ_INT_CNTL, 0); + + a2xx_gmeminit(adreno_dev); +} + +/* Defined in adreno_a2xx_snapshot.c */ +void *a2xx_snapshot(struct adreno_device *adreno_dev, void *snapshot, + int *remain, int hang); + struct adreno_gpudev adreno_a2xx_gpudev = { - .ctxt_gpustate_shadow = a2xx_ctxt_gpustate_shadow, - .ctxt_gmem_shadow = a2xx_ctxt_gmem_shadow, - .ctxt_save = a2xx_ctxt_save, - .ctxt_restore = a2xx_ctxt_restore, + .reg_rbbm_status = REG_RBBM_STATUS, + .reg_cp_pfp_ucode_addr = REG_CP_PFP_UCODE_ADDR, + .reg_cp_pfp_ucode_data = REG_CP_PFP_UCODE_DATA, + + .ctxt_create = a2xx_drawctxt_create, + .ctxt_save = a2xx_drawctxt_save, + .ctxt_restore = a2xx_drawctxt_restore, .irq_handler = a2xx_irq_handler, .irq_control = a2xx_irq_control, + .rb_init = a2xx_rb_init, + .busy_cycles = a2xx_busy_cycles, + .start = a2xx_start, }; diff --git a/drivers/gpu/msm/adreno_a3xx.c b/drivers/gpu/msm/adreno_a3xx.c new file mode 100755 index 00000000..cbc7bed4 --- /dev/null +++ b/drivers/gpu/msm/adreno_a3xx.c @@ -0,0 +1,2547 @@ +/* Copyright (c) 2012, Code Aurora Forum. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include + +#include "kgsl.h" +#include "adreno.h" +#include "kgsl_sharedmem.h" +#include "kgsl_cffdump.h" +#include "a3xx_reg.h" + +/* Simple macro to facilitate bit setting in the gmem2sys and sys2gmem + * functions. + */ + +#define _SET(_shift, _val) ((_val) << (_shift)) + +/* + **************************************************************************** + * + * Context state shadow structure: + * + * +---------------------+------------+-------------+---------------------+---+ + * | ALU Constant Shadow | Reg Shadow | C&V Buffers | Shader Instr Shadow |Tex| + * +---------------------+------------+-------------+---------------------+---+ + * + * 8K - ALU Constant Shadow (8K aligned) + * 4K - H/W Register Shadow (8K aligned) + * 5K - Command and Vertex Buffers + * 8K - Shader Instruction Shadow + * ~6K - Texture Constant Shadow + * + * + *************************************************************************** + */ + +/* Sizes of all sections in state shadow memory */ +#define ALU_SHADOW_SIZE (8*1024) /* 8KB */ +#define REG_SHADOW_SIZE (4*1024) /* 4KB */ +#define CMD_BUFFER_SIZE (5*1024) /* 5KB */ +#define TEX_SIZE_MEM_OBJECTS 896 /* bytes */ +#define TEX_SIZE_MIPMAP 1936 /* bytes */ +#define TEX_SIZE_SAMPLER_OBJ 256 /* bytes */ +#define TEX_SHADOW_SIZE \ + ((TEX_SIZE_MEM_OBJECTS + TEX_SIZE_MIPMAP + \ + TEX_SIZE_SAMPLER_OBJ)*2) /* ~6KB */ +#define SHADER_SHADOW_SIZE (8*1024) /* 8KB */ + +/* Total context size, excluding GMEM shadow */ +#define CONTEXT_SIZE \ + (ALU_SHADOW_SIZE+REG_SHADOW_SIZE + \ + CMD_BUFFER_SIZE+SHADER_SHADOW_SIZE + \ + TEX_SHADOW_SIZE) + +/* Offsets to different sections in context shadow memory */ +#define REG_OFFSET ALU_SHADOW_SIZE +#define CMD_OFFSET (REG_OFFSET+REG_SHADOW_SIZE) +#define SHADER_OFFSET (CMD_OFFSET+CMD_BUFFER_SIZE) +#define TEX_OFFSET (SHADER_OFFSET+SHADER_SHADOW_SIZE) +#define VS_TEX_OFFSET_MEM_OBJECTS TEX_OFFSET +#define VS_TEX_OFFSET_MIPMAP (VS_TEX_OFFSET_MEM_OBJECTS+TEX_SIZE_MEM_OBJECTS) +#define VS_TEX_OFFSET_SAMPLER_OBJ (VS_TEX_OFFSET_MIPMAP+TEX_SIZE_MIPMAP) +#define FS_TEX_OFFSET_MEM_OBJECTS \ + (VS_TEX_OFFSET_SAMPLER_OBJ+TEX_SIZE_SAMPLER_OBJ) +#define FS_TEX_OFFSET_MIPMAP (FS_TEX_OFFSET_MEM_OBJECTS+TEX_SIZE_MEM_OBJECTS) +#define FS_TEX_OFFSET_SAMPLER_OBJ (FS_TEX_OFFSET_MIPMAP+TEX_SIZE_MIPMAP) + +/* The offset for fragment shader data in HLSQ context */ +#define SSIZE (16*1024) + +#define HLSQ_SAMPLER_OFFSET 0x000 +#define HLSQ_MEMOBJ_OFFSET 0x400 +#define HLSQ_MIPMAP_OFFSET 0x800 + +#ifdef GSL_USE_A3XX_HLSQ_SHADOW_RAM +/* Use shadow RAM */ +#define HLSQ_SHADOW_BASE (0x10000+SSIZE*2) +#else +/* Use working RAM */ +#define HLSQ_SHADOW_BASE 0x10000 +#endif + +#define REG_TO_MEM_LOOP_COUNT_SHIFT 15 + +#define BUILD_PC_DRAW_INITIATOR(prim_type, source_select, index_size, \ + vis_cull_mode) \ + (((prim_type) << PC_DRAW_INITIATOR_PRIM_TYPE) | \ + ((source_select) << PC_DRAW_INITIATOR_SOURCE_SELECT) | \ + ((index_size & 1) << PC_DRAW_INITIATOR_INDEX_SIZE) | \ + ((index_size >> 1) << PC_DRAW_INITIATOR_SMALL_INDEX) | \ + ((vis_cull_mode) << PC_DRAW_INITIATOR_VISIBILITY_CULLING_MODE) | \ + (1 << PC_DRAW_INITIATOR_PRE_DRAW_INITIATOR_ENABLE)) + +/* + * List of context registers (starting from dword offset 0x2000). + * Each line contains start and end of a range of registers. + */ +static const unsigned int context_register_ranges[] = { + A3XX_GRAS_CL_CLIP_CNTL, A3XX_GRAS_CL_CLIP_CNTL, + A3XX_GRAS_CL_GB_CLIP_ADJ, A3XX_GRAS_CL_GB_CLIP_ADJ, + A3XX_GRAS_CL_VPORT_XOFFSET, A3XX_GRAS_CL_VPORT_ZSCALE, + A3XX_GRAS_SU_POINT_MINMAX, A3XX_GRAS_SU_POINT_SIZE, + A3XX_GRAS_SU_POLY_OFFSET_SCALE, A3XX_GRAS_SU_POLY_OFFSET_OFFSET, + A3XX_GRAS_SU_MODE_CONTROL, A3XX_GRAS_SU_MODE_CONTROL, + A3XX_GRAS_SC_CONTROL, A3XX_GRAS_SC_CONTROL, + A3XX_GRAS_SC_SCREEN_SCISSOR_TL, A3XX_GRAS_SC_SCREEN_SCISSOR_BR, + A3XX_GRAS_SC_WINDOW_SCISSOR_TL, A3XX_GRAS_SC_WINDOW_SCISSOR_BR, + A3XX_RB_MODE_CONTROL, A3XX_RB_MRT_BLEND_CONTROL3, + A3XX_RB_BLEND_RED, A3XX_RB_COPY_DEST_INFO, + A3XX_RB_DEPTH_CONTROL, A3XX_RB_DEPTH_CONTROL, + A3XX_PC_VSTREAM_CONTROL, A3XX_PC_VSTREAM_CONTROL, + A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, + A3XX_PC_PRIM_VTX_CNTL, A3XX_PC_RESTART_INDEX, + A3XX_HLSQ_CONTROL_0_REG, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG, + A3XX_HLSQ_CL_NDRANGE_0_REG, A3XX_HLSQ_CL_NDRANGE_0_REG, + A3XX_HLSQ_CL_NDRANGE_2_REG, A3XX_HLSQ_CL_CONTROL_1_REG, + A3XX_HLSQ_CL_KERNEL_CONST_REG, A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG, + A3XX_HLSQ_CL_WG_OFFSET_REG, A3XX_HLSQ_CL_WG_OFFSET_REG, + A3XX_VFD_CONTROL_0, A3XX_VFD_VS_THREADING_THRESHOLD, + A3XX_SP_SP_CTRL_REG, A3XX_SP_SP_CTRL_REG, + A3XX_SP_VS_CTRL_REG0, A3XX_SP_VS_OUT_REG_7, + A3XX_SP_VS_VPC_DST_REG_0, A3XX_SP_VS_PVT_MEM_SIZE_REG, + A3XX_SP_VS_LENGTH_REG, A3XX_SP_FS_PVT_MEM_SIZE_REG, + A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, A3XX_SP_FS_FLAT_SHAD_MODE_REG_1, + A3XX_SP_FS_OUTPUT_REG, A3XX_SP_FS_OUTPUT_REG, + A3XX_SP_FS_MRT_REG_0, A3XX_SP_FS_IMAGE_OUTPUT_REG_3, + A3XX_SP_FS_LENGTH_REG, A3XX_SP_FS_LENGTH_REG, + A3XX_TPL1_TP_VS_TEX_OFFSET, A3XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, + A3XX_VPC_ATTR, A3XX_VPC_VARY_CYLWRAP_ENABLE_1, +}; + +/* Global registers that need to be saved separately */ +static const unsigned int global_registers[] = { + A3XX_GRAS_CL_USER_PLANE_X0, A3XX_GRAS_CL_USER_PLANE_Y0, + A3XX_GRAS_CL_USER_PLANE_Z0, A3XX_GRAS_CL_USER_PLANE_W0, + A3XX_GRAS_CL_USER_PLANE_X1, A3XX_GRAS_CL_USER_PLANE_Y1, + A3XX_GRAS_CL_USER_PLANE_Z1, A3XX_GRAS_CL_USER_PLANE_W1, + A3XX_GRAS_CL_USER_PLANE_X2, A3XX_GRAS_CL_USER_PLANE_Y2, + A3XX_GRAS_CL_USER_PLANE_Z2, A3XX_GRAS_CL_USER_PLANE_W2, + A3XX_GRAS_CL_USER_PLANE_X3, A3XX_GRAS_CL_USER_PLANE_Y3, + A3XX_GRAS_CL_USER_PLANE_Z3, A3XX_GRAS_CL_USER_PLANE_W3, + A3XX_GRAS_CL_USER_PLANE_X4, A3XX_GRAS_CL_USER_PLANE_Y4, + A3XX_GRAS_CL_USER_PLANE_Z4, A3XX_GRAS_CL_USER_PLANE_W4, + A3XX_GRAS_CL_USER_PLANE_X5, A3XX_GRAS_CL_USER_PLANE_Y5, + A3XX_GRAS_CL_USER_PLANE_Z5, A3XX_GRAS_CL_USER_PLANE_W5, + A3XX_VSC_BIN_SIZE, + A3XX_VSC_PIPE_CONFIG_0, A3XX_VSC_PIPE_CONFIG_1, + A3XX_VSC_PIPE_CONFIG_2, A3XX_VSC_PIPE_CONFIG_3, + A3XX_VSC_PIPE_CONFIG_4, A3XX_VSC_PIPE_CONFIG_5, + A3XX_VSC_PIPE_CONFIG_6, A3XX_VSC_PIPE_CONFIG_7, + A3XX_VSC_PIPE_DATA_ADDRESS_0, A3XX_VSC_PIPE_DATA_ADDRESS_1, + A3XX_VSC_PIPE_DATA_ADDRESS_2, A3XX_VSC_PIPE_DATA_ADDRESS_3, + A3XX_VSC_PIPE_DATA_ADDRESS_4, A3XX_VSC_PIPE_DATA_ADDRESS_5, + A3XX_VSC_PIPE_DATA_ADDRESS_6, A3XX_VSC_PIPE_DATA_ADDRESS_7, + A3XX_VSC_PIPE_DATA_LENGTH_0, A3XX_VSC_PIPE_DATA_LENGTH_1, + A3XX_VSC_PIPE_DATA_LENGTH_2, A3XX_VSC_PIPE_DATA_LENGTH_3, + A3XX_VSC_PIPE_DATA_LENGTH_4, A3XX_VSC_PIPE_DATA_LENGTH_5, + A3XX_VSC_PIPE_DATA_LENGTH_6, A3XX_VSC_PIPE_DATA_LENGTH_7, + A3XX_VSC_SIZE_ADDRESS +}; + +#define GLOBAL_REGISTER_COUNT ARRAY_SIZE(global_registers) + +/* A scratchpad used to build commands during context create */ +static struct tmp_ctx { + unsigned int *cmd; /* Next available dword in C&V buffer */ + + /* Addresses in comamnd buffer where registers are saved */ + uint32_t reg_values[GLOBAL_REGISTER_COUNT]; + uint32_t gmem_base; /* Base GPU address of GMEM */ +} tmp_ctx; + +#ifndef GSL_CONTEXT_SWITCH_CPU_SYNC +/* + * Function for executing dest = ( (reg & and) ROL rol ) | or + */ +static unsigned int *rmw_regtomem(unsigned int *cmd, + unsigned int reg, unsigned int and, + unsigned int rol, unsigned int or, + unsigned int dest) +{ + /* CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0x00000000) | reg */ + *cmd++ = cp_type3_packet(CP_REG_RMW, 3); + *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2; + *cmd++ = 0x00000000; /* AND value */ + *cmd++ = reg; /* OR address */ + + /* CP_SCRATCH_REG2 = ( (CP_SCRATCH_REG2 & and) ROL rol ) | or */ + *cmd++ = cp_type3_packet(CP_REG_RMW, 3); + *cmd++ = (rol << 24) | A3XX_CP_SCRATCH_REG2; + *cmd++ = and; /* AND value */ + *cmd++ = or; /* OR value */ + + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = A3XX_CP_SCRATCH_REG2; + *cmd++ = dest; + + return cmd; +} +#endif + +static void build_regconstantsave_cmds(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + unsigned int *cmd = tmp_ctx.cmd; + unsigned int *start = cmd; + unsigned int i; + + drawctxt->constant_save_commands[0].hostptr = cmd; + drawctxt->constant_save_commands[0].gpuaddr = + virt2gpu(cmd, &drawctxt->gpustate); + cmd++; + + *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmd++ = 0; + +#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES + /* + * Context registers are already shadowed; just need to + * disable shadowing to prevent corruption. + */ + + *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3); + *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000; + *cmd++ = 4 << 16; /* regs, start=0 */ + *cmd++ = 0x0; /* count = 0 */ + +#else + /* + * Make sure the HW context has the correct register values before + * reading them. + */ + + /* Write context registers into shadow */ + for (i = 0; i < ARRAY_SIZE(context_register_ranges) / 2; i++) { + unsigned int start = context_register_ranges[i * 2]; + unsigned int end = context_register_ranges[i * 2 + 1]; + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = ((end - start + 1) << REG_TO_MEM_LOOP_COUNT_SHIFT) | + start; + *cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET) + & 0xFFFFE000) + (start - 0x2000) * 4; + } +#endif + + /* Need to handle some of the global registers separately */ + for (i = 0; i < ARRAY_SIZE(global_registers); i++) { + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = global_registers[i]; + *cmd++ = tmp_ctx.reg_values[i]; + } + + /* Save vertex shader constants */ + *cmd++ = cp_type3_packet(CP_COND_EXEC, 4); + *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2; + *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2; + *cmd++ = 0x0000FFFF; + *cmd++ = 3; /* EXEC_COUNT */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + drawctxt->constant_save_commands[1].hostptr = cmd; + drawctxt->constant_save_commands[1].gpuaddr = + virt2gpu(cmd, &drawctxt->gpustate); + /* + From fixup: + + dwords = SP_VS_CTRL_REG1.VSCONSTLENGTH / 4 + src = (HLSQ_SHADOW_BASE + 0x2000) / 4 + + From register spec: + SP_VS_CTRL_REG1.VSCONSTLENGTH [09:00]: 0-512, unit = 128bits. + */ + *cmd++ = 0; /* (dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */ + /* ALU constant shadow base */ + *cmd++ = drawctxt->gpustate.gpuaddr & 0xfffffffc; + + /* Save fragment shader constants */ + *cmd++ = cp_type3_packet(CP_COND_EXEC, 4); + *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2; + *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2; + *cmd++ = 0x0000FFFF; + *cmd++ = 3; /* EXEC_COUNT */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + drawctxt->constant_save_commands[2].hostptr = cmd; + drawctxt->constant_save_commands[2].gpuaddr = + virt2gpu(cmd, &drawctxt->gpustate); + /* + From fixup: + + dwords = SP_FS_CTRL_REG1.FSCONSTLENGTH / 4 + src = (HLSQ_SHADOW_BASE + 0x2000 + SSIZE) / 4 + + From register spec: + SP_FS_CTRL_REG1.FSCONSTLENGTH [09:00]: 0-512, unit = 128bits. + */ + *cmd++ = 0; /* (dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */ + + /* + From fixup: + + base = drawctxt->gpustate.gpuaddr (ALU constant shadow base) + offset = SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET + + From register spec: + SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET [16:24]: Constant object + start offset in on chip RAM, + 128bit aligned + + dst = base + offset + Because of the base alignment we can use + dst = base | offset + */ + *cmd++ = 0; /* dst */ + + /* Save VS texture memory objects */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = + ((TEX_SIZE_MEM_OBJECTS / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) | + ((HLSQ_SHADOW_BASE + HLSQ_MEMOBJ_OFFSET) / 4); + *cmd++ = + (drawctxt->gpustate.gpuaddr + + VS_TEX_OFFSET_MEM_OBJECTS) & 0xfffffffc; + + /* Save VS texture mipmap pointers */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = + ((TEX_SIZE_MIPMAP / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) | + ((HLSQ_SHADOW_BASE + HLSQ_MIPMAP_OFFSET) / 4); + *cmd++ = + (drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MIPMAP) & 0xfffffffc; + + /* Save VS texture sampler objects */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = ((TEX_SIZE_SAMPLER_OBJ / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) | + ((HLSQ_SHADOW_BASE + HLSQ_SAMPLER_OFFSET) / 4); + *cmd++ = + (drawctxt->gpustate.gpuaddr + + VS_TEX_OFFSET_SAMPLER_OBJ) & 0xfffffffc; + + /* Save FS texture memory objects */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = + ((TEX_SIZE_MEM_OBJECTS / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) | + ((HLSQ_SHADOW_BASE + HLSQ_MEMOBJ_OFFSET + SSIZE) / 4); + *cmd++ = + (drawctxt->gpustate.gpuaddr + + FS_TEX_OFFSET_MEM_OBJECTS) & 0xfffffffc; + + /* Save FS texture mipmap pointers */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = + ((TEX_SIZE_MIPMAP / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) | + ((HLSQ_SHADOW_BASE + HLSQ_MIPMAP_OFFSET + SSIZE) / 4); + *cmd++ = + (drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MIPMAP) & 0xfffffffc; + + /* Save FS texture sampler objects */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = + ((TEX_SIZE_SAMPLER_OBJ / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) | + ((HLSQ_SHADOW_BASE + HLSQ_SAMPLER_OFFSET + SSIZE) / 4); + *cmd++ = + (drawctxt->gpustate.gpuaddr + + FS_TEX_OFFSET_SAMPLER_OBJ) & 0xfffffffc; + + /* Create indirect buffer command for above command sequence */ + create_ib1(drawctxt, drawctxt->regconstant_save, start, cmd); + + tmp_ctx.cmd = cmd; +} + +/* Copy GMEM contents to system memory shadow. */ +static unsigned int *build_gmem2sys_cmds(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, + struct gmem_shadow_t *shadow) +{ + unsigned int *cmds = tmp_ctx.cmd; + unsigned int *start = cmds; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_RB_MODE_CONTROL); + + /* RB_MODE_CONTROL */ + *cmds++ = _SET(RB_MODECONTROL_RENDER_MODE, RB_RESOLVE_PASS) | + _SET(RB_MODECONTROL_MARB_CACHE_SPLIT_MODE, 1) | + _SET(RB_MODECONTROL_PACKER_TIMER_ENABLE, 1); + /* RB_RENDER_CONTROL */ + *cmds++ = _SET(RB_RENDERCONTROL_BIN_WIDTH, shadow->width >> 5) | + _SET(RB_RENDERCONTROL_DISABLE_COLOR_PIPE, 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5); + *cmds++ = CP_REG(A3XX_RB_COPY_CONTROL); + /* RB_COPY_CONTROL */ + *cmds++ = _SET(RB_COPYCONTROL_RESOLVE_CLEAR_MODE, + RB_CLEAR_MODE_RESOLVE) | + _SET(RB_COPYCONTROL_COPY_GMEM_BASE, + tmp_ctx.gmem_base >> 14); + /* RB_COPY_DEST_BASE */ + *cmds++ = _SET(RB_COPYDESTBASE_COPY_DEST_BASE, + shadow->gmemshadow.gpuaddr >> 5); + /* RB_COPY_DEST_PITCH */ + *cmds++ = _SET(RB_COPYDESTPITCH_COPY_DEST_PITCH, + (shadow->pitch * 4) / 32); + /* RB_COPY_DEST_INFO */ + *cmds++ = _SET(RB_COPYDESTINFO_COPY_DEST_TILE, + RB_TILINGMODE_LINEAR) | + _SET(RB_COPYDESTINFO_COPY_DEST_FORMAT, RB_R8G8B8A8_UNORM) | + _SET(RB_COPYDESTINFO_COPY_COMPONENT_ENABLE, 0X0F) | + _SET(RB_COPYDESTINFO_COPY_DEST_ENDIAN, RB_ENDIAN_NONE); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_GRAS_SC_CONTROL); + /* GRAS_SC_CONTROL */ + *cmds++ = _SET(GRAS_SC_CONTROL_RENDER_MODE, 2); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_VFD_CONTROL_0); + /* VFD_CONTROL_0 */ + *cmds++ = _SET(VFD_CTRLREG0_TOTALATTRTOVS, 4) | + _SET(VFD_CTRLREG0_PACKETSIZE, 2) | + _SET(VFD_CTRLREG0_STRMDECINSTRCNT, 1) | + _SET(VFD_CTRLREG0_STRMFETCHINSTRCNT, 1); + /* VFD_CONTROL_1 */ + *cmds++ = _SET(VFD_CTRLREG1_MAXSTORAGE, 1) | + _SET(VFD_CTRLREG1_REGID4VTX, 252) | + _SET(VFD_CTRLREG1_REGID4INST, 252); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_VFD_FETCH_INSTR_0_0); + /* VFD_FETCH_INSTR_0_0 */ + *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 11) | + _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 12) | + _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1); + /* VFD_FETCH_INSTR_1_0 */ + *cmds++ = _SET(VFD_BASEADDR_BASEADDR, + shadow->quad_vertices.gpuaddr); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_VFD_DECODE_INSTR_0); + /* VFD_DECODE_INSTR_0 */ + *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) | + _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) | + _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 2) | + _SET(VFD_DECODEINSTRUCTIONS_REGID, 5) | + _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 12) | + _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5); + *cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG); + /* HLSQ_CONTROL_0_REG */ + *cmds++ = _SET(HLSQ_CTRL0REG_FSTHREADSIZE, HLSQ_TWO_PIX_QUADS) | + _SET(HLSQ_CTRL0REG_FSSUPERTHREADENABLE, 1) | + _SET(HLSQ_CTRL0REG_SPSHADERRESTART, 1) | + _SET(HLSQ_CTRL0REG_RESERVED2, 1) | + _SET(HLSQ_CTRL0REG_CHUNKDISABLE, 1) | + _SET(HLSQ_CTRL0REG_CONSTSWITCHMODE, 1) | + _SET(HLSQ_CTRL0REG_LAZYUPDATEDISABLE, 1) | + _SET(HLSQ_CTRL0REG_SPCONSTFULLUPDATE, 1) | + _SET(HLSQ_CTRL0REG_TPFULLUPDATE, 1); + /* HLSQ_CONTROL_1_REG */ + *cmds++ = _SET(HLSQ_CTRL1REG_VSTHREADSIZE, HLSQ_TWO_VTX_QUADS) | + _SET(HLSQ_CTRL1REG_VSSUPERTHREADENABLE, 1) | + _SET(HLSQ_CTRL1REG_RESERVED1, 4); + /* HLSQ_CONTROL_2_REG */ + *cmds++ = _SET(HLSQ_CTRL2REG_PRIMALLOCTHRESHOLD, 31); + /* HLSQ_CONTROL_3_REG */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5); + *cmds++ = CP_REG(A3XX_HLSQ_VS_CONTROL_REG); + /* HLSQ_VS_CONTROL_REG */ + *cmds++ = _SET(HLSQ_VSCTRLREG_VSINSTRLENGTH, 1); + /* HLSQ_FS_CONTROL_REG */ + *cmds++ = _SET(HLSQ_FSCTRLREG_FSCONSTLENGTH, 1) | + _SET(HLSQ_FSCTRLREG_FSCONSTSTARTOFFSET, 272) | + _SET(HLSQ_FSCTRLREG_FSINSTRLENGTH, 1); + /* HLSQ_CONST_VSPRESV_RANGE_REG */ + *cmds++ = 0x00000000; + /* HLSQ_CONST_FSPRESV_RANGE_REQ */ + *cmds++ = _SET(HLSQ_CONSTFSPRESERVEDRANGEREG_STARTENTRY, 32) | + _SET(HLSQ_CONSTFSPRESERVEDRANGEREG_ENDENTRY, 32); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_SP_FS_LENGTH_REG); + /* SP_FS_LENGTH_REG */ + *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_SP_SP_CTRL_REG); + /* SP_SP_CTRL_REG */ + *cmds++ = _SET(SP_SPCTRLREG_CONSTMODE, 1) | + _SET(SP_SPCTRLREG_SLEEPMODE, 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 12); + *cmds++ = CP_REG(A3XX_SP_VS_CTRL_REG0); + /* SP_VS_CTRL_REG0 */ + *cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) | + _SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) | + _SET(SP_VSCTRLREG0_VSICACHEINVALID, 1) | + _SET(SP_VSCTRLREG0_VSFULLREGFOOTPRINT, 3) | + _SET(SP_VSCTRLREG0_VSTHREADSIZE, SP_TWO_VTX_QUADS) | + _SET(SP_VSCTRLREG0_VSSUPERTHREADMODE, 1) | + _SET(SP_VSCTRLREG0_VSLENGTH, 1); + /* SP_VS_CTRL_REG1 */ + *cmds++ = _SET(SP_VSCTRLREG1_VSINITIALOUTSTANDING, 4); + /* SP_VS_PARAM_REG */ + *cmds++ = _SET(SP_VSPARAMREG_POSREGID, 1) | + _SET(SP_VSPARAMREG_PSIZEREGID, 252); + /* SP_VS_OUT_REG_0 */ + *cmds++ = 0x00000000; + /* SP_VS_OUT_REG_1 */ + *cmds++ = 0x00000000; + /* SP_VS_OUT_REG_2 */ + *cmds++ = 0x00000000; + /* SP_VS_OUT_REG_3 */ + *cmds++ = 0x00000000; + /* SP_VS_OUT_REG_4 */ + *cmds++ = 0x00000000; + /* SP_VS_OUT_REG_5 */ + *cmds++ = 0x00000000; + /* SP_VS_OUT_REG_6 */ + *cmds++ = 0x00000000; + /* SP_VS_OUT_REG_7 */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7); + *cmds++ = CP_REG(A3XX_SP_VS_VPC_DST_REG_0); + /* SP_VS_VPC_DST_REG_0 */ + *cmds++ = 0x00000000; + /* SP_VS_VPC_DST_REG_1 */ + *cmds++ = 0x00000000; + /* SP_VS_VPC_DST_REG_2 */ + *cmds++ = 0x00000000; + /* SP_VS_VPC_DST_REG_3 */ + *cmds++ = 0x00000000; + /* SP_VS_OBJ_OFFSET_REG */ + *cmds++ = 0x00000000; + /* SP_VS_OBJ_START_REG */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6); + *cmds++ = CP_REG(A3XX_SP_VS_LENGTH_REG); + /* SP_VS_LENGTH_REG */ + *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1); + /* SP_FS_CTRL_REG0 */ + *cmds++ = _SET(SP_FSCTRLREG0_FSTHREADMODE, SP_MULTI) | + _SET(SP_FSCTRLREG0_FSINSTRBUFFERMODE, SP_BUFFER_MODE) | + _SET(SP_FSCTRLREG0_FSICACHEINVALID, 1) | + _SET(SP_FSCTRLREG0_FSFULLREGFOOTPRINT, 2) | + _SET(SP_FSCTRLREG0_FSINOUTREGOVERLAP, 1) | + _SET(SP_FSCTRLREG0_FSTHREADSIZE, SP_TWO_VTX_QUADS) | + _SET(SP_FSCTRLREG0_FSSUPERTHREADMODE, 1) | + _SET(SP_FSCTRLREG0_FSLENGTH, 1); + /* SP_FS_CTRL_REG1 */ + *cmds++ = _SET(SP_FSCTRLREG1_FSCONSTLENGTH, 1) | + _SET(SP_FSCTRLREG1_FSINITIALOUTSTANDING, 2) | + _SET(SP_FSCTRLREG1_HALFPRECVAROFFSET, 63); + /* SP_FS_OBJ_OFFSET_REG */ + *cmds++ = _SET(SP_OBJOFFSETREG_CONSTOBJECTSTARTOFFSET, 272) | + _SET(SP_OBJOFFSETREG_SHADEROBJOFFSETINIC, 1); + /* SP_FS_OBJ_START_REG */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0); + /* SP_FS_FLAT_SHAD_MODE_REG_0 */ + *cmds++ = 0x00000000; + /* SP_FS_FLAT_SHAD_MODE_REG_1 */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_SP_FS_OUTPUT_REG); + /* SP_FS_OUTPUT_REG */ + *cmds++ = _SET(SP_IMAGEOUTPUTREG_PAD0, SP_PIXEL_BASED); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5); + *cmds++ = CP_REG(A3XX_SP_FS_MRT_REG_0); + /* SP_FS_MRT_REG_0 */ + *cmds++ = _SET(SP_FSMRTREG_REGID, 1); + /* SP_FS_MRT_REG_1 */ + *cmds++ = 0x00000000; + /* SP_FS_MRT_REG_2 */ + *cmds++ = 0x00000000; + /* SP_FS_MRT_REG_3 */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 11); + *cmds++ = CP_REG(A3XX_VPC_ATTR); + /* VPC_ATTR */ + *cmds++ = _SET(VPC_VPCATTR_THRHDASSIGN, 1) | + _SET(VPC_VPCATTR_LMSIZE, 1); + /* VPC_PACK */ + *cmds++ = 0x00000000; + /* VPC_VARRYING_INTERUPT_MODE_0 */ + *cmds++ = 0x00000000; + /* VPC_VARRYING_INTERUPT_MODE_1 */ + *cmds++ = 0x00000000; + /* VPC_VARRYING_INTERUPT_MODE_2 */ + *cmds++ = 0x00000000; + /* VPC_VARRYING_INTERUPT_MODE_3 */ + *cmds++ = 0x00000000; + /* VPC_VARYING_PS_REPL_MODE_0 */ + *cmds++ = 0x00000000; + /* VPC_VARYING_PS_REPL_MODE_1 */ + *cmds++ = 0x00000000; + /* VPC_VARYING_PS_REPL_MODE_2 */ + *cmds++ = 0x00000000; + /* VPC_VARYING_PS_REPL_MODE_3 */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10); + *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT) + | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT) + | (HLSQ_BLOCK_ID_SP_VS << CP_LOADSTATE_STATEBLOCKID_SHIFT) + | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT); + *cmds++ = (HLSQ_SP_VS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT) + | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT); + + /* (sy)(rpt3)mov.f32f32 r0.y, (r)r1.y; */ + *cmds++ = 0x00000005; *cmds++ = 0x30044b01; + /* end; */ + *cmds++ = 0x00000000; *cmds++ = 0x03000000; + /* nop; */ + *cmds++ = 0x00000000; *cmds++ = 0x00000000; + /* nop; */ + *cmds++ = 0x00000000; *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10); + *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT) + | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT) + | (HLSQ_BLOCK_ID_SP_FS << CP_LOADSTATE_STATEBLOCKID_SHIFT) + | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT); + *cmds++ = (HLSQ_SP_FS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT) + | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT); + + /* (sy)(rpt3)mov.f32f32 r0.y, (r)c0.x; */ + *cmds++ = 0x00000000; *cmds++ = 0x30244b01; + /* end; */ + *cmds++ = 0x00000000; *cmds++ = 0x03000000; + /* nop; */ + *cmds++ = 0x00000000; *cmds++ = 0x00000000; + /* nop; */ + *cmds++ = 0x00000000; *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_RB_MSAA_CONTROL); + /* RB_MSAA_CONTROL */ + *cmds++ = _SET(RB_MSAACONTROL_MSAA_DISABLE, 1) | + _SET(RB_MSAACONTROL_SAMPLE_MASK, 0xFFFF); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_RB_DEPTH_CONTROL); + /* RB_DEPTH_CONTROL */ + *cmds++ = _SET(RB_DEPTHCONTROL_Z_TEST_FUNC, RB_FRAG_NEVER); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_RB_MRT_CONTROL0); + /* RB_MRT_CONTROL0 */ + *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) | + _SET(RB_MRTCONTROL_ROP_CODE, 12) | + _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) | + _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL0); + /* RB_MRT_BLEND_CONTROL0 */ + *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) | + _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1); + /* RB_MRT_CONTROL1 */ + *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) | + _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) | + _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL1); + /* RB_MRT_BLEND_CONTROL1 */ + *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) | + _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1); + /* RB_MRT_CONTROL2 */ + *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) | + _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) | + _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL2); + /* RB_MRT_BLEND_CONTROL2 */ + *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) | + _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1); + /* RB_MRT_CONTROL3 */ + *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) | + _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) | + _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL3); + /* RB_MRT_BLEND_CONTROL3 */ + *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) | + _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5); + *cmds++ = CP_REG(A3XX_VFD_INDEX_MIN); + /* VFD_INDEX_MIN */ + *cmds++ = 0x00000000; + /* VFD_INDEX_MAX */ + *cmds++ = 0xFFFFFFFF; + /* VFD_INSTANCEID_OFFSET */ + *cmds++ = 0x00000000; + /* VFD_INDEX_OFFSET */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_VFD_VS_THREADING_THRESHOLD); + /* VFD_VS_THREADING_THRESHOLD */ + *cmds++ = _SET(VFD_THREADINGTHRESHOLD_RESERVED6, 12) | + _SET(VFD_THREADINGTHRESHOLD_REGID_VTXCNT, 252); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_TPL1_TP_VS_TEX_OFFSET); + /* TPL1_TP_VS_TEX_OFFSET */ + *cmds++ = 0; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_TPL1_TP_FS_TEX_OFFSET); + /* TPL1_TP_FS_TEX_OFFSET */ + *cmds++ = _SET(TPL1_TPTEXOFFSETREG_SAMPLEROFFSET, 16) | + _SET(TPL1_TPTEXOFFSETREG_MEMOBJOFFSET, 16) | + _SET(TPL1_TPTEXOFFSETREG_BASETABLEPTR, 224); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_PC_PRIM_VTX_CNTL); + /* PC_PRIM_VTX_CNTL */ + *cmds++ = _SET(PC_PRIM_VTX_CONTROL_POLYMODE_FRONT_PTYPE, + PC_DRAW_TRIANGLES) | + _SET(PC_PRIM_VTX_CONTROL_POLYMODE_BACK_PTYPE, + PC_DRAW_TRIANGLES) | + _SET(PC_PRIM_VTX_CONTROL_PROVOKING_VTX_LAST, 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_GRAS_SC_WINDOW_SCISSOR_TL); + /* GRAS_SC_WINDOW_SCISSOR_TL */ + *cmds++ = 0x00000000; + /* GRAS_SC_WINDOW_SCISSOR_BR */ + *cmds++ = _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_X, shadow->width - 1) | + _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_Y, shadow->height - 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_GRAS_SC_SCREEN_SCISSOR_TL); + /* GRAS_SC_SCREEN_SCISSOR_TL */ + *cmds++ = 0x00000000; + /* GRAS_SC_SCREEN_SCISSOR_BR */ + *cmds++ = _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_X, shadow->width - 1) | + _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_Y, shadow->height - 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5); + *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_XOFFSET); + /* GRAS_CL_VPORT_XOFFSET */ + *cmds++ = 0x00000000; + /* GRAS_CL_VPORT_XSCALE */ + *cmds++ = _SET(GRAS_CL_VPORT_XSCALE_VPORT_XSCALE, 0x3f800000); + /* GRAS_CL_VPORT_YOFFSET */ + *cmds++ = 0x00000000; + /* GRAS_CL_VPORT_YSCALE */ + *cmds++ = _SET(GRAS_CL_VPORT_YSCALE_VPORT_YSCALE, 0x3f800000); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_ZOFFSET); + /* GRAS_CL_VPORT_ZOFFSET */ + *cmds++ = 0x00000000; + /* GRAS_CL_VPORT_ZSCALE */ + *cmds++ = _SET(GRAS_CL_VPORT_ZSCALE_VPORT_ZSCALE, 0x3f800000); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_GRAS_CL_CLIP_CNTL); + /* GRAS_CL_CLIP_CNTL */ + *cmds++ = _SET(GRAS_CL_CLIP_CNTL_CLIP_DISABLE, 1) | + _SET(GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE, 1) | + _SET(GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE, 1) | + _SET(GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE, 1) | + _SET(GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE, 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_GRAS_CL_GB_CLIP_ADJ); + /* GRAS_CL_GB_CLIP_ADJ */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmds++ = 0x00000000; + + /* + * Resolve using two draw calls with a dummy register + * write in between. This is a HLM workaround + * that should be removed later. + */ + *cmds++ = cp_type3_packet(CP_DRAW_INDX_2, 6); + *cmds++ = 0x00000000; /* Viz query info */ + *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_TRILIST, + PC_DI_SRC_SEL_IMMEDIATE, + PC_DI_INDEX_SIZE_32_BIT, + PC_DI_IGNORE_VISIBILITY); + *cmds++ = 0x00000003; /* Num indices */ + *cmds++ = 0x00000000; /* Index 0 */ + *cmds++ = 0x00000001; /* Index 1 */ + *cmds++ = 0x00000002; /* Index 2 */ + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_HLSQ_CL_CONTROL_0_REG); + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_DRAW_INDX_2, 6); + *cmds++ = 0x00000000; /* Viz query info */ + *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_TRILIST, + PC_DI_SRC_SEL_IMMEDIATE, + PC_DI_INDEX_SIZE_32_BIT, + PC_DI_IGNORE_VISIBILITY); + *cmds++ = 0x00000003; /* Num indices */ + *cmds++ = 0x00000002; /* Index 0 */ + *cmds++ = 0x00000001; /* Index 1 */ + *cmds++ = 0x00000003; /* Index 2 */ + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_HLSQ_CL_CONTROL_0_REG); + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmds++ = 0x00000000; + + /* Create indirect buffer command for above command sequence */ + create_ib1(drawctxt, shadow->gmem_save, start, cmds); + + return cmds; +} + +static void build_shader_save_cmds(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + unsigned int *cmd = tmp_ctx.cmd; + unsigned int *start; + + /* Reserve space for boolean values used for COND_EXEC packet */ + drawctxt->cond_execs[0].hostptr = cmd; + drawctxt->cond_execs[0].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate); + *cmd++ = 0; + drawctxt->cond_execs[1].hostptr = cmd; + drawctxt->cond_execs[1].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate); + *cmd++ = 0; + + drawctxt->shader_save_commands[0].hostptr = cmd; + drawctxt->shader_save_commands[0].gpuaddr = + virt2gpu(cmd, &drawctxt->gpustate); + *cmd++ = 0; + drawctxt->shader_save_commands[1].hostptr = cmd; + drawctxt->shader_save_commands[1].gpuaddr = + virt2gpu(cmd, &drawctxt->gpustate); + *cmd++ = 0; + + start = cmd; + + /* Save vertex shader */ + + *cmd++ = cp_type3_packet(CP_COND_EXEC, 4); + *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2; + *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2; + *cmd++ = 0x0000FFFF; + *cmd++ = 3; /* EXEC_COUNT */ + + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + drawctxt->shader_save_commands[2].hostptr = cmd; + drawctxt->shader_save_commands[2].gpuaddr = + virt2gpu(cmd, &drawctxt->gpustate); + /* + From fixup: + + dwords = SP_VS_CTRL_REG0.VS_LENGTH * 8 + + From regspec: + SP_VS_CTRL_REG0.VS_LENGTH [31:24]: VS length, unit = 256bits. + If bit31 is 1, it means overflow + or any long shader. + + src = (HLSQ_SHADOW_BASE + 0x1000)/4 + */ + *cmd++ = 0; /*(dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */ + *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET) & 0xfffffffc; + + /* Save fragment shader */ + *cmd++ = cp_type3_packet(CP_COND_EXEC, 4); + *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2; + *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2; + *cmd++ = 0x0000FFFF; + *cmd++ = 3; /* EXEC_COUNT */ + + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + drawctxt->shader_save_commands[3].hostptr = cmd; + drawctxt->shader_save_commands[3].gpuaddr = + virt2gpu(cmd, &drawctxt->gpustate); + /* + From fixup: + + dwords = SP_FS_CTRL_REG0.FS_LENGTH * 8 + + From regspec: + SP_FS_CTRL_REG0.FS_LENGTH [31:24]: FS length, unit = 256bits. + If bit31 is 1, it means overflow + or any long shader. + + fs_offset = SP_FS_OBJ_OFFSET_REG.SHADEROBJOFFSETINIC * 32 + From regspec: + + SP_FS_OBJ_OFFSET_REG.SHADEROBJOFFSETINIC [31:25]: + First instruction of the whole shader will be stored from + the offset in instruction cache, unit = 256bits, a cache line. + It can start from 0 if no VS available. + + src = (HLSQ_SHADOW_BASE + 0x1000 + SSIZE + fs_offset)/4 + */ + *cmd++ = 0; /*(dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */ + *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET + + (SHADER_SHADOW_SIZE / 2)) & 0xfffffffc; + + /* Create indirect buffer command for above command sequence */ + create_ib1(drawctxt, drawctxt->shader_save, start, cmd); + + tmp_ctx.cmd = cmd; +} + +/* + * Make an IB to modify context save IBs with the correct shader instruction + * and constant sizes and offsets. + */ + +static void build_save_fixup_cmds(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + unsigned int *cmd = tmp_ctx.cmd; + unsigned int *start = cmd; + + /* Flush HLSQ lazy updates */ + *cmd++ = cp_type3_packet(CP_EVENT_WRITE, 1); + *cmd++ = 0x7; /* HLSQ_FLUSH */ + *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmd++ = 0; + + *cmd++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2); + *cmd++ = 0x00000000; /* No start addr for full invalidate */ + *cmd++ = (unsigned int) + UCHE_ENTIRE_CACHE << UCHE_INVALIDATE1REG_ALLORPORTION | + UCHE_OP_INVALIDATE << UCHE_INVALIDATE1REG_OPCODE | + 0; /* No end addr for full invalidate */ + + /* Make sure registers are flushed */ + *cmd++ = cp_type3_packet(CP_CONTEXT_UPDATE, 1); + *cmd++ = 0; + +#ifdef GSL_CONTEXT_SWITCH_CPU_SYNC + + /* Save shader sizes */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = A3XX_SP_VS_CTRL_REG0; + *cmd++ = drawctxt->shader_save_commands[2].gpuaddr; + + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = A3XX_SP_FS_CTRL_REG0; + *cmd++ = drawctxt->shader_save_commands[3].gpuaddr; + + /* Save shader offsets */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG; + *cmd++ = drawctxt->shader_save_commands[1].gpuaddr; + + /* Save constant sizes */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = A3XX_SP_VS_CTRL_REG1; + *cmd++ = drawctxt->constant_save_commands[1].gpuaddr; + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = A3XX_SP_FS_CTRL_REG1; + *cmd++ = drawctxt->constant_save_commands[2].gpuaddr; + + /* Save FS constant offset */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG; + *cmd++ = drawctxt->constant_save_commands[0].gpuaddr; + + + /* Save VS instruction store mode */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = A3XX_SP_VS_CTRL_REG0; + *cmd++ = drawctxt->cond_execs[0].gpuaddr; + + /* Save FS instruction store mode */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = A3XX_SP_FS_CTRL_REG0; + *cmd++ = drawctxt->cond_execs[1].gpuaddr; +#else + + /* Shader save */ + cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x7f000000, + 11+REG_TO_MEM_LOOP_COUNT_SHIFT, + (HLSQ_SHADOW_BASE + 0x1000) / 4, + drawctxt->shader_save_commands[2].gpuaddr); + + /* CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0x00000000) | SP_FS_CTRL_REG0 */ + *cmd++ = cp_type3_packet(CP_REG_RMW, 3); + *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2; + *cmd++ = 0x00000000; /* AND value */ + *cmd++ = A3XX_SP_FS_CTRL_REG0; /* OR address */ + /* CP_SCRATCH_REG2 = ( (CP_SCRATCH_REG2 & 0x7f000000) >> 21 ) + | ((HLSQ_SHADOW_BASE+0x1000+SSIZE)/4) */ + *cmd++ = cp_type3_packet(CP_REG_RMW, 3); + *cmd++ = ((11 + REG_TO_MEM_LOOP_COUNT_SHIFT) << 24) | + A3XX_CP_SCRATCH_REG2; + *cmd++ = 0x7f000000; /* AND value */ + *cmd++ = (HLSQ_SHADOW_BASE + 0x1000 + SSIZE) / 4; /* OR value */ + + /* + * CP_SCRATCH_REG3 = (CP_SCRATCH_REG3 & 0x00000000) | + * SP_FS_OBJ_OFFSET_REG + */ + + *cmd++ = cp_type3_packet(CP_REG_RMW, 3); + *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG3; + *cmd++ = 0x00000000; /* AND value */ + *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG; /* OR address */ + /* + * CP_SCRATCH_REG3 = ( (CP_SCRATCH_REG3 & 0xfe000000) >> 25 ) | + * 0x00000000 + */ + *cmd++ = cp_type3_packet(CP_REG_RMW, 3); + *cmd++ = A3XX_CP_SCRATCH_REG3; + *cmd++ = 0xfe000000; /* AND value */ + *cmd++ = 0x00000000; /* OR value */ + /* + * CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0xffffffff) | CP_SCRATCH_REG3 + */ + *cmd++ = cp_type3_packet(CP_REG_RMW, 3); + *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2; + *cmd++ = 0xffffffff; /* AND value */ + *cmd++ = A3XX_CP_SCRATCH_REG3; /* OR address */ + + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = A3XX_CP_SCRATCH_REG2; + *cmd++ = drawctxt->shader_save_commands[3].gpuaddr; + + /* Constant save */ + cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff, + 17, (HLSQ_SHADOW_BASE + 0x2000) / 4, + drawctxt->constant_save_commands[1].gpuaddr); + + cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff, + 17, (HLSQ_SHADOW_BASE + 0x2000 + SSIZE) / 4, + drawctxt->constant_save_commands[2].gpuaddr); + + cmd = rmw_regtomem(cmd, A3XX_SP_FS_OBJ_OFFSET_REG, 0x00ff0000, + 18, drawctxt->gpustate.gpuaddr & 0xfffffe00, + drawctxt->constant_save_commands[2].gpuaddr + + sizeof(unsigned int)); + + /* Modify constant save conditionals */ + cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff, + 0, 0, drawctxt->cond_execs[2].gpuaddr); + + cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff, + 0, 0, drawctxt->cond_execs[3].gpuaddr); + + /* Save VS instruction store mode */ + + cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x00000002, + 31, 0, drawctxt->cond_execs[0].gpuaddr); + + /* Save FS instruction store mode */ + cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG0, 0x00000002, + 31, 0, drawctxt->cond_execs[1].gpuaddr); + +#endif + + create_ib1(drawctxt, drawctxt->save_fixup, start, cmd); + + tmp_ctx.cmd = cmd; +} + +/****************************************************************************/ +/* Functions to build context restore IBs */ +/****************************************************************************/ + +static unsigned int *build_sys2gmem_cmds(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, + struct gmem_shadow_t *shadow) +{ + unsigned int *cmds = tmp_ctx.cmd; + unsigned int *start = cmds; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5); + *cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG); + /* HLSQ_CONTROL_0_REG */ + *cmds++ = _SET(HLSQ_CTRL0REG_FSTHREADSIZE, HLSQ_FOUR_PIX_QUADS) | + _SET(HLSQ_CTRL0REG_SPSHADERRESTART, 1) | + _SET(HLSQ_CTRL0REG_CHUNKDISABLE, 1) | + _SET(HLSQ_CTRL0REG_SPCONSTFULLUPDATE, 1) | + _SET(HLSQ_CTRL0REG_TPFULLUPDATE, 1); + /* HLSQ_CONTROL_1_REG */ + *cmds++ = _SET(HLSQ_CTRL1REG_VSTHREADSIZE, HLSQ_TWO_VTX_QUADS); + /* HLSQ_CONTROL_2_REG */ + *cmds++ = _SET(HLSQ_CTRL2REG_PRIMALLOCTHRESHOLD, 31); + /* HLSQ_CONTROL3_REG */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_RB_MRT_BUF_INFO0); + /* RB_MRT_BUF_INFO0 */ + *cmds++ = _SET(RB_MRTBUFINFO_COLOR_FORMAT, RB_R8G8B8A8_UNORM) | + _SET(RB_MRTBUFINFO_COLOR_TILE_MODE, RB_TILINGMODE_32X32) | + _SET(RB_MRTBUFINFO_COLOR_BUF_PITCH, + (shadow->gmem_pitch * 4 * 8) / 256); + /* RB_MRT_BUF_BASE0 */ + *cmds++ = _SET(RB_MRTBUFBASE_COLOR_BUF_BASE, tmp_ctx.gmem_base >> 5); + + /* Texture samplers */ + *cmds++ = cp_type3_packet(CP_LOAD_STATE, 4); + *cmds++ = (16 << CP_LOADSTATE_DSTOFFSET_SHIFT) + | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT) + | (HLSQ_BLOCK_ID_TP_TEX << CP_LOADSTATE_STATEBLOCKID_SHIFT) + | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT); + *cmds++ = (HLSQ_TP_TEX_SAMPLERS << CP_LOADSTATE_STATETYPE_SHIFT) + | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT); + *cmds++ = 0x00000240; + *cmds++ = 0x00000000; + + /* Texture memobjs */ + *cmds++ = cp_type3_packet(CP_LOAD_STATE, 6); + *cmds++ = (16 << CP_LOADSTATE_DSTOFFSET_SHIFT) + | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT) + | (HLSQ_BLOCK_ID_TP_TEX << CP_LOADSTATE_STATEBLOCKID_SHIFT) + | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT); + *cmds++ = (HLSQ_TP_TEX_MEMOBJ << CP_LOADSTATE_STATETYPE_SHIFT) + | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT); + *cmds++ = 0x4cc06880; + *cmds++ = shadow->height | (shadow->width << 14); + *cmds++ = (shadow->pitch*4*8) << 9; + *cmds++ = 0x00000000; + + /* Mipmap bases */ + *cmds++ = cp_type3_packet(CP_LOAD_STATE, 16); + *cmds++ = (224 << CP_LOADSTATE_DSTOFFSET_SHIFT) + | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT) + | (HLSQ_BLOCK_ID_TP_MIPMAP << CP_LOADSTATE_STATEBLOCKID_SHIFT) + | (14 << CP_LOADSTATE_NUMOFUNITS_SHIFT); + *cmds++ = (HLSQ_TP_MIPMAP_BASE << CP_LOADSTATE_STATETYPE_SHIFT) + | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT); + *cmds++ = shadow->gmemshadow.gpuaddr; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5); + *cmds++ = CP_REG(A3XX_HLSQ_VS_CONTROL_REG); + /* HLSQ_VS_CONTROL_REG */ + *cmds++ = _SET(HLSQ_VSCTRLREG_VSINSTRLENGTH, 1); + /* HLSQ_FS_CONTROL_REG */ + *cmds++ = _SET(HLSQ_FSCTRLREG_FSCONSTLENGTH, 1) | + _SET(HLSQ_FSCTRLREG_FSCONSTSTARTOFFSET, 128) | + _SET(HLSQ_FSCTRLREG_FSINSTRLENGTH, 2); + /* HLSQ_CONST_VSPRESV_RANGE_REG */ + *cmds++ = 0x00000000; + /* HLSQ_CONST_FSPRESV_RANGE_REG */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_SP_FS_LENGTH_REG); + /* SP_FS_LENGTH_REG */ + *cmds++ = _SET(SP_SHADERLENGTH_LEN, 2); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 12); + *cmds++ = CP_REG(A3XX_SP_VS_CTRL_REG0); + /* SP_VS_CTRL_REG0 */ + *cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) | + _SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) | + _SET(SP_VSCTRLREG0_VSICACHEINVALID, 1) | + _SET(SP_VSCTRLREG0_VSFULLREGFOOTPRINT, 2) | + _SET(SP_VSCTRLREG0_VSTHREADSIZE, SP_TWO_VTX_QUADS) | + _SET(SP_VSCTRLREG0_VSLENGTH, 1); + /* SP_VS_CTRL_REG1 */ + *cmds++ = _SET(SP_VSCTRLREG1_VSINITIALOUTSTANDING, 8); + /* SP_VS_PARAM_REG */ + *cmds++ = _SET(SP_VSPARAMREG_POSREGID, 4) | + _SET(SP_VSPARAMREG_PSIZEREGID, 252) | + _SET(SP_VSPARAMREG_TOTALVSOUTVAR, 1); + /* SP_VS_OUT_REG0 */ + *cmds++ = _SET(SP_VSOUTREG_COMPMASK0, 3); + /* SP_VS_OUT_REG1 */ + *cmds++ = 0x00000000; + /* SP_VS_OUT_REG2 */ + *cmds++ = 0x00000000; + /* SP_VS_OUT_REG3 */ + *cmds++ = 0x00000000; + /* SP_VS_OUT_REG4 */ + *cmds++ = 0x00000000; + /* SP_VS_OUT_REG5 */ + *cmds++ = 0x00000000; + /* SP_VS_OUT_REG6 */ + *cmds++ = 0x00000000; + /* SP_VS_OUT_REG7 */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7); + *cmds++ = CP_REG(A3XX_SP_VS_VPC_DST_REG_0); + /* SP_VS_VPC_DST_REG0 */ + *cmds++ = _SET(SP_VSVPCDSTREG_OUTLOC0, 8); + /* SP_VS_VPC_DST_REG1 */ + *cmds++ = 0x00000000; + /* SP_VS_VPC_DST_REG2 */ + *cmds++ = 0x00000000; + /* SP_VS_VPC_DST_REG3 */ + *cmds++ = 0x00000000; + /* SP_VS_OBJ_OFFSET_REG */ + *cmds++ = 0x00000000; + /* SP_VS_OBJ_START_REG */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6); + *cmds++ = CP_REG(A3XX_SP_VS_LENGTH_REG); + /* SP_VS_LENGTH_REG */ + *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1); + /* SP_FS_CTRL_REG0 */ + *cmds++ = _SET(SP_FSCTRLREG0_FSTHREADMODE, SP_MULTI) | + _SET(SP_FSCTRLREG0_FSINSTRBUFFERMODE, SP_BUFFER_MODE) | + _SET(SP_FSCTRLREG0_FSICACHEINVALID, 1) | + _SET(SP_FSCTRLREG0_FSFULLREGFOOTPRINT, 2) | + _SET(SP_FSCTRLREG0_FSINOUTREGOVERLAP, 1) | + _SET(SP_FSCTRLREG0_FSTHREADSIZE, SP_FOUR_PIX_QUADS) | + _SET(SP_FSCTRLREG0_PIXLODENABLE, 1) | + _SET(SP_FSCTRLREG0_FSLENGTH, 2); + /* SP_FS_CTRL_REG1 */ + *cmds++ = _SET(SP_FSCTRLREG1_FSCONSTLENGTH, 1) | + _SET(SP_FSCTRLREG1_FSINITIALOUTSTANDING, 2) | + _SET(SP_FSCTRLREG1_HALFPRECVAROFFSET, 63); + /* SP_FS_OBJ_OFFSET_REG */ + *cmds++ = _SET(SP_OBJOFFSETREG_CONSTOBJECTSTARTOFFSET, 128); + /* SP_FS_OBJ_START_REG */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0); + /* SP_FS_FLAT_SHAD_MODE_REG0 */ + *cmds++ = 0x00000000; + /* SP_FS_FLAT_SHAD_MODE_REG1 */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_SP_FS_OUTPUT_REG); + /* SP_FS_OUT_REG */ + *cmds++ = _SET(SP_FSOUTREG_PAD0, SP_PIXEL_BASED); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_SP_FS_MRT_REG_0); + /* SP_FS_MRT_REG0 */ + *cmds++ = _SET(SP_FSMRTREG_REGID, 4); + /* SP_FS_MRT_REG1 */ + *cmds++ = 0; + /* SP_FS_MRT_REG2 */ + *cmds++ = 0; + /* SP_FS_MRT_REG3 */ + *cmds++ = 0; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 11); + *cmds++ = CP_REG(A3XX_VPC_ATTR); + /* VPC_ATTR */ + *cmds++ = _SET(VPC_VPCATTR_TOTALATTR, 2) | + _SET(VPC_VPCATTR_THRHDASSIGN, 1) | + _SET(VPC_VPCATTR_LMSIZE, 1); + /* VPC_PACK */ + *cmds++ = _SET(VPC_VPCPACK_NUMFPNONPOSVAR, 2) | + _SET(VPC_VPCPACK_NUMNONPOSVSVAR, 2); + /* VPC_VARYING_INTERP_MODE_0 */ + *cmds++ = 0x00000000; + /* VPC_VARYING_INTERP_MODE1 */ + *cmds++ = 0x00000000; + /* VPC_VARYING_INTERP_MODE2 */ + *cmds++ = 0x00000000; + /* VPC_VARYING_IINTERP_MODE3 */ + *cmds++ = 0x00000000; + /* VPC_VARRYING_PS_REPL_MODE_0 */ + *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2); + /* VPC_VARRYING_PS_REPL_MODE_1 */ + *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2); + /* VPC_VARRYING_PS_REPL_MODE_2 */ + *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2); + /* VPC_VARRYING_PS_REPL_MODE_3 */ + *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 11); + *cmds++ = CP_REG(A3XX_SP_SP_CTRL_REG); + /* SP_SP_CTRL_REG */ + *cmds++ = _SET(SP_SPCTRLREG_SLEEPMODE, 1); + + /* Load vertex shader */ + *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10); + *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT) + | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT) + | (HLSQ_BLOCK_ID_SP_VS << CP_LOADSTATE_STATEBLOCKID_SHIFT) + | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT); + *cmds++ = (HLSQ_SP_VS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT) + | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT); + /* (sy)end; */ + *cmds++ = 0x00000000; *cmds++ = 0x13000000; + /* nop; */ + *cmds++ = 0x00000000; *cmds++ = 0x00000000; + /* nop; */ + *cmds++ = 0x00000000; *cmds++ = 0x00000000; + /* nop; */ + *cmds++ = 0x00000000; *cmds++ = 0x00000000; + + /* Load fragment shader */ + *cmds++ = cp_type3_packet(CP_LOAD_STATE, 18); + *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT) + | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT) + | (HLSQ_BLOCK_ID_SP_FS << CP_LOADSTATE_STATEBLOCKID_SHIFT) + | (2 << CP_LOADSTATE_NUMOFUNITS_SHIFT); + *cmds++ = (HLSQ_SP_FS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT) + | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT); + /* (sy)(rpt1)bary.f (ei)r0.z, (r)0, r0.x; */ + *cmds++ = 0x00002000; *cmds++ = 0x57368902; + /* (rpt5)nop; */ + *cmds++ = 0x00000000; *cmds++ = 0x00000500; + /* sam (f32)r0.xyzw, r0.z, s#0, t#0; */ + *cmds++ = 0x00000005; *cmds++ = 0xa0c01f00; + /* (sy)mov.f32f32 r1.x, r0.x; */ + *cmds++ = 0x00000000; *cmds++ = 0x30044004; + /* mov.f32f32 r1.y, r0.y; */ + *cmds++ = 0x00000001; *cmds++ = 0x20044005; + /* mov.f32f32 r1.z, r0.z; */ + *cmds++ = 0x00000002; *cmds++ = 0x20044006; + /* mov.f32f32 r1.w, r0.w; */ + *cmds++ = 0x00000003; *cmds++ = 0x20044007; + /* end; */ + *cmds++ = 0x00000000; *cmds++ = 0x03000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_VFD_CONTROL_0); + /* VFD_CONTROL_0 */ + *cmds++ = _SET(VFD_CTRLREG0_TOTALATTRTOVS, 8) | + _SET(VFD_CTRLREG0_PACKETSIZE, 2) | + _SET(VFD_CTRLREG0_STRMDECINSTRCNT, 2) | + _SET(VFD_CTRLREG0_STRMFETCHINSTRCNT, 2); + /* VFD_CONTROL_1 */ + *cmds++ = _SET(VFD_CTRLREG1_MAXSTORAGE, 2) | + _SET(VFD_CTRLREG1_REGID4VTX, 252) | + _SET(VFD_CTRLREG1_REGID4INST, 252); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5); + *cmds++ = CP_REG(A3XX_VFD_FETCH_INSTR_0_0); + /* VFD_FETCH_INSTR_0_0 */ + *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 7) | + _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 8) | + _SET(VFD_FETCHINSTRUCTIONS_SWITCHNEXT, 1) | + _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1); + /* VFD_FETCH_INSTR_1_0 */ + *cmds++ = _SET(VFD_BASEADDR_BASEADDR, + shadow->quad_vertices_restore.gpuaddr); + /* VFD_FETCH_INSTR_0_1 */ + *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 11) | + _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 12) | + _SET(VFD_FETCHINSTRUCTIONS_INDEXDECODE, 1) | + _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1); + /* VFD_FETCH_INSTR_1_1 */ + *cmds++ = _SET(VFD_BASEADDR_BASEADDR, + shadow->quad_vertices_restore.gpuaddr + 16); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_VFD_DECODE_INSTR_0); + /* VFD_DECODE_INSTR_0 */ + *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) | + _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) | + _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 1) | + _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 8) | + _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1) | + _SET(VFD_DECODEINSTRUCTIONS_SWITCHNEXT, 1); + /* VFD_DECODE_INSTR_1 */ + *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) | + _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) | + _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 2) | + _SET(VFD_DECODEINSTRUCTIONS_REGID, 4) | + _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 12) | + _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_RB_DEPTH_CONTROL); + /* RB_DEPTH_CONTROL */ + *cmds++ = _SET(RB_DEPTHCONTROL_Z_TEST_FUNC, RB_FRAG_NEVER); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_RB_STENCIL_CONTROL); + /* RB_STENCIL_CONTROL */ + *cmds++ = _SET(RB_STENCILCONTROL_STENCIL_FUNC, RB_REF_NEVER) | + _SET(RB_STENCILCONTROL_STENCIL_FAIL, RB_STENCIL_KEEP) | + _SET(RB_STENCILCONTROL_STENCIL_ZPASS, RB_STENCIL_KEEP) | + _SET(RB_STENCILCONTROL_STENCIL_ZFAIL, RB_STENCIL_KEEP) | + _SET(RB_STENCILCONTROL_STENCIL_FUNC_BF, RB_REF_NEVER) | + _SET(RB_STENCILCONTROL_STENCIL_FAIL_BF, RB_STENCIL_KEEP) | + _SET(RB_STENCILCONTROL_STENCIL_ZPASS_BF, RB_STENCIL_KEEP) | + _SET(RB_STENCILCONTROL_STENCIL_ZFAIL_BF, RB_STENCIL_KEEP); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_RB_MODE_CONTROL); + /* RB_MODE_CONTROL */ + *cmds++ = _SET(RB_MODECONTROL_RENDER_MODE, RB_RENDERING_PASS) | + _SET(RB_MODECONTROL_MARB_CACHE_SPLIT_MODE, 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_RB_RENDER_CONTROL); + /* RB_RENDER_CONTROL */ + *cmds++ = _SET(RB_RENDERCONTROL_BIN_WIDTH, shadow->width >> 5) | + _SET(RB_RENDERCONTROL_ALPHA_TEST_FUNC, 7); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_RB_MSAA_CONTROL); + /* RB_MSAA_CONTROL */ + *cmds++ = _SET(RB_MSAACONTROL_MSAA_DISABLE, 1) | + _SET(RB_MSAACONTROL_SAMPLE_MASK, 0xFFFF); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_RB_MRT_CONTROL0); + /* RB_MRT_CONTROL0 */ + *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) | + _SET(RB_MRTCONTROL_ROP_CODE, 12) | + _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) | + _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL0); + /* RB_MRT_BLENDCONTROL0 */ + *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) | + _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) | + _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1); + /* RB_MRT_CONTROL1 */ + *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) | + _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) | + _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL1); + /* RB_MRT_BLENDCONTROL1 */ + *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) | + _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) | + _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1); + /* RB_MRT_CONTROL2 */ + *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) | + _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) | + _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL2); + /* RB_MRT_BLENDCONTROL2 */ + *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) | + _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) | + _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1); + /* RB_MRT_CONTROL3 */ + *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) | + _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) | + _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL3); + /* RB_MRT_BLENDCONTROL3 */ + *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) | + _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) | + _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5); + *cmds++ = CP_REG(A3XX_VFD_INDEX_MIN); + /* VFD_INDEX_MIN */ + *cmds++ = 0x00000000; + /* VFD_INDEX_MAX */ + *cmds++ = 0xFFFFFFFF; + /* VFD_INDEX_OFFSET */ + *cmds++ = 0x00000000; + /* TPL1_TP_VS_TEX_OFFSET */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_VFD_VS_THREADING_THRESHOLD); + /* VFD_VS_THREADING_THRESHOLD */ + *cmds++ = _SET(VFD_THREADINGTHRESHOLD_RESERVED6, 12) | + _SET(VFD_THREADINGTHRESHOLD_REGID_VTXCNT, 252); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_TPL1_TP_VS_TEX_OFFSET); + /* TPL1_TP_VS_TEX_OFFSET */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_TPL1_TP_FS_TEX_OFFSET); + /* TPL1_TP_FS_TEX_OFFSET */ + *cmds++ = _SET(TPL1_TPTEXOFFSETREG_SAMPLEROFFSET, 16) | + _SET(TPL1_TPTEXOFFSETREG_MEMOBJOFFSET, 16) | + _SET(TPL1_TPTEXOFFSETREG_BASETABLEPTR, 224); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_GRAS_SC_CONTROL); + /* GRAS_SC_CONTROL */ + *cmds++ = _SET(GRAS_SC_CONTROL_RASTER_MODE, 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_GRAS_SU_MODE_CONTROL); + /* GRAS_SU_MODE_CONTROL */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_GRAS_SC_WINDOW_SCISSOR_TL); + /* GRAS_SC_WINDOW_SCISSOR_TL */ + *cmds++ = 0x00000000; + /* GRAS_SC_WINDOW_SCISSOR_BR */ + *cmds++ = _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_X, shadow->width - 1) | + _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_Y, shadow->height - 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_GRAS_SC_SCREEN_SCISSOR_TL); + /* GRAS_SC_SCREEN_SCISSOR_TL */ + *cmds++ = 0x00000000; + /* GRAS_SC_SCREEN_SCISSOR_BR */ + *cmds++ = _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_X, shadow->width - 1) | + _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_Y, shadow->height - 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5); + *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_XOFFSET); + /* GRAS_CL_VPORT_XOFFSET */ + *cmds++ = 0x00000000; + /* GRAS_CL_VPORT_XSCALE */ + *cmds++ = _SET(GRAS_CL_VPORT_XSCALE_VPORT_XSCALE, 0x3F800000); + /* GRAS_CL_VPORT_YOFFSET */ + *cmds++ = 0x00000000; + /* GRAS_CL_VPORT_YSCALE */ + *cmds++ = _SET(GRAS_CL_VPORT_YSCALE_VPORT_YSCALE, 0x3F800000); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_ZOFFSET); + /* GRAS_CL_VPORT_ZOFFSET */ + *cmds++ = 0x00000000; + /* GRAS_CL_VPORT_ZSCALE */ + *cmds++ = _SET(GRAS_CL_VPORT_ZSCALE_VPORT_ZSCALE, 0x3F800000); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_GRAS_CL_CLIP_CNTL); + /* GRAS_CL_CLIP_CNTL */ + *cmds++ = _SET(GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER, 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_SP_FS_IMAGE_OUTPUT_REG_0); + /* SP_FS_IMAGE_OUTPUT_REG_0 */ + *cmds++ = _SET(SP_IMAGEOUTPUTREG_MRTFORMAT, SP_R8G8B8A8_UNORM); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_PC_PRIM_VTX_CNTL); + /* PC_PRIM_VTX_CONTROL */ + *cmds++ = _SET(PC_PRIM_VTX_CONTROL_STRIDE_IN_VPC, 2) | + _SET(PC_PRIM_VTX_CONTROL_POLYMODE_FRONT_PTYPE, + PC_DRAW_TRIANGLES) | + _SET(PC_PRIM_VTX_CONTROL_POLYMODE_BACK_PTYPE, + PC_DRAW_TRIANGLES) | + _SET(PC_PRIM_VTX_CONTROL_PROVOKING_VTX_LAST, 1); + + *cmds++ = cp_type3_packet(CP_DRAW_INDX, 3); + *cmds++ = 0x00000000; /* Viz query info */ + *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_RECTLIST, + PC_DI_SRC_SEL_AUTO_INDEX, + PC_DI_INDEX_SIZE_16_BIT, + PC_DI_IGNORE_VISIBILITY); + *cmds++ = 0x00000002; /* Num indices */ + + /* Create indirect buffer command for above command sequence */ + create_ib1(drawctxt, shadow->gmem_restore, start, cmds); + + return cmds; +} + +static void build_regrestore_cmds(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + unsigned int *start = tmp_ctx.cmd; + unsigned int *cmd = start; + unsigned int *lcc_start; + + int i; + + /* Flush HLSQ lazy updates */ + *cmd++ = cp_type3_packet(CP_EVENT_WRITE, 1); + *cmd++ = 0x7; /* HLSQ_FLUSH */ + *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmd++ = 0; + + *cmd++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2); + *cmd++ = 0x00000000; /* No start addr for full invalidate */ + *cmd++ = (unsigned int) + UCHE_ENTIRE_CACHE << UCHE_INVALIDATE1REG_ALLORPORTION | + UCHE_OP_INVALIDATE << UCHE_INVALIDATE1REG_OPCODE | + 0; /* No end addr for full invalidate */ + + lcc_start = cmd; + + /* deferred cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, ???); */ + cmd++; + +#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES + /* Force mismatch */ + *cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000) | 1; +#else + *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000; +#endif + + for (i = 0; i < ARRAY_SIZE(context_register_ranges) / 2; i++) { + cmd = reg_range(cmd, context_register_ranges[i * 2], + context_register_ranges[i * 2 + 1]); + } + + lcc_start[0] = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, + (cmd - lcc_start) - 1); + +#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES + lcc_start[2] |= (0 << 24) | (4 << 16); /* Disable shadowing. */ +#else + lcc_start[2] |= (1 << 24) | (4 << 16); +#endif + + for (i = 0; i < ARRAY_SIZE(global_registers); i++) { + *cmd++ = cp_type0_packet(global_registers[i], 1); + tmp_ctx.reg_values[i] = virt2gpu(cmd, &drawctxt->gpustate); + *cmd++ = 0x00000000; + } + + create_ib1(drawctxt, drawctxt->reg_restore, start, cmd); + tmp_ctx.cmd = cmd; +} + +static void build_constantrestore_cmds(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + unsigned int *cmd = tmp_ctx.cmd; + unsigned int *start = cmd; + unsigned int mode = 4; /* Indirect mode */ + unsigned int stateblock; + unsigned int numunits; + unsigned int statetype; + + drawctxt->cond_execs[2].hostptr = cmd; + drawctxt->cond_execs[2].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate); + *cmd++ = 0; + drawctxt->cond_execs[3].hostptr = cmd; + drawctxt->cond_execs[3].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate); + *cmd++ = 0; + +#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES + *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3); + *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000; + *cmd++ = 4 << 16; + *cmd++ = 0x0; +#endif + /* HLSQ full update */ + *cmd++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmd++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG); + *cmd++ = 0x68000240; /* A3XX_HLSQ_CONTROL_0_REG */ + +#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES + /* Re-enable shadowing */ + *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3); + *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000; + *cmd++ = (4 << 16) | (1 << 24); + *cmd++ = 0x0; +#endif + + /* Load vertex shader constants */ + *cmd++ = cp_type3_packet(CP_COND_EXEC, 4); + *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2; + *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2; + *cmd++ = 0x0000ffff; + *cmd++ = 3; /* EXEC_COUNT */ + *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2); + drawctxt->constant_load_commands[0].hostptr = cmd; + drawctxt->constant_load_commands[0].gpuaddr = virt2gpu(cmd, + &drawctxt->gpustate); + + /* + From fixup: + + mode = 4 (indirect) + stateblock = 4 (Vertex constants) + numunits = SP_VS_CTRL_REG1.VSCONSTLENGTH * 2; (256bit units) + + From register spec: + SP_VS_CTRL_REG1.VSCONSTLENGTH [09:00]: 0-512, unit = 128bits. + + ord1 = (numunits<<22) | (stateblock<<19) | (mode<<16); + */ + + *cmd++ = 0; /* ord1 */ + *cmd++ = ((drawctxt->gpustate.gpuaddr) & 0xfffffffc) | 1; + + /* Load fragment shader constants */ + *cmd++ = cp_type3_packet(CP_COND_EXEC, 4); + *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2; + *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2; + *cmd++ = 0x0000ffff; + *cmd++ = 3; /* EXEC_COUNT */ + *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2); + drawctxt->constant_load_commands[1].hostptr = cmd; + drawctxt->constant_load_commands[1].gpuaddr = + virt2gpu(cmd, &drawctxt->gpustate); + /* + From fixup: + + mode = 4 (indirect) + stateblock = 6 (Fragment constants) + numunits = SP_FS_CTRL_REG1.FSCONSTLENGTH * 2; (256bit units) + + From register spec: + SP_FS_CTRL_REG1.FSCONSTLENGTH [09:00]: 0-512, unit = 128bits. + + ord1 = (numunits<<22) | (stateblock<<19) | (mode<<16); + */ + + *cmd++ = 0; /* ord1 */ + drawctxt->constant_load_commands[2].hostptr = cmd; + drawctxt->constant_load_commands[2].gpuaddr = + virt2gpu(cmd, &drawctxt->gpustate); + /* + From fixup: + base = drawctxt->gpustate.gpuaddr (ALU constant shadow base) + offset = SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET + + From register spec: + SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET [16:24]: Constant object + start offset in on chip RAM, + 128bit aligned + + ord2 = base + offset | 1 + Because of the base alignment we can use + ord2 = base | offset | 1 + */ + *cmd++ = 0; /* ord2 */ + + /* Restore VS texture memory objects */ + stateblock = 0; + statetype = 1; + numunits = (TEX_SIZE_MEM_OBJECTS / 7) / 4; + + *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2); + *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16); + *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MEM_OBJECTS) + & 0xfffffffc) | statetype; + + /* Restore VS texture mipmap addresses */ + stateblock = 1; + statetype = 1; + numunits = TEX_SIZE_MIPMAP / 4; + *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2); + *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16); + *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MIPMAP) + & 0xfffffffc) | statetype; + + /* Restore VS texture sampler objects */ + stateblock = 0; + statetype = 0; + numunits = (TEX_SIZE_SAMPLER_OBJ / 2) / 4; + *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2); + *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16); + *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_SAMPLER_OBJ) + & 0xfffffffc) | statetype; + + /* Restore FS texture memory objects */ + stateblock = 2; + statetype = 1; + numunits = (TEX_SIZE_MEM_OBJECTS / 7) / 4; + *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2); + *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16); + *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MEM_OBJECTS) + & 0xfffffffc) | statetype; + + /* Restore FS texture mipmap addresses */ + stateblock = 3; + statetype = 1; + numunits = TEX_SIZE_MIPMAP / 4; + *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2); + *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16); + *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MIPMAP) + & 0xfffffffc) | statetype; + + /* Restore FS texture sampler objects */ + stateblock = 2; + statetype = 0; + numunits = (TEX_SIZE_SAMPLER_OBJ / 2) / 4; + *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2); + *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16); + *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_SAMPLER_OBJ) + & 0xfffffffc) | statetype; + + create_ib1(drawctxt, drawctxt->constant_restore, start, cmd); + tmp_ctx.cmd = cmd; +} + +static void build_shader_restore_cmds(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + unsigned int *cmd = tmp_ctx.cmd; + unsigned int *start = cmd; + + /* Vertex shader */ + *cmd++ = cp_type3_packet(CP_COND_EXEC, 4); + *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2; + *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2; + *cmd++ = 1; + *cmd++ = 3; /* EXEC_COUNT */ + + *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2); + drawctxt->shader_load_commands[0].hostptr = cmd; + drawctxt->shader_load_commands[0].gpuaddr = + virt2gpu(cmd, &drawctxt->gpustate); + /* + From fixup: + + mode = 4 (indirect) + stateblock = 4 (Vertex shader) + numunits = SP_VS_CTRL_REG0.VS_LENGTH + + From regspec: + SP_VS_CTRL_REG0.VS_LENGTH [31:24]: VS length, unit = 256bits. + If bit31 is 1, it means overflow + or any long shader. + + ord1 = (numunits<<22) | (stateblock<<19) | (mode<<11) + */ + *cmd++ = 0; /*ord1 */ + *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET) & 0xfffffffc; + + /* Fragment shader */ + *cmd++ = cp_type3_packet(CP_COND_EXEC, 4); + *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2; + *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2; + *cmd++ = 1; + *cmd++ = 3; /* EXEC_COUNT */ + + *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2); + drawctxt->shader_load_commands[1].hostptr = cmd; + drawctxt->shader_load_commands[1].gpuaddr = + virt2gpu(cmd, &drawctxt->gpustate); + /* + From fixup: + + mode = 4 (indirect) + stateblock = 6 (Fragment shader) + numunits = SP_FS_CTRL_REG0.FS_LENGTH + + From regspec: + SP_FS_CTRL_REG0.FS_LENGTH [31:24]: FS length, unit = 256bits. + If bit31 is 1, it means overflow + or any long shader. + + ord1 = (numunits<<22) | (stateblock<<19) | (mode<<11) + */ + *cmd++ = 0; /*ord1 */ + *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET + + (SHADER_SHADOW_SIZE / 2)) & 0xfffffffc; + + create_ib1(drawctxt, drawctxt->shader_restore, start, cmd); + tmp_ctx.cmd = cmd; +} + +static void build_hlsqcontrol_restore_cmds(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + unsigned int *cmd = tmp_ctx.cmd; + unsigned int *start = cmd; + + *cmd++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmd++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG); + drawctxt->hlsqcontrol_restore_commands[0].hostptr = cmd; + drawctxt->hlsqcontrol_restore_commands[0].gpuaddr + = virt2gpu(cmd, &drawctxt->gpustate); + *cmd++ = 0; + + /* Create indirect buffer command for above command sequence */ + create_ib1(drawctxt, drawctxt->hlsqcontrol_restore, start, cmd); + + tmp_ctx.cmd = cmd; +} + +/* IB that modifies the shader and constant sizes and offsets in restore IBs. */ +static void build_restore_fixup_cmds(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + unsigned int *cmd = tmp_ctx.cmd; + unsigned int *start = cmd; + +#ifdef GSL_CONTEXT_SWITCH_CPU_SYNC + /* Save shader sizes */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = A3XX_SP_VS_CTRL_REG0; + *cmd++ = drawctxt->shader_load_commands[0].gpuaddr; + + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = A3XX_SP_FS_CTRL_REG0; + *cmd++ = drawctxt->shader_load_commands[1].gpuaddr; + + /* Save constant sizes */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = A3XX_SP_VS_CTRL_REG1; + *cmd++ = drawctxt->constant_load_commands[0].gpuaddr; + + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = A3XX_SP_FS_CTRL_REG1; + *cmd++ = drawctxt->constant_load_commands[1].gpuaddr; + + /* Save constant offsets */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG; + *cmd++ = drawctxt->constant_load_commands[2].gpuaddr; +#else + /* Save shader sizes */ + cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x7f000000, + 30, (4 << 19) | (4 << 16), + drawctxt->shader_load_commands[0].gpuaddr); + + cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG0, 0x7f000000, + 30, (6 << 19) | (4 << 16), + drawctxt->shader_load_commands[1].gpuaddr); + + /* Save constant sizes */ + cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff, + 23, (4 << 19) | (4 << 16), + drawctxt->constant_load_commands[0].gpuaddr); + + cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff, + 23, (6 << 19) | (4 << 16), + drawctxt->constant_load_commands[1].gpuaddr); + + /* Modify constant restore conditionals */ + cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff, + 0, 0, drawctxt->cond_execs[2].gpuaddr); + + cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff, + 0, 0, drawctxt->cond_execs[3].gpuaddr); + + /* Save fragment constant shadow offset */ + cmd = rmw_regtomem(cmd, A3XX_SP_FS_OBJ_OFFSET_REG, 0x00ff0000, + 18, (drawctxt->gpustate.gpuaddr & 0xfffffe00) | 1, + drawctxt->constant_load_commands[2].gpuaddr); +#endif + + /* Use mask value to avoid flushing HLSQ which would cause the HW to + discard all the shader data */ + + cmd = rmw_regtomem(cmd, A3XX_HLSQ_CONTROL_0_REG, 0x9ffffdff, + 0, 0, drawctxt->hlsqcontrol_restore_commands[0].gpuaddr); + + create_ib1(drawctxt, drawctxt->restore_fixup, start, cmd); + + tmp_ctx.cmd = cmd; +} + +static int a3xx_create_gpustate_shadow(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + drawctxt->flags |= CTXT_FLAGS_STATE_SHADOW; + + build_regrestore_cmds(adreno_dev, drawctxt); + build_constantrestore_cmds(adreno_dev, drawctxt); + build_hlsqcontrol_restore_cmds(adreno_dev, drawctxt); + build_regconstantsave_cmds(adreno_dev, drawctxt); + build_shader_save_cmds(adreno_dev, drawctxt); + build_shader_restore_cmds(adreno_dev, drawctxt); + build_restore_fixup_cmds(adreno_dev, drawctxt); + build_save_fixup_cmds(adreno_dev, drawctxt); + + return 0; +} + +/* create buffers for saving/restoring registers, constants, & GMEM */ +static int a3xx_create_gmem_shadow(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + calc_gmemsize(&drawctxt->context_gmem_shadow, + adreno_dev->gmemspace.sizebytes); + tmp_ctx.gmem_base = adreno_dev->gmemspace.gpu_base; + + if (drawctxt->flags & CTXT_FLAGS_GMEM_SHADOW) { + int result = + kgsl_allocate(&drawctxt->context_gmem_shadow.gmemshadow, + drawctxt->pagetable, + drawctxt->context_gmem_shadow.size); + + if (result) + return result; + } else { + memset(&drawctxt->context_gmem_shadow.gmemshadow, 0, + sizeof(drawctxt->context_gmem_shadow.gmemshadow)); + + return 0; + } + + build_quad_vtxbuff(drawctxt, &drawctxt->context_gmem_shadow, + &tmp_ctx.cmd); + + /* Dow we need to idle? */ + /* adreno_idle(&adreno_dev->dev, KGSL_TIMEOUT_DEFAULT); */ + + tmp_ctx.cmd = build_gmem2sys_cmds(adreno_dev, drawctxt, + &drawctxt->context_gmem_shadow); + tmp_ctx.cmd = build_sys2gmem_cmds(adreno_dev, drawctxt, + &drawctxt->context_gmem_shadow); + + kgsl_cache_range_op(&drawctxt->context_gmem_shadow.gmemshadow, + KGSL_CACHE_OP_FLUSH); + + return 0; +} + +static int a3xx_drawctxt_create(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + int ret; + + /* + * Allocate memory for the GPU state and the context commands. + * Despite the name, this is much more then just storage for + * the gpustate. This contains command space for gmem save + * and texture and vertex buffer storage too + */ + + ret = kgsl_allocate(&drawctxt->gpustate, + drawctxt->pagetable, CONTEXT_SIZE); + + if (ret) + return ret; + + kgsl_sharedmem_set(&drawctxt->gpustate, 0, 0, CONTEXT_SIZE); + tmp_ctx.cmd = drawctxt->gpustate.hostptr + CMD_OFFSET; + + if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) { + ret = a3xx_create_gpustate_shadow(adreno_dev, drawctxt); + if (ret) + goto done; + + drawctxt->flags |= CTXT_FLAGS_SHADER_SAVE; + } + + if (!(drawctxt->flags & CTXT_FLAGS_NOGMEMALLOC)) + ret = a3xx_create_gmem_shadow(adreno_dev, drawctxt); + +done: + if (ret) + kgsl_sharedmem_free(&drawctxt->gpustate); + + return ret; +} + +static void a3xx_drawctxt_save(struct adreno_device *adreno_dev, + struct adreno_context *context) +{ + struct kgsl_device *device = &adreno_dev->dev; + + if (context == NULL) + return; + + if (context->flags & CTXT_FLAGS_GPU_HANG) + KGSL_CTXT_WARN(device, + "Current active context has caused gpu hang\n"); + + if (!(context->flags & CTXT_FLAGS_PREAMBLE)) { + /* Fixup self modifying IBs for save operations */ + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, + context->save_fixup, 3); + + /* save registers and constants. */ + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, + context->regconstant_save, 3); + + if (context->flags & CTXT_FLAGS_SHADER_SAVE) { + /* Save shader instructions */ + adreno_ringbuffer_issuecmds(device, + KGSL_CMD_FLAGS_PMODE, context->shader_save, 3); + + context->flags |= CTXT_FLAGS_SHADER_RESTORE; + } + } + + if ((context->flags & CTXT_FLAGS_GMEM_SAVE) && + (context->flags & CTXT_FLAGS_GMEM_SHADOW)) { + /* + * Save GMEM (note: changes shader. shader must + * already be saved.) + */ + + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE, + context->context_gmem_shadow. + gmem_save, 3); + context->flags |= CTXT_FLAGS_GMEM_RESTORE; + } +} + +static void a3xx_drawctxt_restore(struct adreno_device *adreno_dev, + struct adreno_context *context) +{ + struct kgsl_device *device = &adreno_dev->dev; + unsigned int cmds[5]; + + if (context == NULL) { + /* No context - set the default pagetable and thats it */ + kgsl_mmu_setstate(device, device->mmu.defaultpagetable); + return; + } + + KGSL_CTXT_INFO(device, "context flags %08x\n", context->flags); + + cmds[0] = cp_nop_packet(1); + cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER; + cmds[2] = cp_type3_packet(CP_MEM_WRITE, 2); + cmds[3] = device->memstore.gpuaddr + + KGSL_DEVICE_MEMSTORE_OFFSET(current_context); + cmds[4] = (unsigned int)context; + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, cmds, 5); + kgsl_mmu_setstate(device, context->pagetable); + + /* + * Restore GMEM. (note: changes shader. + * Shader must not already be restored.) + */ + + if (context->flags & CTXT_FLAGS_GMEM_RESTORE) { + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE, + context->context_gmem_shadow. + gmem_restore, 3); + context->flags &= ~CTXT_FLAGS_GMEM_RESTORE; + } + + if (!(context->flags & CTXT_FLAGS_PREAMBLE)) { + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, + context->reg_restore, 3); + + /* Fixup self modifying IBs for restore operations */ + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, + context->restore_fixup, 3); + + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, + context->constant_restore, 3); + + if (context->flags & CTXT_FLAGS_SHADER_RESTORE) + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, + context->shader_restore, 3); + + /* Restore HLSQ_CONTROL_0 register */ + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, + context->hlsqcontrol_restore, 3); + } +} + +static void a3xx_rb_init(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb) +{ + unsigned int *cmds, cmds_gpu; + cmds = adreno_ringbuffer_allocspace(rb, 18); + cmds_gpu = rb->buffer_desc.gpuaddr + sizeof(uint) * (rb->wptr - 18); + + GSL_RB_WRITE(cmds, cmds_gpu, cp_type3_packet(CP_ME_INIT, 17)); + GSL_RB_WRITE(cmds, cmds_gpu, 0x000003f7); + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000080); + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000100); + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000180); + GSL_RB_WRITE(cmds, cmds_gpu, 0x00006600); + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000150); + GSL_RB_WRITE(cmds, cmds_gpu, 0x0000014e); + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000154); + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000001); + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); + /* Protected mode control - turned off for A3XX */ + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); + + adreno_ringbuffer_submit(rb); +} + +static void a3xx_err_callback(struct adreno_device *adreno_dev, int bit) +{ + struct kgsl_device *device = &adreno_dev->dev; + const char *err = ""; + + switch (bit) { + case A3XX_INT_RBBM_AHB_ERROR: { + unsigned int reg; + + adreno_regread(device, A3XX_RBBM_AHB_ERROR_STATUS, ®); + + /* + * Return the word address of the erroring register so that it + * matches the register specification + */ + + KGSL_DRV_CRIT(device, + "RBBM | AHB bus error | %s | addr=%x | ports=%x:%x\n", + reg & (1 << 28) ? "WRITE" : "READ", + (reg & 0xFFFFF) >> 2, (reg >> 20) & 0x3, + (reg >> 24) & 0x3); + + /* Clear the error */ + adreno_regwrite(device, A3XX_RBBM_AHB_CMD, (1 << 3)); + return; + } + case A3XX_INT_RBBM_REG_TIMEOUT: + err = "RBBM: AHB register timeout"; + break; + case A3XX_INT_RBBM_ME_MS_TIMEOUT: + err = "RBBM: ME master split timeout"; + break; + case A3XX_INT_RBBM_PFP_MS_TIMEOUT: + err = "RBBM: PFP master split timeout"; + break; + case A3XX_INT_RBBM_ATB_BUS_OVERFLOW: + err = "RBBM: ATB bus oveflow"; + break; + case A3XX_INT_VFD_ERROR: + err = "VFD: Out of bounds access"; + break; + case A3XX_INT_CP_T0_PACKET_IN_IB: + err = "ringbuffer TO packet in IB interrupt"; + break; + case A3XX_INT_CP_OPCODE_ERROR: + err = "ringbuffer opcode error interrupt"; + break; + case A3XX_INT_CP_RESERVED_BIT_ERROR: + err = "ringbuffer reserved bit error interrupt"; + break; + case A3XX_INT_CP_HW_FAULT: + err = "ringbuffer hardware fault"; + break; + case A3XX_INT_CP_REG_PROTECT_FAULT: + err = "ringbuffer protected mode error interrupt"; + break; + case A3XX_INT_CP_AHB_ERROR_HALT: + err = "ringbuffer AHB error interrupt"; + break; + case A3XX_INT_MISC_HANG_DETECT: + err = "MISC: GPU hang detected"; + break; + case A3XX_INT_UCHE_OOB_ACCESS: + err = "UCHE: Out of bounds access"; + break; + } + + KGSL_DRV_CRIT(device, "%s\n", err); + kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF); +} + +static void a3xx_cp_callback(struct adreno_device *adreno_dev, int irq) +{ + struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer; + + if (irq == A3XX_INT_CP_RB_INT) { + kgsl_sharedmem_writel(&rb->device->memstore, + KGSL_DEVICE_MEMSTORE_OFFSET(ts_cmp_enable), 0); + wmb(); + KGSL_CMD_WARN(rb->device, "ringbuffer rb interrupt\n"); + } + + wake_up_interruptible_all(&rb->device->wait_queue); + + /* Schedule work to free mem and issue ibs */ + queue_work(rb->device->work_queue, &rb->device->ts_expired_ws); + + atomic_notifier_call_chain(&rb->device->ts_notifier_list, + rb->device->id, NULL); +} + +#define A3XX_IRQ_CALLBACK(_c) { .func = _c } + +#define A3XX_INT_MASK \ + ((1 << A3XX_INT_RBBM_AHB_ERROR) | \ + (1 << A3XX_INT_RBBM_REG_TIMEOUT) | \ + (1 << A3XX_INT_RBBM_ME_MS_TIMEOUT) | \ + (1 << A3XX_INT_RBBM_PFP_MS_TIMEOUT) | \ + (1 << A3XX_INT_RBBM_ATB_BUS_OVERFLOW) | \ + (1 << A3XX_INT_VFD_ERROR) | \ + (1 << A3XX_INT_CP_T0_PACKET_IN_IB) | \ + (1 << A3XX_INT_CP_OPCODE_ERROR) | \ + (1 << A3XX_INT_CP_RESERVED_BIT_ERROR) | \ + (1 << A3XX_INT_CP_HW_FAULT) | \ + (1 << A3XX_INT_CP_IB1_INT) | \ + (1 << A3XX_INT_CP_IB2_INT) | \ + (1 << A3XX_INT_CP_RB_INT) | \ + (1 << A3XX_INT_CP_REG_PROTECT_FAULT) | \ + (1 << A3XX_INT_CP_AHB_ERROR_HALT) | \ + (1 << A3XX_INT_MISC_HANG_DETECT) | \ + (1 << A3XX_INT_UCHE_OOB_ACCESS)) + +static struct { + void (*func)(struct adreno_device *, int); +} a3xx_irq_funcs[] = { + A3XX_IRQ_CALLBACK(NULL), /* 0 - RBBM_GPU_IDLE */ + A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 1 - RBBM_AHB_ERROR */ + A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 2 - RBBM_REG_TIMEOUT */ + A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 3 - RBBM_ME_MS_TIMEOUT */ + A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 4 - RBBM_PFP_MS_TIMEOUT */ + A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 5 - RBBM_ATB_BUS_OVERFLOW */ + A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 6 - RBBM_VFD_ERROR */ + A3XX_IRQ_CALLBACK(NULL), /* 7 - CP_SW */ + A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 8 - CP_T0_PACKET_IN_IB */ + A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 9 - CP_OPCODE_ERROR */ + A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 10 - CP_RESERVED_BIT_ERROR */ + A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 11 - CP_HW_FAULT */ + A3XX_IRQ_CALLBACK(NULL), /* 12 - CP_DMA */ + A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 13 - CP_IB2_INT */ + A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 14 - CP_IB1_INT */ + A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 15 - CP_RB_INT */ + A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 16 - CP_REG_PROTECT_FAULT */ + A3XX_IRQ_CALLBACK(NULL), /* 17 - CP_RB_DONE_TS */ + A3XX_IRQ_CALLBACK(NULL), /* 18 - CP_VS_DONE_TS */ + A3XX_IRQ_CALLBACK(NULL), /* 19 - CP_PS_DONE_TS */ + A3XX_IRQ_CALLBACK(NULL), /* 20 - CP_CACHE_FLUSH_TS */ + A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 21 - CP_AHB_ERROR_FAULT */ + A3XX_IRQ_CALLBACK(NULL), /* 22 - Unused */ + A3XX_IRQ_CALLBACK(NULL), /* 23 - Unused */ + A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 24 - MISC_HANG_DETECT */ + A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 25 - UCHE_OOB_ACCESS */ + /* 26 to 31 - Unused */ +}; + +static irqreturn_t a3xx_irq_handler(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = &adreno_dev->dev; + irqreturn_t ret = IRQ_NONE; + unsigned int status, tmp; + int i; + + adreno_regread(&adreno_dev->dev, A3XX_RBBM_INT_0_STATUS, &status); + + for (tmp = status, i = 0; tmp && i < ARRAY_SIZE(a3xx_irq_funcs); i++) { + if (tmp & 1) { + if (a3xx_irq_funcs[i].func != NULL) { + a3xx_irq_funcs[i].func(adreno_dev, i); + ret = IRQ_HANDLED; + } else { + KGSL_DRV_CRIT(device, + "Unhandled interrupt bit %x\n", i); + } + } + + tmp >>= 1; + } + + if (status) + adreno_regwrite(&adreno_dev->dev, A3XX_RBBM_INT_CLEAR_CMD, + status); + return ret; +} + +static void a3xx_irq_control(struct adreno_device *adreno_dev, int state) +{ + struct kgsl_device *device = &adreno_dev->dev; + + if (state) + adreno_regwrite(device, A3XX_RBBM_INT_0_MASK, A3XX_INT_MASK); + else + adreno_regwrite(device, A3XX_RBBM_INT_0_MASK, 0); +} + +static unsigned int a3xx_busy_cycles(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = &adreno_dev->dev; + unsigned int reg, val; + + /* Freeze the counter */ + adreno_regread(device, A3XX_RBBM_RBBM_CTL, ®); + reg &= ~RBBM_RBBM_CTL_ENABLE_PWR_CTR1; + adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg); + + /* Read the value */ + adreno_regread(device, A3XX_RBBM_PERFCTR_PWR_1_LO, &val); + + /* Reset the counter */ + reg |= RBBM_RBBM_CTL_RESET_PWR_CTR1; + adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg); + + /* Re-enable the counter */ + reg &= ~RBBM_RBBM_CTL_RESET_PWR_CTR1; + reg |= RBBM_RBBM_CTL_ENABLE_PWR_CTR1; + adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg); + + return val; +} + +static void a3xx_start(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = &adreno_dev->dev; + + /* Reset the core */ + adreno_regwrite(device, A3XX_RBBM_SW_RESET_CMD, + 0x00000001); + msleep(20); + + /* + * enable fixed master AXI port of 0x0 for all clients to keep + * traffic from going to random places + */ + + adreno_regwrite(device, A3XX_VBIF_FIXED_SORT_EN, 0x0001003F); + adreno_regwrite(device, A3XX_VBIF_FIXED_SORT_SEL0, 0x00000000); + adreno_regwrite(device, A3XX_VBIF_FIXED_SORT_SEL1, 0x00000000); + + /* Make all blocks contribute to the GPU BUSY perf counter */ + adreno_regwrite(device, A3XX_RBBM_GPU_BUSY_MASKED, 0xFFFFFFFF); + + /* Enable the RBBM error reporting bits. This lets us get + useful information on failure */ + + adreno_regwrite(device, A3XX_RBBM_AHB_CTL0, 0x00000001); + + /* Enable AHB error reporting */ + adreno_regwrite(device, A3XX_RBBM_AHB_CTL1, 0xA6FFFFFF); + + /* Turn on the power counters */ + adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, 0x00003000); +} + +struct adreno_gpudev adreno_a3xx_gpudev = { + .reg_rbbm_status = A3XX_RBBM_STATUS, + .reg_cp_pfp_ucode_addr = A3XX_CP_PFP_UCODE_ADDR, + .reg_cp_pfp_ucode_data = A3XX_CP_PFP_UCODE_DATA, + + .ctxt_create = a3xx_drawctxt_create, + .ctxt_save = a3xx_drawctxt_save, + .ctxt_restore = a3xx_drawctxt_restore, + .rb_init = a3xx_rb_init, + .irq_control = a3xx_irq_control, + .irq_handler = a3xx_irq_handler, + .busy_cycles = a3xx_busy_cycles, + .start = a3xx_start, +}; diff --git a/drivers/gpu/msm/adreno_debugfs.c b/drivers/gpu/msm/adreno_debugfs.c old mode 100644 new mode 100755 index c878a2c2..9c9ee02c --- a/drivers/gpu/msm/adreno_debugfs.c +++ b/drivers/gpu/msm/adreno_debugfs.c @@ -223,21 +223,24 @@ static int kgsl_regread_nolock(struct kgsl_device *device, return 0; } -#define KGSL_ISTORE_START 0x5000 -#define KGSL_ISTORE_LENGTH 0x600 +#define ADRENO_ISTORE_START 0x5000 static ssize_t kgsl_istore_read( struct file *file, char __user *buff, size_t buff_count, loff_t *ppos) { - int i, count = KGSL_ISTORE_LENGTH, remaining, pos = 0, tot = 0; + int i, count, remaining, pos = 0, tot = 0; struct kgsl_device *device = file->private_data; const int rowc = 8; + struct adreno_device *adreno_dev; if (!ppos || !device) return 0; + adreno_dev = ADRENO_DEVICE(device); + count = adreno_dev->istore_size * adreno_dev->instruction_size; + remaining = count; for (i = 0; i < count; i += rowc) { unsigned int vals[rowc]; @@ -248,7 +251,8 @@ static ssize_t kgsl_istore_read( if (pos >= *ppos) { for (j = 0; j < linec; ++j) kgsl_regread_nolock(device, - KGSL_ISTORE_START+i+j, vals+j); + ADRENO_ISTORE_START + i + j, + vals + j); } else memset(vals, 0, sizeof(vals)); @@ -440,6 +444,8 @@ void adreno_debugfs_init(struct kgsl_device *device) &kgsl_cff_dump_enable_fops); debugfs_create_u32("wait_timeout", 0644, device->d_debugfs, &adreno_dev->wait_timeout); + debugfs_create_u32("ib_check", 0644, device->d_debugfs, + &adreno_dev->ib_check_level); /* Create post mortem control files */ diff --git a/drivers/gpu/msm/adreno_drawctxt.c b/drivers/gpu/msm/adreno_drawctxt.c old mode 100644 new mode 100755 index b7b0ea46..87f9efe4 --- a/drivers/gpu/msm/adreno_drawctxt.c +++ b/drivers/gpu/msm/adreno_drawctxt.c @@ -19,6 +19,7 @@ /* quad for copying GMEM to context shadow */ #define QUAD_LEN 12 +#define QUAD_RESTORE_LEN 14 static unsigned int gmem_copy_quad[QUAD_LEN] = { 0x00000000, 0x00000000, 0x3f800000, @@ -27,6 +28,14 @@ static unsigned int gmem_copy_quad[QUAD_LEN] = { 0x00000000, 0x00000000, 0x3f800000 }; +static unsigned int gmem_restore_quad[QUAD_RESTORE_LEN] = { + 0x00000000, 0x3f800000, 0x3f800000, + 0x00000000, 0x00000000, 0x00000000, + 0x3f800000, 0x00000000, 0x00000000, + 0x3f800000, 0x00000000, 0x00000000, + 0x3f800000, 0x3f800000, +}; + #define TEXCOORD_LEN 8 static unsigned int gmem_copy_texcoord[TEXCOORD_LEN] = { @@ -73,12 +82,12 @@ static void set_gmem_copy_quad(struct gmem_shadow_t *shadow) gmem_copy_quad[4] = uint2float(shadow->height); gmem_copy_quad[9] = uint2float(shadow->width); - gmem_copy_quad[0] = 0; - gmem_copy_quad[6] = 0; - gmem_copy_quad[7] = 0; - gmem_copy_quad[10] = 0; + gmem_restore_quad[5] = uint2float(shadow->height); + gmem_restore_quad[7] = uint2float(shadow->width); memcpy(shadow->quad_vertices.hostptr, gmem_copy_quad, QUAD_LEN << 2); + memcpy(shadow->quad_vertices_restore.hostptr, gmem_copy_quad, + QUAD_RESTORE_LEN << 2); memcpy(shadow->quad_texcoords.hostptr, gmem_copy_texcoord, TEXCOORD_LEN << 2); @@ -103,6 +112,13 @@ void build_quad_vtxbuff(struct adreno_context *drawctxt, cmd += QUAD_LEN; + /* Used by A3XX, but define for both to make the code easier */ + shadow->quad_vertices_restore.hostptr = cmd; + shadow->quad_vertices_restore.gpuaddr = + virt2gpu(cmd, &drawctxt->gpustate); + + cmd += QUAD_RESTORE_LEN; + /* tex coord buffer location (in GPU space) */ shadow->quad_texcoords.hostptr = cmd; shadow->quad_texcoords.gpuaddr = virt2gpu(cmd, &drawctxt->gpustate); @@ -139,27 +155,19 @@ int adreno_drawctxt_create(struct kgsl_device *device, drawctxt->pagetable = pagetable; drawctxt->bin_base_offset = 0; - /* FIXME: Deal with preambles */ + if (flags & KGSL_CONTEXT_PREAMBLE) + drawctxt->flags |= CTXT_FLAGS_PREAMBLE; - ret = adreno_dev->gpudev->ctxt_gpustate_shadow(adreno_dev, drawctxt); + if (flags & KGSL_CONTEXT_NO_GMEM_ALLOC) + drawctxt->flags |= CTXT_FLAGS_NOGMEMALLOC; + + ret = adreno_dev->gpudev->ctxt_create(adreno_dev, drawctxt); if (ret) - goto err; - - /* Save the shader instruction memory on context switching */ - drawctxt->flags |= CTXT_FLAGS_SHADER_SAVE; - - if (!(flags & KGSL_CONTEXT_NO_GMEM_ALLOC)) { - /* create gmem shadow */ - ret = adreno_dev->gpudev->ctxt_gmem_shadow(adreno_dev, - drawctxt); - if (ret != 0) goto err; - } context->devctxt = drawctxt; return 0; err: - kgsl_sharedmem_free(&drawctxt->gpustate); kfree(drawctxt); return ret; } @@ -179,11 +187,12 @@ void adreno_drawctxt_destroy(struct kgsl_device *device, struct kgsl_context *context) { struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - struct adreno_context *drawctxt = context->devctxt; + struct adreno_context *drawctxt; - if (drawctxt == NULL) + if (context == NULL) return; + drawctxt = context->devctxt; /* deactivate context */ if (adreno_dev->drawctxt_active == drawctxt) { /* no need to save GMEM or shader, the context is @@ -261,6 +270,6 @@ void adreno_drawctxt_switch(struct adreno_device *adreno_dev, adreno_dev->gpudev->ctxt_save(adreno_dev, adreno_dev->drawctxt_active); /* Set the new context */ - adreno_dev->drawctxt_active = drawctxt; adreno_dev->gpudev->ctxt_restore(adreno_dev, drawctxt); + adreno_dev->drawctxt_active = drawctxt; } diff --git a/drivers/gpu/msm/adreno_drawctxt.h b/drivers/gpu/msm/adreno_drawctxt.h old mode 100644 new mode 100755 index 3c3a8536..50ee3450 --- a/drivers/gpu/msm/adreno_drawctxt.h +++ b/drivers/gpu/msm/adreno_drawctxt.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved. +/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -30,12 +30,16 @@ #define CTXT_FLAGS_GMEM_SAVE 0x00000200 /* gmem can be restored from shadow */ #define CTXT_FLAGS_GMEM_RESTORE 0x00000400 +/* preamble packed in cmdbuffer for context switching */ +#define CTXT_FLAGS_PREAMBLE 0x00000800 /* shader must be copied to shadow */ #define CTXT_FLAGS_SHADER_SAVE 0x00002000 /* shader can be restored from shadow */ #define CTXT_FLAGS_SHADER_RESTORE 0x00004000 /* Context has caused a GPU hang */ #define CTXT_FLAGS_GPU_HANG 0x00008000 +/* Specifies there is no need to save GMEM */ +#define CTXT_FLAGS_NOGMEMALLOC 0x00010000 struct kgsl_device; struct adreno_device; @@ -46,37 +50,56 @@ struct kgsl_context; struct gmem_shadow_t { struct kgsl_memdesc gmemshadow; /* Shadow buffer address */ - /* 256 KB GMEM surface = 4 bytes-per-pixel x 256 pixels/row x - * 256 rows. */ - /* width & height must be a multiples of 32, in case tiled textures - * are used. */ - enum COLORFORMATX format; + /* + * 256 KB GMEM surface = 4 bytes-per-pixel x 256 pixels/row x + * 256 rows. Width & height must be multiples of 32 in case tiled + * textures are used + */ + + enum COLORFORMATX format; /* Unused on A3XX */ unsigned int size; /* Size of surface used to store GMEM */ unsigned int width; /* Width of surface used to store GMEM */ unsigned int height; /* Height of surface used to store GMEM */ unsigned int pitch; /* Pitch of surface used to store GMEM */ unsigned int gmem_pitch; /* Pitch value used for GMEM */ - unsigned int *gmem_save_commands; - unsigned int *gmem_restore_commands; + unsigned int *gmem_save_commands; /* Unused on A3XX */ + unsigned int *gmem_restore_commands; /* Unused on A3XX */ unsigned int gmem_save[3]; unsigned int gmem_restore[3]; struct kgsl_memdesc quad_vertices; struct kgsl_memdesc quad_texcoords; + struct kgsl_memdesc quad_vertices_restore; }; struct adreno_context { uint32_t flags; struct kgsl_pagetable *pagetable; struct kgsl_memdesc gpustate; - unsigned int reg_save[3]; unsigned int reg_restore[3]; unsigned int shader_save[3]; - unsigned int shader_fixup[3]; unsigned int shader_restore[3]; - unsigned int chicken_restore[3]; - unsigned int bin_base_offset; + /* Information of the GMEM shadow that is created in context create */ struct gmem_shadow_t context_gmem_shadow; + + /* A2XX specific items */ + unsigned int reg_save[3]; + unsigned int shader_fixup[3]; + unsigned int chicken_restore[3]; + unsigned int bin_base_offset; + + /* A3XX specific items */ + unsigned int regconstant_save[3]; + unsigned int constant_restore[3]; + unsigned int hlsqcontrol_restore[3]; + unsigned int save_fixup[3]; + unsigned int restore_fixup[3]; + struct kgsl_memdesc shader_load_commands[2]; + struct kgsl_memdesc shader_save_commands[4]; + struct kgsl_memdesc constant_save_commands[3]; + struct kgsl_memdesc constant_load_commands[3]; + struct kgsl_memdesc cond_execs[4]; + struct kgsl_memdesc hlsqcontrol_restore_commands[1]; }; int adreno_drawctxt_create(struct kgsl_device *device, diff --git a/drivers/gpu/msm/adreno_pm4types.h b/drivers/gpu/msm/adreno_pm4types.h old mode 100644 new mode 100755 index 8aea58c9..9340f691 --- a/drivers/gpu/msm/adreno_pm4types.h +++ b/drivers/gpu/msm/adreno_pm4types.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved. +/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -29,11 +29,6 @@ /* skip N 32-bit words to get to the next packet */ #define CP_NOP 0x10 -/* indirect buffer dispatch. prefetch parser uses this packet type to determine -* whether to pre-fetch the IB -*/ -#define CP_INDIRECT_BUFFER 0x3f - /* indirect buffer dispatch. same as IB, but init is pipelined */ #define CP_INDIRECT_BUFFER_PFD 0x37 @@ -117,6 +112,9 @@ /* load constants from a location in memory */ #define CP_LOAD_CONSTANT_CONTEXT 0x2e +/* (A2x) sets binning configuration registers */ +#define CP_SET_BIN_DATA 0x2f + /* selective invalidation of state pointers */ #define CP_INVALIDATE_STATE 0x3b @@ -157,6 +155,25 @@ #define CP_SET_PROTECTED_MODE 0x5f /* sets the register protection mode */ +/* + * for a3xx + */ + +#define CP_LOAD_STATE 0x30 /* load high level sequencer command */ + +/* Conditionally load a IB based on a flag */ +#define CP_COND_INDIRECT_BUFFER_PFE 0x3A /* prefetch enabled */ +#define CP_COND_INDIRECT_BUFFER_PFD 0x32 /* prefetch disabled */ + +/* Load a buffer with pre-fetch enabled */ +#define CP_INDIRECT_BUFFER_PFE 0x3F + +#define CP_LOADSTATE_DSTOFFSET_SHIFT 0x00000000 +#define CP_LOADSTATE_STATESRC_SHIFT 0x00000010 +#define CP_LOADSTATE_STATEBLOCKID_SHIFT 0x00000013 +#define CP_LOADSTATE_NUMOFUNITS_SHIFT 0x00000016 +#define CP_LOADSTATE_STATETYPE_SHIFT 0x00000000 +#define CP_LOADSTATE_EXTSRCADDR_SHIFT 0x00000002 /* packet header building macros */ #define cp_type0_packet(regindx, cnt) \ @@ -178,11 +195,20 @@ #define cp_nop_packet(cnt) \ (CP_TYPE3_PKT | (((cnt)-1) << 16) | (CP_NOP << 8)) +#define pkt_is_type0(pkt) (((pkt) & 0XC0000000) == CP_TYPE0_PKT) + +#define type0_pkt_size(pkt) ((((pkt) >> 16) & 0x3FFF) + 1) +#define type0_pkt_offset(pkt) ((pkt) & 0x7FFF) + +#define pkt_is_type3(pkt) (((pkt) & 0xC0000000) == CP_TYPE3_PKT) + +#define cp_type3_opcode(pkt) (((pkt) >> 8) & 0xFF) +#define type3_pkt_size(pkt) ((((pkt) >> 16) & 0x3FFF) + 1) /* packet headers */ #define CP_HDR_ME_INIT cp_type3_packet(CP_ME_INIT, 18) #define CP_HDR_INDIRECT_BUFFER_PFD cp_type3_packet(CP_INDIRECT_BUFFER_PFD, 2) -#define CP_HDR_INDIRECT_BUFFER cp_type3_packet(CP_INDIRECT_BUFFER, 2) +#define CP_HDR_INDIRECT_BUFFER_PFE cp_type3_packet(CP_INDIRECT_BUFFER_PFE, 2) /* dword base address of the GFX decode space */ #define SUBBLOCK_OFFSET(reg) ((unsigned int)((reg) - (0x2000))) @@ -190,4 +216,14 @@ /* gmem command buffer length */ #define CP_REG(reg) ((0x4 << 16) | (SUBBLOCK_OFFSET(reg))) + +/* Return 1 if the command is an indirect buffer of any kind */ +static inline int adreno_cmd_is_ib(unsigned int cmd) +{ + return (cmd == cp_type3_packet(CP_INDIRECT_BUFFER_PFE, 2) || + cmd == cp_type3_packet(CP_INDIRECT_BUFFER_PFD, 2) || + cmd == cp_type3_packet(CP_COND_INDIRECT_BUFFER_PFE, 2) || + cmd == cp_type3_packet(CP_COND_INDIRECT_BUFFER_PFD, 2)); +} + #endif /* __ADRENO_PM4TYPES_H */ diff --git a/drivers/gpu/msm/adreno_postmortem.c b/drivers/gpu/msm/adreno_postmortem.c old mode 100644 new mode 100755 index 3d957f69..7e073fd9 --- a/drivers/gpu/msm/adreno_postmortem.c +++ b/drivers/gpu/msm/adreno_postmortem.c @@ -52,7 +52,7 @@ static const struct pm_id_name pm3_types[] = { {CP_IM_LOAD, "IN__LOAD"}, {CP_IM_LOAD_IMMEDIATE, "IM_LOADI"}, {CP_IM_STORE, "IM_STORE"}, - {CP_INDIRECT_BUFFER, "IND_BUF_"}, + {CP_INDIRECT_BUFFER_PFE, "IND_BUF_"}, {CP_INDIRECT_BUFFER_PFD, "IND_BUFP"}, {CP_INTERRUPT, "PM4_INTR"}, {CP_INVALIDATE_STATE, "INV_STAT"}, @@ -247,9 +247,8 @@ static void adreno_dump_regs(struct kgsl_device *device, static void dump_ib(struct kgsl_device *device, char* buffId, uint32_t pt_base, uint32_t base_offset, uint32_t ib_base, uint32_t ib_size, bool dump) { - unsigned int memsize; - uint8_t *base_addr = kgsl_sharedmem_convertaddr(device, pt_base, - ib_base, &memsize); + uint8_t *base_addr = adreno_convertaddr(device, pt_base, + ib_base, ib_size*sizeof(uint32_t)); if (base_addr && dump) print_hex_dump(KERN_ERR, buffId, DUMP_PREFIX_OFFSET, @@ -277,20 +276,19 @@ static void dump_ib1(struct kgsl_device *device, uint32_t pt_base, int i, j; uint32_t value; uint32_t *ib1_addr; - unsigned int memsize; dump_ib(device, "IB1:", pt_base, base_offset, ib1_base, ib1_size, dump); /* fetch virtual address for given IB base */ - ib1_addr = (uint32_t *)kgsl_sharedmem_convertaddr(device, pt_base, - ib1_base, &memsize); + ib1_addr = (uint32_t *)adreno_convertaddr(device, pt_base, + ib1_base, ib1_size*sizeof(uint32_t)); if (!ib1_addr) return; for (i = 0; i+3 < ib1_size; ) { value = ib1_addr[i++]; - if (value == cp_type3_packet(CP_INDIRECT_BUFFER_PFD, 2)) { + if (adreno_cmd_is_ib(value)) { uint32_t ib2_base = ib1_addr[i++]; uint32_t ib2_size = ib1_addr[i++]; @@ -466,7 +464,7 @@ static int adreno_dump(struct kgsl_device *device) const uint32_t *rb_vaddr; int num_item = 0; int read_idx, write_idx; - unsigned int ts_processed, rb_memsize; + unsigned int ts_processed; static struct ib_list ib_list; @@ -681,11 +679,16 @@ static int adreno_dump(struct kgsl_device *device) KGSL_LOG_DUMP(device, "RB: rd_addr:%8.8x rb_size:%d num_item:%d\n", cp_rb_base, rb_count<<2, num_item); - rb_vaddr = (const uint32_t *)kgsl_sharedmem_convertaddr(device, - cur_pt_base, cp_rb_base, &rb_memsize); + + if (adreno_dev->ringbuffer.buffer_desc.gpuaddr != cp_rb_base) + KGSL_LOG_POSTMORTEM_WRITE(device, + "rb address mismatch, should be 0x%08x\n", + adreno_dev->ringbuffer.buffer_desc.gpuaddr); + + rb_vaddr = adreno_dev->ringbuffer.buffer_desc.hostptr; if (!rb_vaddr) { KGSL_LOG_POSTMORTEM_WRITE(device, - "Can't fetch vaddr for CP_RB_BASE\n"); + "rb has no kernel mapping!\n"); goto error_vfree; } @@ -711,7 +714,7 @@ static int adreno_dump(struct kgsl_device *device) i = 0; for (read_idx = 0; read_idx < num_item; ) { uint32_t this_cmd = rb_copy[read_idx++]; - if (this_cmd == cp_type3_packet(CP_INDIRECT_BUFFER_PFD, 2)) { + if (adreno_cmd_is_ib(this_cmd)) { uint32_t ib_addr = rb_copy[read_idx++]; uint32_t ib_size = rb_copy[read_idx++]; dump_ib1(device, cur_pt_base, (read_idx-3)<<2, ib_addr, @@ -743,8 +746,7 @@ static int adreno_dump(struct kgsl_device *device) for (read_idx = NUM_DWORDS_OF_RINGBUFFER_HISTORY; read_idx >= 0; --read_idx) { uint32_t this_cmd = rb_copy[read_idx]; - if (this_cmd == cp_type3_packet( - CP_INDIRECT_BUFFER_PFD, 2)) { + if (adreno_cmd_is_ib(this_cmd)) { uint32_t ib_addr = rb_copy[read_idx+1]; uint32_t ib_size = rb_copy[read_idx+2]; if (ib_size && cp_ib1_base == ib_addr) { diff --git a/drivers/gpu/msm/adreno_ringbuffer.c b/drivers/gpu/msm/adreno_ringbuffer.c old mode 100644 new mode 100755 index d59057c8..71f239cc --- a/drivers/gpu/msm/adreno_ringbuffer.c +++ b/drivers/gpu/msm/adreno_ringbuffer.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved. +/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -22,30 +22,14 @@ #include "adreno.h" #include "adreno_pm4types.h" #include "adreno_ringbuffer.h" +#include "adreno_debugfs.h" #include "a2xx_reg.h" +#include "a3xx_reg.h" #define GSL_RB_NOP_SIZEDWORDS 2 -/* protected mode error checking below register address 0x800 -* note: if CP_INTERRUPT packet is used then checking needs -* to change to below register address 0x7C8 -*/ -#define GSL_RB_PROTECTED_MODE_CONTROL 0x200001F2 -/* Firmware file names - * Legacy names must remain but replacing macro names to - * match current kgsl model. - * a200 is yamato - * a220 is leia - */ -#define A200_PFP_FW "yamato_pfp.fw" -#define A200_PM4_FW "yamato_pm4.fw" -#define A220_PFP_470_FW "leia_pfp_470.fw" -#define A220_PM4_470_FW "leia_pm4_470.fw" -#define A225_PFP_FW "a225_pfp.fw" -#define A225_PM4_FW "a225_pm4.fw" - -static void adreno_ringbuffer_submit(struct adreno_ringbuffer *rb) +void adreno_ringbuffer_submit(struct adreno_ringbuffer *rb) { BUG_ON(rb->wptr == 0); @@ -104,8 +88,7 @@ adreno_ringbuffer_waitspace(struct adreno_ringbuffer *rb, unsigned int numcmds, } while ((freecmds != 0) && (freecmds <= numcmds)); } - -static unsigned int *adreno_ringbuffer_allocspace(struct adreno_ringbuffer *rb, +unsigned int *adreno_ringbuffer_allocspace(struct adreno_ringbuffer *rb, unsigned int numcmds) { unsigned int *ptr = NULL; @@ -231,9 +214,10 @@ static int adreno_ringbuffer_load_pfp_ucode(struct kgsl_device *device) KGSL_DRV_INFO(device, "loading pfp ucode version: %d\n", adreno_dev->pfp_fw[0]); - adreno_regwrite(device, REG_CP_PFP_UCODE_ADDR, 0); + adreno_regwrite(device, adreno_dev->gpudev->reg_cp_pfp_ucode_addr, 0); for (i = 1; i < adreno_dev->pfp_fw_size; i++) - adreno_regwrite(device, REG_CP_PFP_UCODE_DATA, + adreno_regwrite(device, + adreno_dev->gpudev->reg_cp_pfp_ucode_data, adreno_dev->pfp_fw[i]); err: return ret; @@ -244,9 +228,9 @@ int adreno_ringbuffer_start(struct adreno_ringbuffer *rb, unsigned int init_ram) int status; /*cp_rb_cntl_u cp_rb_cntl; */ union reg_cp_rb_cntl cp_rb_cntl; - unsigned int *cmds, rb_cntl; + unsigned int rb_cntl; struct kgsl_device *device = rb->device; - uint cmds_gpu; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); if (rb->flags & KGSL_FLAGS_STARTED) return 0; @@ -262,12 +246,15 @@ int adreno_ringbuffer_start(struct adreno_ringbuffer *rb, unsigned int init_ram) kgsl_sharedmem_set(&rb->buffer_desc, 0, 0xAA, (rb->sizedwords << 2)); + if (adreno_is_a2xx(adreno_dev)) { adreno_regwrite(device, REG_CP_RB_WPTR_BASE, (rb->memptrs_desc.gpuaddr + GSL_RB_MEMPTRS_WPTRPOLL_OFFSET)); /* setup WPTR delay */ - adreno_regwrite(device, REG_CP_RB_WPTR_DELAY, 0 /*0x70000010 */); + adreno_regwrite(device, REG_CP_RB_WPTR_DELAY, + 0 /*0x70000010 */); + } /*setup REG_CP_RB_CNTL */ adreno_regread(device, REG_CP_RB_CNTL, &rb_cntl); @@ -286,7 +273,11 @@ int adreno_ringbuffer_start(struct adreno_ringbuffer *rb, unsigned int init_ram) */ cp_rb_cntl.f.rb_blksz = ilog2(KGSL_RB_BLKSIZE >> 3); - cp_rb_cntl.f.rb_poll_en = GSL_RB_CNTL_POLL_EN; /* WPTR polling */ + if (adreno_is_a2xx(adreno_dev)) { + /* WPTR polling */ + cp_rb_cntl.f.rb_poll_en = GSL_RB_CNTL_POLL_EN; + } + /* mem RPTR writebacks */ cp_rb_cntl.f.rb_no_update = GSL_RB_CNTL_NO_UPDATE; @@ -298,8 +289,36 @@ int adreno_ringbuffer_start(struct adreno_ringbuffer *rb, unsigned int init_ram) rb->memptrs_desc.gpuaddr + GSL_RB_MEMPTRS_RPTR_OFFSET); + if (adreno_is_a3xx(adreno_dev)) { + /* enable access protection to privileged registers */ + adreno_regwrite(device, A3XX_CP_PROTECT_CTRL, 0x00000007); + + /* RBBM registers */ + adreno_regwrite(device, A3XX_CP_PROTECT_REG_0, 0x63000040); + adreno_regwrite(device, A3XX_CP_PROTECT_REG_1, 0x62000080); + adreno_regwrite(device, A3XX_CP_PROTECT_REG_2, 0x600000CC); + adreno_regwrite(device, A3XX_CP_PROTECT_REG_3, 0x60000108); + adreno_regwrite(device, A3XX_CP_PROTECT_REG_4, 0x64000140); + adreno_regwrite(device, A3XX_CP_PROTECT_REG_5, 0x66000400); + + /* CP registers */ + adreno_regwrite(device, A3XX_CP_PROTECT_REG_6, 0x65000700); + adreno_regwrite(device, A3XX_CP_PROTECT_REG_7, 0x610007D8); + adreno_regwrite(device, A3XX_CP_PROTECT_REG_8, 0x620007E0); + adreno_regwrite(device, A3XX_CP_PROTECT_REG_9, 0x61001178); + adreno_regwrite(device, A3XX_CP_PROTECT_REG_A, 0x64001180); + + /* RB registers */ + adreno_regwrite(device, A3XX_CP_PROTECT_REG_B, 0x60003300); + + /* VBIF registers */ + adreno_regwrite(device, A3XX_CP_PROTECT_REG_C, 0x6B00C000); + } + + if (adreno_is_a2xx(adreno_dev)) { /* explicitly clear all cp interrupts */ adreno_regwrite(device, REG_CP_INT_ACK, 0xFFFFFFFF); + } /* setup scratch/timestamp */ adreno_regwrite(device, REG_SCRATCH_ADDR, @@ -309,6 +328,11 @@ int adreno_ringbuffer_start(struct adreno_ringbuffer *rb, unsigned int init_ram) adreno_regwrite(device, REG_SCRATCH_UMSK, GSL_RB_MEMPTRS_SCRATCH_MASK); + /* update the eoptimestamp field with the last retired timestamp */ + kgsl_sharedmem_writel(&device->memstore, + KGSL_DEVICE_MEMSTORE_OFFSET(eoptimestamp), + rb->timestamp); + /* load the CP ucode */ status = adreno_ringbuffer_load_pm4_ucode(device); @@ -328,54 +352,8 @@ int adreno_ringbuffer_start(struct adreno_ringbuffer *rb, unsigned int init_ram) /* clear ME_HALT to start micro engine */ adreno_regwrite(device, REG_CP_ME_CNTL, 0); - /* ME_INIT */ - cmds = adreno_ringbuffer_allocspace(rb, 19); - cmds_gpu = rb->buffer_desc.gpuaddr + sizeof(uint)*(rb->wptr-19); - - GSL_RB_WRITE(cmds, cmds_gpu, CP_HDR_ME_INIT); - /* All fields present (bits 9:0) */ - GSL_RB_WRITE(cmds, cmds_gpu, 0x000003ff); - /* Disable/Enable Real-Time Stream processing (present but ignored) */ - GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); - /* Enable (2D <-> 3D) implicit synchronization (present but ignored) */ - GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); - - GSL_RB_WRITE(cmds, cmds_gpu, - SUBBLOCK_OFFSET(REG_RB_SURFACE_INFO)); - GSL_RB_WRITE(cmds, cmds_gpu, - SUBBLOCK_OFFSET(REG_PA_SC_WINDOW_OFFSET)); - GSL_RB_WRITE(cmds, cmds_gpu, - SUBBLOCK_OFFSET(REG_VGT_MAX_VTX_INDX)); - GSL_RB_WRITE(cmds, cmds_gpu, - SUBBLOCK_OFFSET(REG_SQ_PROGRAM_CNTL)); - GSL_RB_WRITE(cmds, cmds_gpu, - SUBBLOCK_OFFSET(REG_RB_DEPTHCONTROL)); - GSL_RB_WRITE(cmds, cmds_gpu, - SUBBLOCK_OFFSET(REG_PA_SU_POINT_SIZE)); - GSL_RB_WRITE(cmds, cmds_gpu, - SUBBLOCK_OFFSET(REG_PA_SC_LINE_CNTL)); - GSL_RB_WRITE(cmds, cmds_gpu, - SUBBLOCK_OFFSET(REG_PA_SU_POLY_OFFSET_FRONT_SCALE)); - - /* Vertex and Pixel Shader Start Addresses in instructions - * (3 DWORDS per instruction) */ - GSL_RB_WRITE(cmds, cmds_gpu, 0x80000180); - /* Maximum Contexts */ - GSL_RB_WRITE(cmds, cmds_gpu, 0x00000001); - /* Write Confirm Interval and The CP will wait the - * wait_interval * 16 clocks between polling */ - GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); - - /* NQ and External Memory Swap */ - GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); - /* Protected mode error checking */ - GSL_RB_WRITE(cmds, cmds_gpu, GSL_RB_PROTECTED_MODE_CONTROL); - /* Disable header dumping and Header dump address */ - GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); - /* Header dump size */ - GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); - - adreno_ringbuffer_submit(rb); + /* ME init is GPU specific, so jump into the sub-function */ + adreno_dev->gpudev->rb_init(adreno_dev, rb); /* idle device to validate ME INIT */ status = adreno_idle(device, KGSL_TIMEOUT_DEFAULT); @@ -391,7 +369,6 @@ void adreno_ringbuffer_stop(struct adreno_ringbuffer *rb) if (rb->flags & KGSL_FLAGS_STARTED) { /* ME_HALT */ adreno_regwrite(rb->device, REG_CP_ME_CNTL, 0x10000000); - rb->flags &= ~KGSL_FLAGS_STARTED; } } @@ -457,6 +434,7 @@ adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb, unsigned int flags, unsigned int *cmds, int sizedwords) { + struct adreno_device *adreno_dev = ADRENO_DEVICE(rb->device); unsigned int *ringcmds; unsigned int timestamp; unsigned int total_sizedwords = sizedwords + 6; @@ -470,6 +448,9 @@ adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb, total_sizedwords += !(flags & KGSL_CMD_FLAGS_NO_TS_CMP) ? 7 : 0; total_sizedwords += !(flags & KGSL_CMD_FLAGS_NOT_KERNEL_CMD) ? 2 : 0; + if (adreno_is_a3xx(adreno_dev)) + total_sizedwords += 7; + ringcmds = adreno_ringbuffer_allocspace(rb, total_sizedwords); rcmd_gpu = rb->buffer_desc.gpuaddr + sizeof(uint)*(rb->wptr-total_sizedwords); @@ -503,6 +484,21 @@ adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb, /* start-of-pipeline and end-of-pipeline timestamps */ GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type0_packet(REG_CP_TIMESTAMP, 1)); GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp); + + if (adreno_is_a3xx(adreno_dev)) { + /* + * FLush HLSQ lazy updates to make sure there are no + * rsources pending for indirect loads after the timestamp + */ + + GSL_RB_WRITE(ringcmds, rcmd_gpu, + cp_type3_packet(CP_EVENT_WRITE, 1)); + GSL_RB_WRITE(ringcmds, rcmd_gpu, 0x07); /* HLSQ_FLUSH */ + GSL_RB_WRITE(ringcmds, rcmd_gpu, + cp_type3_packet(CP_WAIT_FOR_IDLE, 1)); + GSL_RB_WRITE(ringcmds, rcmd_gpu, 0x00); + } + GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_EVENT_WRITE, 3)); GSL_RB_WRITE(ringcmds, rcmd_gpu, CACHE_FLUSH_TS); GSL_RB_WRITE(ringcmds, rcmd_gpu, @@ -526,6 +522,15 @@ adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb, GSL_RB_WRITE(ringcmds, rcmd_gpu, CP_INT_CNTL__RB_INT_MASK); } + if (adreno_is_a3xx(adreno_dev)) { + /* Dummy set-constant to trigger context rollover */ + GSL_RB_WRITE(ringcmds, rcmd_gpu, + cp_type3_packet(CP_SET_CONSTANT, 2)); + GSL_RB_WRITE(ringcmds, rcmd_gpu, + (0x4<<16)|(A3XX_HLSQ_CL_KERNEL_GROUP_X_REG - 0x2000)); + GSL_RB_WRITE(ringcmds, rcmd_gpu, 0); + } + adreno_ringbuffer_submit(rb); /* return timestamp of issued coREG_ands */ @@ -546,6 +551,198 @@ adreno_ringbuffer_issuecmds(struct kgsl_device *device, adreno_ringbuffer_addcmds(rb, flags, cmds, sizedwords); } +static bool _parse_ibs(struct kgsl_device_private *dev_priv, uint gpuaddr, + int sizedwords); + +static bool +_handle_type3(struct kgsl_device_private *dev_priv, uint *hostaddr) +{ + unsigned int opcode = cp_type3_opcode(*hostaddr); + switch (opcode) { + case CP_INDIRECT_BUFFER_PFD: + case CP_INDIRECT_BUFFER_PFE: + case CP_COND_INDIRECT_BUFFER_PFE: + case CP_COND_INDIRECT_BUFFER_PFD: + return _parse_ibs(dev_priv, hostaddr[1], hostaddr[2]); + case CP_NOP: + case CP_WAIT_FOR_IDLE: + case CP_WAIT_REG_MEM: + case CP_WAIT_REG_EQ: + case CP_WAT_REG_GTE: + case CP_WAIT_UNTIL_READ: + case CP_WAIT_IB_PFD_COMPLETE: + case CP_REG_RMW: + case CP_REG_TO_MEM: + case CP_MEM_WRITE: + case CP_MEM_WRITE_CNTR: + case CP_COND_EXEC: + case CP_COND_WRITE: + case CP_EVENT_WRITE: + case CP_EVENT_WRITE_SHD: + case CP_EVENT_WRITE_CFL: + case CP_EVENT_WRITE_ZPD: + case CP_DRAW_INDX: + case CP_DRAW_INDX_2: + case CP_DRAW_INDX_BIN: + case CP_DRAW_INDX_2_BIN: + case CP_VIZ_QUERY: + case CP_SET_STATE: + case CP_SET_CONSTANT: + case CP_IM_LOAD: + case CP_IM_LOAD_IMMEDIATE: + case CP_LOAD_CONSTANT_CONTEXT: + case CP_INVALIDATE_STATE: + case CP_SET_SHADER_BASES: + case CP_SET_BIN_MASK: + case CP_SET_BIN_SELECT: + case CP_SET_BIN_BASE_OFFSET: + case CP_SET_BIN_DATA: + case CP_CONTEXT_UPDATE: + case CP_INTERRUPT: + case CP_IM_STORE: + case CP_LOAD_STATE: + break; + /* these shouldn't come from userspace */ + case CP_ME_INIT: + case CP_SET_PROTECTED_MODE: + default: + KGSL_CMD_ERR(dev_priv->device, "bad CP opcode %0x\n", opcode); + return false; + break; + } + + return true; +} + +static bool +_handle_type0(struct kgsl_device_private *dev_priv, uint *hostaddr) +{ + unsigned int reg = type0_pkt_offset(*hostaddr); + unsigned int cnt = type0_pkt_size(*hostaddr); + if (reg < 0x0192 || (reg + cnt) >= 0x8000) { + KGSL_CMD_ERR(dev_priv->device, "bad type0 reg: 0x%0x cnt: %d\n", + reg, cnt); + return false; + } + return true; +} + +/* + * Traverse IBs and dump them to test vector. Detect swap by inspecting + * register writes, keeping note of the current state, and dump + * framebuffer config to test vector + */ +static bool _parse_ibs(struct kgsl_device_private *dev_priv, + uint gpuaddr, int sizedwords) +{ + static uint level; /* recursion level */ + bool ret = false; + uint *hostaddr, *hoststart; + int dwords_left = sizedwords; /* dwords left in the current command + buffer */ + struct kgsl_mem_entry *entry; + + spin_lock(&dev_priv->process_priv->mem_lock); + entry = kgsl_sharedmem_find_region(dev_priv->process_priv, + gpuaddr, sizedwords * sizeof(uint)); + spin_unlock(&dev_priv->process_priv->mem_lock); + if (entry == NULL) { + KGSL_CMD_ERR(dev_priv->device, + "no mapping for gpuaddr: 0x%08x\n", gpuaddr); + return false; + } + + hostaddr = (uint *)kgsl_gpuaddr_to_vaddr(&entry->memdesc, gpuaddr); + if (hostaddr == NULL) { + KGSL_CMD_ERR(dev_priv->device, + "no mapping for gpuaddr: 0x%08x\n", gpuaddr); + return false; + } + + hoststart = hostaddr; + + level++; + + KGSL_CMD_INFO(dev_priv->device, "ib: gpuaddr:0x%08x, wc:%d, hptr:%p\n", + gpuaddr, sizedwords, hostaddr); + + mb(); + while (dwords_left > 0) { + bool cur_ret = true; + int count = 0; /* dword count including packet header */ + + switch (*hostaddr >> 30) { + case 0x0: /* type-0 */ + count = (*hostaddr >> 16)+2; + cur_ret = _handle_type0(dev_priv, hostaddr); + break; + case 0x1: /* type-1 */ + count = 2; + break; + case 0x3: /* type-3 */ + count = ((*hostaddr >> 16) & 0x3fff) + 2; + cur_ret = _handle_type3(dev_priv, hostaddr); + break; + default: + KGSL_CMD_ERR(dev_priv->device, "unexpected type: " + "type:%d, word:0x%08x @ 0x%p, gpu:0x%08x\n", + *hostaddr >> 30, *hostaddr, hostaddr, + gpuaddr+4*(sizedwords-dwords_left)); + cur_ret = false; + count = dwords_left; + break; + } + + if (!cur_ret) { + KGSL_CMD_ERR(dev_priv->device, + "bad sub-type: #:%d/%d, v:0x%08x" + " @ 0x%p[gb:0x%08x], level:%d\n", + sizedwords-dwords_left, sizedwords, *hostaddr, + hostaddr, gpuaddr+4*(sizedwords-dwords_left), + level); + + if (ADRENO_DEVICE(dev_priv->device)->ib_check_level + >= 2) + print_hex_dump(KERN_ERR, + level == 1 ? "IB1:" : "IB2:", + DUMP_PREFIX_OFFSET, 32, 4, hoststart, + sizedwords*4, 0); + goto done; + } + + /* jump to next packet */ + dwords_left -= count; + hostaddr += count; + if (dwords_left < 0) { + KGSL_CMD_ERR(dev_priv->device, + "bad count: c:%d, #:%d/%d, " + "v:0x%08x @ 0x%p[gb:0x%08x], level:%d\n", + count, sizedwords-(dwords_left+count), + sizedwords, *(hostaddr-count), hostaddr-count, + gpuaddr+4*(sizedwords-(dwords_left+count)), + level); + if (ADRENO_DEVICE(dev_priv->device)->ib_check_level + >= 2) + print_hex_dump(KERN_ERR, + level == 1 ? "IB1:" : "IB2:", + DUMP_PREFIX_OFFSET, 32, 4, hoststart, + sizedwords*4, 0); + goto done; + } + } + + ret = true; +done: + if (!ret) + KGSL_DRV_ERR(dev_priv->device, + "parsing failed: gpuaddr:0x%08x, " + "host:0x%p, wc:%d\n", gpuaddr, hoststart, sizedwords); + + level--; + + return ret; +} + int adreno_ringbuffer_issueibcmds(struct kgsl_device_private *dev_priv, struct kgsl_context *context, @@ -560,6 +757,7 @@ adreno_ringbuffer_issueibcmds(struct kgsl_device_private *dev_priv, unsigned int *cmds; unsigned int i; struct adreno_context *drawctxt; + unsigned int start_index = 0; if (device->state & KGSL_STATE_HUNG) return -EBUSY; @@ -575,22 +773,48 @@ adreno_ringbuffer_issueibcmds(struct kgsl_device_private *dev_priv, drawctxt); return -EDEADLK; } - link = kzalloc(sizeof(unsigned int) * numibs * 3, GFP_KERNEL); - cmds = link; + + cmds = link = kzalloc(sizeof(unsigned int) * (numibs * 3 + 4), + GFP_KERNEL); if (!link) { - KGSL_MEM_ERR(device, "Failed to allocate memory for for command" - " submission, size %x\n", numibs * 3); + KGSL_CORE_ERR("kzalloc(%d) failed\n", + sizeof(unsigned int) * (numibs * 3 + 4)); return -ENOMEM; } - for (i = 0; i < numibs; i++) { - (void)kgsl_cffdump_parse_ibs(dev_priv, NULL, - ibdesc[i].gpuaddr, ibdesc[i].sizedwords, false); + /*When preamble is enabled, the preamble buffer with state restoration + commands are stored in the first node of the IB chain. We can skip that + if a context switch hasn't occured */ + + if (drawctxt->flags & CTXT_FLAGS_PREAMBLE && + adreno_dev->drawctxt_active == drawctxt) + start_index = 1; + + if (!start_index) { + *cmds++ = cp_nop_packet(1); + *cmds++ = KGSL_START_OF_IB_IDENTIFIER; + } else { + *cmds++ = cp_nop_packet(4); + *cmds++ = KGSL_START_OF_IB_IDENTIFIER; + *cmds++ = CP_HDR_INDIRECT_BUFFER_PFD; + *cmds++ = ibdesc[0].gpuaddr; + *cmds++ = ibdesc[0].sizedwords; + } + for (i = start_index; i < numibs; i++) { + if (unlikely(adreno_dev->ib_check_level >= 1 && + !_parse_ibs(dev_priv, ibdesc[i].gpuaddr, + ibdesc[i].sizedwords))) { + kfree(link); + return -EINVAL; + } *cmds++ = CP_HDR_INDIRECT_BUFFER_PFD; *cmds++ = ibdesc[i].gpuaddr; *cmds++ = ibdesc[i].sizedwords; } + *cmds++ = cp_nop_packet(1); + *cmds++ = KGSL_END_OF_IB_IDENTIFIER; + kgsl_setstate(device, kgsl_mmu_pt_get_flags(device->mmu.hwpagetable, device->id)); @@ -632,7 +856,6 @@ int adreno_ringbuffer_extract(struct adreno_ringbuffer *rb, unsigned int val3; unsigned int copy_rb_contents = 0; unsigned int cur_context; - unsigned int j; GSL_RB_GET_READPTR(rb, &rb->rptr); @@ -739,8 +962,20 @@ int adreno_ringbuffer_extract(struct adreno_ringbuffer *rb, kgsl_sharedmem_readl(&rb->buffer_desc, &value, rb_rptr); rb_rptr = adreno_ringbuffer_inc_wrapped(rb_rptr, rb->buffer_desc.size); - BUG_ON((copy_rb_contents == 0) && - (value == cur_context)); + + /* + * If other context switches were already lost and + * and the current context is the one that is hanging, + * then we cannot recover. Print an error message + * and leave. + */ + + if ((copy_rb_contents == 0) && (value == cur_context)) { + KGSL_DRV_ERR(device, "GPU recovery could not " + "find the previous context\n"); + return -EINVAL; + } + /* * If we were copying the commands and got to this point * then we need to remove the 3 commands that appear @@ -771,19 +1006,6 @@ int adreno_ringbuffer_extract(struct adreno_ringbuffer *rb, } *rb_size = temp_idx; - KGSL_DRV_ERR(device, "Extracted rb contents, size: %x\n", *rb_size); - for (temp_idx = 0; temp_idx < *rb_size;) { - char str[80]; - int idx = 0; - if ((temp_idx + 8) <= *rb_size) - j = 8; - else - j = *rb_size - temp_idx; - for (; j != 0; j--) - idx += scnprintf(str + idx, 80 - idx, - "%8.8X ", temp_rb_buffer[temp_idx++]); - printk(KERN_ALERT "%s", str); - } return 0; } diff --git a/drivers/gpu/msm/adreno_ringbuffer.h b/drivers/gpu/msm/adreno_ringbuffer.h old mode 100644 new mode 100755 index 3e7a6880..d0110b9f --- a/drivers/gpu/msm/adreno_ringbuffer.h +++ b/drivers/gpu/msm/adreno_ringbuffer.h @@ -1,5 +1,4 @@ -/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved. - * Copyright (C) 2011 Sony Ericsson Mobile Communications AB. +/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -14,10 +13,6 @@ #ifndef __ADRENO_RINGBUFFER_H #define __ADRENO_RINGBUFFER_H -#define GSL_RB_USE_MEM_RPTR -#define GSL_RB_USE_MEM_TIMESTAMP -#define GSL_DEVICE_SHADOW_MEMSTORE_TO_USER - /* * Adreno ringbuffer sizes in bytes - these are converted to * the appropriate log2 values in the code @@ -62,49 +57,36 @@ struct adreno_ringbuffer { uint32_t timestamp; }; + #define GSL_RB_WRITE(ring, gpuaddr, data) \ do { \ - writel_relaxed(data, ring); \ + *ring = data; \ wmb(); \ kgsl_cffdump_setmem(gpuaddr, data, 4); \ ring++; \ gpuaddr += sizeof(uint); \ } while (0) -/* timestamp */ -#ifdef GSL_DEVICE_SHADOW_MEMSTORE_TO_USER -#define GSL_RB_USE_MEM_TIMESTAMP -#endif /* GSL_DEVICE_SHADOW_MEMSTORE_TO_USER */ - -#ifdef GSL_RB_USE_MEM_TIMESTAMP /* enable timestamp (...scratch0) memory shadowing */ #define GSL_RB_MEMPTRS_SCRATCH_MASK 0x1 #define GSL_RB_INIT_TIMESTAMP(rb) -#else -#define GSL_RB_MEMPTRS_SCRATCH_MASK 0x0 -#define GSL_RB_INIT_TIMESTAMP(rb) \ - adreno_regwrite((rb)->device->id, REG_CP_TIMESTAMP, 0) - -#endif /* GSL_RB_USE_MEMTIMESTAMP */ - /* mem rptr */ -#ifdef GSL_RB_USE_MEM_RPTR #define GSL_RB_CNTL_NO_UPDATE 0x0 /* enable */ #define GSL_RB_GET_READPTR(rb, data) \ do { \ - *(data) = readl_relaxed(&(rb)->memptrs->rptr); \ + *(data) = rb->memptrs->rptr; \ } while (0) -#else -#define GSL_RB_CNTL_NO_UPDATE 0x1 /* disable */ -#define GSL_RB_GET_READPTR(rb, data) \ - do { \ - adreno_regread((rb)->device->id, REG_CP_RB_RPTR, (data)); \ - } while (0) -#endif /* GSL_RB_USE_MEMRPTR */ #define GSL_RB_CNTL_POLL_EN 0x0 /* disable */ +/* + * protected mode error checking below register address 0x800 + * note: if CP_INTERRUPT packet is used then checking needs + * to change to below register address 0x7C8 + */ +#define GSL_RB_PROTECTED_MODE_CONTROL 0x200001F2 + int adreno_ringbuffer_issueibcmds(struct kgsl_device_private *dev_priv, struct kgsl_context *context, struct kgsl_ibdesc *ibdesc, @@ -126,6 +108,8 @@ void adreno_ringbuffer_issuecmds(struct kgsl_device *device, unsigned int *cmdaddr, int sizedwords); +void adreno_ringbuffer_submit(struct adreno_ringbuffer *rb); + void kgsl_cp_intrcallback(struct kgsl_device *device); int adreno_ringbuffer_extract(struct adreno_ringbuffer *rb, @@ -136,6 +120,9 @@ void adreno_ringbuffer_restore(struct adreno_ringbuffer *rb, unsigned int *rb_buff, int num_rb_contents); +unsigned int *adreno_ringbuffer_allocspace(struct adreno_ringbuffer *rb, + unsigned int numcmds); + static inline int adreno_ringbuffer_count(struct adreno_ringbuffer *rb, unsigned int rptr) { diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c old mode 100644 new mode 100755 index e21ca09c..e7c4ff8b --- a/drivers/gpu/msm/kgsl.c +++ b/drivers/gpu/msm/kgsl.c @@ -1,5 +1,4 @@ -/* Copyright (c) 2008-2011, Code Aurora Forum. All rights reserved. - * Copyright (C) 2011 Sony Ericsson Mobile Communications AB. +/* Copyright (c) 2008-2012, Code Aurora Forum. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -25,6 +24,7 @@ #include #include +#include #include "kgsl.h" #include "kgsl_debugfs.h" @@ -45,7 +45,7 @@ module_param_named(mmutype, ksgl_mmu_type, charp, 0); MODULE_PARM_DESC(ksgl_mmu_type, "Type of MMU to be used for graphics. Valid values are 'iommu' or 'gpummu' or 'nommu'"); -#ifdef CONFIG_GENLOCK +static struct ion_client *kgsl_ion_client; /** * kgsl_add_event - Add a new timstamp event for the KGSL device @@ -53,12 +53,14 @@ MODULE_PARM_DESC(ksgl_mmu_type, * @ts - the timestamp to trigger the event on * @cb - callback function to call when the timestamp expires * @priv - private data for the specific event type + * @owner - driver instance that owns this event * * @returns - 0 on success or error code on failure */ static int kgsl_add_event(struct kgsl_device *device, u32 ts, - void (*cb)(struct kgsl_device *, void *, u32), void *priv) + void (*cb)(struct kgsl_device *, void *, u32), void *priv, + struct kgsl_device_private *owner) { struct kgsl_event *event; struct list_head *n; @@ -82,6 +84,7 @@ static int kgsl_add_event(struct kgsl_device *device, u32 ts, event->timestamp = ts; event->priv = priv; event->func = cb; + event->owner = owner; /* Add the event in order to the list */ @@ -97,10 +100,73 @@ static int kgsl_add_event(struct kgsl_device *device, u32 ts, if (n == &device->events) list_add_tail(&event->list, &device->events); - + + queue_work(device->work_queue, &device->ts_expired_ws); return 0; } -#endif + +/** + * kgsl_cancel_events - Cancel all events for a process + * @device - KGSL device for the events to cancel + * @owner - driver instance that owns the events to cancel + * + */ +static void kgsl_cancel_events(struct kgsl_device *device, + struct kgsl_device_private *owner) +{ + struct kgsl_event *event, *event_tmp; + unsigned int cur = device->ftbl->readtimestamp(device, + KGSL_TIMESTAMP_RETIRED); + + list_for_each_entry_safe(event, event_tmp, &device->events, list) { + if (event->owner != owner) + continue; + /* + * "cancel" the events by calling their callback. + * Currently, events are used for lock and memory + * management, so if the process is dying the right + * thing to do is release or free. + */ + if (event->func) + event->func(device, event->priv, cur); + + list_del(&event->list); + kfree(event); + } +} + +/* kgsl_get_mem_entry - get the mem_entry structure for the specified object + * @ptbase - the pagetable base of the object + * @gpuaddr - the GPU address of the object + * @size - Size of the region to search + */ + +struct kgsl_mem_entry *kgsl_get_mem_entry(unsigned int ptbase, + unsigned int gpuaddr, unsigned int size) +{ + struct kgsl_process_private *priv; + struct kgsl_mem_entry *entry; + + mutex_lock(&kgsl_driver.process_mutex); + + list_for_each_entry(priv, &kgsl_driver.process_list, list) { + if (!kgsl_mmu_pt_equal(priv->pagetable, ptbase)) + continue; + spin_lock(&priv->mem_lock); + entry = kgsl_sharedmem_find_region(priv, gpuaddr, size); + + if (entry) { + spin_unlock(&priv->mem_lock); + mutex_unlock(&kgsl_driver.process_mutex); + return entry; + } + spin_unlock(&priv->mem_lock); + } + mutex_unlock(&kgsl_driver.process_mutex); + + return NULL; +} +EXPORT_SYMBOL(kgsl_get_mem_entry); static inline struct kgsl_mem_entry * kgsl_mem_entry_create(void) @@ -121,18 +187,32 @@ kgsl_mem_entry_destroy(struct kref *kref) struct kgsl_mem_entry *entry = container_of(kref, struct kgsl_mem_entry, refcount); - size_t size = entry->memdesc.size; + + if (entry->memtype != KGSL_MEM_ENTRY_KERNEL) + kgsl_driver.stats.mapped -= entry->memdesc.size; + + /* + * Ion takes care of freeing the sglist for us (how nice ) so + * unmap the dma before freeing the sharedmem so kgsl_sharedmem_free + * doesn't try to free it again + */ + + if (entry->memtype == KGSL_MEM_ENTRY_ION) { + ion_unmap_dma(kgsl_ion_client, entry->priv_data); + entry->memdesc.sg = NULL; + } kgsl_sharedmem_free(&entry->memdesc); - if (entry->memtype == KGSL_USER_MEMORY) - entry->priv->stats.user -= size; - else if (entry->memtype == KGSL_MAPPED_MEMORY) { - if (entry->file_ptr) - fput(entry->file_ptr); - - kgsl_driver.stats.mapped -= size; - entry->priv->stats.mapped -= size; + switch (entry->memtype) { + case KGSL_MEM_ENTRY_PMEM: + case KGSL_MEM_ENTRY_ASHMEM: + if (entry->priv_data) + fput(entry->priv_data); + break; + case KGSL_MEM_ENTRY_ION: + ion_free(kgsl_ion_client, entry->priv_data); + break; } kfree(entry); @@ -150,6 +230,21 @@ void kgsl_mem_entry_attach_process(struct kgsl_mem_entry *entry, entry->priv = process; } +/* Detach a memory entry from a process and unmap it from the MMU */ + +static void kgsl_mem_entry_detach_process(struct kgsl_mem_entry *entry) +{ + if (entry == NULL) + return; + + entry->priv->stats[entry->memtype].cur -= entry->memdesc.size; + entry->priv = NULL; + + kgsl_mmu_unmap(entry->memdesc.pagetable, &entry->memdesc); + + kgsl_mem_entry_put(entry); +} + /* Allocate a new context id */ static struct kgsl_context * @@ -206,44 +301,10 @@ kgsl_destroy_context(struct kgsl_device_private *dev_priv, idr_remove(&dev_priv->device->context_idr, id); } -/* to be called when a process is destroyed, this walks the memqueue and - * frees any entryies that belong to the dying process - */ -static void kgsl_memqueue_cleanup(struct kgsl_device *device, - struct kgsl_process_private *private) -{ - struct kgsl_mem_entry *entry, *entry_tmp; - - if (!private) - return; - - BUG_ON(!mutex_is_locked(&device->mutex)); - - list_for_each_entry_safe(entry, entry_tmp, &device->memqueue, list) { - if (entry->priv == private) { - list_del(&entry->list); - kgsl_mem_entry_put(entry); - } - } -} - -static void kgsl_memqueue_freememontimestamp(struct kgsl_device *device, - struct kgsl_mem_entry *entry, - uint32_t timestamp, - enum kgsl_timestamp_type type) -{ - BUG_ON(!mutex_is_locked(&device->mutex)); - - entry->free_timestamp = timestamp; - - list_add_tail(&entry->list, &device->memqueue); -} - static void kgsl_timestamp_expired(struct work_struct *work) { struct kgsl_device *device = container_of(work, struct kgsl_device, ts_expired_ws); - struct kgsl_mem_entry *entry, *entry_tmp; struct kgsl_event *event, *event_tmp; uint32_t ts_processed; @@ -253,15 +314,6 @@ static void kgsl_timestamp_expired(struct work_struct *work) ts_processed = device->ftbl->readtimestamp(device, KGSL_TIMESTAMP_RETIRED); - /* Flush the freememontimestamp queue */ - list_for_each_entry_safe(entry, entry_tmp, &device->memqueue, list) { - if (timestamp_cmp(ts_processed, entry->free_timestamp) < 0) - break; - - list_del(&entry->list); - kgsl_mem_entry_put(entry); - } - /* Process expired events */ list_for_each_entry_safe(event, event_tmp, &device->events, list) { if (timestamp_cmp(ts_processed, event->timestamp) < 0) @@ -585,18 +637,13 @@ kgsl_put_process_private(struct kgsl_device *device, if (--private->refcnt) goto unlock; - KGSL_MEM_INFO(device, - "Memory usage: user (%d/%d) mapped (%d/%d)\n", - private->stats.user, private->stats.user_max, - private->stats.mapped, private->stats.mapped_max); - kgsl_process_uninit_sysfs(private); list_del(&private->list); list_for_each_entry_safe(entry, entry_tmp, &private->mem_list, list) { list_del(&entry->list); - kgsl_mem_entry_put(entry); + kgsl_mem_entry_detach_process(entry); } kgsl_mmu_putpagetable(private->pagetable); @@ -641,7 +688,7 @@ static int kgsl_release(struct inode *inodep, struct file *filep) /* clean up any to-be-freed entries that belong to this * process and this device */ - kgsl_memqueue_cleanup(device, private); + kgsl_cancel_events(device, dev_priv); mutex_unlock(&device->mutex); kfree(dev_priv); @@ -758,9 +805,7 @@ kgsl_sharedmem_find_region(struct kgsl_process_private *private, BUG_ON(private == NULL); list_for_each_entry(entry, &private->mem_list, list) { - if (gpuaddr >= entry->memdesc.gpuaddr && - ((gpuaddr + size) <= - (entry->memdesc.gpuaddr + entry->memdesc.size))) { + if (kgsl_gpuaddr_in_memdesc(&entry->memdesc, gpuaddr, size)) { result = entry; break; } @@ -770,20 +815,6 @@ kgsl_sharedmem_find_region(struct kgsl_process_private *private, } EXPORT_SYMBOL(kgsl_sharedmem_find_region); -uint8_t *kgsl_gpuaddr_to_vaddr(const struct kgsl_memdesc *memdesc, - unsigned int gpuaddr, unsigned int *size) -{ - BUG_ON(memdesc->hostptr == NULL); - - if (memdesc->gpuaddr == 0 || (gpuaddr < memdesc->gpuaddr || - gpuaddr >= memdesc->gpuaddr + memdesc->size)) - return NULL; - - *size = memdesc->size - (gpuaddr - memdesc->gpuaddr); - return memdesc->hostptr + (gpuaddr - memdesc->gpuaddr); -} -EXPORT_SYMBOL(kgsl_gpuaddr_to_vaddr); - /*call all ioctl sub functions with driver locked*/ static long kgsl_ioctl_device_getproperty(struct kgsl_device_private *dev_priv, unsigned int cmd, void *data) @@ -810,6 +841,40 @@ static long kgsl_ioctl_device_getproperty(struct kgsl_device_private *dev_priv, break; } + case KGSL_PROP_GPU_RESET_STAT: + { + /* Return reset status of given context and clear it */ + uint32_t id; + struct kgsl_context *context; + + if (param->sizebytes != sizeof(unsigned int)) { + result = -EINVAL; + break; + } + /* We expect the value passed in to contain the context id */ + if (copy_from_user(&id, param->value, + sizeof(unsigned int))) { + result = -EFAULT; + break; + } + context = kgsl_find_context(dev_priv, id); + if (!context) { + result = -EINVAL; + break; + } + /* + * Copy the reset status to value which also serves as + * the out parameter + */ + if (copy_to_user(param->value, &(context->reset_status), + sizeof(unsigned int))) { + result = -EFAULT; + break; + } + /* Clear reset status once its been queried */ + context->reset_status = KGSL_CTX_STAT_NO_ERROR; + break; + } default: result = dev_priv->device->ftbl->getproperty( dev_priv->device, param->type, @@ -844,40 +909,6 @@ static long kgsl_ioctl_device_waittimestamp(struct kgsl_device_private return result; } -static bool check_ibdesc(struct kgsl_device_private *dev_priv, - struct kgsl_ibdesc *ibdesc, unsigned int numibs, - bool parse) -{ - bool result = true; - unsigned int i; - for (i = 0; i < numibs; i++) { - struct kgsl_mem_entry *entry; - spin_lock(&dev_priv->process_priv->mem_lock); - entry = kgsl_sharedmem_find_region(dev_priv->process_priv, - ibdesc[i].gpuaddr, ibdesc[i].sizedwords * sizeof(uint)); - spin_unlock(&dev_priv->process_priv->mem_lock); - if (entry == NULL) { - KGSL_DRV_ERR(dev_priv->device, - "invalid cmd buffer gpuaddr %08x " \ - "sizedwords %d\n", ibdesc[i].gpuaddr, - ibdesc[i].sizedwords); - result = false; - break; - } - - if (parse && !kgsl_cffdump_parse_ibs(dev_priv, &entry->memdesc, - ibdesc[i].gpuaddr, ibdesc[i].sizedwords, true)) { - KGSL_DRV_ERR(dev_priv->device, - "invalid cmd buffer gpuaddr %08x " \ - "sizedwords %d numibs %d/%d\n", - ibdesc[i].gpuaddr, - ibdesc[i].sizedwords, i+1, numibs); - result = false; - break; - } - } - return result; -} static long kgsl_ioctl_rb_issueibcmds(struct kgsl_device_private *dev_priv, unsigned int cmd, void *data) @@ -947,12 +978,6 @@ static long kgsl_ioctl_rb_issueibcmds(struct kgsl_device_private *dev_priv, param->numibs = 1; } - if (!check_ibdesc(dev_priv, ibdesc, param->numibs, true)) { - KGSL_DRV_ERR(dev_priv->device, "bad ibdesc"); - result = -EINVAL; - goto free_ibdesc; - } - result = dev_priv->device->ftbl->issueibcmds(dev_priv, context, ibdesc, @@ -960,17 +985,6 @@ static long kgsl_ioctl_rb_issueibcmds(struct kgsl_device_private *dev_priv, ¶m->timestamp, param->flags); - if (result != 0) - goto free_ibdesc; - - /* this is a check to try to detect if a command buffer was freed - * during issueibcmds(). - */ - if (!check_ibdesc(dev_priv, ibdesc, param->numibs, false)) { - KGSL_DRV_ERR(dev_priv->device, "bad ibdesc AFTER issue"); - result = -EINVAL; - goto free_ibdesc; - } free_ibdesc: kfree(ibdesc); @@ -996,6 +1010,16 @@ static long kgsl_ioctl_cmdstream_readtimestamp(struct kgsl_device_private return 0; } +static void kgsl_freemem_event_cb(struct kgsl_device *device, + void *priv, u32 timestamp) +{ + struct kgsl_mem_entry *entry = priv; + spin_lock(&entry->priv->mem_lock); + list_del(&entry->list); + spin_unlock(&entry->priv->mem_lock); + kgsl_mem_entry_detach_process(entry); +} + static long kgsl_ioctl_cmdstream_freememontimestamp(struct kgsl_device_private *dev_priv, unsigned int cmd, void *data) @@ -1006,13 +1030,11 @@ static long kgsl_ioctl_cmdstream_freememontimestamp(struct kgsl_device_private spin_lock(&dev_priv->process_priv->mem_lock); entry = kgsl_sharedmem_find(dev_priv->process_priv, param->gpuaddr); - if (entry) - list_del(&entry->list); spin_unlock(&dev_priv->process_priv->mem_lock); if (entry) { - kgsl_memqueue_freememontimestamp(dev_priv->device, entry, - param->timestamp, param->type); + result = kgsl_add_event(dev_priv->device, param->timestamp, + kgsl_freemem_event_cb, entry, dev_priv); } else { KGSL_DRV_ERR(dev_priv->device, "invalid gpuaddr %08x\n", param->gpuaddr); @@ -1089,7 +1111,7 @@ static long kgsl_ioctl_sharedmem_free(struct kgsl_device_private *dev_priv, spin_unlock(&private->mem_lock); if (entry) { - kgsl_mem_entry_put(entry); + kgsl_mem_entry_detach_process(entry); } else { KGSL_CORE_ERR("invalid gpuaddr %08x\n", param->gpuaddr); result = -EINVAL; @@ -1169,7 +1191,7 @@ kgsl_ioctl_sharedmem_from_vmalloc(struct kgsl_device_private *dev_priv, goto error; } - result = kgsl_sharedmem_vmalloc_user(&entry->memdesc, + result = kgsl_sharedmem_page_alloc_user(&entry->memdesc, private->pagetable, len, param->flags); if (result != 0) @@ -1177,26 +1199,25 @@ kgsl_ioctl_sharedmem_from_vmalloc(struct kgsl_device_private *dev_priv, vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); - result = remap_vmalloc_range(vma, (void *) entry->memdesc.hostptr, 0); + result = kgsl_sharedmem_map_vma(vma, &entry->memdesc); if (result) { - KGSL_CORE_ERR("remap_vmalloc_range failed: %d\n", result); - goto error_free_vmalloc; + KGSL_CORE_ERR("kgsl_sharedmem_map_vma failed: %d\n", result); + goto error_free_alloc; } param->gpuaddr = entry->memdesc.gpuaddr; - entry->memtype = KGSL_USER_MEMORY; + entry->memtype = KGSL_MEM_ENTRY_KERNEL; kgsl_mem_entry_attach_process(entry, private); /* Process specific statistics */ - KGSL_STATS_ADD(len, private->stats.user, - private->stats.user_max); + kgsl_process_add_stats(private, entry->memtype, len); kgsl_check_idle(dev_priv->device); return 0; -error_free_vmalloc: +error_free_alloc: kgsl_sharedmem_free(&entry->memdesc); error_free_entry: @@ -1292,7 +1313,7 @@ static int kgsl_setup_phys_file(struct kgsl_mem_entry *entry, } - entry->file_ptr = filep; + entry->priv_data = filep; entry->memdesc.pagetable = pagetable; entry->memdesc.size = size; @@ -1319,8 +1340,8 @@ static int memdesc_sg_virt(struct kgsl_memdesc *memdesc, int sglen = PAGE_ALIGN(size) / PAGE_SIZE; unsigned long paddr = (unsigned long) addr; - memdesc->sg = kmalloc(sglen * sizeof(struct scatterlist), - GFP_KERNEL); + memdesc->sg = kgsl_sg_alloc(sglen); + if (memdesc->sg == NULL) return -ENOMEM; @@ -1360,7 +1381,7 @@ static int memdesc_sg_virt(struct kgsl_memdesc *memdesc, err: spin_unlock(¤t->mm->page_table_lock); - kfree(memdesc->sg); + kgsl_sg_free(memdesc->sg, sglen); memdesc->sg = NULL; return -EINVAL; @@ -1464,7 +1485,7 @@ static int kgsl_setup_ashmem(struct kgsl_mem_entry *entry, goto err; } - entry->file_ptr = filep; + entry->priv_data = filep; entry->memdesc.pagetable = pagetable; entry->memdesc.size = ALIGN(size, PAGE_SIZE); entry->memdesc.hostptr = hostptr; @@ -1488,6 +1509,51 @@ static int kgsl_setup_ashmem(struct kgsl_mem_entry *entry, } #endif +static int kgsl_setup_ion(struct kgsl_mem_entry *entry, + struct kgsl_pagetable *pagetable, int fd) +{ + struct ion_handle *handle; + struct scatterlist *s; + unsigned long flags; + + if (kgsl_ion_client == NULL) { + kgsl_ion_client = msm_ion_client_create(UINT_MAX, KGSL_NAME); + if (kgsl_ion_client == NULL) + return -ENODEV; + } + + handle = ion_import_fd(kgsl_ion_client, fd); + if (IS_ERR_OR_NULL(handle)) + return PTR_ERR(handle); + + entry->memtype = KGSL_MEM_ENTRY_ION; + entry->priv_data = handle; + entry->memdesc.pagetable = pagetable; + entry->memdesc.size = 0; + + if (ion_handle_get_flags(kgsl_ion_client, handle, &flags)) + goto err; + + entry->memdesc.sg = ion_map_dma(kgsl_ion_client, handle, flags); + + if (IS_ERR_OR_NULL(entry->memdesc.sg)) + goto err; + + /* Calculate the size of the memdesc from the sglist */ + + entry->memdesc.sglen = 0; + + for (s = entry->memdesc.sg; s != NULL; s = sg_next(s)) { + entry->memdesc.size += s->length; + entry->memdesc.sglen++; + } + + return 0; +err: + ion_free(kgsl_ion_client, handle); + return -ENOMEM; +} + static long kgsl_ioctl_map_user_mem(struct kgsl_device_private *dev_priv, unsigned int cmd, void *data) { @@ -1515,6 +1581,7 @@ static long kgsl_ioctl_map_user_mem(struct kgsl_device_private *dev_priv, result = kgsl_setup_phys_file(entry, private->pagetable, param->fd, param->offset, param->len); + entry->memtype = KGSL_MEM_ENTRY_PMEM; break; case KGSL_USER_MEM_TYPE_ADDR: @@ -1531,6 +1598,7 @@ static long kgsl_ioctl_map_user_mem(struct kgsl_device_private *dev_priv, result = kgsl_setup_hostptr(entry, private->pagetable, (void *) param->hostptr, param->offset, param->len); + entry->memtype = KGSL_MEM_ENTRY_USER; break; case KGSL_USER_MEM_TYPE_ASHMEM: @@ -1547,6 +1615,12 @@ static long kgsl_ioctl_map_user_mem(struct kgsl_device_private *dev_priv, result = kgsl_setup_ashmem(entry, private->pagetable, param->fd, (void *) param->hostptr, param->len); + + entry->memtype = KGSL_MEM_ENTRY_ASHMEM; + break; + case KGSL_USER_MEM_TYPE_ION: + result = kgsl_setup_ion(entry, private->pagetable, + param->fd); break; default: KGSL_CORE_ERR("Invalid memory type: %x\n", memtype); @@ -1566,14 +1640,10 @@ static long kgsl_ioctl_map_user_mem(struct kgsl_device_private *dev_priv, /* Adjust the returned value for a non 4k aligned offset */ param->gpuaddr = entry->memdesc.gpuaddr + (param->offset & ~PAGE_MASK); - entry->memtype = KGSL_MAPPED_MEMORY; - KGSL_STATS_ADD(param->len, kgsl_driver.stats.mapped, kgsl_driver.stats.mapped_max); - /* Statistics */ - KGSL_STATS_ADD(param->len, private->stats.mapped, - private->stats.mapped_max); + kgsl_process_add_stats(private, entry->memtype, param->len); kgsl_mem_entry_attach_process(entry, private); @@ -1581,8 +1651,8 @@ static long kgsl_ioctl_map_user_mem(struct kgsl_device_private *dev_priv, return result; error_put_file_ptr: - if (entry->file_ptr) - fput(entry->file_ptr); + if (entry->priv_data) + fput(entry->priv_data); error: kfree(entry); @@ -1608,10 +1678,6 @@ kgsl_ioctl_sharedmem_flush_cache(struct kgsl_device_private *dev_priv, result = -EINVAL; goto done; } - if (!entry->memdesc.hostptr) - entry->memdesc.hostptr = - kgsl_gpuaddr_to_vaddr(&entry->memdesc, - param->gpuaddr, &entry->memdesc.size); if (!entry->memdesc.hostptr) { KGSL_CORE_ERR("invalid hostptr with gpuaddr %08x\n", @@ -1620,9 +1686,6 @@ kgsl_ioctl_sharedmem_flush_cache(struct kgsl_device_private *dev_priv, } kgsl_cache_range_op(&entry->memdesc, KGSL_CACHE_OP_CLEAN); - - /* Statistics - keep track of how many flushes each process does */ - private->stats.flushes++; done: spin_unlock(&private->mem_lock); return result; @@ -1645,12 +1708,11 @@ kgsl_ioctl_gpumem_alloc(struct kgsl_device_private *dev_priv, param->size, param->flags); if (result == 0) { - entry->memtype = KGSL_USER_MEMORY; + entry->memtype = KGSL_MEM_ENTRY_KERNEL; kgsl_mem_entry_attach_process(entry, private); param->gpuaddr = entry->memdesc.gpuaddr; - KGSL_STATS_ADD(entry->memdesc.size, private->stats.user, - private->stats.user_max); + kgsl_process_add_stats(private, entry->memtype, param->size); } else kfree(entry); @@ -1724,6 +1786,7 @@ static void kgsl_genlock_event_cb(struct kgsl_device *device, * @timestamp - Timestamp to trigger the event * @data - User space buffer containing struct kgsl_genlock_event_priv * @len - length of the userspace buffer + * @owner - driver instance that owns this event * @returns 0 on success or error code on error * * Attack to a genlock handle and register an event to release the @@ -1731,7 +1794,8 @@ static void kgsl_genlock_event_cb(struct kgsl_device *device, */ static int kgsl_add_genlock_event(struct kgsl_device *device, - u32 timestamp, void __user *data, int len) + u32 timestamp, void __user *data, int len, + struct kgsl_device_private *owner) { struct kgsl_genlock_event_priv *event; struct kgsl_timestamp_event_genlock priv; @@ -1756,7 +1820,8 @@ static int kgsl_add_genlock_event(struct kgsl_device *device, return ret; } - ret = kgsl_add_event(device, timestamp, kgsl_genlock_event_cb, event); + ret = kgsl_add_event(device, timestamp, kgsl_genlock_event_cb, event, + owner); if (ret) kfree(event); @@ -1764,7 +1829,8 @@ static int kgsl_add_genlock_event(struct kgsl_device *device, } #else static long kgsl_add_genlock_event(struct kgsl_device *device, - u32 timestamp, void __user *data, int len) + u32 timestamp, void __user *data, int len, + struct kgsl_device_private *owner) { return -EINVAL; } @@ -1787,7 +1853,8 @@ static long kgsl_ioctl_timestamp_event(struct kgsl_device_private *dev_priv, switch (param->type) { case KGSL_TIMESTAMP_EVENT_GENLOCK: ret = kgsl_add_genlock_event(dev_priv->device, - param->timestamp, param->priv, param->len); + param->timestamp, param->priv, param->len, + dev_priv); break; default: ret = -EINVAL; @@ -1892,7 +1959,7 @@ static long kgsl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) if (!func) { KGSL_DRV_INFO(dev_priv->device, "invalid ioctl code %08x\n", cmd); - ret = -EINVAL; + ret = -ENOIOCTLCMD; goto done; } lock = 1; @@ -2142,7 +2209,6 @@ kgsl_register_device(struct kgsl_device *device) INIT_WORK(&device->idle_check_ws, kgsl_idle_check); INIT_WORK(&device->ts_expired_ws, kgsl_timestamp_expired); - INIT_LIST_HEAD(&device->memqueue); INIT_LIST_HEAD(&device->events); ret = kgsl_mmu_init(device); diff --git a/drivers/gpu/msm/kgsl.h b/drivers/gpu/msm/kgsl.h old mode 100644 new mode 100755 index e26cdc9e..3f9ff843 --- a/drivers/gpu/msm/kgsl.h +++ b/drivers/gpu/msm/kgsl.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-2011, Code Aurora Forum. All rights reserved. +/* Copyright (c) 2008-2012, Code Aurora Forum. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -21,9 +21,13 @@ #include #include #include +#include #define KGSL_NAME "kgsl" +/* Timestamp window used to detect rollovers */ +#define KGSL_TIMESTAMP_WINDOW 0x80000000 + /*cache coherency ops */ #define DRM_KGSL_GEM_CACHE_OP_TO_DEV 0x0001 #define DRM_KGSL_GEM_CACHE_OP_FROM_DEV 0x0002 @@ -91,6 +95,8 @@ struct kgsl_driver { struct { unsigned int vmalloc; unsigned int vmalloc_max; + unsigned int page_alloc; + unsigned int page_alloc_max; unsigned int coherent; unsigned int coherent_max; unsigned int mapped; @@ -101,11 +107,18 @@ struct kgsl_driver { extern struct kgsl_driver kgsl_driver; -#define KGSL_USER_MEMORY 1 -#define KGSL_MAPPED_MEMORY 2 - struct kgsl_pagetable; -struct kgsl_memdesc_ops; +struct kgsl_memdesc; + +struct kgsl_memdesc_ops { + int (*vmflags)(struct kgsl_memdesc *); + int (*vmfault)(struct kgsl_memdesc *, struct vm_area_struct *, + struct vm_fault *); + void (*free)(struct kgsl_memdesc *memdesc); + int (*map_kernel_mem)(struct kgsl_memdesc *); +}; + +#define KGSL_MEMDESC_GUARD_PAGE BIT(0) /* shared memory allocation */ struct kgsl_memdesc { @@ -118,13 +131,23 @@ struct kgsl_memdesc { struct scatterlist *sg; unsigned int sglen; struct kgsl_memdesc_ops *ops; + int flags; }; +/* List of different memory entry types */ + +#define KGSL_MEM_ENTRY_KERNEL 0 +#define KGSL_MEM_ENTRY_PMEM 1 +#define KGSL_MEM_ENTRY_ASHMEM 2 +#define KGSL_MEM_ENTRY_USER 3 +#define KGSL_MEM_ENTRY_ION 4 +#define KGSL_MEM_ENTRY_MAX 5 + struct kgsl_mem_entry { struct kref refcount; struct kgsl_memdesc memdesc; int memtype; - struct file *file_ptr; + void *priv_data; struct list_head list; uint32_t free_timestamp; /* back pointer to private structure under whose context this @@ -139,8 +162,10 @@ struct kgsl_mem_entry { #endif void kgsl_mem_entry_destroy(struct kref *kref); -uint8_t *kgsl_gpuaddr_to_vaddr(const struct kgsl_memdesc *memdesc, - unsigned int gpuaddr, unsigned int *size); + +struct kgsl_mem_entry *kgsl_get_mem_entry(unsigned int ptbase, + unsigned int gpuaddr, unsigned int size); + struct kgsl_mem_entry *kgsl_sharedmem_find_region( struct kgsl_process_private *private, unsigned int gpuaddr, size_t size); @@ -169,14 +194,26 @@ static inline void kgsl_drm_exit(void) #endif static inline int kgsl_gpuaddr_in_memdesc(const struct kgsl_memdesc *memdesc, - unsigned int gpuaddr) + unsigned int gpuaddr, unsigned int size) { - if (gpuaddr >= memdesc->gpuaddr && (gpuaddr + sizeof(unsigned int)) <= - (memdesc->gpuaddr + memdesc->size)) { + if (gpuaddr >= memdesc->gpuaddr && + ((gpuaddr + size) <= (memdesc->gpuaddr + memdesc->size))) { return 1; } return 0; } +static inline uint8_t *kgsl_gpuaddr_to_vaddr(struct kgsl_memdesc *memdesc, + unsigned int gpuaddr) +{ + if (memdesc->gpuaddr == 0 || + gpuaddr < memdesc->gpuaddr || + gpuaddr >= (memdesc->gpuaddr + memdesc->size) || + (NULL == memdesc->hostptr && memdesc->ops->map_kernel_mem && + memdesc->ops->map_kernel_mem(memdesc))) + return NULL; + + return memdesc->hostptr + (gpuaddr - memdesc->gpuaddr); +} static inline int timestamp_cmp(unsigned int new, unsigned int old) { @@ -185,7 +222,7 @@ static inline int timestamp_cmp(unsigned int new, unsigned int old) if (ts_diff == 0) return 0; - return ((ts_diff > 0) || (ts_diff < -20000)) ? 1 : -1; + return ((ts_diff > 0) || (ts_diff < -KGSL_TIMESTAMP_WINDOW)) ? 1 : -1; } static inline void diff --git a/drivers/gpu/msm/kgsl_cffdump.c b/drivers/gpu/msm/kgsl_cffdump.c old mode 100644 new mode 100755 index aa33152c..1ab8908f --- a/drivers/gpu/msm/kgsl_cffdump.c +++ b/drivers/gpu/msm/kgsl_cffdump.c @@ -20,7 +20,7 @@ #include #include #include -#include +//#include #include "kgsl.h" #include "kgsl_cffdump.h" @@ -231,8 +231,6 @@ static void cffdump_printline(int id, uint opcode, uint op1, uint op2, spin_lock(&cffdump_lock); if (opcode == CFF_OP_WRITE_MEM) { - if (op1 < 0x40000000 || op1 >= 0x60000000) - KGSL_CORE_ERR("addr out-of-range: op1=%08x", op1); if ((cff_op_write_membuf.addr != op1 && cff_op_write_membuf.count) || (cff_op_write_membuf.count == MEMBUF_SIZE)) @@ -360,15 +358,7 @@ void kgsl_cffdump_destroy() void kgsl_cffdump_open(enum kgsl_deviceid device_id) { - /*TODO: move this to where we can report correct gmemsize*/ - unsigned int va_base; - - if (cpu_is_msm8x60() || cpu_is_msm8960() || cpu_is_msm8930()) - va_base = 0x40000000; - else - va_base = 0x20000000; - - kgsl_cffdump_memory_base(device_id, va_base, + kgsl_cffdump_memory_base(device_id, KGSL_PAGETABLE_BASE, CONFIG_MSM_KGSL_PAGE_TABLE_SIZE, SZ_256K); } @@ -401,8 +391,6 @@ void kgsl_cffdump_syncmem(struct kgsl_device_private *dev_priv, bool clean_cache) { const void *src; - uint host_size; - uint physaddr; if (!kgsl_cff_dump_enable) return; @@ -422,13 +410,9 @@ void kgsl_cffdump_syncmem(struct kgsl_device_private *dev_priv, } memdesc = &entry->memdesc; } - BUG_ON(memdesc->gpuaddr == 0); - BUG_ON(gpuaddr == 0); - physaddr = kgsl_get_realaddr(memdesc) + (gpuaddr - memdesc->gpuaddr); - - src = kgsl_gpuaddr_to_vaddr(memdesc, gpuaddr, &host_size); - if (src == NULL || host_size < sizebytes) { - KGSL_CORE_ERR("did not find mapping for " + src = (uint *)kgsl_gpuaddr_to_vaddr(memdesc, gpuaddr); + if (memdesc->hostptr == NULL) { + KGSL_CORE_ERR("no kernel mapping for " "gpuaddr: 0x%08x, m->host: 0x%p, phys: 0x%08x\n", gpuaddr, memdesc->hostptr, memdesc->physaddr); return; @@ -444,7 +428,6 @@ void kgsl_cffdump_syncmem(struct kgsl_device_private *dev_priv, KGSL_CACHE_OP_INV); } - BUG_ON(physaddr > 0x66000000 && physaddr < 0x66ffffff); while (sizebytes > 3) { cffdump_printline(-1, CFF_OP_WRITE_MEM, gpuaddr, *(uint *)src, 0, 0, 0); @@ -462,7 +445,6 @@ void kgsl_cffdump_setmem(uint addr, uint value, uint sizebytes) if (!kgsl_cff_dump_enable) return; - BUG_ON(addr > 0x66000000 && addr < 0x66ffffff); while (sizebytes > 3) { /* Use 32bit memory writes as long as there's at least * 4 bytes left */ @@ -515,197 +497,6 @@ int kgsl_cffdump_waitirq(void) } EXPORT_SYMBOL(kgsl_cffdump_waitirq); -#define ADDRESS_STACK_SIZE 256 -#define GET_PM4_TYPE3_OPCODE(x) ((*(x) >> 8) & 0xFF) -static unsigned int kgsl_cffdump_addr_count; - -static bool kgsl_cffdump_handle_type3(struct kgsl_device_private *dev_priv, - uint *hostaddr, bool check_only) -{ - static uint addr_stack[ADDRESS_STACK_SIZE]; - static uint size_stack[ADDRESS_STACK_SIZE]; - - switch (GET_PM4_TYPE3_OPCODE(hostaddr)) { - case CP_INDIRECT_BUFFER_PFD: - case CP_INDIRECT_BUFFER: - { - /* traverse indirect buffers */ - int i; - uint ibaddr = hostaddr[1]; - uint ibsize = hostaddr[2]; - - /* is this address already in encountered? */ - for (i = 0; - i < kgsl_cffdump_addr_count && addr_stack[i] != ibaddr; - ++i) - ; - - if (kgsl_cffdump_addr_count == i) { - addr_stack[kgsl_cffdump_addr_count] = ibaddr; - size_stack[kgsl_cffdump_addr_count++] = ibsize; - - if (kgsl_cffdump_addr_count >= ADDRESS_STACK_SIZE) { - KGSL_CORE_ERR("stack overflow\n"); - return false; - } - - return kgsl_cffdump_parse_ibs(dev_priv, NULL, - ibaddr, ibsize, check_only); - } else if (size_stack[i] != ibsize) { - KGSL_CORE_ERR("gpuaddr: 0x%08x, " - "wc: %u, with size wc: %u already on the " - "stack\n", ibaddr, ibsize, size_stack[i]); - return false; - } - } - break; - } - - return true; -} - -/* - * Traverse IBs and dump them to test vector. Detect swap by inspecting - * register writes, keeping note of the current state, and dump - * framebuffer config to test vector - */ -bool kgsl_cffdump_parse_ibs(struct kgsl_device_private *dev_priv, - const struct kgsl_memdesc *memdesc, uint gpuaddr, int sizedwords, - bool check_only) -{ - static uint level; /* recursion level */ - bool ret = true; - uint host_size; - uint *hostaddr, *hoststart; - int dwords_left = sizedwords; /* dwords left in the current command - buffer */ - - if (level == 0) - kgsl_cffdump_addr_count = 0; - - if (memdesc == NULL) { - struct kgsl_mem_entry *entry; - spin_lock(&dev_priv->process_priv->mem_lock); - entry = kgsl_sharedmem_find_region(dev_priv->process_priv, - gpuaddr, sizedwords * sizeof(uint)); - spin_unlock(&dev_priv->process_priv->mem_lock); - if (entry == NULL) { - KGSL_CORE_ERR("did not find mapping " - "for gpuaddr: 0x%08x\n", gpuaddr); - return true; - } - memdesc = &entry->memdesc; - } - - hostaddr = (uint *)kgsl_gpuaddr_to_vaddr(memdesc, gpuaddr, &host_size); - if (hostaddr == NULL) { - KGSL_CORE_ERR("did not find mapping for " - "gpuaddr: 0x%08x\n", gpuaddr); - return true; - } - - hoststart = hostaddr; - - level++; - - if (!memdesc->physaddr) { - KGSL_CORE_ERR("no physaddr"); - } else { - mb(); - kgsl_cache_range_op((struct kgsl_memdesc *)memdesc, - KGSL_CACHE_OP_INV); - } - -#ifdef DEBUG - pr_info("kgsl: cffdump: ib: gpuaddr:0x%08x, wc:%d, hptr:%p\n", - gpuaddr, sizedwords, hostaddr); -#endif - - while (dwords_left > 0) { - int count = 0; /* dword count including packet header */ - bool cur_ret = true; - - switch (*hostaddr >> 30) { - case 0x0: /* type-0 */ - count = (*hostaddr >> 16)+2; - break; - case 0x1: /* type-1 */ - count = 2; - break; - case 0x3: /* type-3 */ - count = ((*hostaddr >> 16) & 0x3fff) + 2; - cur_ret = kgsl_cffdump_handle_type3(dev_priv, - hostaddr, check_only); - break; - default: - pr_warn("kgsl: cffdump: parse-ib: unexpected type: " - "type:%d, word:0x%08x @ 0x%p, gpu:0x%08x\n", - *hostaddr >> 30, *hostaddr, hostaddr, - gpuaddr+4*(sizedwords-dwords_left)); - cur_ret = false; - count = dwords_left; - break; - } - -#ifdef DEBUG - if (!cur_ret) { - pr_info("kgsl: cffdump: bad sub-type: #:%d/%d, v:0x%08x" - " @ 0x%p[gb:0x%08x], level:%d\n", - sizedwords-dwords_left, sizedwords, *hostaddr, - hostaddr, gpuaddr+4*(sizedwords-dwords_left), - level); - - print_hex_dump(KERN_ERR, level == 1 ? "IB1:" : "IB2:", - DUMP_PREFIX_OFFSET, 32, 4, hoststart, - sizedwords*4, 0); - } -#endif - ret = ret && cur_ret; - - /* jump to next packet */ - dwords_left -= count; - hostaddr += count; - cur_ret = dwords_left >= 0; - -#ifdef DEBUG - if (!cur_ret) { - pr_info("kgsl: cffdump: bad count: c:%d, #:%d/%d, " - "v:0x%08x @ 0x%p[gb:0x%08x], level:%d\n", - count, sizedwords-(dwords_left+count), - sizedwords, *(hostaddr-count), hostaddr-count, - gpuaddr+4*(sizedwords-(dwords_left+count)), - level); - - print_hex_dump(KERN_ERR, level == 1 ? "IB1:" : "IB2:", - DUMP_PREFIX_OFFSET, 32, 4, hoststart, - sizedwords*4, 0); - } -#endif - - ret = ret && cur_ret; - } - - if (!ret) - pr_info("kgsl: cffdump: parsing failed: gpuaddr:0x%08x, " - "host:0x%p, wc:%d\n", gpuaddr, hoststart, sizedwords); - - if (!check_only) { -#ifdef DEBUG - uint offset = gpuaddr - memdesc->gpuaddr; - pr_info("kgsl: cffdump: ib-dump: hostptr:%p, gpuaddr:%08x, " - "physaddr:%08x, offset:%d, size:%d", hoststart, - gpuaddr, memdesc->physaddr + offset, offset, - sizedwords*4); -#endif - kgsl_cffdump_syncmem(dev_priv, memdesc, gpuaddr, sizedwords*4, - false); - } - - level--; - - return ret; -} - static int subbuf_start_handler(struct rchan_buf *buf, void *subbuf, void *prev_subbuf, uint prev_padding) { diff --git a/drivers/gpu/msm/kgsl_device.h b/drivers/gpu/msm/kgsl_device.h old mode 100644 new mode 100755 index 64d369eb..ff78ae3f --- a/drivers/gpu/msm/kgsl_device.h +++ b/drivers/gpu/msm/kgsl_device.h @@ -124,6 +124,7 @@ struct kgsl_event { void (*func)(struct kgsl_device *, void *, u32); void *priv; struct list_head list; + struct kgsl_device_private *owner; }; @@ -152,7 +153,6 @@ struct kgsl_device { uint32_t state; uint32_t requested_state; - struct list_head memqueue; unsigned int active_cnt; struct completion suspend_gate; @@ -185,6 +185,11 @@ struct kgsl_context { /* Pointer to the device specific context information */ void *devctxt; + /* + * Status indicating whether a gpu reset occurred and whether this + * context was responsible for causing it + */ + unsigned int reset_status; }; struct kgsl_process_private { @@ -194,15 +199,12 @@ struct kgsl_process_private { struct list_head mem_list; struct kgsl_pagetable *pagetable; struct list_head list; - struct kobject *kobj; + struct kobject kobj; struct { - unsigned int user; - unsigned int user_max; - unsigned int mapped; - unsigned int mapped_max; - unsigned int flushes; - } stats; + unsigned int cur; + unsigned int max; + } stats[KGSL_MEM_ENTRY_MAX]; }; struct kgsl_device_private { @@ -217,6 +219,14 @@ struct kgsl_power_stats { struct kgsl_device *kgsl_get_device(int dev_idx); +static inline void kgsl_process_add_stats(struct kgsl_process_private *priv, + unsigned int type, size_t size) +{ + priv->stats[type].cur += size; + if (priv->stats[type].max < priv->stats[type].cur) + priv->stats[type].max = priv->stats[type].cur; +} + static inline void kgsl_regread(struct kgsl_device *device, unsigned int offsetwords, unsigned int *value) diff --git a/drivers/gpu/msm/kgsl_drm.c b/drivers/gpu/msm/kgsl_drm.c old mode 100644 new mode 100755 index cdf9dc4e..66ac08f6 --- a/drivers/gpu/msm/kgsl_drm.c +++ b/drivers/gpu/msm/kgsl_drm.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2009-2011, Code Aurora Forum. All rights reserved. +/* Copyright (c) 2009-2012, Code Aurora Forum. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -17,7 +17,6 @@ #include "drmP.h" #include "drm.h" #include -#include #include "kgsl.h" #include "kgsl_device.h" @@ -39,6 +38,9 @@ #define ENTRY_EMPTY -1 #define ENTRY_NEEDS_CLEANUP -2 +#define DRM_KGSL_NOT_INITED -1 +#define DRM_KGSL_INITED 1 + #define DRM_KGSL_NUM_FENCE_ENTRIES (DRM_KGSL_HANDLE_WAIT_ENTRIES << 2) #define DRM_KGSL_HANDLE_WAIT_ENTRIES 5 @@ -127,6 +129,8 @@ struct drm_kgsl_gem_object { struct list_head wait_list; }; +static int kgsl_drm_inited = DRM_KGSL_NOT_INITED; + /* This is a global list of all the memory currently mapped in the MMU */ static struct list_head kgsl_mem_list; @@ -152,22 +156,6 @@ static void kgsl_gem_mem_flush(struct kgsl_memdesc *memdesc, int type, int op) kgsl_cache_range_op(memdesc, cacheop); } -/* Flush all the memory mapped in the MMU */ - -void kgsl_gpu_mem_flush(int op) -{ - struct drm_kgsl_gem_object *entry; - - list_for_each_entry(entry, &kgsl_mem_list, list) { - kgsl_gem_mem_flush(&entry->memdesc, entry->type, op); - } - - /* Takes care of WT/WC case. - * More useful when we go barrierless - */ - dmb(); -} - /* TODO: * Add vsync wait */ @@ -186,41 +174,6 @@ struct kgsl_drm_device_priv { struct kgsl_device_private *devpriv[KGSL_DEVICE_MAX]; }; -static int kgsl_ts_notifier_cb(struct notifier_block *blk, - unsigned long code, void *_param); - -static struct notifier_block kgsl_ts_nb[KGSL_DEVICE_MAX]; - -static int kgsl_drm_firstopen(struct drm_device *dev) -{ - int i; - - for (i = 0; i < KGSL_DEVICE_MAX; i++) { - struct kgsl_device *device = kgsl_get_device(i); - - if (device == NULL) - continue; - - kgsl_ts_nb[i].notifier_call = kgsl_ts_notifier_cb; - kgsl_register_ts_notifier(device, &kgsl_ts_nb[i]); - } - - return 0; -} - -void kgsl_drm_lastclose(struct drm_device *dev) -{ - int i; - - for (i = 0; i < KGSL_DEVICE_MAX; i++) { - struct kgsl_device *device = kgsl_get_device(i); - if (device == NULL) - continue; - - kgsl_unregister_ts_notifier(device, &kgsl_ts_nb[i]); - } -} - void kgsl_drm_preclose(struct drm_device *dev, struct drm_file *file_priv) { } @@ -268,80 +221,71 @@ kgsl_gem_alloc_memory(struct drm_gem_object *obj) { struct drm_kgsl_gem_object *priv = obj->driver_private; int index; + int result = 0; /* Return if the memory is already allocated */ if (kgsl_gem_memory_allocated(obj) || TYPE_IS_FD(priv->type)) return 0; + if (priv->pagetable == NULL) { + priv->pagetable = kgsl_mmu_getpagetable(KGSL_MMU_GLOBAL_PT); + + if (priv->pagetable == NULL) { + DRM_ERROR("Unable to get the GPU MMU pagetable\n"); + return -EINVAL; + } + } + if (TYPE_IS_PMEM(priv->type)) { int type; if (priv->type == DRM_KGSL_GEM_TYPE_EBI || - priv->type & DRM_KGSL_GEM_PMEM_EBI) - type = PMEM_MEMTYPE_EBI1; - else - type = PMEM_MEMTYPE_SMI; - - priv->memdesc.physaddr = - pmem_kalloc(obj->size * priv->bufcount, - type | PMEM_ALIGNMENT_4K); - - if (IS_ERR((void *) priv->memdesc.physaddr)) { - DRM_ERROR("Unable to allocate PMEM memory\n"); - return -ENOMEM; + priv->type & DRM_KGSL_GEM_PMEM_EBI) { + type = PMEM_MEMTYPE_EBI1; + result = kgsl_sharedmem_ebimem_user( + &priv->memdesc, + priv->pagetable, + obj->size * priv->bufcount, + 0); + if (result) { + DRM_ERROR( + "Unable to allocate PMEM memory\n"); + return result; + } } - - priv->memdesc.size = obj->size * priv->bufcount; + else + return -EINVAL; } else if (TYPE_IS_MEM(priv->type)) { - priv->memdesc.hostptr = - vmalloc_user(obj->size * priv->bufcount); - if (priv->memdesc.hostptr == NULL) { - DRM_ERROR("Unable to allocate vmalloc memory\n"); - return -ENOMEM; + if (priv->type == DRM_KGSL_GEM_TYPE_KMEM || + priv->type & DRM_KGSL_GEM_CACHE_MASK) + list_add(&priv->list, &kgsl_mem_list); + + result = kgsl_sharedmem_page_alloc_user(&priv->memdesc, + priv->pagetable, + obj->size * priv->bufcount, 0); + + if (result != 0) { + DRM_ERROR( + "Unable to allocate Vmalloc user memory\n"); + return result; } - - priv->memdesc.size = obj->size * priv->bufcount; - priv->memdesc.ops = &kgsl_vmalloc_ops; } else return -EINVAL; - for (index = 0; index < priv->bufcount; index++) + for (index = 0; index < priv->bufcount; index++) { priv->bufs[index].offset = index * obj->size; - + priv->bufs[index].gpuaddr = + priv->memdesc.gpuaddr + + priv->bufs[index].offset; + } + priv->flags |= DRM_KGSL_GEM_FLAG_MAPPED; return 0; } -#ifdef CONFIG_MSM_KGSL_MMU -static void -kgsl_gem_unmap(struct drm_gem_object *obj) -{ - struct drm_kgsl_gem_object *priv = obj->driver_private; - - if (!priv->flags & DRM_KGSL_GEM_FLAG_MAPPED) - return; - - kgsl_mmu_unmap(priv->pagetable, &priv->memdesc); - - kgsl_mmu_putpagetable(priv->pagetable); - priv->pagetable = NULL; - - if ((priv->type == DRM_KGSL_GEM_TYPE_KMEM) || - (priv->type & DRM_KGSL_GEM_CACHE_MASK)) - list_del(&priv->list); - - priv->flags &= ~DRM_KGSL_GEM_FLAG_MAPPED; -} -#else -static void -kgsl_gem_unmap(struct drm_gem_object *obj) -{ -} -#endif - static void kgsl_gem_free_memory(struct drm_gem_object *obj) { @@ -353,12 +297,17 @@ kgsl_gem_free_memory(struct drm_gem_object *obj) kgsl_gem_mem_flush(&priv->memdesc, priv->type, DRM_KGSL_GEM_CACHE_OP_FROM_DEV); - kgsl_gem_unmap(obj); - - if (TYPE_IS_PMEM(priv->type)) - pmem_kfree(priv->memdesc.physaddr); - kgsl_sharedmem_free(&priv->memdesc); + + kgsl_mmu_putpagetable(priv->pagetable); + priv->pagetable = NULL; + + if ((priv->type == DRM_KGSL_GEM_TYPE_KMEM) || + (priv->type & DRM_KGSL_GEM_CACHE_MASK)) + list_del(&priv->list); + + priv->flags &= ~DRM_KGSL_GEM_FLAG_MAPPED; + } int @@ -454,7 +403,7 @@ kgsl_gem_obj_addr(int drm_fd, int handle, unsigned long *start, filp = fget(drm_fd); if (unlikely(filp == NULL)) { - DRM_ERROR("Unable to ghet the DRM file descriptor\n"); + DRM_ERROR("Unable to get the DRM file descriptor\n"); return -EINVAL; } file_priv = filp->private_data; @@ -527,7 +476,7 @@ kgsl_gem_init_obj(struct drm_device *dev, ret = drm_gem_handle_create(file_priv, obj, handle); - drm_gem_object_handle_unreference(obj); + drm_gem_object_unreference(obj); INIT_LIST_HEAD(&priv->wait_list); for (i = 0; i < DRM_KGSL_HANDLE_WAIT_ENTRIES; i++) { @@ -702,128 +651,14 @@ int kgsl_gem_unbind_gpu_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { - struct drm_kgsl_gem_bind_gpu *args = data; - struct drm_gem_object *obj; - struct drm_kgsl_gem_object *priv; - - obj = drm_gem_object_lookup(dev, file_priv, args->handle); - - if (obj == NULL) { - DRM_ERROR("Invalid GEM handle %x\n", args->handle); - return -EBADF; - } - - mutex_lock(&dev->struct_mutex); - priv = obj->driver_private; - - if (--priv->bound == 0) - kgsl_gem_unmap(obj); - - drm_gem_object_unreference(obj); - mutex_unlock(&dev->struct_mutex); return 0; } -#ifdef CONFIG_MSM_KGSL_MMU -static int -kgsl_gem_map(struct drm_gem_object *obj) -{ - struct drm_kgsl_gem_object *priv = obj->driver_private; - int index; - int ret = -EINVAL; - - if (priv->flags & DRM_KGSL_GEM_FLAG_MAPPED) - return 0; - - /* Get the global page table */ - - if (priv->pagetable == NULL) { - priv->pagetable = kgsl_mmu_getpagetable(KGSL_MMU_GLOBAL_PT); - - if (priv->pagetable == NULL) { - DRM_ERROR("Unable to get the GPU MMU pagetable\n"); - return -EINVAL; - } - } - - priv->memdesc.pagetable = priv->pagetable; - - ret = kgsl_mmu_map(priv->pagetable, &priv->memdesc, - GSL_PT_PAGE_RV | GSL_PT_PAGE_WV); - - if (!ret) { - for (index = 0; index < priv->bufcount; index++) { - priv->bufs[index].gpuaddr = - priv->memdesc.gpuaddr + - priv->bufs[index].offset; - } - } - - /* Add cached memory to the list to be cached */ - - if (priv->type == DRM_KGSL_GEM_TYPE_KMEM || - priv->type & DRM_KGSL_GEM_CACHE_MASK) - list_add(&priv->list, &kgsl_mem_list); - - priv->flags |= DRM_KGSL_GEM_FLAG_MAPPED; - - return ret; -} -#else -static int -kgsl_gem_map(struct drm_gem_object *obj) -{ - struct drm_kgsl_gem_object *priv = obj->driver_private; - int index; - - if (TYPE_IS_PMEM(priv->type)) { - for (index = 0; index < priv->bufcount; index++) - priv->bufs[index].gpuaddr = - priv->memdesc.physaddr + priv->bufs[index].offset; - - return 0; - } - - return -EINVAL; -} -#endif - int kgsl_gem_bind_gpu_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { - struct drm_kgsl_gem_bind_gpu *args = data; - struct drm_gem_object *obj; - struct drm_kgsl_gem_object *priv; - int ret = 0; - - obj = drm_gem_object_lookup(dev, file_priv, args->handle); - - if (obj == NULL) { - DRM_ERROR("Invalid GEM handle %x\n", args->handle); - return -EBADF; - } - - mutex_lock(&dev->struct_mutex); - priv = obj->driver_private; - - if (priv->bound++ == 0) { - - if (!kgsl_gem_memory_allocated(obj)) { - DRM_ERROR("Memory not allocated for this object\n"); - ret = -ENOMEM; - goto out; - } - - ret = kgsl_gem_map(obj); - - /* This is legacy behavior - use GET_BUFFERINFO instead */ - args->gpuptr = priv->bufs[0].gpuaddr; - } -out: - drm_gem_object_unreference(obj); - mutex_unlock(&dev->struct_mutex); - return ret; + return 0; } /* Allocate the memory and prepare it for CPU mapping */ @@ -1068,17 +903,18 @@ int kgsl_gem_kmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) struct drm_gem_object *obj = vma->vm_private_data; struct drm_device *dev = obj->dev; struct drm_kgsl_gem_object *priv; - unsigned long offset, pg; + unsigned long offset; struct page *page; + int i; mutex_lock(&dev->struct_mutex); priv = obj->driver_private; offset = (unsigned long) vmf->virtual_address - vma->vm_start; - pg = (unsigned long) priv->memdesc.hostptr + offset; + i = offset >> PAGE_SHIFT; + page = sg_page(&(priv->memdesc.sg[i])); - page = vmalloc_to_page((void *) pg); if (!page) { mutex_unlock(&dev->struct_mutex); return VM_FAULT_SIGBUS; @@ -1370,27 +1206,6 @@ wakeup_fence_entries(struct drm_kgsl_gem_object_fence *fence) fence->fence_id = ENTRY_NEEDS_CLEANUP; /* Mark it as needing cleanup */ } -static int kgsl_ts_notifier_cb(struct notifier_block *blk, - unsigned long code, void *_param) -{ - struct drm_kgsl_gem_object_fence *fence; - struct kgsl_device *device = kgsl_get_device(code); - int i; - - /* loop through the fences to see what things can be processed */ - - for (i = 0; i < DRM_KGSL_NUM_FENCE_ENTRIES; i++) { - fence = &gem_buf_fence[i]; - if (!fence->ts_valid || fence->ts_device != code) - continue; - - if (kgsl_check_timestamp(device, fence->timestamp)) - wakeup_fence_entries(fence); - } - - return 0; -} - int kgsl_gem_lock_handle_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) @@ -1583,7 +1398,7 @@ kgsl_gem_unlock_on_ts_ioctl(struct drm_device *dev, void *data, } device = kgsl_get_device(ts_device); - ts_done = kgsl_check_timestamp(device, args->timestamp); + ts_done = kgsl_check_timestamp(device, NULL, args->timestamp); mutex_lock(&dev->struct_mutex); @@ -1634,11 +1449,9 @@ struct drm_ioctl_desc kgsl_drm_ioctls[] = { }; static struct drm_driver driver = { - .driver_features = DRIVER_USE_PLATFORM_DEVICE | DRIVER_GEM, + .driver_features = DRIVER_GEM, .load = kgsl_drm_load, .unload = kgsl_drm_unload, - .firstopen = kgsl_drm_firstopen, - .lastclose = kgsl_drm_lastclose, .preclose = kgsl_drm_preclose, .suspend = kgsl_drm_suspend, .resume = kgsl_drm_resume, @@ -1669,8 +1482,13 @@ int kgsl_drm_init(struct platform_device *dev) { int i; + /* Only initialize once */ + if (kgsl_drm_inited == DRM_KGSL_INITED) + return 0; + + kgsl_drm_inited = DRM_KGSL_INITED; + driver.num_ioctls = DRM_ARRAY_SIZE(kgsl_drm_ioctls); - driver.platform_device = dev; INIT_LIST_HEAD(&kgsl_mem_list); @@ -1680,10 +1498,11 @@ int kgsl_drm_init(struct platform_device *dev) gem_buf_fence[i].fence_id = ENTRY_EMPTY; } - return drm_init(&driver); + return drm_platform_init(&driver, dev); } void kgsl_drm_exit(void) { - drm_exit(&driver); + kgsl_drm_inited = DRM_KGSL_NOT_INITED; + drm_platform_exit(&driver, driver.kdriver.platform_device); } diff --git a/drivers/gpu/msm/kgsl_gpummu.c b/drivers/gpu/msm/kgsl_gpummu.c old mode 100644 new mode 100755 index 9e7ef61d..5d326658 --- a/drivers/gpu/msm/kgsl_gpummu.c +++ b/drivers/gpu/msm/kgsl_gpummu.c @@ -356,8 +356,8 @@ err_ptpool_remove: int kgsl_gpummu_pt_equal(struct kgsl_pagetable *pt, unsigned int pt_base) { - struct kgsl_gpummu_pt *gpummu_pt = pt->priv; - return pt && pt_base && (gpummu_pt->base.gpuaddr == pt_base); + struct kgsl_gpummu_pt *gpummu_pt = pt ? pt->priv : NULL; + return gpummu_pt && pt_base && (gpummu_pt->base.gpuaddr == pt_base); } void kgsl_gpummu_destroy_pagetable(void *mmu_specific_pt) @@ -385,14 +385,16 @@ kgsl_pt_map_set(struct kgsl_gpummu_pt *pt, uint32_t pte, uint32_t val) { uint32_t *baseptr = (uint32_t *)pt->base.hostptr; - writel_relaxed(val, &baseptr[pte]); + BUG_ON(pte*sizeof(uint32_t) >= pt->base.size); + baseptr[pte] = val; } static inline uint32_t kgsl_pt_map_get(struct kgsl_gpummu_pt *pt, uint32_t pte) { uint32_t *baseptr = (uint32_t *)pt->base.hostptr; - return readl_relaxed(&baseptr[pte]) & GSL_PT_PAGE_ADDR_MASK; + BUG_ON(pte*sizeof(uint32_t) >= pt->base.size); + return baseptr[pte] & GSL_PT_PAGE_ADDR_MASK; } static unsigned int kgsl_gpummu_pt_get_flags(struct kgsl_pagetable *pt, @@ -683,7 +685,7 @@ kgsl_gpummu_map(void *mmu_specific_pt, flushtlb = 1; for_each_sg(memdesc->sg, s, memdesc->sglen, i) { - unsigned int paddr = sg_phys(s); + unsigned int paddr = kgsl_get_sg_pa(s); unsigned int j; /* Each sg entry might be multiple pages long */ diff --git a/drivers/gpu/msm/kgsl_iommu.c b/drivers/gpu/msm/kgsl_iommu.c old mode 100644 new mode 100755 index 30365a3c..760cdb03 --- a/drivers/gpu/msm/kgsl_iommu.c +++ b/drivers/gpu/msm/kgsl_iommu.c @@ -34,8 +34,8 @@ struct kgsl_iommu { static int kgsl_iommu_pt_equal(struct kgsl_pagetable *pt, unsigned int pt_base) { - struct iommu_domain *domain = pt->priv; - return pt && pt_base && ((unsigned int)domain == pt_base); + struct iommu_domain *domain = pt ? pt->priv : NULL; + return domain && pt_base && ((unsigned int)domain == pt_base); } static void kgsl_iommu_destroy_pagetable(void *mmu_specific_pt) diff --git a/drivers/gpu/msm/kgsl_mmu.c b/drivers/gpu/msm/kgsl_mmu.c old mode 100644 new mode 100755 index 7916ecb0..d7585ef9 --- a/drivers/gpu/msm/kgsl_mmu.c +++ b/drivers/gpu/msm/kgsl_mmu.c @@ -1,5 +1,4 @@ -/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved. - * Copyright (C) 2011 Sony Ericsson Mobile Communications AB. +/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -23,6 +22,7 @@ #include "kgsl_mmu.h" #include "kgsl_device.h" #include "kgsl_sharedmem.h" +#include "adreno_postmortem.h" #define KGSL_MMU_ALIGN_SHIFT 13 #define KGSL_MMU_ALIGN_MASK (~((1 << KGSL_MMU_ALIGN_SHIFT) - 1)) @@ -592,6 +592,12 @@ kgsl_mmu_unmap(struct kgsl_pagetable *pagetable, memdesc->gpuaddr & KGSL_MMU_ALIGN_MASK, memdesc->size); + /* + * Don't clear the gpuaddr on global mappings because they + * may be in use by other pagetables + */ + if (!(memdesc->priv & KGSL_MEMFLAGS_GLOBAL)) + memdesc->gpuaddr = 0; return 0; } EXPORT_SYMBOL(kgsl_mmu_unmap); @@ -623,6 +629,7 @@ int kgsl_mmu_map_global(struct kgsl_pagetable *pagetable, gpuaddr, memdesc->gpuaddr); goto error_unmap; } + memdesc->priv |= KGSL_MEMFLAGS_GLOBAL; return result; error_unmap: kgsl_mmu_unmap(pagetable, memdesc); diff --git a/drivers/gpu/msm/kgsl_sharedmem.c b/drivers/gpu/msm/kgsl_sharedmem.c old mode 100644 new mode 100755 index a587c44a..b52fc1d5 --- a/drivers/gpu/msm/kgsl_sharedmem.c +++ b/drivers/gpu/msm/kgsl_sharedmem.c @@ -1,5 +1,4 @@ -/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved. - * Copyright (C) 2011 Sony Ericsson Mobile Communications AB. +/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -11,9 +10,14 @@ * GNU General Public License for more details. * */ + +#include #include #include #include +#include +#include +#include #include "kgsl.h" #include "kgsl_sharedmem.h" @@ -21,6 +25,59 @@ #include "kgsl_device.h" #include "adreno_ringbuffer.h" +/* An attribute for showing per-process memory statistics */ +struct kgsl_mem_entry_attribute { + struct attribute attr; + int memtype; + ssize_t (*show)(struct kgsl_process_private *priv, + int type, char *buf); +}; + +#define to_mem_entry_attr(a) \ +container_of(a, struct kgsl_mem_entry_attribute, attr) + +#define __MEM_ENTRY_ATTR(_type, _name, _show) \ +{ \ + .attr = { .name = __stringify(_name), .mode = 0444 }, \ + .memtype = _type, \ + .show = _show, \ +} + +/* + * A structure to hold the attributes for a particular memory type. + * For each memory type in each process we store the current and maximum + * memory usage and display the counts in sysfs. This structure and + * the following macro allow us to simplify the definition for those + * adding new memory types + */ + +struct mem_entry_stats { + int memtype; + struct kgsl_mem_entry_attribute attr; + struct kgsl_mem_entry_attribute max_attr; +}; + + +#define MEM_ENTRY_STAT(_type, _name) \ +{ \ + .memtype = _type, \ + .attr = __MEM_ENTRY_ATTR(_type, _name, mem_entry_show), \ + .max_attr = __MEM_ENTRY_ATTR(_type, _name##_max, \ + mem_entry_max_show), \ +} + + +/* + * One page allocation for a guard region to protect against over-zealous + * GPU pre-fetch + */ + +static struct page *kgsl_guard_page; + +/** + * Given a kobj, find the process structure attached to it + */ + static struct kgsl_process_private * _get_priv_from_kobj(struct kobject *kobj) { @@ -41,87 +98,109 @@ _get_priv_from_kobj(struct kobject *kobj) return NULL; } -/* sharedmem / memory sysfs files */ +/** + * Show the current amount of memory allocated for the given memtype + */ static ssize_t -process_show(struct kobject *kobj, - struct kobj_attribute *attr, - char *buf) +mem_entry_show(struct kgsl_process_private *priv, int type, char *buf) { + return snprintf(buf, PAGE_SIZE, "%d\n", priv->stats[type].cur); +} + +/** + * Show the maximum memory allocated for the given memtype through the life of + * the process + */ + +static ssize_t +mem_entry_max_show(struct kgsl_process_private *priv, int type, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", priv->stats[type].max); +} + + +static void mem_entry_sysfs_release(struct kobject *kobj) +{ +} + +static ssize_t mem_entry_sysfs_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct kgsl_mem_entry_attribute *pattr = to_mem_entry_attr(attr); struct kgsl_process_private *priv; - unsigned int val = 0; + ssize_t ret; mutex_lock(&kgsl_driver.process_mutex); priv = _get_priv_from_kobj(kobj); - if (priv == NULL) { - mutex_unlock(&kgsl_driver.process_mutex); - return 0; - } - - if (!strncmp(attr->attr.name, "user", 4)) - val = priv->stats.user; - if (!strncmp(attr->attr.name, "user_max", 8)) - val = priv->stats.user_max; - if (!strncmp(attr->attr.name, "mapped", 6)) - val = priv->stats.mapped; - if (!strncmp(attr->attr.name, "mapped_max", 10)) - val = priv->stats.mapped_max; - if (!strncmp(attr->attr.name, "flushes", 7)) - val = priv->stats.flushes; + if (priv && pattr->show) + ret = pattr->show(priv, pattr->memtype, buf); + else + ret = -EIO; mutex_unlock(&kgsl_driver.process_mutex); - return snprintf(buf, PAGE_SIZE, "%u\n", val); + return ret; } -#define KGSL_MEMSTAT_ATTR(_name, _show) \ - static struct kobj_attribute attr_##_name = \ - __ATTR(_name, 0444, _show, NULL) - -KGSL_MEMSTAT_ATTR(user, process_show); -KGSL_MEMSTAT_ATTR(user_max, process_show); -KGSL_MEMSTAT_ATTR(mapped, process_show); -KGSL_MEMSTAT_ATTR(mapped_max, process_show); -KGSL_MEMSTAT_ATTR(flushes, process_show); - -static struct attribute *process_attrs[] = { - &attr_user.attr, - &attr_user_max.attr, - &attr_mapped.attr, - &attr_mapped_max.attr, - &attr_flushes.attr, - NULL +static const struct sysfs_ops mem_entry_sysfs_ops = { + .show = mem_entry_sysfs_show, }; -static struct attribute_group process_attr_group = { - .attrs = process_attrs, +static struct kobj_type ktype_mem_entry = { + .sysfs_ops = &mem_entry_sysfs_ops, + .default_attrs = NULL, + .release = mem_entry_sysfs_release +}; + +static struct mem_entry_stats mem_stats[] = { + MEM_ENTRY_STAT(KGSL_MEM_ENTRY_KERNEL, kernel), +#ifdef CONFIG_ANDROID_PMEM + MEM_ENTRY_STAT(KGSL_MEM_ENTRY_PMEM, pmem), +#endif +#ifdef CONFIG_ASHMEM + MEM_ENTRY_STAT(KGSL_MEM_ENTRY_ASHMEM, ashmem), +#endif + MEM_ENTRY_STAT(KGSL_MEM_ENTRY_USER, user), +#ifdef CONFIG_ION + MEM_ENTRY_STAT(KGSL_MEM_ENTRY_ION, ion), +#endif }; void kgsl_process_uninit_sysfs(struct kgsl_process_private *private) { - /* Remove the sysfs entry */ - if (private->kobj) { - sysfs_remove_group(private->kobj, &process_attr_group); - kobject_put(private->kobj); + int i; + + for (i = 0; i < ARRAY_SIZE(mem_stats); i++) { + sysfs_remove_file(&private->kobj, &mem_stats[i].attr.attr); + sysfs_remove_file(&private->kobj, + &mem_stats[i].max_attr.attr); } + + kobject_put(&private->kobj); } void kgsl_process_init_sysfs(struct kgsl_process_private *private) { unsigned char name[16]; + int i, ret; - /* Add a entry to the sysfs device */ snprintf(name, sizeof(name), "%d", private->pid); - private->kobj = kobject_create_and_add(name, kgsl_driver.prockobj); - /* sysfs failure isn't fatal, just annoying */ - if (private->kobj != NULL) { - if (sysfs_create_group(private->kobj, &process_attr_group)) { - kobject_put(private->kobj); - private->kobj = NULL; - } + if (kobject_init_and_add(&private->kobj, &ktype_mem_entry, + kgsl_driver.prockobj, name)) + return; + + for (i = 0; i < ARRAY_SIZE(mem_stats); i++) { + /* We need to check the value of sysfs_create_file, but we + * don't really care if it passed or not */ + + ret = sysfs_create_file(&private->kobj, + &mem_stats[i].attr.attr); + ret = sysfs_create_file(&private->kobj, + &mem_stats[i].max_attr.attr); } } @@ -135,6 +214,10 @@ static int kgsl_drv_memstat_show(struct device *dev, val = kgsl_driver.stats.vmalloc; else if (!strncmp(attr->attr.name, "vmalloc_max", 11)) val = kgsl_driver.stats.vmalloc_max; + else if (!strncmp(attr->attr.name, "page_alloc", 10)) + val = kgsl_driver.stats.page_alloc; + else if (!strncmp(attr->attr.name, "page_alloc_max", 14)) + val = kgsl_driver.stats.page_alloc_max; else if (!strncmp(attr->attr.name, "coherent", 8)) val = kgsl_driver.stats.coherent; else if (!strncmp(attr->attr.name, "coherent_max", 12)) @@ -164,6 +247,8 @@ static int kgsl_drv_histogram_show(struct device *dev, DEVICE_ATTR(vmalloc, 0444, kgsl_drv_memstat_show, NULL); DEVICE_ATTR(vmalloc_max, 0444, kgsl_drv_memstat_show, NULL); +DEVICE_ATTR(page_alloc, 0444, kgsl_drv_memstat_show, NULL); +DEVICE_ATTR(page_alloc_max, 0444, kgsl_drv_memstat_show, NULL); DEVICE_ATTR(coherent, 0444, kgsl_drv_memstat_show, NULL); DEVICE_ATTR(coherent_max, 0444, kgsl_drv_memstat_show, NULL); DEVICE_ATTR(mapped, 0444, kgsl_drv_memstat_show, NULL); @@ -173,6 +258,8 @@ DEVICE_ATTR(histogram, 0444, kgsl_drv_histogram_show, NULL); static struct device_attribute *drv_attr_list[] = { &dev_attr_vmalloc, &dev_attr_vmalloc_max, + &dev_attr_page_alloc, + &dev_attr_page_alloc_max, &dev_attr_coherent, &dev_attr_coherent_max, &dev_attr_mapped, @@ -216,7 +303,7 @@ static void outer_cache_range_op_sg(struct scatterlist *sg, int sglen, int op) int i; for_each_sg(sg, s, sglen, i) { - unsigned int paddr = sg_phys(s); + unsigned int paddr = kgsl_get_sg_pa(s); _outer_cache_range_op(op, paddr, s->length); } } @@ -227,17 +314,18 @@ static void outer_cache_range_op_sg(struct scatterlist *sg, int sglen, int op) } #endif -static int kgsl_vmalloc_vmfault(struct kgsl_memdesc *memdesc, +static int kgsl_page_alloc_vmfault(struct kgsl_memdesc *memdesc, struct vm_area_struct *vma, struct vm_fault *vmf) { - unsigned long offset, pg; + unsigned long offset; struct page *page; + int i; offset = (unsigned long) vmf->virtual_address - vma->vm_start; - pg = (unsigned long) memdesc->hostptr + offset; - page = vmalloc_to_page((void *) pg); + i = offset >> PAGE_SHIFT; + page = sg_page(&memdesc->sg[i]); if (page == NULL) return VM_FAULT_SIGBUS; @@ -247,15 +335,30 @@ static int kgsl_vmalloc_vmfault(struct kgsl_memdesc *memdesc, return 0; } -static int kgsl_vmalloc_vmflags(struct kgsl_memdesc *memdesc) +static int kgsl_page_alloc_vmflags(struct kgsl_memdesc *memdesc) { return VM_RESERVED | VM_DONTEXPAND; } -static void kgsl_vmalloc_free(struct kgsl_memdesc *memdesc) +static void kgsl_page_alloc_free(struct kgsl_memdesc *memdesc) { + int i = 0; + struct scatterlist *sg; + int sglen = memdesc->sglen; + + /* Don't free the guard page if it was used */ + if (memdesc->flags & KGSL_MEMDESC_GUARD_PAGE) + sglen--; + + kgsl_driver.stats.page_alloc -= memdesc->size; + + if (memdesc->hostptr) { + vunmap(memdesc->hostptr); kgsl_driver.stats.vmalloc -= memdesc->size; - vfree(memdesc->hostptr); + } + if (memdesc->sg) + for_each_sg(memdesc->sg, sg, sglen, i) + __free_page(sg_page(sg)); } static int kgsl_contiguous_vmflags(struct kgsl_memdesc *memdesc) @@ -263,6 +366,48 @@ static int kgsl_contiguous_vmflags(struct kgsl_memdesc *memdesc) return VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTEXPAND; } +/* + * kgsl_page_alloc_map_kernel - Map the memory in memdesc to kernel address + * space + * + * @memdesc - The memory descriptor which contains information about the memory + * + * Return: 0 on success else error code + */ +static int kgsl_page_alloc_map_kernel(struct kgsl_memdesc *memdesc) +{ + if (!memdesc->hostptr) { + pgprot_t page_prot = pgprot_writecombine(PAGE_KERNEL); + struct page **pages = NULL; + struct scatterlist *sg; + int sglen = memdesc->sglen; + int i; + + /* Don't map the guard page if it exists */ + if (memdesc->flags & KGSL_MEMDESC_GUARD_PAGE) + sglen--; + + /* create a list of pages to call vmap */ + pages = vmalloc(sglen * sizeof(struct page *)); + if (!pages) { + KGSL_CORE_ERR("vmalloc(%d) failed\n", + sglen * sizeof(struct page *)); + return -ENOMEM; + } + for_each_sg(memdesc->sg, sg, sglen, i) + pages[i] = sg_page(sg); + memdesc->hostptr = vmap(pages, sglen, + VM_IOREMAP, page_prot); + KGSL_STATS_ADD(memdesc->size, kgsl_driver.stats.vmalloc, + kgsl_driver.stats.vmalloc_max); + vfree(pages); + } + if (!memdesc->hostptr) + return -ENOMEM; + + return 0; +} + static int kgsl_contiguous_vmfault(struct kgsl_memdesc *memdesc, struct vm_area_struct *vma, struct vm_fault *vmf) @@ -302,12 +447,13 @@ static void kgsl_coherent_free(struct kgsl_memdesc *memdesc) } /* Global - also used by kgsl_drm.c */ -struct kgsl_memdesc_ops kgsl_vmalloc_ops = { - .free = kgsl_vmalloc_free, - .vmflags = kgsl_vmalloc_vmflags, - .vmfault = kgsl_vmalloc_vmfault, +struct kgsl_memdesc_ops kgsl_page_alloc_ops = { + .free = kgsl_page_alloc_free, + .vmflags = kgsl_page_alloc_vmflags, + .vmfault = kgsl_page_alloc_vmfault, + .map_kernel_mem = kgsl_page_alloc_map_kernel, }; -EXPORT_SYMBOL(kgsl_vmalloc_ops); +EXPORT_SYMBOL(kgsl_page_alloc_ops); static struct kgsl_memdesc_ops kgsl_ebimem_ops = { .free = kgsl_ebimem_free, @@ -341,47 +487,145 @@ void kgsl_cache_range_op(struct kgsl_memdesc *memdesc, int op) EXPORT_SYMBOL(kgsl_cache_range_op); static int -_kgsl_sharedmem_vmalloc(struct kgsl_memdesc *memdesc, +_kgsl_sharedmem_page_alloc(struct kgsl_memdesc *memdesc, struct kgsl_pagetable *pagetable, - void *ptr, size_t size, unsigned int protflags) + size_t size, unsigned int protflags) { - int order, ret = 0; + int i, order, ret = 0; int sglen = PAGE_ALIGN(size) / PAGE_SIZE; - int i; + struct page **pages = NULL; + pgprot_t page_prot = pgprot_writecombine(PAGE_KERNEL); + void *ptr; + + /* + * Add guard page to the end of the allocation when the + * IOMMU is in use. + */ + + if (kgsl_mmu_get_mmutype() == KGSL_MMU_TYPE_IOMMU) + sglen++; memdesc->size = size; memdesc->pagetable = pagetable; memdesc->priv = KGSL_MEMFLAGS_CACHED; - memdesc->ops = &kgsl_vmalloc_ops; - memdesc->hostptr = (void *) ptr; + memdesc->ops = &kgsl_page_alloc_ops; + + memdesc->sg = kgsl_sg_alloc(sglen); - memdesc->sg = kmalloc(sglen * sizeof(struct scatterlist), GFP_KERNEL); if (memdesc->sg == NULL) { + KGSL_CORE_ERR("vmalloc(%d) failed\n", + sglen * sizeof(struct scatterlist)); ret = -ENOMEM; goto done; } + /* + * Allocate space to store the list of pages to send to vmap. + * This is an array of pointers so we can track 1024 pages per page of + * allocation which means we can handle up to a 8MB buffer request with + * two pages; well within the acceptable limits for using kmalloc. + */ + + pages = kmalloc(sglen * sizeof(struct page *), GFP_KERNEL); + + if (pages == NULL) { + KGSL_CORE_ERR("kmalloc (%d) failed\n", + sglen * sizeof(struct page *)); + ret = -ENOMEM; + goto done; + } + + kmemleak_not_leak(memdesc->sg); + memdesc->sglen = sglen; sg_init_table(memdesc->sg, sglen); - for (i = 0; i < memdesc->sglen; i++, ptr += PAGE_SIZE) { - struct page *page = vmalloc_to_page(ptr); - if (!page) { - ret = -EINVAL; + for (i = 0; i < PAGE_ALIGN(size) / PAGE_SIZE; i++) { + + /* + * Don't use GFP_ZERO here because it is faster to memset the + * range ourselves (see below) + */ + + pages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); + if (pages[i] == NULL) { + ret = -ENOMEM; + memdesc->sglen = i; goto done; } - sg_set_page(&memdesc->sg[i], page, PAGE_SIZE, 0); + + sg_set_page(&memdesc->sg[i], pages[i], PAGE_SIZE, 0); } - kgsl_cache_range_op(memdesc, KGSL_CACHE_OP_INV); + /* ADd the guard page to the end of the sglist */ + + if (kgsl_mmu_get_mmutype() == KGSL_MMU_TYPE_IOMMU) { + /* + * It doesn't matter if we use GFP_ZERO here, this never + * gets mapped, and we only allocate it once in the life + * of the system + */ + + if (kgsl_guard_page == NULL) + kgsl_guard_page = alloc_page(GFP_KERNEL | __GFP_ZERO | + __GFP_HIGHMEM); + + if (kgsl_guard_page != NULL) { + sg_set_page(&memdesc->sg[sglen - 1], kgsl_guard_page, + PAGE_SIZE, 0); + memdesc->flags |= KGSL_MEMDESC_GUARD_PAGE; + } else + memdesc->sglen--; + } + + /* + * All memory that goes to the user has to be zeroed out before it gets + * exposed to userspace. This means that the memory has to be mapped in + * the kernel, zeroed (memset) and then unmapped. This also means that + * the dcache has to be flushed to ensure coherency between the kernel + * and user pages. We used to pass __GFP_ZERO to alloc_page which mapped + * zeroed and unmaped each individual page, and then we had to turn + * around and call flush_dcache_page() on that page to clear the caches. + * This was killing us for performance. Instead, we found it is much + * faster to allocate the pages without GFP_ZERO, map the entire range, + * memset it, flush the range and then unmap - this results in a factor + * of 4 improvement for speed for large buffers. There is a small + * increase in speed for small buffers, but only on the order of a few + * microseconds at best. The only downside is that there needs to be + * enough temporary space in vmalloc to accomodate the map. This + * shouldn't be a problem, but if it happens, fall back to a much slower + * path + */ + + ptr = vmap(pages, i, VM_IOREMAP, page_prot); + + if (ptr != NULL) { + memset(ptr, 0, memdesc->size); + dmac_flush_range(ptr, ptr + memdesc->size); + vunmap(ptr); + } else { + int j; + + /* Very, very, very slow path */ + + for (j = 0; j < i; j++) { + ptr = kmap_atomic(pages[j],KM_BOUNCE_READ); + memset(ptr, 0, PAGE_SIZE); + dmac_flush_range(ptr, ptr + PAGE_SIZE); + kunmap_atomic(ptr,KM_BOUNCE_READ); + } + } + + outer_cache_range_op_sg(memdesc->sg, memdesc->sglen, + KGSL_CACHE_OP_FLUSH); ret = kgsl_mmu_map(pagetable, memdesc, protflags); if (ret) goto done; - KGSL_STATS_ADD(size, kgsl_driver.stats.vmalloc, - kgsl_driver.stats.vmalloc_max); + KGSL_STATS_ADD(size, kgsl_driver.stats.page_alloc, + kgsl_driver.stats.page_alloc_max); order = get_order(size); @@ -389,6 +633,8 @@ _kgsl_sharedmem_vmalloc(struct kgsl_memdesc *memdesc, kgsl_driver.stats.histogram[order]++; done: + kfree(pages); + if (ret) kgsl_sharedmem_free(memdesc); @@ -396,51 +642,41 @@ done: } int -kgsl_sharedmem_vmalloc(struct kgsl_memdesc *memdesc, +kgsl_sharedmem_page_alloc(struct kgsl_memdesc *memdesc, struct kgsl_pagetable *pagetable, size_t size) { - void *ptr; - + int ret = 0; BUG_ON(size == 0); size = ALIGN(size, PAGE_SIZE * 2); - ptr = vmalloc(size); - if (ptr == NULL) { - KGSL_CORE_ERR("vmalloc(%d) failed\n", size); - return -ENOMEM; - } - - return _kgsl_sharedmem_vmalloc(memdesc, pagetable, ptr, size, + ret = _kgsl_sharedmem_page_alloc(memdesc, pagetable, size, GSL_PT_PAGE_RV | GSL_PT_PAGE_WV); + if (!ret) + ret = kgsl_page_alloc_map_kernel(memdesc); + if (ret) + kgsl_sharedmem_free(memdesc); + return ret; } -EXPORT_SYMBOL(kgsl_sharedmem_vmalloc); +EXPORT_SYMBOL(kgsl_sharedmem_page_alloc); int -kgsl_sharedmem_vmalloc_user(struct kgsl_memdesc *memdesc, +kgsl_sharedmem_page_alloc_user(struct kgsl_memdesc *memdesc, struct kgsl_pagetable *pagetable, size_t size, int flags) { - void *ptr; unsigned int protflags; BUG_ON(size == 0); - ptr = vmalloc_user(size); - - if (ptr == NULL) { - KGSL_CORE_ERR("vmalloc_user(%d) failed: allocated=%d\n", - size, kgsl_driver.stats.vmalloc); - return -ENOMEM; - } protflags = GSL_PT_PAGE_RV; if (!(flags & KGSL_MEMFLAGS_GPUREADONLY)) protflags |= GSL_PT_PAGE_WV; - return _kgsl_sharedmem_vmalloc(memdesc, pagetable, ptr, size, + return _kgsl_sharedmem_page_alloc(memdesc, pagetable, size, protflags); } -EXPORT_SYMBOL(kgsl_sharedmem_vmalloc_user); +EXPORT_SYMBOL(kgsl_sharedmem_page_alloc_user); int kgsl_sharedmem_alloc_coherent(struct kgsl_memdesc *memdesc, size_t size) @@ -488,7 +724,7 @@ void kgsl_sharedmem_free(struct kgsl_memdesc *memdesc) if (memdesc->ops && memdesc->ops->free) memdesc->ops->free(memdesc); - kfree(memdesc->sg); + kgsl_sg_free(memdesc->sg, memdesc->sglen); memset(memdesc, 0, sizeof(*memdesc)); } @@ -570,13 +806,17 @@ kgsl_sharedmem_readl(const struct kgsl_memdesc *memdesc, uint32_t *dst, unsigned int offsetbytes) { + uint32_t *src; BUG_ON(memdesc == NULL || memdesc->hostptr == NULL || dst == NULL); - WARN_ON(offsetbytes + sizeof(unsigned int) > memdesc->size); + WARN_ON(offsetbytes % sizeof(uint32_t) != 0); + if (offsetbytes % sizeof(uint32_t) != 0) + return -EINVAL; - if (offsetbytes + sizeof(unsigned int) > memdesc->size) + WARN_ON(offsetbytes + sizeof(uint32_t) > memdesc->size); + if (offsetbytes + sizeof(uint32_t) > memdesc->size) return -ERANGE; - - *dst = readl_relaxed(memdesc->hostptr + offsetbytes); + src = (uint32_t *)(memdesc->hostptr + offsetbytes); + *dst = *src; return 0; } EXPORT_SYMBOL(kgsl_sharedmem_readl); @@ -586,12 +826,19 @@ kgsl_sharedmem_writel(const struct kgsl_memdesc *memdesc, unsigned int offsetbytes, uint32_t src) { + uint32_t *dst; BUG_ON(memdesc == NULL || memdesc->hostptr == NULL); - BUG_ON(offsetbytes + sizeof(unsigned int) > memdesc->size); + WARN_ON(offsetbytes % sizeof(uint32_t) != 0); + if (offsetbytes % sizeof(uint32_t) != 0) + return -EINVAL; - kgsl_cffdump_setmem(memdesc->physaddr + offsetbytes, - src, sizeof(uint)); - writel_relaxed(src, memdesc->hostptr + offsetbytes); + WARN_ON(offsetbytes + sizeof(uint32_t) > memdesc->size); + if (offsetbytes + sizeof(uint32_t) > memdesc->size) + return -ERANGE; + kgsl_cffdump_setmem(memdesc->gpuaddr + offsetbytes, + src, sizeof(uint32_t)); + dst = (uint32_t *)(memdesc->hostptr + offsetbytes); + *dst = src; return 0; } EXPORT_SYMBOL(kgsl_sharedmem_writel); @@ -603,9 +850,39 @@ kgsl_sharedmem_set(const struct kgsl_memdesc *memdesc, unsigned int offsetbytes, BUG_ON(memdesc == NULL || memdesc->hostptr == NULL); BUG_ON(offsetbytes + sizebytes > memdesc->size); - kgsl_cffdump_setmem(memdesc->physaddr + offsetbytes, value, + kgsl_cffdump_setmem(memdesc->gpuaddr + offsetbytes, value, sizebytes); memset(memdesc->hostptr + offsetbytes, value, sizebytes); return 0; } EXPORT_SYMBOL(kgsl_sharedmem_set); + +/* + * kgsl_sharedmem_map_vma - Map a user vma to physical memory + * + * @vma - The user vma to map + * @memdesc - The memory descriptor which contains information about the + * physical memory + * + * Return: 0 on success else error code + */ +int +kgsl_sharedmem_map_vma(struct vm_area_struct *vma, + const struct kgsl_memdesc *memdesc) +{ + unsigned long addr = vma->vm_start; + unsigned long size = vma->vm_end - vma->vm_start; + int ret, i = 0; + + if (!memdesc->sg || (size != memdesc->size) || + (memdesc->sglen != (size / PAGE_SIZE))) + return -EINVAL; + + for (; addr < vma->vm_end; addr += PAGE_SIZE, i++) { + ret = vm_insert_page(vma, addr, sg_page(&memdesc->sg[i])); + if (ret) + return ret; + } + return 0; +} +EXPORT_SYMBOL(kgsl_sharedmem_map_vma); diff --git a/drivers/gpu/msm/kgsl_sharedmem.h b/drivers/gpu/msm/kgsl_sharedmem.h old mode 100644 new mode 100755 index 61bcf05b..49a2be24 --- a/drivers/gpu/msm/kgsl_sharedmem.h +++ b/drivers/gpu/msm/kgsl_sharedmem.h @@ -16,6 +16,8 @@ #include #include +#include +#include /* * Convert a page to a physical address @@ -31,20 +33,15 @@ struct kgsl_process_private; /** Set if the memdesc describes cached memory */ #define KGSL_MEMFLAGS_CACHED 0x00000001 +/** Set if the memdesc is mapped into all pagetables */ +#define KGSL_MEMFLAGS_GLOBAL 0x00000002 -struct kgsl_memdesc_ops { - int (*vmflags)(struct kgsl_memdesc *); - int (*vmfault)(struct kgsl_memdesc *, struct vm_area_struct *, - struct vm_fault *); - void (*free)(struct kgsl_memdesc *memdesc); -}; +extern struct kgsl_memdesc_ops kgsl_page_alloc_ops; -extern struct kgsl_memdesc_ops kgsl_vmalloc_ops; - -int kgsl_sharedmem_vmalloc(struct kgsl_memdesc *memdesc, +int kgsl_sharedmem_page_alloc(struct kgsl_memdesc *memdesc, struct kgsl_pagetable *pagetable, size_t size); -int kgsl_sharedmem_vmalloc_user(struct kgsl_memdesc *memdesc, +int kgsl_sharedmem_page_alloc_user(struct kgsl_memdesc *memdesc, struct kgsl_pagetable *pagetable, size_t size, int flags); @@ -80,19 +77,58 @@ void kgsl_process_uninit_sysfs(struct kgsl_process_private *private); int kgsl_sharedmem_init_sysfs(void); void kgsl_sharedmem_uninit_sysfs(void); +static inline unsigned int kgsl_get_sg_pa(struct scatterlist *sg) +{ + /* + * Try sg_dma_address first to support ion carveout + * regions which do not work with sg_phys(). + */ + unsigned int pa = sg_dma_address(sg); + if (pa == 0) + pa = sg_phys(sg); + return pa; +} + +int +kgsl_sharedmem_map_vma(struct vm_area_struct *vma, + const struct kgsl_memdesc *memdesc); + +/* + * For relatively small sglists, it is preferable to use kzalloc + * rather than going down the vmalloc rat hole. If the size of + * the sglist is < PAGE_SIZE use kzalloc otherwise fallback to + * vmalloc + */ + +static inline void *kgsl_sg_alloc(unsigned int sglen) +{ + if ((sglen * sizeof(struct scatterlist)) < PAGE_SIZE) + return kzalloc(sglen * sizeof(struct scatterlist), GFP_KERNEL); + else + return vmalloc(sglen * sizeof(struct scatterlist)); +} + +static inline void kgsl_sg_free(void *ptr, unsigned int sglen) +{ + if ((sglen * sizeof(struct scatterlist)) < PAGE_SIZE) + kfree(ptr); + else + vfree(ptr); +} + static inline int memdesc_sg_phys(struct kgsl_memdesc *memdesc, unsigned int physaddr, unsigned int size) { - struct page *page = phys_to_page(physaddr); + memdesc->sg = kgsl_sg_alloc(1); - memdesc->sg = kmalloc(sizeof(struct scatterlist) * 1, GFP_KERNEL); - if (memdesc->sg == NULL) - return -ENOMEM; + kmemleak_not_leak(memdesc->sg); memdesc->sglen = 1; sg_init_table(memdesc->sg, 1); - sg_set_page(&memdesc->sg[0], page, size, 0); + memdesc->sg[0].length = size; + memdesc->sg[0].offset = 0; + memdesc->sg[0].dma_address = physaddr; return 0; } @@ -100,11 +136,7 @@ static inline int kgsl_allocate(struct kgsl_memdesc *memdesc, struct kgsl_pagetable *pagetable, size_t size) { -#ifdef CONFIG_MSM_KGSL_MMU - return kgsl_sharedmem_vmalloc(memdesc, pagetable, size); -#else - return kgsl_sharedmem_ebimem(memdesc, pagetable, size); -#endif + return kgsl_sharedmem_page_alloc(memdesc, pagetable, size); } static inline int @@ -112,21 +144,13 @@ kgsl_allocate_user(struct kgsl_memdesc *memdesc, struct kgsl_pagetable *pagetable, size_t size, unsigned int flags) { -#ifdef CONFIG_MSM_KGSL_MMU - return kgsl_sharedmem_vmalloc_user(memdesc, pagetable, size, flags); -#else - return kgsl_sharedmem_ebimem_user(memdesc, pagetable, size, flags); -#endif + return kgsl_sharedmem_page_alloc_user(memdesc, pagetable, size, flags); } static inline int kgsl_allocate_contiguous(struct kgsl_memdesc *memdesc, size_t size) { int ret = kgsl_sharedmem_alloc_coherent(memdesc, size); -#ifndef CONFIG_MSM_KGSL_MMU - if (!ret) - memdesc->gpuaddr = memdesc->physaddr; -#endif return ret; } diff --git a/include/linux/ion.h b/include/linux/ion.h new file mode 100755 index 00000000..4b7b8b7d --- /dev/null +++ b/include/linux/ion.h @@ -0,0 +1,548 @@ +/* + * include/linux/ion.h + * + * Copyright (C) 2011 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _LINUX_ION_H +#define _LINUX_ION_H + +#include +#include + + +struct ion_handle; +/** + * enum ion_heap_types - list of all possible types of heaps + * @ION_HEAP_TYPE_SYSTEM: memory allocated via vmalloc + * @ION_HEAP_TYPE_SYSTEM_CONTIG: memory allocated via kmalloc + * @ION_HEAP_TYPE_CARVEOUT: memory allocated from a prereserved + * carveout heap, allocations are physically + * contiguous + * @ION_HEAP_END: helper for iterating over heaps + */ +enum ion_heap_type { + ION_HEAP_TYPE_SYSTEM, + ION_HEAP_TYPE_SYSTEM_CONTIG, + ION_HEAP_TYPE_CARVEOUT, + ION_HEAP_TYPE_CUSTOM, /* must be last so device specific heaps always + are at the end of this enum */ + ION_NUM_HEAPS, +}; + +#define ION_HEAP_SYSTEM_MASK (1 << ION_HEAP_TYPE_SYSTEM) +#define ION_HEAP_SYSTEM_CONTIG_MASK (1 << ION_HEAP_TYPE_SYSTEM_CONTIG) +#define ION_HEAP_CARVEOUT_MASK (1 << ION_HEAP_TYPE_CARVEOUT) + + +/** + * These are the only ids that should be used for Ion heap ids. + * The ids listed are the order in which allocation will be attempted + * if specified. Don't swap the order of heap ids unless you know what + * you are doing! + */ + +enum ion_heap_ids { + ION_HEAP_SYSTEM_ID, + ION_HEAP_SYSTEM_CONTIG_ID, + ION_HEAP_EBI_ID, + ION_HEAP_SMI_ID, + ION_HEAP_ADSP_ID, + ION_HEAP_AUDIO_ID, +}; + +#define ION_KMALLOC_HEAP_NAME "kmalloc" +#define ION_VMALLOC_HEAP_NAME "vmalloc" +#define ION_EBI1_HEAP_NAME "EBI1" +#define ION_ADSP_HEAP_NAME "adsp" +#define ION_SMI_HEAP_NAME "smi" + +#define CACHED 1 +#define UNCACHED 0 + +#define ION_CACHE_SHIFT 0 + +#define ION_SET_CACHE(__cache) ((__cache) << ION_CACHE_SHIFT) + +#define ION_IS_CACHED(__flags) ((__flags) & (1 << ION_CACHE_SHIFT)) + +#ifdef __KERNEL__ +#include +#include +struct ion_device; +struct ion_heap; +struct ion_mapper; +struct ion_client; +struct ion_buffer; + +/* This should be removed some day when phys_addr_t's are fully + plumbed in the kernel, and all instances of ion_phys_addr_t should + be converted to phys_addr_t. For the time being many kernel interfaces + do not accept phys_addr_t's that would have to */ +#define ion_phys_addr_t unsigned long + +/** + * struct ion_platform_heap - defines a heap in the given platform + * @type: type of the heap from ion_heap_type enum + * @id: unique identifier for heap. When allocating (lower numbers + * will be allocated from first) + * @name: used for debug purposes + * @base: base address of heap in physical memory if applicable + * @size: size of the heap in bytes if applicable + * + * Provided by the board file. + */ +struct ion_platform_heap { + enum ion_heap_type type; + unsigned int id; + const char *name; + ion_phys_addr_t base; + size_t size; + enum ion_memory_types memory_type; +}; + +/** + * struct ion_platform_data - array of platform heaps passed from board file + * @nr: number of structures in the array + * @heaps: array of platform_heap structions + * + * Provided by the board file in the form of platform data to a platform device. + */ +struct ion_platform_data { + int nr; + struct ion_platform_heap heaps[]; +}; + +#ifdef CONFIG_ION + +/** + * ion_client_create() - allocate a client and returns it + * @dev: the global ion device + * @heap_mask: mask of heaps this client can allocate from + * @name: used for debugging + */ +struct ion_client *ion_client_create(struct ion_device *dev, + unsigned int heap_mask, const char *name); + +/** + * msm_ion_client_create - allocate a client using the ion_device specified in + * drivers/gpu/ion/msm/msm_ion.c + * + * heap_mask and name are the same as ion_client_create, return values + * are the same as ion_client_create. + */ + +struct ion_client *msm_ion_client_create(unsigned int heap_mask, + const char *name); + +/** + * ion_client_destroy() - free's a client and all it's handles + * @client: the client + * + * Free the provided client and all it's resources including + * any handles it is holding. + */ +void ion_client_destroy(struct ion_client *client); + +/** + * ion_alloc - allocate ion memory + * @client: the client + * @len: size of the allocation + * @align: requested allocation alignment, lots of hardware blocks have + * alignment requirements of some kind + * @flags: mask of heaps to allocate from, if multiple bits are set + * heaps will be tried in order from lowest to highest order bit + * + * Allocate memory in one of the heaps provided in heap mask and return + * an opaque handle to it. + */ +struct ion_handle *ion_alloc(struct ion_client *client, size_t len, + size_t align, unsigned int flags); + +/** + * ion_free - free a handle + * @client: the client + * @handle: the handle to free + * + * Free the provided handle. + */ +void ion_free(struct ion_client *client, struct ion_handle *handle); + +/** + * ion_phys - returns the physical address and len of a handle + * @client: the client + * @handle: the handle + * @addr: a pointer to put the address in + * @len: a pointer to put the length in + * + * This function queries the heap for a particular handle to get the + * handle's physical address. It't output is only correct if + * a heap returns physically contiguous memory -- in other cases + * this api should not be implemented -- ion_map_dma should be used + * instead. Returns -EINVAL if the handle is invalid. This has + * no implications on the reference counting of the handle -- + * the returned value may not be valid if the caller is not + * holding a reference. + */ +int ion_phys(struct ion_client *client, struct ion_handle *handle, + ion_phys_addr_t *addr, size_t *len); + +/** + * ion_map_kernel - create mapping for the given handle + * @client: the client + * @handle: handle to map + * @flags: flags for this mapping + * + * Map the given handle into the kernel and return a kernel address that + * can be used to access this address. If no flags are specified, this + * will return a non-secure uncached mapping. + */ +void *ion_map_kernel(struct ion_client *client, struct ion_handle *handle, + unsigned long flags); + +/** + * ion_unmap_kernel() - destroy a kernel mapping for a handle + * @client: the client + * @handle: handle to unmap + */ +void ion_unmap_kernel(struct ion_client *client, struct ion_handle *handle); + +/** + * ion_map_dma - create a dma mapping for a given handle + * @client: the client + * @handle: handle to map + * + * Return an sglist describing the given handle + */ +struct scatterlist *ion_map_dma(struct ion_client *client, + struct ion_handle *handle, + unsigned long flags); + +/** + * ion_unmap_dma() - destroy a dma mapping for a handle + * @client: the client + * @handle: handle to unmap + */ +void ion_unmap_dma(struct ion_client *client, struct ion_handle *handle); + +/** + * ion_share() - given a handle, obtain a buffer to pass to other clients + * @client: the client + * @handle: the handle to share + * + * Given a handle, return a buffer, which exists in a global name + * space, and can be passed to other clients. Should be passed into ion_import + * to obtain a new handle for this buffer. + * + * NOTE: This function does do not an extra reference. The burden is on the + * caller to make sure the buffer doesn't go away while it's being passed to + * another client. That is, ion_free should not be called on this handle until + * the buffer has been imported into the other client. + */ +struct ion_buffer *ion_share(struct ion_client *client, + struct ion_handle *handle); + +/** + * ion_import() - given an buffer in another client, import it + * @client: this blocks client + * @buffer: the buffer to import (as obtained from ion_share) + * + * Given a buffer, add it to the client and return the handle to use to refer + * to it further. This is called to share a handle from one kernel client to + * another. + */ +struct ion_handle *ion_import(struct ion_client *client, + struct ion_buffer *buffer); + +/** + * ion_import_fd() - given an fd obtained via ION_IOC_SHARE ioctl, import it + * @client: this blocks client + * @fd: the fd + * + * A helper function for drivers that will be recieving ion buffers shared + * with them from userspace. These buffers are represented by a file + * descriptor obtained as the return from the ION_IOC_SHARE ioctl. + * This function coverts that fd into the underlying buffer, and returns + * the handle to use to refer to it further. + */ +struct ion_handle *ion_import_fd(struct ion_client *client, int fd); + +/** + * ion_handle_get_flags - get the flags for a given handle + * + * @client - client who allocated the handle + * @handle - handle to get the flags + * @flags - pointer to store the flags + * + * Gets the current flags for a handle. These flags indicate various options + * of the buffer (caching, security, etc.) + */ +int ion_handle_get_flags(struct ion_client *client, struct ion_handle *handle, + unsigned long *flags); + +#else +static inline struct ion_client *ion_client_create(struct ion_device *dev, + unsigned int heap_mask, const char *name) +{ + return ERR_PTR(-ENODEV); +} + +static inline struct ion_client *msm_ion_client_create(unsigned int heap_mask, + const char *name) +{ + return ERR_PTR(-ENODEV); +} + +static inline void ion_client_destroy(struct ion_client *client) { } + +static inline struct ion_handle *ion_alloc(struct ion_client *client, + size_t len, size_t align, unsigned int flags) +{ + return ERR_PTR(-ENODEV); +} + +static inline void ion_free(struct ion_client *client, + struct ion_handle *handle) { } + + +static inline int ion_phys(struct ion_client *client, + struct ion_handle *handle, ion_phys_addr_t *addr, size_t *len) +{ + return -ENODEV; +} + +static inline void *ion_map_kernel(struct ion_client *client, + struct ion_handle *handle, unsigned long flags) +{ + return ERR_PTR(-ENODEV); +} + +static inline void ion_unmap_kernel(struct ion_client *client, + struct ion_handle *handle) { } + +static inline struct scatterlist *ion_map_dma(struct ion_client *client, + struct ion_handle *handle, unsigned long flags) +{ + return ERR_PTR(-ENODEV); +} + +static inline void ion_unmap_dma(struct ion_client *client, + struct ion_handle *handle) { } + +static inline struct ion_buffer *ion_share(struct ion_client *client, + struct ion_handle *handle) +{ + return ERR_PTR(-ENODEV); +} + +static inline struct ion_handle *ion_import(struct ion_client *client, + struct ion_buffer *buffer) +{ + return ERR_PTR(-ENODEV); +} + +static inline struct ion_handle *ion_import_fd(struct ion_client *client, + int fd) +{ + return ERR_PTR(-ENODEV); +} + +static inline int ion_handle_get_flags(struct ion_client *client, + struct ion_handle *handle, unsigned long *flags) +{ + return -ENODEV; +} +#endif /* CONFIG_ION */ +#endif /* __KERNEL__ */ + +/** + * DOC: Ion Userspace API + * + * create a client by opening /dev/ion + * most operations handled via following ioctls + * + */ + +/** + * struct ion_allocation_data - metadata passed from userspace for allocations + * @len: size of the allocation + * @align: required alignment of the allocation + * @flags: flags passed to heap + * @handle: pointer that will be populated with a cookie to use to refer + * to this allocation + * + * Provided by userspace as an argument to the ioctl + */ +struct ion_allocation_data { + size_t len; + size_t align; + unsigned int flags; + struct ion_handle *handle; +}; + +/** + * struct ion_fd_data - metadata passed to/from userspace for a handle/fd pair + * @handle: a handle + * @fd: a file descriptor representing that handle + * + * For ION_IOC_SHARE or ION_IOC_MAP userspace populates the handle field with + * the handle returned from ion alloc, and the kernel returns the file + * descriptor to share or map in the fd field. For ION_IOC_IMPORT, userspace + * provides the file descriptor and the kernel returns the handle. + */ +struct ion_fd_data { + struct ion_handle *handle; + int fd; +}; + +/** + * struct ion_handle_data - a handle passed to/from the kernel + * @handle: a handle + */ +struct ion_handle_data { + struct ion_handle *handle; +}; + +/** + * struct ion_custom_data - metadata passed to/from userspace for a custom ioctl + * @cmd: the custom ioctl function to call + * @arg: additional data to pass to the custom ioctl, typically a user + * pointer to a predefined structure + * + * This works just like the regular cmd and arg fields of an ioctl. + */ +struct ion_custom_data { + unsigned int cmd; + unsigned long arg; +}; + + +/* struct ion_flush_data - data passed to ion for flushing caches + * + * @handle: handle with data to flush + * @vaddr: userspace virtual address mapped with mmap + * @offset: offset into the handle to flush + * @length: length of handle to flush + * + * Performs cache operations on the handle. If p is the start address + * of the handle, p + offset through p + offset + length will have + * the cache operations performed + */ +struct ion_flush_data { + struct ion_handle *handle; + void *vaddr; + unsigned int offset; + unsigned int length; +}; + +/* struct ion_flag_data - information about flags for this buffer + * + * @handle: handle to get flags from + * @flags: flags of this handle + * + * Takes handle as an input and outputs the flags from the handle + * in the flag field. + */ +struct ion_flag_data { + struct ion_handle *handle; + unsigned long flags; +}; + +#define ION_IOC_MAGIC 'I' + +/** + * DOC: ION_IOC_ALLOC - allocate memory + * + * Takes an ion_allocation_data struct and returns it with the handle field + * populated with the opaque handle for the allocation. + */ +#define ION_IOC_ALLOC _IOWR(ION_IOC_MAGIC, 0, \ + struct ion_allocation_data) + +/** + * DOC: ION_IOC_FREE - free memory + * + * Takes an ion_handle_data struct and frees the handle. + */ +#define ION_IOC_FREE _IOWR(ION_IOC_MAGIC, 1, struct ion_handle_data) + +/** + * DOC: ION_IOC_MAP - get a file descriptor to mmap + * + * Takes an ion_fd_data struct with the handle field populated with a valid + * opaque handle. Returns the struct with the fd field set to a file + * descriptor open in the current address space. This file descriptor + * can then be used as an argument to mmap. + */ +#define ION_IOC_MAP _IOWR(ION_IOC_MAGIC, 2, struct ion_fd_data) + +/** + * DOC: ION_IOC_SHARE - creates a file descriptor to use to share an allocation + * + * Takes an ion_fd_data struct with the handle field populated with a valid + * opaque handle. Returns the struct with the fd field set to a file + * descriptor open in the current address space. This file descriptor + * can then be passed to another process. The corresponding opaque handle can + * be retrieved via ION_IOC_IMPORT. + */ +#define ION_IOC_SHARE _IOWR(ION_IOC_MAGIC, 4, struct ion_fd_data) + +/** + * DOC: ION_IOC_IMPORT - imports a shared file descriptor + * + * Takes an ion_fd_data struct with the fd field populated with a valid file + * descriptor obtained from ION_IOC_SHARE and returns the struct with the handle + * filed set to the corresponding opaque handle. + */ +#define ION_IOC_IMPORT _IOWR(ION_IOC_MAGIC, 5, int) + +/** + * DOC: ION_IOC_CUSTOM - call architecture specific ion ioctl + * + * Takes the argument of the architecture specific ioctl to call and + * passes appropriate userdata for that ioctl + */ +#define ION_IOC_CUSTOM _IOWR(ION_IOC_MAGIC, 6, struct ion_custom_data) + + +/** + * DOC: ION_IOC_CLEAN_CACHES - clean the caches + * + * Clean the caches of the handle specified. + */ +#define ION_IOC_CLEAN_CACHES _IOWR(ION_IOC_MAGIC, 7, \ + struct ion_flush_data) +/** + * DOC: ION_MSM_IOC_INV_CACHES - invalidate the caches + * + * Invalidate the caches of the handle specified. + */ +#define ION_IOC_INV_CACHES _IOWR(ION_IOC_MAGIC, 8, \ + struct ion_flush_data) +/** + * DOC: ION_MSM_IOC_CLEAN_CACHES - clean and invalidate the caches + * + * Clean and invalidate the caches of the handle specified. + */ +#define ION_IOC_CLEAN_INV_CACHES _IOWR(ION_IOC_MAGIC, 9, \ + struct ion_flush_data) + +/** + * DOC: ION_IOC_GET_FLAGS - get the flags of the handle + * + * Gets the flags of the current handle which indicate cachability, + * secure state etc. + */ +#define ION_IOC_GET_FLAGS _IOWR(ION_IOC_MAGIC, 10, \ + struct ion_flag_data) +#endif /* _LINUX_ION_H */ diff --git a/include/linux/msm_kgsl.h b/include/linux/msm_kgsl.h old mode 100644 new mode 100755 index 56e6cc6b..36357e08 --- a/include/linux/msm_kgsl.h +++ b/include/linux/msm_kgsl.h @@ -42,6 +42,7 @@ #define KGSL_CONTEXT_NO_GMEM_ALLOC 2 #define KGSL_CONTEXT_SUBMIT_IB_LIST 4 #define KGSL_CONTEXT_CTX_SWITCH 8 +#define KGSL_CONTEXT_PREAMBLE 16 /* Memory allocayion flags */ #define KGSL_MEMFLAGS_GPUREADONLY 0x01000000 @@ -58,6 +59,24 @@ #define KGSL_FLAGS_RESERVED2 0x00000080 #define KGSL_FLAGS_SOFT_RESET 0x00000100 +/* Clock flags to show which clocks should be controled by a given platform */ +#define KGSL_CLK_SRC 0x00000001 +#define KGSL_CLK_CORE 0x00000002 +#define KGSL_CLK_IFACE 0x00000004 +#define KGSL_CLK_MEM 0x00000008 +#define KGSL_CLK_MEM_IFACE 0x00000010 +#define KGSL_CLK_AXI 0x00000020 + +/* + * Reset status values for context + */ +enum kgsl_ctx_reset_stat { + KGSL_CTX_STAT_NO_ERROR = 0x00000000, + KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT = 0x00000001, + KGSL_CTX_STAT_INNOCENT_CONTEXT_RESET_EXT = 0x00000002, + KGSL_CTX_STAT_UNKNOWN_CONTEXT_RESET_EXT = 0x00000003 +}; + #define KGSL_MAX_PWRLEVELS 5 #define KGSL_CONVERT_TO_MBPS(val) \ @@ -74,7 +93,9 @@ enum kgsl_deviceid { enum kgsl_user_mem_type { KGSL_USER_MEM_TYPE_PMEM = 0x00000000, KGSL_USER_MEM_TYPE_ASHMEM = 0x00000001, - KGSL_USER_MEM_TYPE_ADDR = 0x00000002 + KGSL_USER_MEM_TYPE_ADDR = 0x00000002, + KGSL_USER_MEM_TYPE_ION = 0x00000003, + KGSL_USER_MEM_TYPE_MAX = 0x00000004, }; struct kgsl_devinfo { @@ -132,6 +153,7 @@ enum kgsl_property_type { KGSL_PROP_MMU_ENABLE = 0x00000006, KGSL_PROP_INTERRUPT_WAITS = 0x00000007, KGSL_PROP_VERSION = 0x00000008, + KGSL_PROP_GPU_RESET_STAT = 0x00000009 }; struct kgsl_shadowprop {