From daf542e57e00fb91488dfdfe43ba1e6d8d14b8ab Mon Sep 17 00:00:00 2001 From: Shantanu Gupta Date: Sat, 12 May 2012 16:56:17 +0530 Subject: [PATCH 001/155] Backport android and MSM parts from caf --- arch/arm/mach-msm/irq.c | 170 +- arch/arm/mach-msm/proc_comm.c | 100 +- arch/arm/mach-msm/proc_comm.h | 17 +- arch/arm/mach-msm/sirc.c | 8 +- arch/arm/mach-msm/sirc.h | 12 +- arch/arm/mach-msm/smd.c | 12 +- arch/arm/mach-msm/smd_private.h | 93 +- drivers/misc/pmem.c | 1738 ++++----------------- drivers/staging/android/binder.c | 9 +- drivers/staging/android/logger.c | 4 + drivers/staging/android/lowmemorykiller.c | 68 +- drivers/staging/android/timed_gpio.c | 1 + include/linux/android_alarm.h | 1 + include/linux/android_pmem.h | 107 +- include/linux/gfp.h | 20 +- include/linux/msm_kgsl.h | 353 ----- mm/ashmem.c | 159 +- 17 files changed, 664 insertions(+), 2208 deletions(-) diff --git a/arch/arm/mach-msm/irq.c b/arch/arm/mach-msm/irq.c index 4b4a0442..096285e0 100644 --- a/arch/arm/mach-msm/irq.c +++ b/arch/arm/mach-msm/irq.c @@ -43,38 +43,19 @@ static int msm_irq_debug_mask; module_param_named(debug_mask, msm_irq_debug_mask, int, S_IRUGO | S_IWUSR | S_IWGRP); #define VIC_REG(off) (MSM_VIC_BASE + (off)) -#if defined(CONFIG_ARCH_MSM7X30) -#define VIC_INT_TO_REG_ADDR(base, irq) (base + (irq / 32) * 4) -#define VIC_INT_TO_REG_INDEX(irq) ((irq >> 5) & 3) -#else -#define VIC_INT_TO_REG_ADDR(base, irq) (base + ((irq & 32) ? 4 : 0)) -#define VIC_INT_TO_REG_INDEX(irq) ((irq >> 5) & 1) -#endif #define VIC_INT_SELECT0 VIC_REG(0x0000) /* 1: FIQ, 0: IRQ */ #define VIC_INT_SELECT1 VIC_REG(0x0004) /* 1: FIQ, 0: IRQ */ -#define VIC_INT_SELECT2 VIC_REG(0x0008) /* 1: FIQ, 0: IRQ */ -#define VIC_INT_SELECT3 VIC_REG(0x000C) /* 1: FIQ, 0: IRQ */ #define VIC_INT_EN0 VIC_REG(0x0010) #define VIC_INT_EN1 VIC_REG(0x0014) -#define VIC_INT_EN2 VIC_REG(0x0018) -#define VIC_INT_EN3 VIC_REG(0x001C) #define VIC_INT_ENCLEAR0 VIC_REG(0x0020) #define VIC_INT_ENCLEAR1 VIC_REG(0x0024) -#define VIC_INT_ENCLEAR2 VIC_REG(0x0028) -#define VIC_INT_ENCLEAR3 VIC_REG(0x002C) #define VIC_INT_ENSET0 VIC_REG(0x0030) #define VIC_INT_ENSET1 VIC_REG(0x0034) -#define VIC_INT_ENSET2 VIC_REG(0x0038) -#define VIC_INT_ENSET3 VIC_REG(0x003C) #define VIC_INT_TYPE0 VIC_REG(0x0040) /* 1: EDGE, 0: LEVEL */ #define VIC_INT_TYPE1 VIC_REG(0x0044) /* 1: EDGE, 0: LEVEL */ -#define VIC_INT_TYPE2 VIC_REG(0x0048) /* 1: EDGE, 0: LEVEL */ -#define VIC_INT_TYPE3 VIC_REG(0x004C) /* 1: EDGE, 0: LEVEL */ #define VIC_INT_POLARITY0 VIC_REG(0x0050) /* 1: NEG, 0: POS */ #define VIC_INT_POLARITY1 VIC_REG(0x0054) /* 1: NEG, 0: POS */ -#define VIC_INT_POLARITY2 VIC_REG(0x0058) /* 1: NEG, 0: POS */ -#define VIC_INT_POLARITY3 VIC_REG(0x005C) /* 1: NEG, 0: POS */ #define VIC_NO_PEND_VAL VIC_REG(0x0060) #if defined(CONFIG_ARCH_MSM_SCORPION) @@ -88,24 +69,14 @@ module_param_named(debug_mask, msm_irq_debug_mask, int, S_IRUGO | S_IWUSR | S_IW #endif #define VIC_IRQ_STATUS0 VIC_REG(0x0080) #define VIC_IRQ_STATUS1 VIC_REG(0x0084) -#define VIC_IRQ_STATUS2 VIC_REG(0x0088) -#define VIC_IRQ_STATUS3 VIC_REG(0x008C) #define VIC_FIQ_STATUS0 VIC_REG(0x0090) #define VIC_FIQ_STATUS1 VIC_REG(0x0094) -#define VIC_FIQ_STATUS2 VIC_REG(0x0098) -#define VIC_FIQ_STATUS3 VIC_REG(0x009C) #define VIC_RAW_STATUS0 VIC_REG(0x00A0) #define VIC_RAW_STATUS1 VIC_REG(0x00A4) -#define VIC_RAW_STATUS2 VIC_REG(0x00A8) -#define VIC_RAW_STATUS3 VIC_REG(0x00AC) #define VIC_INT_CLEAR0 VIC_REG(0x00B0) #define VIC_INT_CLEAR1 VIC_REG(0x00B4) -#define VIC_INT_CLEAR2 VIC_REG(0x00B8) -#define VIC_INT_CLEAR3 VIC_REG(0x00BC) #define VIC_SOFTINT0 VIC_REG(0x00C0) #define VIC_SOFTINT1 VIC_REG(0x00C4) -#define VIC_SOFTINT2 VIC_REG(0x00C8) -#define VIC_SOFTINT3 VIC_REG(0x00CC) #define VIC_IRQ_VEC_RD VIC_REG(0x00D0) /* pending int # */ #define VIC_IRQ_VEC_PEND_RD VIC_REG(0x00D4) /* pending vector addr */ #define VIC_IRQ_VEC_WR VIC_REG(0x00D8) @@ -129,40 +100,14 @@ module_param_named(debug_mask, msm_irq_debug_mask, int, S_IRUGO | S_IWUSR | S_IW #define VIC_VECTPRIORITY(n) VIC_REG(0x0200+((n) * 4)) #define VIC_VECTADDR(n) VIC_REG(0x0400+((n) * 4)) -#if defined(CONFIG_ARCH_MSM7X30) -#define VIC_NUM_REGS 4 -#else -#define VIC_NUM_REGS 2 -#endif - -#if VIC_NUM_REGS == 2 -#define DPRINT_REGS(base_reg, format, ...) \ - printk(KERN_INFO format " %x %x\n", ##__VA_ARGS__, \ - readl(base_reg ## 0), readl(base_reg ## 1)) -#define DPRINT_ARRAY(array, format, ...) \ - printk(KERN_INFO format " %x %x\n", ##__VA_ARGS__, \ - array[0], array[1]) -#elif VIC_NUM_REGS == 4 -#define DPRINT_REGS(base_reg, format, ...) \ - printk(KERN_INFO format " %x %x %x %x\n", ##__VA_ARGS__, \ - readl(base_reg ## 0), readl(base_reg ## 1), \ - readl(base_reg ## 2), readl(base_reg ## 3)) -#define DPRINT_ARRAY(array, format, ...) \ - printk(KERN_INFO format " %x %x %x %x\n", ##__VA_ARGS__, \ - array[0], array[1], \ - array[2], array[3]) -#else -#error "VIC_NUM_REGS set to illegal value" -#endif - static uint32_t msm_irq_smsm_wake_enable[2]; static struct { uint32_t int_en[2]; uint32_t int_type; uint32_t int_polarity; uint32_t int_select; -} msm_irq_shadow_reg[VIC_NUM_REGS]; -static uint32_t msm_irq_idle_disable[VIC_NUM_REGS]; +} msm_irq_shadow_reg[2]; +static uint32_t msm_irq_idle_disable[2]; #if defined(CONFIG_MSM_N_WAY_SMD) #define INT_INFO_SMSM_ID SMEM_APPS_DEM_SLAVE_DATA @@ -198,9 +143,7 @@ static uint8_t msm_irq_to_smsm[NR_MSM_IRQS + NR_SIRC_IRQS] = { [INT_UART1DM_IRQ] = 17, [INT_UART1DM_RX] = 18, [INT_KEYSENSE] = 19, -#if !defined(CONFIG_ARCH_MSM7X30) [INT_AD_HSSD] = 20, -#endif [INT_NAND_WR_ER_DONE] = 21, [INT_NAND_OP_DONE] = 22, @@ -226,31 +169,23 @@ static uint8_t msm_irq_to_smsm[NR_MSM_IRQS + NR_SIRC_IRQS] = { [INT_GP_TIMER_EXP] = SMSM_FAKE_IRQ, [INT_DEBUG_TIMER_EXP] = SMSM_FAKE_IRQ, [INT_ADSP_A11] = SMSM_FAKE_IRQ, -#ifdef CONFIG_ARCH_QSD8X50 +#ifdef CONFIG_ARCH_MSM_SCORPION [INT_SIRC_0] = SMSM_FAKE_IRQ, [INT_SIRC_1] = SMSM_FAKE_IRQ, #endif }; -static inline void msm_irq_write_all_regs(void __iomem *base, unsigned int val) -{ - int i; - /* the address must be continue */ - for (i = 0; i < VIC_NUM_REGS; i++) - writel(val, base + (i * 4)); -} - static void msm_irq_ack(unsigned int irq) { - void __iomem *reg = VIC_INT_TO_REG_ADDR(VIC_INT_CLEAR0, irq); + void __iomem *reg = VIC_INT_CLEAR0 + ((irq & 32) ? 4 : 0); irq = 1 << (irq & 31); writel(irq, reg); } static void msm_irq_mask(unsigned int irq) { - void __iomem *reg = VIC_INT_TO_REG_ADDR(VIC_INT_ENCLEAR0, irq); - unsigned index = VIC_INT_TO_REG_INDEX(irq); + void __iomem *reg = VIC_INT_ENCLEAR0 + ((irq & 32) ? 4 : 0); + unsigned index = (irq >> 5) & 1; uint32_t mask = 1UL << (irq & 31); int smsm_irq = msm_irq_to_smsm[irq]; @@ -266,8 +201,8 @@ static void msm_irq_mask(unsigned int irq) static void msm_irq_unmask(unsigned int irq) { - void __iomem *reg = VIC_INT_TO_REG_ADDR(VIC_INT_ENSET0, irq); - unsigned index = VIC_INT_TO_REG_INDEX(irq); + void __iomem *reg = VIC_INT_ENSET0 + ((irq & 32) ? 4 : 0); + unsigned index = (irq >> 5) & 1; uint32_t mask = 1UL << (irq & 31); int smsm_irq = msm_irq_to_smsm[irq]; @@ -284,7 +219,7 @@ static void msm_irq_unmask(unsigned int irq) static int msm_irq_set_wake(unsigned int irq, unsigned int on) { - unsigned index = VIC_INT_TO_REG_INDEX(irq); + unsigned index = (irq >> 5) & 1; uint32_t mask = 1UL << (irq & 31); int smsm_irq = msm_irq_to_smsm[irq]; @@ -310,9 +245,9 @@ static int msm_irq_set_wake(unsigned int irq, unsigned int on) static int msm_irq_set_type(unsigned int irq, unsigned int flow_type) { - void __iomem *treg = VIC_INT_TO_REG_ADDR(VIC_INT_TYPE0, irq); - void __iomem *preg = VIC_INT_TO_REG_ADDR(VIC_INT_POLARITY0, irq); - unsigned index = VIC_INT_TO_REG_INDEX(irq); + void __iomem *treg = VIC_INT_TYPE0 + ((irq & 32) ? 4 : 0); + void __iomem *preg = VIC_INT_POLARITY0 + ((irq & 32) ? 4 : 0); + unsigned index = (irq >> 5) & 1; int b = 1 << (irq & 31); uint32_t polarity; uint32_t type; @@ -341,24 +276,16 @@ static int msm_irq_set_type(unsigned int irq, unsigned int flow_type) int msm_irq_pending(void) { - int i, pending = 0; - /* the address must be continue */ - for (i = 0; (i < VIC_NUM_REGS) && !pending; i++) - pending |= readl(VIC_IRQ_STATUS0 + (i * 4)); - - return pending; + return readl(VIC_IRQ_STATUS0) || readl(VIC_IRQ_STATUS1); } int msm_irq_idle_sleep_allowed(void) { - int i, disable = 0; - if (msm_irq_debug_mask & IRQ_DEBUG_SLEEP_REQUEST) - DPRINT_ARRAY(msm_irq_idle_disable, - "msm_irq_idle_sleep_allowed: disable"); - for (i = 0; i < VIC_NUM_REGS; i++) - disable |= msm_irq_idle_disable[i]; - return !(disable || !smsm_int_info); + printk(KERN_INFO "msm_irq_idle_sleep_allowed: disable %x %x\n", + msm_irq_idle_disable[0], msm_irq_idle_disable[1]); + return !(msm_irq_idle_disable[0] || msm_irq_idle_disable[1] || + !smsm_int_info); } /* If arm9_wake is set: pass control to the other core. @@ -374,8 +301,8 @@ void msm_irq_enter_sleep1(bool arm9_wake, int from_idle) int msm_irq_enter_sleep2(bool arm9_wake, int from_idle) { - int i, limit = 10; - uint32_t pending[VIC_NUM_REGS]; + int limit = 10; + uint32_t pending0, pending1; if (from_idle && !arm9_wake) return 0; @@ -384,25 +311,23 @@ int msm_irq_enter_sleep2(bool arm9_wake, int from_idle) WARN_ON_ONCE(!arm9_wake && !from_idle); if (msm_irq_debug_mask & IRQ_DEBUG_SLEEP) - DPRINT_REGS(VIC_IRQ_STATUS, "%s change irq, pend", __func__); - - for (i = 0; i < VIC_NUM_REGS; i++) { - pending[i] = readl(VIC_IRQ_STATUS0 + (i * 4)); - pending[i] &= msm_irq_shadow_reg[i].int_en[!from_idle]; - } + printk(KERN_INFO "msm_irq_enter_sleep change irq, pend %x %x\n", + readl(VIC_IRQ_STATUS0), readl(VIC_IRQ_STATUS1)); + pending0 = readl(VIC_IRQ_STATUS0); + pending1 = readl(VIC_IRQ_STATUS1); + pending0 &= msm_irq_shadow_reg[0].int_en[!from_idle]; /* Clear INT_A9_M2A_5 since requesting sleep triggers it */ - pending[0] &= ~(1U << INT_A9_M2A_5); - - for (i = 0; i < VIC_NUM_REGS; i++) { - if (pending[i]) { - if (msm_irq_debug_mask & IRQ_DEBUG_SLEEP_ABORT) - DPRINT_ARRAY(pending, "%s abort", - __func__); - return -EAGAIN; - } + pending0 &= ~(1U << INT_A9_M2A_5); + pending1 &= msm_irq_shadow_reg[1].int_en[!from_idle]; + if (pending0 || pending1) { + if (msm_irq_debug_mask & IRQ_DEBUG_SLEEP_ABORT) + printk(KERN_INFO "msm_irq_enter_sleep2 abort %x %x\n", + pending0, pending1); + return -EAGAIN; } - msm_irq_write_all_regs(VIC_INT_EN0, 0); + writel(0, VIC_INT_EN0); + writel(0, VIC_INT_EN1); while (limit-- > 0) { int pend_irq; @@ -420,9 +345,8 @@ int msm_irq_enter_sleep2(bool arm9_wake, int from_idle) msm_irq_ack(INT_A9_M2A_6); writel(1U << INT_A9_M2A_6, VIC_INT_ENSET0); } else { - for (i = 0; i < VIC_NUM_REGS; i++) - writel(msm_irq_shadow_reg[i].int_en[1], - VIC_INT_ENSET0 + (i * 4)); + writel(msm_irq_shadow_reg[0].int_en[1], VIC_INT_ENSET0); + writel(msm_irq_shadow_reg[1].int_en[1], VIC_INT_ENSET1); } return 0; } @@ -433,7 +357,7 @@ void msm_irq_exit_sleep1(void) msm_irq_ack(INT_A9_M2A_6); msm_irq_ack(INT_PWB_I2C); - for (i = 0; i < VIC_NUM_REGS; i++) { + for (i = 0; i < 2; i++) { writel(msm_irq_shadow_reg[i].int_type, VIC_INT_TYPE0 + i * 4); writel(msm_irq_shadow_reg[i].int_polarity, VIC_INT_POLARITY0 + i * 4); writel(msm_irq_shadow_reg[i].int_en[0], VIC_INT_EN0 + i * 4); @@ -527,16 +451,20 @@ void __init msm_init_irq(void) unsigned n; /* select level interrupts */ - msm_irq_write_all_regs(VIC_INT_TYPE0, 0); + writel(0, VIC_INT_TYPE0); + writel(0, VIC_INT_TYPE1); /* select highlevel interrupts */ - msm_irq_write_all_regs(VIC_INT_POLARITY0, 0); + writel(0, VIC_INT_POLARITY0); + writel(0, VIC_INT_POLARITY1); /* select IRQ for all INTs */ - msm_irq_write_all_regs(VIC_INT_SELECT0, 0); + writel(0, VIC_INT_SELECT0); + writel(0, VIC_INT_SELECT1); /* disable all INTs */ - msm_irq_write_all_regs(VIC_INT_EN0, 0); + writel(0, VIC_INT_EN0); + writel(0, VIC_INT_EN1); /* don't use 1136 vic */ writel(0, VIC_CONFIG); @@ -565,7 +493,7 @@ late_initcall(msm_init_irq_late); #if defined(CONFIG_MSM_FIQ_SUPPORT) void msm_trigger_irq(int irq) { - void __iomem *reg = VIC_INT_TO_REG_ADDR(VIC_SOFTINT0, irq); + void __iomem *reg = VIC_SOFTINT0 + ((irq & 32) ? 4 : 0); uint32_t mask = 1UL << (irq & 31); writel(mask, reg); } @@ -588,8 +516,8 @@ void msm_fiq_disable(int irq) static void _msm_fiq_select(int irq) { - void __iomem *reg = VIC_INT_TO_REG_ADDR(VIC_INT_SELECT0, irq); - unsigned index = VIC_INT_TO_REG_INDEX(irq); + void __iomem *reg = VIC_INT_SELECT0 + ((irq & 32) ? 4 : 0); + unsigned index = (irq >> 5) & 1; uint32_t mask = 1UL << (irq & 31); unsigned long flags; @@ -601,8 +529,8 @@ static void _msm_fiq_select(int irq) static void _msm_fiq_unselect(int irq) { - void __iomem *reg = VIC_INT_TO_REG_ADDR(VIC_INT_SELECT0, irq); - unsigned index = VIC_INT_TO_REG_INDEX(irq); + void __iomem *reg = VIC_INT_SELECT0 + ((irq & 32) ? 4 : 0); + unsigned index = (irq >> 5) & 1; uint32_t mask = 1UL << (irq & 31); unsigned long flags; diff --git a/arch/arm/mach-msm/proc_comm.c b/arch/arm/mach-msm/proc_comm.c index d2870bdf..85b75502 100644 --- a/arch/arm/mach-msm/proc_comm.c +++ b/arch/arm/mach-msm/proc_comm.c @@ -1,6 +1,7 @@ /* arch/arm/mach-msm/proc_comm.c * * Copyright (C) 2007-2008 Google, Inc. + * Copyright (c) 2009-2010, Code Aurora Forum. All rights reserved. * Author: Brian Swetland * * This software is licensed under the terms of the GNU General Public @@ -18,24 +19,24 @@ #include #include #include +#include #include #include #include "proc_comm.h" +#include "smd_private.h" #if defined(CONFIG_ARCH_MSM7X30) -#define MSM_TRIG_A2M_INT(n) (writel(1 << n, MSM_GCC_BASE + 0x8)) +#define MSM_TRIG_A2M_PC_INT (writel(1 << 6, MSM_GCC_BASE + 0x8)) +#elif defined(CONFIG_ARCH_MSM8X60) +#define MSM_TRIG_A2M_PC_INT (writel(1 << 5, MSM_GCC_BASE + 0x8)) +#else +#define MSM_TRIG_A2M_PC_INT (writel(1, MSM_CSR_BASE + 0x400 + (6) * 4)) #endif -#define MSM_A2M_INT(n) (MSM_CSR_BASE + 0x400 + (n) * 4) - static inline void notify_other_proc_comm(void) { -#if defined(CONFIG_ARCH_MSM7X30) - MSM_TRIG_A2M_INT(6); -#else - writel(1, MSM_A2M_INT(6)); -#endif + MSM_TRIG_A2M_PC_INT; } #define APP_COMMAND 0x00 @@ -50,69 +51,84 @@ static inline void notify_other_proc_comm(void) static DEFINE_SPINLOCK(proc_comm_lock); -/* The higher level SMD support will install this to - * provide a way to check for and handle modem restart. - */ -int (*msm_check_for_modem_crash)(void); - /* Poll for a state change, checking for possible * modem crashes along the way (so we don't wait - * forever while the ARM9 is blowing up). + * forever while the ARM9 is blowing up. * * Return an error in the event of a modem crash and * restart so the msm_proc_comm() routine can restart * the operation from the beginning. */ -static int proc_comm_wait_for(void __iomem *addr, unsigned value) +static int proc_comm_wait_for(unsigned addr, unsigned value) { - for (;;) { + while (1) { if (readl(addr) == value) return 0; - if (msm_check_for_modem_crash) - if (msm_check_for_modem_crash()) - return -EAGAIN; + if (smsm_check_for_modem_crash()) + return -EAGAIN; + + udelay(5); } } +void msm_proc_comm_reset_modem_now(void) +{ + unsigned base = (unsigned)MSM_SHARED_RAM_BASE; + unsigned long flags; + + spin_lock_irqsave(&proc_comm_lock, flags); + +again: + if (proc_comm_wait_for(base + MDM_STATUS, PCOM_READY)) + goto again; + + writel(PCOM_RESET_MODEM, base + APP_COMMAND); + writel(0, base + APP_DATA1); + writel(0, base + APP_DATA2); + + spin_unlock_irqrestore(&proc_comm_lock, flags); + + notify_other_proc_comm(); + + return; +} +EXPORT_SYMBOL(msm_proc_comm_reset_modem_now); + int msm_proc_comm(unsigned cmd, unsigned *data1, unsigned *data2) { - void __iomem *base = MSM_SHARED_RAM_BASE; + unsigned base = (unsigned)MSM_SHARED_RAM_BASE; unsigned long flags; int ret; spin_lock_irqsave(&proc_comm_lock, flags); - for (;;) { - if (proc_comm_wait_for(base + MDM_STATUS, PCOM_READY)) - continue; +again: + if (proc_comm_wait_for(base + MDM_STATUS, PCOM_READY)) + goto again; - writel(cmd, base + APP_COMMAND); - writel(data1 ? *data1 : 0, base + APP_DATA1); - writel(data2 ? *data2 : 0, base + APP_DATA2); + writel(cmd, base + APP_COMMAND); + writel(data1 ? *data1 : 0, base + APP_DATA1); + writel(data2 ? *data2 : 0, base + APP_DATA2); - notify_other_proc_comm(); + notify_other_proc_comm(); - if (proc_comm_wait_for(base + APP_COMMAND, PCOM_CMD_DONE)) - continue; + if (proc_comm_wait_for(base + APP_COMMAND, PCOM_CMD_DONE)) + goto again; - if (readl(base + APP_STATUS) != PCOM_CMD_FAIL) { - if (data1) - *data1 = readl(base + APP_DATA1); - if (data2) - *data2 = readl(base + APP_DATA2); - ret = 0; - } else { - ret = -EIO; - } - break; + if (readl(base + APP_STATUS) == PCOM_CMD_SUCCESS) { + if (data1) + *data1 = readl(base + APP_DATA1); + if (data2) + *data2 = readl(base + APP_DATA2); + ret = 0; + } else { + ret = -EIO; } writel(PCOM_CMD_IDLE, base + APP_COMMAND); spin_unlock_irqrestore(&proc_comm_lock, flags); - return ret; } - - +EXPORT_SYMBOL(msm_proc_comm); diff --git a/arch/arm/mach-msm/proc_comm.h b/arch/arm/mach-msm/proc_comm.h index c9269c7c..4d5bee01 100644 --- a/arch/arm/mach-msm/proc_comm.h +++ b/arch/arm/mach-msm/proc_comm.h @@ -1,6 +1,6 @@ /* arch/arm/mach-msm/proc_comm.h * - * Copyright (c) 2007 QUALCOMM Incorporated + * Copyright (c) 2007-2009, Code Aurora Forum. All rights reserved. * * This software is licensed under the terms of the GNU General Public * License version 2, as published by the Free Software Foundation, and @@ -179,7 +179,18 @@ enum { PCOM_CLKCTL_RPC_RAIL_DISABLE, PCOM_CLKCTL_RPC_RAIL_CONTROL, PCOM_CLKCTL_RPC_MIN_MSMC1, - PCOM_NUM_CMDS, + PCOM_CLKCTL_RPC_SRC_REQUEST, + PCOM_NPA_INIT, + PCOM_NPA_ISSUE_REQUIRED_REQUEST, +}; + +enum { + PCOM_OEM_FIRST_CMD = 0x10000000, + PCOM_OEM_TEST_CMD = PCOM_OEM_FIRST_CMD, + + /* add OEM PROC COMM commands here */ + + PCOM_OEM_LAST = PCOM_OEM_TEST_CMD, }; enum { @@ -199,7 +210,6 @@ enum { PCOM_CMD_FAIL_SMSM_NOT_INIT, PCOM_CMD_FAIL_PROC_COMM_BUSY, PCOM_CMD_FAIL_PROC_COMM_NOT_INIT, - }; /* List of VREGs that support the Pull Down Resistor setting. */ @@ -294,6 +304,7 @@ enum { (((pull) & 0x3) << 15) | \ (((drvstr) & 0xF) << 17)) +void msm_proc_comm_reset_modem_now(void); int msm_proc_comm(unsigned cmd, unsigned *data1, unsigned *data2); #endif diff --git a/arch/arm/mach-msm/sirc.c b/arch/arm/mach-msm/sirc.c index 50ee773f..2dc3247c 100644 --- a/arch/arm/mach-msm/sirc.c +++ b/arch/arm/mach-msm/sirc.c @@ -1,6 +1,6 @@ /* linux/arch/arm/mach-msm/irq.c * - * Copyright (c) 2009 QUALCOMM Incorporated. + * Copyright (c) 2009-2010 Code Aurora Forum. All rights reserved. * Copyright (C) 2009 Google, Inc. * * This software is licensed under the terms of the GNU General Public @@ -189,9 +189,9 @@ static void sirc_irq_handler(unsigned int irq, struct irq_desc *desc) reg++; if (reg == ARRAY_SIZE(sirc_reg_table)) { - printk(KERN_ERR "%s: incorrect irq %d called\n", - __func__, irq); - return; + printk(KERN_ERR "%s: incorrect irq %d called\n", + __func__, irq); + return; } status = readl(sirc_reg_table[reg].int_status); diff --git a/arch/arm/mach-msm/sirc.h b/arch/arm/mach-msm/sirc.h index 8e1399f0..24f3ae80 100644 --- a/arch/arm/mach-msm/sirc.h +++ b/arch/arm/mach-msm/sirc.h @@ -16,12 +16,20 @@ #ifndef _ARCH_ARM_MACH_MSM_SIRC_H #define _ARCH_ARM_MACH_MSM_SIRC_H -#ifdef CONFIG_ARCH_QSD8X50 +#ifdef CONFIG_ARCH_MSM_SCORPION void sirc_fiq_select(int irq, bool enable); -void __init msm_init_sirc(void); #else static inline void sirc_fiq_select(int irq, bool enable) {} +#endif + +#ifdef CONFIG_ARCH_QSD8X50 +void __init msm_init_sirc(void); +void msm_sirc_enter_sleep(void); +void msm_sirc_exit_sleep(void); +#else static inline void __init msm_init_sirc(void) {} +static inline void msm_sirc_enter_sleep(void) { } +static inline void msm_sirc_exit_sleep(void) { } #endif #endif diff --git a/arch/arm/mach-msm/smd.c b/arch/arm/mach-msm/smd.c index 23d67e8d..8a74234a 100644 --- a/arch/arm/mach-msm/smd.c +++ b/arch/arm/mach-msm/smd.c @@ -140,16 +140,18 @@ static void handle_modem_crash(void) ; } -extern int (*msm_check_for_modem_crash)(void); - uint32_t raw_smsm_get_state(enum smsm_state_item item) { return readl(smd_info.state + item * 4); } -static int check_for_modem_crash(void) +int smsm_check_for_modem_crash(void) { - if (raw_smsm_get_state(SMSM_STATE_MODEM) & SMSM_RESET) { + /* if the modem's not ready yet, we have to hope for the best */ + if (!smd_info.state) + return 0; + + if (raw_smsm_get_state(SMSM_MODEM_STATE) & SMSM_RESET) { handle_modem_crash(); return -1; } @@ -1238,8 +1240,6 @@ static int __init msm_smd_probe(struct platform_device *pdev) do_smd_probe(); - msm_check_for_modem_crash = check_for_modem_crash; - msm_init_last_radio_log(THIS_MODULE); smd_initialized = 1; diff --git a/arch/arm/mach-msm/smd_private.h b/arch/arm/mach-msm/smd_private.h index 3dc0a203..91f19338 100644 --- a/arch/arm/mach-msm/smd_private.h +++ b/arch/arm/mach-msm/smd_private.h @@ -1,7 +1,7 @@ /* arch/arm/mach-msm/smd_private.h * * Copyright (C) 2007 Google, Inc. - * Copyright (c) 2007 QUALCOMM Incorporated + * Copyright (c) 2007-2010, Code Aurora Forum. All rights reserved. * * This software is licensed under the terms of the GNU General Public * License version 2, as published by the Free Software Foundation, and @@ -16,6 +16,9 @@ #ifndef _ARCH_ARM_MACH_MSM_MSM_SMD_PRIVATE_H_ #define _ARCH_ARM_MACH_MSM_MSM_SMD_PRIVATE_H_ +#include +#include + struct smem_heap_info { unsigned initialized; unsigned free_offset; @@ -46,12 +49,15 @@ struct smem_proc_comm { #define VERSION_MODEM_SBL 7 #define VERSION_APPS 8 #define VERSION_MODEM 9 +#define VERSION_DSPS 10 + +#define SMD_HEAP_SIZE 512 struct smem_shared { struct smem_proc_comm proc_comm[4]; unsigned version[32]; struct smem_heap_info heap_info; - struct smem_heap_entry heap_toc[512]; + struct smem_heap_entry heap_toc[SMD_HEAP_SIZE]; }; #define SMSM_V1_SIZE (sizeof(unsigned) * 8) @@ -122,35 +128,37 @@ enum { #define ID_SHARED_STATE SMEM_SMSM_SHARED_STATE #define ID_CH_ALLOC_TBL SMEM_CHANNEL_ALLOC_TBL -#define SMSM_INIT 0x00000001 -#define SMSM_OSENTERED 0x00000002 -#define SMSM_SMDWAIT 0x00000004 -#define SMSM_SMDINIT 0x00000008 -#define SMSM_RPCWAIT 0x00000010 -#define SMSM_RPCINIT 0x00000020 -#define SMSM_RESET 0x00000040 -#define SMSM_RSA 0x00000080 -#define SMSM_RUN 0x00000100 -#define SMSM_PWRC 0x00000200 -#define SMSM_TIMEWAIT 0x00000400 -#define SMSM_TIMEINIT 0x00000800 -#define SMSM_PWRC_EARLY_EXIT 0x00001000 -#define SMSM_WFPI 0x00002000 -#define SMSM_SLEEP 0x00004000 -#define SMSM_SLEEPEXIT 0x00008000 -#define SMSM_OEMSBL_RELEASE 0x00010000 -#define SMSM_APPS_REBOOT 0x00020000 -#define SMSM_SYSTEM_POWER_DOWN 0x00040000 -#define SMSM_SYSTEM_REBOOT 0x00080000 -#define SMSM_SYSTEM_DOWNLOAD 0x00100000 -#define SMSM_PWRC_SUSPEND 0x00200000 -#define SMSM_APPS_SHUTDOWN 0x00400000 -#define SMSM_SMD_LOOPBACK 0x00800000 -#define SMSM_RUN_QUIET 0x01000000 -#define SMSM_MODEM_WAIT 0x02000000 -#define SMSM_MODEM_BREAK 0x04000000 -#define SMSM_MODEM_CONTINUE 0x08000000 -#define SMSM_UNKNOWN 0x80000000 +#define SMSM_INIT 0x00000001 +#define SMSM_OSENTERED 0x00000002 +#define SMSM_SMDWAIT 0x00000004 +#define SMSM_SMDINIT 0x00000008 +#define SMSM_RPCWAIT 0x00000010 +#define SMSM_RPCINIT 0x00000020 +#define SMSM_RESET 0x00000040 +#define SMSM_RSA 0x00000080 +#define SMSM_RUN 0x00000100 +#define SMSM_PWRC 0x00000200 +#define SMSM_TIMEWAIT 0x00000400 +#define SMSM_TIMEINIT 0x00000800 +#define SMSM_PWRC_EARLY_EXIT 0x00001000 +#define SMSM_WFPI 0x00002000 +#define SMSM_SLEEP 0x00004000 +#define SMSM_SLEEPEXIT 0x00008000 +#define SMSM_OEMSBL_RELEASE 0x00010000 +#define SMSM_APPS_REBOOT 0x00020000 +#define SMSM_SYSTEM_POWER_DOWN 0x00040000 +#define SMSM_SYSTEM_REBOOT 0x00080000 +#define SMSM_SYSTEM_DOWNLOAD 0x00100000 +#define SMSM_PWRC_SUSPEND 0x00200000 +#define SMSM_APPS_SHUTDOWN 0x00400000 +#define SMSM_SMD_LOOPBACK 0x00800000 +#define SMSM_RUN_QUIET 0x01000000 +#define SMSM_MODEM_WAIT 0x02000000 +#define SMSM_MODEM_BREAK 0x04000000 +#define SMSM_MODEM_CONTINUE 0x08000000 +#define SMSM_SYSTEM_REBOOT_USR 0x20000000 +#define SMSM_SYSTEM_PWRDWN_USR 0x40000000 +#define SMSM_UNKNOWN 0x80000000 #define SMSM_WKUP_REASON_RPC 0x00000001 #define SMSM_WKUP_REASON_INT 0x00000002 @@ -278,18 +286,17 @@ typedef enum { } smem_mem_type; -#define SMD_SS_CLOSED 0x00000000 -#define SMD_SS_OPENING 0x00000001 -#define SMD_SS_OPENED 0x00000002 -#define SMD_SS_FLUSHING 0x00000003 -#define SMD_SS_CLOSING 0x00000004 -#define SMD_SS_RESET 0x00000005 -#define SMD_SS_RESET_OPENING 0x00000006 +#define SMD_SS_CLOSED 0x00000000 +#define SMD_SS_OPENING 0x00000001 +#define SMD_SS_OPENED 0x00000002 +#define SMD_SS_FLUSHING 0x00000003 +#define SMD_SS_CLOSING 0x00000004 +#define SMD_SS_RESET 0x00000005 +#define SMD_SS_RESET_OPENING 0x00000006 -#define SMD_BUF_SIZE 8192 -#define SMD_CHANNELS 64 - -#define SMD_HEADER_SIZE 20 +#define SMD_BUF_SIZE 8192 +#define SMD_CHANNELS 64 +#define SMD_HEADER_SIZE 20 #define SMD_TYPE_MASK 0x0FF #define SMD_TYPE_APPS_MODEM 0x000 @@ -301,6 +308,8 @@ typedef enum { #define SMD_KIND_STREAM 0x100 #define SMD_KIND_PACKET 0x200 +int smsm_check_for_modem_crash(void); +#define msm_check_for_modem_crash smsm_check_for_modem_crash void *smem_find(unsigned id, unsigned size); void *smem_item(unsigned id, unsigned *size); uint32_t raw_smsm_get_state(enum smsm_state_item item); diff --git a/drivers/misc/pmem.c b/drivers/misc/pmem.c index 9e9fb56d..f1523fd5 100755 --- a/drivers/misc/pmem.c +++ b/drivers/misc/pmem.c @@ -1,4 +1,3 @@ -#ifdef CONFIG_MSM_KGSL /* drivers/android/pmem.c * * Copyright (C) 2007 Google, Inc. @@ -15,8 +14,6 @@ * */ - - #include #include #include @@ -36,7 +33,6 @@ #include #include #include -#include #define PMEM_MAX_USER_SPACE_DEVICES (10) #define PMEM_MAX_KERNEL_SPACE_DEVICES (2) @@ -57,6 +53,8 @@ #define PMEM_DEBUG 0 #endif +#define SYSTEM_ALLOC_RETRY 10 + /* indicates that a refernce to this file has been taken via get_pmem_file, * the file should not be released until put_pmem_file is called */ #define PMEM_FLAGS_BUSY 0x1 @@ -143,6 +141,14 @@ unsigned long unstable_pmem_start; /* size of unstable PMEM physical memory */ unsigned long unstable_pmem_size; +struct alloc_list { + void *addr; /* physical addr of allocation */ + void *aaddr; /* aligned physical addr */ + unsigned int size; /* total size of allocation */ + unsigned char __iomem *vaddr; /* Virtual addr */ + struct list_head allocs; +}; + struct pmem_info { struct miscdevice dev; /* physical start address of the remaped pmem space */ @@ -207,6 +213,11 @@ struct pmem_info { unsigned short quanta; } *bitm_alloc; } bitmap; + + struct { + unsigned long used; /* Bytes currently allocated */ + struct list_head alist; /* List of allocations */ + } system_mem; } allocator; int id; @@ -313,28 +324,7 @@ static struct pmem_attr pmem_attr_## name = \ #define WO_PMEM_ATTR(name) \ static struct pmem_attr pmem_attr_## name = \ PMEM_ATTR(name, S_IWUSR, NULL, store_pmem_## name) -/*HTC_START*/ -static struct dentry *root = NULL; -u32 misc_msg_pmem_qcom = 0; -static struct dentry *vidc_debugfs_root; - -static struct dentry *vidc_get_debugfs_root(void) -{ - if (vidc_debugfs_root == NULL) - vidc_debugfs_root = debugfs_create_dir("misc", NULL); - return vidc_debugfs_root; -} - -static void vidc_debugfs_file_create(struct dentry *root, const char *name, - u32 *var) -{ - struct dentry *vidc_debugfs_file = - debugfs_create_u32(name, S_IRUGO | S_IWUSR, root, var); - if (!vidc_debugfs_file) - pr_info("%s(): Error creating/opening file %s\n", __func__, name); -} -/*HTC_END*/ static ssize_t show_pmem(struct kobject *kobj, struct attribute *attr, char *buf) @@ -378,6 +368,8 @@ static ssize_t show_pmem_allocator_type(int id, char *buf) return scnprintf(buf, PAGE_SIZE, "%s\n", "Buddy Bestfit"); case PMEM_ALLOCATORTYPE_BITMAP: return scnprintf(buf, PAGE_SIZE, "%s\n", "Bitmap"); + case PMEM_ALLOCATORTYPE_SYSTEM: + return scnprintf(buf, PAGE_SIZE, "%s\n", "System heap"); default: return scnprintf(buf, PAGE_SIZE, "??? Invalid allocator type (%d) for this region! " @@ -552,11 +544,22 @@ static struct attribute *pmem_bitmap_attrs[] = { NULL }; +static struct attribute *pmem_system_attrs[] = { + PMEM_COMMON_SYSFS_ATTRS, + + NULL +}; + static struct kobj_type pmem_bitmap_ktype = { .sysfs_ops = &pmem_ops, .default_attrs = pmem_bitmap_attrs, }; +static struct kobj_type pmem_system_ktype = { + .sysfs_ops = &pmem_ops, + .default_attrs = pmem_system_attrs, +}; + static int get_id(struct file *file) { return MINOR(file->f_dentry->d_inode->i_rdev); @@ -568,7 +571,7 @@ static char *get_name(struct file *file) return pmem[id].name; } -int is_pmem_file(struct file *file) +static int is_pmem_file(struct file *file) { int id; @@ -588,7 +591,7 @@ static int has_allocation(struct file *file) * means that file is guaranteed not to be NULL upon entry!! * check is_pmem_file first if not accessed via pmem_file_ops */ struct pmem_data *pdata = file->private_data; - return pdata && pdata->index >= 0; + return pdata && pdata->index != -1; } static int is_master_owner(struct file *file) @@ -604,7 +607,8 @@ static int is_master_owner(struct file *file) master_file = fget_light(data->master_fd, &put_needed); if (master_file && data->master_file == master_file) ret = 1; - fput_light(master_file, put_needed); + if (master_file) + fput_light(master_file, put_needed); return ret; } @@ -730,10 +734,9 @@ static int pmem_free_bitmap(int id, int bitnum) /* caller should hold the lock on arena_mutex! */ int i; char currtask_name[FIELD_SIZEOF(struct task_struct, comm) + 1]; -/*HTC_START*/ - if (misc_msg_pmem_qcom) - pr_info("[PME][%s] pmem_free_bitmap, bitnum %d\n", pmem[id].name, bitnum); -/*HTC_END*/ + + DLOG("bitnum %d\n", bitnum); + for (i = 0; i < pmem[id].allocator.bitmap.bitmap_allocs; i++) { const int curr_bit = pmem[id].allocator.bitmap.bitm_alloc[i].bit; @@ -746,6 +749,7 @@ static int pmem_free_bitmap(int id, int bitnum) curr_bit, curr_bit + curr_quanta); pmem[id].allocator.bitmap.bitmap_free += curr_quanta; pmem[id].allocator.bitmap.bitm_alloc[i].bit = -1; + pmem[id].allocator.bitmap.bitm_alloc[i].quanta = 0; return 0; } } @@ -756,6 +760,27 @@ static int pmem_free_bitmap(int id, int bitnum) return -1; } +static int pmem_free_system(int id, int index) +{ + /* caller should hold the lock on arena_mutex! */ + struct alloc_list *item; + + DLOG("index %d\n", index); + if (index != 0) + item = (struct alloc_list *)index; + else + return 0; + + if (item->vaddr != NULL) { + iounmap(item->vaddr); + kfree(__va(item->addr)); + list_del(&item->allocs); + kfree(item); + } + + return 0; +} + static int pmem_free_space_bitmap(int id, struct pmem_freespace *fs) { int i, j; @@ -778,12 +803,12 @@ static int pmem_free_space_bitmap(int id, struct pmem_freespace *fs) const int curr_alloc = pmem[id].allocator. bitmap.bitm_alloc[j].bit; if (curr_alloc != -1) { + if (alloc_start == curr_alloc) + alloc_idx = j; if (alloc_start >= curr_alloc) continue; - if (curr_alloc < next_alloc) { + if (curr_alloc < next_alloc) next_alloc = curr_alloc; - alloc_idx = j; - } } } alloc_quanta = pmem[id].allocator.bitmap. @@ -804,6 +829,14 @@ static int pmem_free_space_bitmap(int id, struct pmem_freespace *fs) return 0; } +static int pmem_free_space_system(int id, struct pmem_freespace *fs) +{ + fs->total = pmem[id].size; + fs->largest = pmem[id].size; + + return 0; +} + static void pmem_revoke(struct file *file, struct pmem_data *data); static int pmem_release(struct inode *inode, struct file *file) @@ -888,10 +921,6 @@ static int pmem_open(struct inode *inode, struct file *file) DLOG("pid %u(%s) file %p(%ld) dev %s(id: %d)\n", current->pid, get_task_comm(currtask_name, current), file, file_count(file), get_name(file), id); - /* setup file->private_data to indicate its unmapped */ - /* you can only open a pmem device one time */ - if (file->private_data != NULL) - return -EINVAL; data = kmalloc(sizeof(struct pmem_data), GFP_KERNEL); if (!data) { printk(KERN_ALERT "pmem: %s: unable to allocate memory for " @@ -1153,35 +1182,28 @@ static int pmem_allocator_bitmap(const int id, /* caller should hold the lock on arena_mutex! */ int bitnum, i; unsigned int quanta_needed; -/*HTC_START*/ - if (misc_msg_pmem_qcom) - pr_info("[PME][%s] pmem_allocator_bitmap, len %ld\n", pmem[id].name, len); + DLOG("bitmap id %d, len %ld, align %u\n", id, len, align); if (!pmem[id].allocator.bitmap.bitm_alloc) { - if (misc_msg_pmem_qcom) { - printk(KERN_ALERT "[PME][%s] bitm_alloc not present! \n", - pmem[id].name); - } -/*HTC_END*/ - bitnum = -1; goto leave; +#if PMEM_DEBUG + printk(KERN_ALERT "pmem: bitm_alloc not present! id: %d\n", + id); +#endif + return -1; } quanta_needed = (len + pmem[id].quantum - 1) / pmem[id].quantum; -/*HTC_START*/ - if (misc_msg_pmem_qcom) { - pr_info("[PME][%s] quantum size %u quanta needed %u free %u\n", - pmem[id].name, pmem[id].quantum, quanta_needed, - pmem[id].allocator.bitmap.bitmap_free); - } + DLOG("quantum size %u quanta needed %u free %u id %d\n", + pmem[id].quantum, quanta_needed, + pmem[id].allocator.bitmap.bitmap_free, id); if (pmem[id].allocator.bitmap.bitmap_free < quanta_needed) { - if (misc_msg_pmem_qcom) { - printk(KERN_ALERT "[PME][%s] memory allocation failure. " - "PMEM memory region exhausted." - " Unable to comply with allocation request.\n", pmem[id].name); - } -/*HTC_END*/ - bitnum = -1; goto leave; +#if PMEM_DEBUG + printk(KERN_ALERT "pmem: memory allocation failure. " + "PMEM memory region exhausted, id %d." + " Unable to comply with allocation request.\n", id); +#endif + return -1; } bitnum = reserve_quanta(quanta_needed, id, align); @@ -1201,36 +1223,35 @@ static int pmem_allocator_bitmap(const int id, int j; if (!new_bitmap_allocs) { /* failed sanity check!! */ -/*HTC_START*/ - if (misc_msg_pmem_qcom) { - pr_alert("[PME][%s] pmem: bitmap_allocs number" - " wrapped around to zero! Something " - "is VERY wrong.\n", pmem[id].name); - } - bitnum = -1; goto leave; +#if PMEM_DEBUG + pr_alert("pmem: bitmap_allocs number" + " wrapped around to zero! Something " + "is VERY wrong.\n"); +#endif + return -1; } + if (new_bitmap_allocs > pmem[id].num_entries) { /* failed sanity check!! */ - if (misc_msg_pmem_qcom) { - pr_alert("[PME][%s] pmem: required bitmap_allocs" - " number exceeds maximum entries possible" - " for current quanta\n", pmem[id].name); - } - - bitnum = -1; goto leave; +#if PMEM_DEBUG + pr_alert("pmem: required bitmap_allocs" + " number exceeds maximum entries possible" + " for current quanta\n"); +#endif + return -1; } + temp = krealloc(pmem[id].allocator.bitmap.bitm_alloc, new_bitmap_allocs * sizeof(*pmem[id].allocator.bitmap.bitm_alloc), GFP_KERNEL); if (!temp) { - if (misc_msg_pmem_qcom) { - pr_alert("[PME][%s] can't realloc bitmap_allocs," - " current num bitmap allocs %d\n", - pmem[id].name, pmem[id].allocator.bitmap.bitmap_allocs); - } -/*HTC_END*/ - bitnum = -1; goto leave; +#if PMEM_DEBUG + pr_alert("pmem: can't realloc bitmap_allocs," + "id %d, current num bitmap allocs %d\n", + id, pmem[id].allocator.bitmap.bitmap_allocs); +#endif + return -1; } pmem[id].allocator.bitmap.bitmap_allocs = new_bitmap_allocs; pmem[id].allocator.bitmap.bitm_alloc = temp; @@ -1239,41 +1260,79 @@ static int pmem_allocator_bitmap(const int id, pmem[id].allocator.bitmap.bitm_alloc[j].bit = -1; pmem[id].allocator.bitmap.bitm_alloc[i].quanta = 0; } -/*HTC_START*/ - if (misc_msg_pmem_qcom) { - pr_info("[PME][%s] increased # of allocated regions to %d for \n", - pmem[id].name, pmem[id].allocator.bitmap.bitmap_allocs); - } + + DLOG("increased # of allocated regions to %d for id %d\n", + pmem[id].allocator.bitmap.bitmap_allocs, id); } - if (misc_msg_pmem_qcom) - pr_info("[PME][%s] bitnum %d, bitm_alloc index %d\n", pmem[id].name, bitnum, i); -/*HTC_END*/ + + DLOG("bitnum %d, bitm_alloc index %d\n", bitnum, i); + pmem[id].allocator.bitmap.bitmap_free -= quanta_needed; pmem[id].allocator.bitmap.bitm_alloc[i].bit = bitnum; pmem[id].allocator.bitmap.bitm_alloc[i].quanta = quanta_needed; leave: - if (-1 == bitnum) { - pr_err("[PME][%s] error: pmem_allocator_bitmap failed\n", pmem[id].name); - for (i = 0; i < pmem[id].allocator.bitmap.bitmap_allocs; i++) { - if (pmem[id].allocator.bitmap.bitm_alloc[i].bit != -1) { - /*HTC_START*/ - if (misc_msg_pmem_qcom) { - pr_info("[PME][%s] bitm_alloc[%d].bit: %u bitm_alloc[%d].quanta: %u\n", - pmem[id].name, - i, - pmem[id].allocator.bitmap.bitm_alloc[i].bit, - i, - pmem[id].allocator.bitmap.bitm_alloc[i].quanta - ); - } - /*HTC_END*/ - } - } - } return bitnum; } -static pgprot_t pmem_phys_mem_access_prot(struct file *file, pgprot_t vma_prot) +static int pmem_allocator_system(const int id, + const unsigned long len, + const unsigned int align) +{ + /* caller should hold the lock on arena_mutex! */ + struct alloc_list *list; + unsigned long aligned_len; + int count = SYSTEM_ALLOC_RETRY; + void *buf; + + DLOG("system id %d, len %ld, align %u\n", id, len, align); + + if ((pmem[id].allocator.system_mem.used + len) > pmem[id].size) { + DLOG("requested size would be larger than quota\n"); + return -1; + } + + /* Handle alignment */ + aligned_len = len + align; + + /* Attempt allocation */ + list = kmalloc(sizeof(struct alloc_list), GFP_KERNEL); + if (list == NULL) { + printk(KERN_ERR "pmem: failed to allocate system metadata\n"); + return -1; + } + list->vaddr = NULL; + + buf = NULL; + while ((buf == NULL) && count--) { + buf = kmalloc((aligned_len), GFP_KERNEL); + if (buf == NULL) { + DLOG("pmem: kmalloc %d temporarily failed len= %ld\n", + count, aligned_len); + } + } + if (!buf) { + printk(KERN_CRIT "pmem: kmalloc failed for id= %d len= %ld\n", + id, aligned_len); + kfree(list); + return -1; + } + list->size = aligned_len; + list->addr = (void *)__pa(buf); + list->aaddr = (void *)(((unsigned int)(list->addr) + (align - 1)) & + ~(align - 1)); + + if (!pmem[id].cached) + list->vaddr = ioremap(__pa(buf), aligned_len); + else + list->vaddr = ioremap_cached(__pa(buf), aligned_len); + + INIT_LIST_HEAD(&list->allocs); + list_add(&list->allocs, &pmem[id].allocator.system_mem.alist); + + return (int)list; +} + +static pgprot_t phys_mem_access_prot(struct file *file, pgprot_t vma_prot) { int id = get_id(file); #ifdef pgprot_writecombine @@ -1305,8 +1364,16 @@ static unsigned long pmem_start_addr_bitmap(int id, struct pmem_data *data) return data->index * pmem[id].quantum + pmem[id].base; } +static unsigned long pmem_start_addr_system(int id, struct pmem_data *data) +{ + return (unsigned long)(((struct alloc_list *)(data->index))->aaddr); +} + static void *pmem_start_vaddr(int id, struct pmem_data *data) { + if (pmem[id].allocator_type == PMEM_ALLOCATORTYPE_SYSTEM) + return ((struct alloc_list *)(data->index))->vaddr; + else return pmem[id].start_addr(id, data) - pmem[id].base + pmem[id].vbase; } @@ -1344,6 +1411,18 @@ static unsigned long pmem_len_bitmap(int id, struct pmem_data *data) return ret; } +static unsigned long pmem_len_system(int id, struct pmem_data *data) +{ + unsigned long ret = 0; + + mutex_lock(&pmem[id].arena_mutex); + + ret = ((struct alloc_list *)data->index)->size; + mutex_unlock(&pmem[id].arena_mutex); + + return ret; +} + static int pmem_map_garbage(int id, struct vm_area_struct *vma, struct pmem_data *data, unsigned long offset, unsigned long len) @@ -1481,6 +1560,10 @@ static int pmem_mmap(struct file *file, struct vm_area_struct *vma) unsigned long vma_size = vma->vm_end - vma->vm_start; int ret = 0, id = get_id(file); + if (!data) { + pr_err("pmem: Invalid file descriptor, no private data\n"); + return -EINVAL; + } #if PMEM_DEBUG_MSGS char currtask_name[FIELD_SIZEOF(struct task_struct, comm) + 1]; #endif @@ -1509,24 +1592,21 @@ static int pmem_mmap(struct file *file, struct vm_area_struct *vma) goto error; } /* if file->private_data == unalloced, alloc*/ - if (data && data->index == -1) { + if (data->index == -1) { mutex_lock(&pmem[id].arena_mutex); index = pmem[id].allocate(id, vma->vm_end - vma->vm_start, SZ_4K); mutex_unlock(&pmem[id].arena_mutex); - data->index = index; - if (data->index < 0) { + /* either no space was available or an error occured */ + if (index == -1) { pr_err("pmem: mmap unable to allocate memory" "on %s\n", get_name(file)); + ret = -ENOMEM; + goto error; } - } - - /* either no space was available or an error occured */ - if (!has_allocation(file)) { - ret = -ENOMEM; - pr_err("pmem: could not find allocation for map.\n"); - goto error; + /* store the index of a successful allocation */ + data->index = index; } if (pmem[id].len(id, data) < vma_size) { @@ -1541,7 +1621,7 @@ static int pmem_mmap(struct file *file, struct vm_area_struct *vma) vma->vm_pgoff = pmem[id].start_addr(id, data) >> PAGE_SHIFT; - vma->vm_page_prot = pmem_phys_mem_access_prot(file, vma->vm_page_prot); + vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_page_prot); if (data->flags & PMEM_FLAGS_CONNECTED) { struct pmem_region_node *region_node; @@ -1770,6 +1850,21 @@ void flush_pmem_file(struct file *file, unsigned long offset, unsigned long len) goto end; vaddr = pmem_start_vaddr(id, data); + + if (pmem[id].allocator_type == PMEM_ALLOCATORTYPE_SYSTEM) { + dmac_flush_range(vaddr, + (void *)((unsigned long)vaddr + + ((struct alloc_list *)(data->index))->size)); +#ifdef CONFIG_OUTER_CACHE + phy_start = pmem_start_addr_system(id, data); + + phy_end = phy_start + + ((struct alloc_list *)(data->index))->size; + + outer_flush_range(phy_start, phy_end); +#endif + goto end; + } /* if this isn't a submmapped file, flush the whole thing */ if (unlikely(!(data->flags & PMEM_FLAGS_CONNECTED))) { dmac_flush_range(vaddr, vaddr + pmem[id].len(id, data)); @@ -1958,6 +2053,11 @@ static int pmem_kapi_free_index_bitmap(const int32_t physaddr, int id) bit_from_paddr(id, physaddr) : -1; } +static int pmem_kapi_free_index_system(const int32_t physaddr, int id) +{ + return 0; +} + int pmem_kfree(const int32_t physaddr) { int i; @@ -2503,6 +2603,17 @@ static long pmem_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return pmem_cache_maint(file, cmd, &pmem_addr); } + case PMEM_CACHE_FLUSH: + { + struct pmem_region region; + + if (copy_from_user(®ion, (void __user *)arg, + sizeof(struct pmem_region))) + return -EFAULT; + + flush_pmem_file(file, region.offset, region.len); + break; + } default: if (pmem[id].ioctl) return pmem[id].ioctl(file, cmd, arg); @@ -2891,6 +3002,35 @@ int pmem_setup(struct android_pmem_platform_data *pdata, pmem[id].size, pmem[id].quantum); break; + case PMEM_ALLOCATORTYPE_SYSTEM: + +#ifdef CONFIG_MEMORY_HOTPLUG + goto err_no_mem; +#endif + + INIT_LIST_HEAD(&pmem[id].allocator.system_mem.alist); + + pmem[id].allocator.system_mem.used = 0; + pmem[id].vbase = NULL; + + if (kobject_init_and_add(&pmem[id].kobj, + &pmem_system_ktype, NULL, + "%s", pdata->name)) + goto out_put_kobj; + + pmem[id].allocate = pmem_allocator_system; + pmem[id].free = pmem_free_system; + pmem[id].free_space = pmem_free_space_system; + pmem[id].kapi_free_index = pmem_kapi_free_index_system; + pmem[id].len = pmem_len_system; + pmem[id].start_addr = pmem_start_addr_system; + pmem[id].num_entries = 0; + pmem[id].quantum = PAGE_SIZE; + + DLOG("system allocator id %d (%s), raw size %lu\n", + id, pdata->name, pmem[id].size); + break; + default: pr_alert("Invalid allocator type (%d) for pmem driver\n", pdata->allocator_type); @@ -2923,7 +3063,8 @@ int pmem_setup(struct android_pmem_platform_data *pdata, if (pmem[id].memory_state == MEMORY_UNSTABLE_NO_MEMORY_ALLOCATED) return 0; - if (!is_kernel_memtype) { + if ((!is_kernel_memtype) && + (pmem[id].allocator_type != PMEM_ALLOCATORTYPE_SYSTEM)) { ioremap_pmem(id); if (pmem[id].vbase == 0) { pr_err("pmem: ioremap failed for device %s\n", @@ -3017,14 +3158,7 @@ static int __init pmem_init(void) pr_err("pmem(%s):kset_create_and_add fail\n", __func__); return -ENOMEM; } -/*HTC_START*/ -root = vidc_get_debugfs_root(); - if (root) { - vidc_debugfs_file_create(root, "misc_msg_pmem_qcom", - (u32 *) &misc_msg_pmem_qcom); - } -/*HTC_END*/ #ifdef CONFIG_MEMORY_HOTPLUG hotplug_memory_notifier(pmem_memory_callback, 0); #endif @@ -3038,1352 +3172,4 @@ static void __exit pmem_exit(void) module_init(pmem_init); module_exit(pmem_exit); -#else -/* drivers/android/pmem.c - * - * Copyright (C) 2007 Google, Inc. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define PMEM_MAX_DEVICES 10 -#define PMEM_MAX_ORDER 128 -#define PMEM_MIN_ALLOC PAGE_SIZE - -#define PMEM_DEBUG 1 - -/* indicates that a refernce to this file has been taken via get_pmem_file, - * the file should not be released until put_pmem_file is called */ -#define PMEM_FLAGS_BUSY 0x1 -/* indicates that this is a suballocation of a larger master range */ -#define PMEM_FLAGS_CONNECTED 0x1 << 1 -/* indicates this is a master and not a sub allocation and that it is mmaped */ -#define PMEM_FLAGS_MASTERMAP 0x1 << 2 -/* submap and unsubmap flags indicate: - * 00: subregion has never been mmaped - * 10: subregion has been mmaped, reference to the mm was taken - * 11: subretion has ben released, refernece to the mm still held - * 01: subretion has been released, reference to the mm has been released - */ -#define PMEM_FLAGS_SUBMAP 0x1 << 3 -#define PMEM_FLAGS_UNSUBMAP 0x1 << 4 - - -struct pmem_data { - /* in alloc mode: an index into the bitmap - * in no_alloc mode: the size of the allocation */ - int index; - /* see flags above for descriptions */ - unsigned int flags; - /* protects this data field, if the mm_mmap sem will be held at the - * same time as this sem, the mm sem must be taken first (as this is - * the order for vma_open and vma_close ops */ - struct rw_semaphore sem; - /* info about the mmaping process */ - struct vm_area_struct *vma; - /* task struct of the mapping process */ - struct task_struct *task; - /* process id of teh mapping process */ - pid_t pid; - /* file descriptor of the master */ - int master_fd; - /* file struct of the master */ - struct file *master_file; - /* a list of currently available regions if this is a suballocation */ - struct list_head region_list; - /* a linked list of data so we can access them for debugging */ - struct list_head list; -#if PMEM_DEBUG - int ref; -#endif -}; - -struct pmem_bits { - unsigned allocated:1; /* 1 if allocated, 0 if free */ - unsigned order:7; /* size of the region in pmem space */ -}; - -struct pmem_region_node { - struct pmem_region region; - struct list_head list; -}; - -#define PMEM_DEBUG_MSGS 0 -#if PMEM_DEBUG_MSGS -#define DLOG(fmt,args...) \ - do { printk(KERN_INFO "[%s:%s:%d] "fmt, __FILE__, __func__, __LINE__, \ - ##args); } \ - while (0) -#else -#define DLOG(x...) do {} while (0) -#endif - -struct pmem_info { - struct miscdevice dev; - /* physical start address of the remaped pmem space */ - unsigned long base; - /* vitual start address of the remaped pmem space */ - unsigned char __iomem *vbase; - /* total size of the pmem space */ - unsigned long size; - /* number of entries in the pmem space */ - unsigned long num_entries; - /* pfn of the garbage page in memory */ - unsigned long garbage_pfn; - /* index of the garbage page in the pmem space */ - int garbage_index; - /* the bitmap for the region indicating which entries are allocated - * and which are free */ - struct pmem_bits *bitmap; - /* indicates the region should not be managed with an allocator */ - unsigned no_allocator; - /* indicates maps of this region should be cached, if a mix of - * cached and uncached is desired, set this and open the device with - * O_SYNC to get an uncached region */ - unsigned cached; - unsigned buffered; - /* in no_allocator mode the first mapper gets the whole space and sets - * this flag */ - unsigned allocated; - /* for debugging, creates a list of pmem file structs, the - * data_list_sem should be taken before pmem_data->sem if both are - * needed */ - struct semaphore data_list_sem; - struct list_head data_list; - /* pmem_sem protects the bitmap array - * a write lock should be held when modifying entries in bitmap - * a read lock should be held when reading data from bits or - * dereferencing a pointer into bitmap - * - * pmem_data->sem protects the pmem data of a particular file - * Many of the function that require the pmem_data->sem have a non- - * locking version for when the caller is already holding that sem. - * - * IF YOU TAKE BOTH LOCKS TAKE THEM IN THIS ORDER: - * down(pmem_data->sem) => down(bitmap_sem) - */ - struct rw_semaphore bitmap_sem; - - long (*ioctl)(struct file *, unsigned int, unsigned long); - int (*release)(struct inode *, struct file *); -}; - -static struct pmem_info pmem[PMEM_MAX_DEVICES]; -static int id_count; - -#define PMEM_IS_FREE(id, index) !(pmem[id].bitmap[index].allocated) -#define PMEM_ORDER(id, index) pmem[id].bitmap[index].order -#define PMEM_BUDDY_INDEX(id, index) (index ^ (1 << PMEM_ORDER(id, index))) -#define PMEM_NEXT_INDEX(id, index) (index + (1 << PMEM_ORDER(id, index))) -#define PMEM_OFFSET(index) (index * PMEM_MIN_ALLOC) -#define PMEM_START_ADDR(id, index) (PMEM_OFFSET(index) + pmem[id].base) -#define PMEM_LEN(id, index) ((1 << PMEM_ORDER(id, index)) * PMEM_MIN_ALLOC) -#define PMEM_END_ADDR(id, index) (PMEM_START_ADDR(id, index) + \ - PMEM_LEN(id, index)) -#define PMEM_START_VADDR(id, index) (PMEM_OFFSET(id, index) + pmem[id].vbase) -#define PMEM_END_VADDR(id, index) (PMEM_START_VADDR(id, index) + \ - PMEM_LEN(id, index)) -#define PMEM_REVOKED(data) (data->flags & PMEM_FLAGS_REVOKED) -#define PMEM_IS_PAGE_ALIGNED(addr) (!((addr) & (~PAGE_MASK))) -#define PMEM_IS_SUBMAP(data) ((data->flags & PMEM_FLAGS_SUBMAP) && \ - (!(data->flags & PMEM_FLAGS_UNSUBMAP))) - -static int pmem_release(struct inode *, struct file *); -static int pmem_mmap(struct file *, struct vm_area_struct *); -static int pmem_open(struct inode *, struct file *); -static long pmem_ioctl(struct file *, unsigned int, unsigned long); - -struct file_operations pmem_fops = { - .release = pmem_release, - .mmap = pmem_mmap, - .open = pmem_open, - .unlocked_ioctl = pmem_ioctl, -}; - -static int get_id(struct file *file) -{ - return MINOR(file->f_dentry->d_inode->i_rdev); -} - -int is_pmem_file(struct file *file) -{ - int id; - - if (unlikely(!file || !file->f_dentry || !file->f_dentry->d_inode)) - return 0; - id = get_id(file); - if (unlikely(id >= PMEM_MAX_DEVICES)) - return 0; - if (unlikely(file->f_dentry->d_inode->i_rdev != - MKDEV(MISC_MAJOR, pmem[id].dev.minor))) - return 0; - return 1; -} - -static int has_allocation(struct file *file) -{ - struct pmem_data *data; - /* check is_pmem_file first if not accessed via pmem_file_ops */ - - if (unlikely(!file->private_data)) - return 0; - data = (struct pmem_data *)file->private_data; - if (unlikely(data->index < 0)) - return 0; - return 1; -} - -static int is_master_owner(struct file *file) -{ - struct file *master_file; - struct pmem_data *data; - int put_needed, ret = 0; - - if (!is_pmem_file(file) || !has_allocation(file)) - return 0; - data = (struct pmem_data *)file->private_data; - if (PMEM_FLAGS_MASTERMAP & data->flags) - return 1; - master_file = fget_light(data->master_fd, &put_needed); - if (master_file && data->master_file == master_file) - ret = 1; - fput_light(master_file, put_needed); - return ret; -} - -static int pmem_free(int id, int index) -{ - /* caller should hold the write lock on pmem_sem! */ - int buddy, curr = index; - DLOG("index %d\n", index); - - if (pmem[id].no_allocator) { - pmem[id].allocated = 0; - return 0; - } - /* clean up the bitmap, merging any buddies */ - pmem[id].bitmap[curr].allocated = 0; - /* find a slots buddy Buddy# = Slot# ^ (1 << order) - * if the buddy is also free merge them - * repeat until the buddy is not free or end of the bitmap is reached - */ - do { - buddy = PMEM_BUDDY_INDEX(id, curr); - if (buddy < pmem[id].num_entries && - PMEM_IS_FREE(id, buddy) && - PMEM_ORDER(id, buddy) == PMEM_ORDER(id, curr)) { - PMEM_ORDER(id, buddy)++; - PMEM_ORDER(id, curr)++; - curr = min(buddy, curr); - } else { - break; - } - } while (curr < pmem[id].num_entries); - - return 0; -} - -static void pmem_revoke(struct file *file, struct pmem_data *data); - -static int pmem_release(struct inode *inode, struct file *file) -{ - struct pmem_data *data = (struct pmem_data *)file->private_data; - struct pmem_region_node *region_node; - struct list_head *elt, *elt2; - int id = get_id(file), ret = 0; - - - down(&pmem[id].data_list_sem); - /* if this file is a master, revoke all the memory in the connected - * files */ - if (PMEM_FLAGS_MASTERMAP & data->flags) { - struct pmem_data *sub_data; - list_for_each(elt, &pmem[id].data_list) { - sub_data = list_entry(elt, struct pmem_data, list); - down_read(&sub_data->sem); - if (PMEM_IS_SUBMAP(sub_data) && - file == sub_data->master_file) { - up_read(&sub_data->sem); - pmem_revoke(file, sub_data); - } else - up_read(&sub_data->sem); - } - } - list_del(&data->list); - up(&pmem[id].data_list_sem); - - - down_write(&data->sem); - - /* if its not a conencted file and it has an allocation, free it */ - if (!(PMEM_FLAGS_CONNECTED & data->flags) && has_allocation(file)) { - down_write(&pmem[id].bitmap_sem); - ret = pmem_free(id, data->index); - up_write(&pmem[id].bitmap_sem); - } - - /* if this file is a submap (mapped, connected file), downref the - * task struct */ - if (PMEM_FLAGS_SUBMAP & data->flags) - if (data->task) { - put_task_struct(data->task); - data->task = NULL; - } - - file->private_data = NULL; - - list_for_each_safe(elt, elt2, &data->region_list) { - region_node = list_entry(elt, struct pmem_region_node, list); - list_del(elt); - kfree(region_node); - } - BUG_ON(!list_empty(&data->region_list)); - - up_write(&data->sem); - kfree(data); - if (pmem[id].release) - ret = pmem[id].release(inode, file); - - return ret; -} - -static int pmem_open(struct inode *inode, struct file *file) -{ - struct pmem_data *data; - int id = get_id(file); - int ret = 0; - - DLOG("current %u file %p(%d)\n", current->pid, file, file_count(file)); - /* setup file->private_data to indicate its unmapped */ - /* you can only open a pmem device one time */ - if (file->private_data != NULL) - return -1; - data = kmalloc(sizeof(struct pmem_data), GFP_KERNEL); - if (!data) { - printk("pmem: unable to allocate memory for pmem metadata."); - return -1; - } - data->flags = 0; - data->index = -1; - data->task = NULL; - data->vma = NULL; - data->pid = 0; - data->master_file = NULL; -#if PMEM_DEBUG - data->ref = 0; -#endif - INIT_LIST_HEAD(&data->region_list); - init_rwsem(&data->sem); - - file->private_data = data; - INIT_LIST_HEAD(&data->list); - - down(&pmem[id].data_list_sem); - list_add(&data->list, &pmem[id].data_list); - up(&pmem[id].data_list_sem); - return ret; -} - -static unsigned long pmem_order(unsigned long len) -{ - int i; - - len = (len + PMEM_MIN_ALLOC - 1)/PMEM_MIN_ALLOC; - len--; - for (i = 0; i < sizeof(len)*8; i++) - if (len >> i == 0) - break; - return i; -} - -static int pmem_allocate(int id, unsigned long len) -{ - /* caller should hold the write lock on pmem_sem! */ - /* return the corresponding pdata[] entry */ - int curr = 0; - int end = pmem[id].num_entries; - int best_fit = -1; - unsigned long order = pmem_order(len); - - if (pmem[id].no_allocator) { - DLOG("no allocator"); - if ((len > pmem[id].size) || pmem[id].allocated) - return -1; - pmem[id].allocated = 1; - return len; - } - - if (order > PMEM_MAX_ORDER) - return -1; - DLOG("order %lx\n", order); - - /* look through the bitmap: - * if you find a free slot of the correct order use it - * otherwise, use the best fit (smallest with size > order) slot - */ - while (curr < end) { - if (PMEM_IS_FREE(id, curr)) { - if (PMEM_ORDER(id, curr) == (unsigned char)order) { - /* set the not free bit and clear others */ - best_fit = curr; - break; - } - if (PMEM_ORDER(id, curr) > (unsigned char)order && - (best_fit < 0 || - PMEM_ORDER(id, curr) < PMEM_ORDER(id, best_fit))) - best_fit = curr; - } - curr = PMEM_NEXT_INDEX(id, curr); - } - - /* if best_fit < 0, there are no suitable slots, - * return an error - */ - if (best_fit < 0) { - printk("pmem: no space left to allocate!\n"); - return -1; - } - - /* now partition the best fit: - * split the slot into 2 buddies of order - 1 - * repeat until the slot is of the correct order - */ - while (PMEM_ORDER(id, best_fit) > (unsigned char)order) { - int buddy; - PMEM_ORDER(id, best_fit) -= 1; - buddy = PMEM_BUDDY_INDEX(id, best_fit); - PMEM_ORDER(id, buddy) = PMEM_ORDER(id, best_fit); - } - pmem[id].bitmap[best_fit].allocated = 1; - return best_fit; -} - -static pgprot_t phys_mem_access_prot(struct file *file, pgprot_t vma_prot) -{ - int id = get_id(file); -#ifdef pgprot_noncached - if (pmem[id].cached == 0 || file->f_flags & O_SYNC) - return pgprot_noncached(vma_prot); -#endif -#ifdef pgprot_ext_buffered - else if (pmem[id].buffered) - return pgprot_ext_buffered(vma_prot); -#endif - return vma_prot; -} - -static unsigned long pmem_start_addr(int id, struct pmem_data *data) -{ - if (pmem[id].no_allocator) - return PMEM_START_ADDR(id, 0); - else - return PMEM_START_ADDR(id, data->index); - -} - -static void *pmem_start_vaddr(int id, struct pmem_data *data) -{ - return pmem_start_addr(id, data) - pmem[id].base + pmem[id].vbase; -} - -static unsigned long pmem_len(int id, struct pmem_data *data) -{ - if (pmem[id].no_allocator) - return data->index; - else - return PMEM_LEN(id, data->index); -} - -static int pmem_map_garbage(int id, struct vm_area_struct *vma, - struct pmem_data *data, unsigned long offset, - unsigned long len) -{ - int i, garbage_pages = len >> PAGE_SHIFT; - - vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP | VM_SHARED | VM_WRITE; - for (i = 0; i < garbage_pages; i++) { - if (vm_insert_pfn(vma, vma->vm_start + offset + (i * PAGE_SIZE), - pmem[id].garbage_pfn)) - return -EAGAIN; - } - return 0; -} - -static int pmem_unmap_pfn_range(int id, struct vm_area_struct *vma, - struct pmem_data *data, unsigned long offset, - unsigned long len) -{ - int garbage_pages; - DLOG("unmap offset %lx len %lx\n", offset, len); - - BUG_ON(!PMEM_IS_PAGE_ALIGNED(len)); - - garbage_pages = len >> PAGE_SHIFT; - zap_page_range(vma, vma->vm_start + offset, len, NULL); - pmem_map_garbage(id, vma, data, offset, len); - return 0; -} - -static int pmem_map_pfn_range(int id, struct vm_area_struct *vma, - struct pmem_data *data, unsigned long offset, - unsigned long len) -{ - DLOG("map offset %lx len %lx\n", offset, len); - BUG_ON(!PMEM_IS_PAGE_ALIGNED(vma->vm_start)); - BUG_ON(!PMEM_IS_PAGE_ALIGNED(vma->vm_end)); - BUG_ON(!PMEM_IS_PAGE_ALIGNED(len)); - BUG_ON(!PMEM_IS_PAGE_ALIGNED(offset)); - - if (io_remap_pfn_range(vma, vma->vm_start + offset, - (pmem_start_addr(id, data) + offset) >> PAGE_SHIFT, - len, vma->vm_page_prot)) { - return -EAGAIN; - } - return 0; -} - -static int pmem_remap_pfn_range(int id, struct vm_area_struct *vma, - struct pmem_data *data, unsigned long offset, - unsigned long len) -{ - /* hold the mm semp for the vma you are modifying when you call this */ - BUG_ON(!vma); - zap_page_range(vma, vma->vm_start + offset, len, NULL); - return pmem_map_pfn_range(id, vma, data, offset, len); -} - -static void pmem_vma_open(struct vm_area_struct *vma) -{ - struct file *file = vma->vm_file; - struct pmem_data *data = file->private_data; - int id = get_id(file); - /* this should never be called as we don't support copying pmem - * ranges via fork */ - BUG_ON(!has_allocation(file)); - down_write(&data->sem); - /* remap the garbage pages, forkers don't get access to the data */ - pmem_unmap_pfn_range(id, vma, data, 0, vma->vm_start - vma->vm_end); - up_write(&data->sem); -} - -static void pmem_vma_close(struct vm_area_struct *vma) -{ - struct file *file = vma->vm_file; - struct pmem_data *data = file->private_data; - - DLOG("current %u ppid %u file %p count %d\n", current->pid, - current->parent->pid, file, file_count(file)); - if (unlikely(!is_pmem_file(file) || !has_allocation(file))) { - printk(KERN_WARNING "pmem: something is very wrong, you are " - "closing a vm backing an allocation that doesn't " - "exist!\n"); - return; - } - down_write(&data->sem); - if (data->vma == vma) { - data->vma = NULL; - if ((data->flags & PMEM_FLAGS_CONNECTED) && - (data->flags & PMEM_FLAGS_SUBMAP)) - data->flags |= PMEM_FLAGS_UNSUBMAP; - } - /* the kernel is going to free this vma now anyway */ - up_write(&data->sem); -} - -static struct vm_operations_struct vm_ops = { - .open = pmem_vma_open, - .close = pmem_vma_close, -}; - -static int pmem_mmap(struct file *file, struct vm_area_struct *vma) -{ - struct pmem_data *data; - int index; - unsigned long vma_size = vma->vm_end - vma->vm_start; - int ret = 0, id = get_id(file); - - if (vma->vm_pgoff || !PMEM_IS_PAGE_ALIGNED(vma_size)) { -#if PMEM_DEBUG - printk(KERN_ERR "pmem: mmaps must be at offset zero, aligned" - " and a multiple of pages_size.\n"); -#endif - return -EINVAL; - } - - data = (struct pmem_data *)file->private_data; - down_write(&data->sem); - /* check this file isn't already mmaped, for submaps check this file - * has never been mmaped */ - if ((data->flags & PMEM_FLAGS_MASTERMAP) || - (data->flags & PMEM_FLAGS_SUBMAP) || - (data->flags & PMEM_FLAGS_UNSUBMAP)) { -#if PMEM_DEBUG - printk(KERN_ERR "pmem: you can only mmap a pmem file once, " - "this file is already mmaped. %x\n", data->flags); -#endif - ret = -EINVAL; - goto error; - } - /* if file->private_data == unalloced, alloc*/ - if (data && data->index == -1) { - down_write(&pmem[id].bitmap_sem); - index = pmem_allocate(id, vma->vm_end - vma->vm_start); - up_write(&pmem[id].bitmap_sem); - data->index = index; - } - /* either no space was available or an error occured */ - if (!has_allocation(file)) { - ret = -EINVAL; - printk("pmem: could not find allocation for map.\n"); - goto error; - } - - if (pmem_len(id, data) < vma_size) { -#if PMEM_DEBUG - printk(KERN_WARNING "pmem: mmap size [%lu] does not match" - "size of backing region [%lu].\n", vma_size, - pmem_len(id, data)); -#endif - ret = -EINVAL; - goto error; - } - - vma->vm_pgoff = pmem_start_addr(id, data) >> PAGE_SHIFT; - vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_page_prot); - - if (data->flags & PMEM_FLAGS_CONNECTED) { - struct pmem_region_node *region_node; - struct list_head *elt; - if (pmem_map_garbage(id, vma, data, 0, vma_size)) { - printk("pmem: mmap failed in kernel!\n"); - ret = -EAGAIN; - goto error; - } - list_for_each(elt, &data->region_list) { - region_node = list_entry(elt, struct pmem_region_node, - list); - DLOG("remapping file: %p %lx %lx\n", file, - region_node->region.offset, - region_node->region.len); - if (pmem_remap_pfn_range(id, vma, data, - region_node->region.offset, - region_node->region.len)) { - ret = -EAGAIN; - goto error; - } - } - data->flags |= PMEM_FLAGS_SUBMAP; - get_task_struct(current->group_leader); - data->task = current->group_leader; - data->vma = vma; -#if PMEM_DEBUG - data->pid = current->pid; -#endif - DLOG("submmapped file %p vma %p pid %u\n", file, vma, - current->pid); - } else { - if (pmem_map_pfn_range(id, vma, data, 0, vma_size)) { - printk(KERN_INFO "pmem: mmap failed in kernel!\n"); - ret = -EAGAIN; - goto error; - } - data->flags |= PMEM_FLAGS_MASTERMAP; - data->pid = current->pid; - } - vma->vm_ops = &vm_ops; -error: - up_write(&data->sem); - return ret; -} - -/* the following are the api for accessing pmem regions by other drivers - * from inside the kernel */ -int get_pmem_user_addr(struct file *file, unsigned long *start, - unsigned long *len) -{ - struct pmem_data *data; - if (!is_pmem_file(file) || !has_allocation(file)) { -#if PMEM_DEBUG - printk(KERN_INFO "pmem: requested pmem data from invalid" - "file.\n"); -#endif - return -1; - } - data = (struct pmem_data *)file->private_data; - down_read(&data->sem); - if (data->vma) { - *start = data->vma->vm_start; - *len = data->vma->vm_end - data->vma->vm_start; - } else { - *start = 0; - *len = 0; - } - up_read(&data->sem); - return 0; -} - -int get_pmem_addr(struct file *file, unsigned long *start, - unsigned long *vstart, unsigned long *len) -{ - struct pmem_data *data; - int id; - - if (!is_pmem_file(file) || !has_allocation(file)) { - return -1; - } - - data = (struct pmem_data *)file->private_data; - if (data->index == -1) { -#if PMEM_DEBUG - printk(KERN_INFO "pmem: requested pmem data from file with no " - "allocation.\n"); - return -1; -#endif - } - id = get_id(file); - - down_read(&data->sem); - *start = pmem_start_addr(id, data); - *len = pmem_len(id, data); - *vstart = (unsigned long)pmem_start_vaddr(id, data); - up_read(&data->sem); -#if PMEM_DEBUG - down_write(&data->sem); - data->ref++; - up_write(&data->sem); -#endif - return 0; -} - -int get_pmem_file(int fd, unsigned long *start, unsigned long *vstart, - unsigned long *len, struct file **filp) -{ - struct file *file; - - file = fget(fd); - if (unlikely(file == NULL)) { - printk(KERN_INFO "pmem: requested data from file descriptor " - "that doesn't exist."); - return -1; - } - - if (get_pmem_addr(file, start, vstart, len)) - goto end; - - if (filp) - *filp = file; - return 0; -end: - fput(file); - return -1; -} - -void put_pmem_file(struct file *file) -{ - struct pmem_data *data; - int id; - - if (!is_pmem_file(file)) - return; - id = get_id(file); - data = (struct pmem_data *)file->private_data; -#if PMEM_DEBUG - down_write(&data->sem); - if (data->ref == 0) { - printk("pmem: pmem_put > pmem_get %s (pid %d)\n", - pmem[id].dev.name, data->pid); - BUG(); - } - data->ref--; - up_write(&data->sem); -#endif - fput(file); -} - -void flush_pmem_file(struct file *file, unsigned long offset, unsigned long len) -{ - struct pmem_data *data; - int id; - void *vaddr; - struct pmem_region_node *region_node; - struct list_head *elt; - void *flush_start, *flush_end; - - if (!is_pmem_file(file) || !has_allocation(file)) { - return; - } - - id = get_id(file); - data = (struct pmem_data *)file->private_data; - if (!pmem[id].cached || file->f_flags & O_SYNC) - return; - - down_read(&data->sem); - vaddr = pmem_start_vaddr(id, data); - /* if this isn't a submmapped file, flush the whole thing */ - if (unlikely(!(data->flags & PMEM_FLAGS_CONNECTED))) { - dmac_flush_range(vaddr, vaddr + pmem_len(id, data)); - goto end; - } - /* otherwise, flush the region of the file we are drawing */ - list_for_each(elt, &data->region_list) { - region_node = list_entry(elt, struct pmem_region_node, list); - if ((offset >= region_node->region.offset) && - ((offset + len) <= (region_node->region.offset + - region_node->region.len))) { - flush_start = vaddr + region_node->region.offset; - flush_end = flush_start + region_node->region.len; - dmac_flush_range(flush_start, flush_end); - break; - } - } -end: - up_read(&data->sem); -} - -static int pmem_connect(unsigned long connect, struct file *file) -{ - struct pmem_data *data = (struct pmem_data *)file->private_data; - struct pmem_data *src_data; - struct file *src_file; - int ret = 0, put_needed; - - down_write(&data->sem); - /* retrieve the src file and check it is a pmem file with an alloc */ - src_file = fget_light(connect, &put_needed); - DLOG("connect %p to %p\n", file, src_file); - if (!src_file) { - printk("pmem: src file not found!\n"); - ret = -EINVAL; - goto err_no_file; - } - if (unlikely(!is_pmem_file(src_file) || !has_allocation(src_file))) { - printk(KERN_INFO "pmem: src file is not a pmem file or has no " - "alloc!\n"); - ret = -EINVAL; - goto err_bad_file; - } - src_data = (struct pmem_data *)src_file->private_data; - - if (has_allocation(file) && (data->index != src_data->index)) { - printk("pmem: file is already mapped but doesn't match this" - " src_file!\n"); - ret = -EINVAL; - goto err_bad_file; - } - data->index = src_data->index; - data->flags |= PMEM_FLAGS_CONNECTED; - data->master_fd = connect; - data->master_file = src_file; - -err_bad_file: - fput_light(src_file, put_needed); -err_no_file: - up_write(&data->sem); - return ret; -} - -static void pmem_unlock_data_and_mm(struct pmem_data *data, - struct mm_struct *mm) -{ - up_write(&data->sem); - if (mm != NULL) { - up_write(&mm->mmap_sem); - mmput(mm); - } -} - -static int pmem_lock_data_and_mm(struct file *file, struct pmem_data *data, - struct mm_struct **locked_mm) -{ - int ret = 0; - struct mm_struct *mm = NULL; - *locked_mm = NULL; -lock_mm: - down_read(&data->sem); - if (PMEM_IS_SUBMAP(data)) { - mm = get_task_mm(data->task); - if (!mm) { -#if PMEM_DEBUG - printk("pmem: can't remap task is gone!\n"); -#endif - up_read(&data->sem); - return -1; - } - } - up_read(&data->sem); - - if (mm) - down_write(&mm->mmap_sem); - - down_write(&data->sem); - /* check that the file didn't get mmaped before we could take the - * data sem, this should be safe b/c you can only submap each file - * once */ - if (PMEM_IS_SUBMAP(data) && !mm) { - pmem_unlock_data_and_mm(data, mm); - up_write(&data->sem); - goto lock_mm; - } - /* now check that vma.mm is still there, it could have been - * deleted by vma_close before we could get the data->sem */ - if ((data->flags & PMEM_FLAGS_UNSUBMAP) && (mm != NULL)) { - /* might as well release this */ - if (data->flags & PMEM_FLAGS_SUBMAP) { - put_task_struct(data->task); - data->task = NULL; - /* lower the submap flag to show the mm is gone */ - data->flags &= ~(PMEM_FLAGS_SUBMAP); - } - pmem_unlock_data_and_mm(data, mm); - return -1; - } - *locked_mm = mm; - return ret; -} - -int pmem_remap(struct pmem_region *region, struct file *file, - unsigned operation) -{ - int ret; - struct pmem_region_node *region_node; - struct mm_struct *mm = NULL; - struct list_head *elt, *elt2; - int id = get_id(file); - struct pmem_data *data = (struct pmem_data *)file->private_data; - - /* pmem region must be aligned on a page boundry */ - if (unlikely(!PMEM_IS_PAGE_ALIGNED(region->offset) || - !PMEM_IS_PAGE_ALIGNED(region->len))) { -#if PMEM_DEBUG - printk("pmem: request for unaligned pmem suballocation " - "%lx %lx\n", region->offset, region->len); -#endif - return -EINVAL; - } - - /* if userspace requests a region of len 0, there's nothing to do */ - if (region->len == 0) - return 0; - - /* lock the mm and data */ - ret = pmem_lock_data_and_mm(file, data, &mm); - if (ret) - return 0; - - /* only the owner of the master file can remap the client fds - * that back in it */ - if (!is_master_owner(file)) { -#if PMEM_DEBUG - printk("pmem: remap requested from non-master process\n"); -#endif - ret = -EINVAL; - goto err; - } - - /* check that the requested range is within the src allocation */ - if (unlikely((region->offset > pmem_len(id, data)) || - (region->len > pmem_len(id, data)) || - (region->offset + region->len > pmem_len(id, data)))) { -#if PMEM_DEBUG - printk(KERN_INFO "pmem: suballoc doesn't fit in src_file!\n"); -#endif - ret = -EINVAL; - goto err; - } - - if (operation == PMEM_MAP) { - region_node = kmalloc(sizeof(struct pmem_region_node), - GFP_KERNEL); - if (!region_node) { - ret = -ENOMEM; -#if PMEM_DEBUG - printk(KERN_INFO "No space to allocate metadata!"); -#endif - goto err; - } - region_node->region = *region; - list_add(®ion_node->list, &data->region_list); - } else if (operation == PMEM_UNMAP) { - int found = 0; - list_for_each_safe(elt, elt2, &data->region_list) { - region_node = list_entry(elt, struct pmem_region_node, - list); - if (region->len == 0 || - (region_node->region.offset == region->offset && - region_node->region.len == region->len)) { - list_del(elt); - kfree(region_node); - found = 1; - } - } - if (!found) { -#if PMEM_DEBUG - printk("pmem: Unmap region does not map any mapped " - "region!"); -#endif - ret = -EINVAL; - goto err; - } - } - - if (data->vma && PMEM_IS_SUBMAP(data)) { - if (operation == PMEM_MAP) - ret = pmem_remap_pfn_range(id, data->vma, data, - region->offset, region->len); - else if (operation == PMEM_UNMAP) - ret = pmem_unmap_pfn_range(id, data->vma, data, - region->offset, region->len); - } - -err: - pmem_unlock_data_and_mm(data, mm); - return ret; -} - -static void pmem_revoke(struct file *file, struct pmem_data *data) -{ - struct pmem_region_node *region_node; - struct list_head *elt, *elt2; - struct mm_struct *mm = NULL; - int id = get_id(file); - int ret = 0; - - data->master_file = NULL; - ret = pmem_lock_data_and_mm(file, data, &mm); - /* if lock_data_and_mm fails either the task that mapped the fd, or - * the vma that mapped it have already gone away, nothing more - * needs to be done */ - if (ret) - return; - /* unmap everything */ - /* delete the regions and region list nothing is mapped any more */ - if (data->vma) - list_for_each_safe(elt, elt2, &data->region_list) { - region_node = list_entry(elt, struct pmem_region_node, - list); - pmem_unmap_pfn_range(id, data->vma, data, - region_node->region.offset, - region_node->region.len); - list_del(elt); - kfree(region_node); - } - /* delete the master file */ - pmem_unlock_data_and_mm(data, mm); -} - -static void pmem_get_size(struct pmem_region *region, struct file *file) -{ - struct pmem_data *data = (struct pmem_data *)file->private_data; - int id = get_id(file); - - if (!has_allocation(file)) { - region->offset = 0; - region->len = 0; - return; - } else { - region->offset = pmem_start_addr(id, data); - region->len = pmem_len(id, data); - } - DLOG("offset %lx len %lx\n", region->offset, region->len); -} - - -static long pmem_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -{ - struct pmem_data *data; - int id = get_id(file); - - switch (cmd) { - case PMEM_GET_PHYS: - { - struct pmem_region region; - DLOG("get_phys\n"); - if (!has_allocation(file)) { - region.offset = 0; - region.len = 0; - } else { - data = (struct pmem_data *)file->private_data; - region.offset = pmem_start_addr(id, data); - region.len = pmem_len(id, data); - } - printk(KERN_INFO "pmem: request for physical address of pmem region " - "from process %d.\n", current->pid); - if (copy_to_user((void __user *)arg, ®ion, - sizeof(struct pmem_region))) - return -EFAULT; - break; - } - case PMEM_MAP: - { - struct pmem_region region; - if (copy_from_user(®ion, (void __user *)arg, - sizeof(struct pmem_region))) - return -EFAULT; - data = (struct pmem_data *)file->private_data; - return pmem_remap(®ion, file, PMEM_MAP); - } - break; - case PMEM_UNMAP: - { - struct pmem_region region; - if (copy_from_user(®ion, (void __user *)arg, - sizeof(struct pmem_region))) - return -EFAULT; - data = (struct pmem_data *)file->private_data; - return pmem_remap(®ion, file, PMEM_UNMAP); - break; - } - case PMEM_GET_SIZE: - { - struct pmem_region region; - DLOG("get_size\n"); - pmem_get_size(®ion, file); - if (copy_to_user((void __user *)arg, ®ion, - sizeof(struct pmem_region))) - return -EFAULT; - break; - } - case PMEM_GET_TOTAL_SIZE: - { - struct pmem_region region; - DLOG("get total size\n"); - region.offset = 0; - get_id(file); - region.len = pmem[id].size; - if (copy_to_user((void __user *)arg, ®ion, - sizeof(struct pmem_region))) - return -EFAULT; - break; - } - case PMEM_ALLOCATE: - { - if (has_allocation(file)) - return -EINVAL; - data = (struct pmem_data *)file->private_data; - data->index = pmem_allocate(id, arg); - break; - } - case PMEM_CONNECT: - DLOG("connect\n"); - return pmem_connect(arg, file); - break; - case PMEM_CACHE_FLUSH: - { - struct pmem_region region; - DLOG("flush\n"); - if (copy_from_user(®ion, (void __user *)arg, - sizeof(struct pmem_region))) - return -EFAULT; - flush_pmem_file(file, region.offset, region.len); - break; - } - default: - if (pmem[id].ioctl) - return pmem[id].ioctl(file, cmd, arg); - return -EINVAL; - } - return 0; -} - -#if PMEM_DEBUG -static ssize_t debug_open(struct inode *inode, struct file *file) -{ - file->private_data = inode->i_private; - return 0; -} - -static ssize_t debug_read(struct file *file, char __user *buf, size_t count, - loff_t *ppos) -{ - struct list_head *elt, *elt2; - struct pmem_data *data; - struct pmem_region_node *region_node; - int id = (int)file->private_data; - const int debug_bufmax = 4096; - static char buffer[4096]; - int n = 0; - - DLOG("debug open\n"); - n = scnprintf(buffer, debug_bufmax, - "pid #: mapped regions (offset, len) (offset,len)...\n"); - - down(&pmem[id].data_list_sem); - list_for_each(elt, &pmem[id].data_list) { - data = list_entry(elt, struct pmem_data, list); - down_read(&data->sem); - n += scnprintf(buffer + n, debug_bufmax - n, "pid %u:", - data->pid); - list_for_each(elt2, &data->region_list) { - region_node = list_entry(elt2, struct pmem_region_node, - list); - n += scnprintf(buffer + n, debug_bufmax - n, - "(%lx,%lx) ", - region_node->region.offset, - region_node->region.len); - } - n += scnprintf(buffer + n, debug_bufmax - n, "\n"); - up_read(&data->sem); - } - up(&pmem[id].data_list_sem); - - n++; - buffer[n] = 0; - return simple_read_from_buffer(buf, count, ppos, buffer, n); -} - -static struct file_operations debug_fops = { - .read = debug_read, - .open = debug_open, -}; -#endif - -#if 0 -static struct miscdevice pmem_dev = { - .name = "pmem", - .fops = &pmem_fops, -}; -#endif - -int pmem_setup(struct android_pmem_platform_data *pdata, - long (*ioctl)(struct file *, unsigned int, unsigned long), - int (*release)(struct inode *, struct file *)) -{ - int err = 0; - int i, index = 0; - int id = id_count; - id_count++; - - pmem[id].no_allocator = pdata->no_allocator; - pmem[id].cached = pdata->cached; - pmem[id].buffered = pdata->buffered; - pmem[id].base = pdata->start; - pmem[id].size = pdata->size; - pmem[id].ioctl = ioctl; - pmem[id].release = release; - init_rwsem(&pmem[id].bitmap_sem); - init_MUTEX(&pmem[id].data_list_sem); - INIT_LIST_HEAD(&pmem[id].data_list); - pmem[id].dev.name = pdata->name; - pmem[id].dev.minor = id; - pmem[id].dev.fops = &pmem_fops; - printk(KERN_INFO "%s: %d init\n", pdata->name, pdata->cached); - - err = misc_register(&pmem[id].dev); - if (err) { - printk(KERN_ALERT "Unable to register pmem driver!\n"); - goto err_cant_register_device; - } - pmem[id].num_entries = pmem[id].size / PMEM_MIN_ALLOC; - - pmem[id].bitmap = kmalloc(pmem[id].num_entries * - sizeof(struct pmem_bits), GFP_KERNEL); - if (!pmem[id].bitmap) - goto err_no_mem_for_metadata; - - memset(pmem[id].bitmap, 0, sizeof(struct pmem_bits) * - pmem[id].num_entries); - - for (i = sizeof(pmem[id].num_entries) * 8 - 1; i >= 0; i--) { - if ((pmem[id].num_entries) & 1<name, S_IFREG | S_IRUGO, NULL, (void *)id, - &debug_fops); -#endif - return 0; -error_cant_remap: - kfree(pmem[id].bitmap); -err_no_mem_for_metadata: - misc_deregister(&pmem[id].dev); -err_cant_register_device: - return -1; -} - -static int pmem_probe(struct platform_device *pdev) -{ - struct android_pmem_platform_data *pdata; - - if (!pdev || !pdev->dev.platform_data) { - printk(KERN_ALERT "Unable to probe pmem!\n"); - return -1; - } - pdata = pdev->dev.platform_data; - return pmem_setup(pdata, NULL, NULL); -} - - -static int pmem_remove(struct platform_device *pdev) -{ - int id = pdev->id; - __free_page(pfn_to_page(pmem[id].garbage_pfn)); - misc_deregister(&pmem[id].dev); - return 0; -} - -static struct platform_driver pmem_driver = { - .probe = pmem_probe, - .remove = pmem_remove, - .driver = { .name = "android_pmem" } -}; - - -static int __init pmem_init(void) -{ - return platform_driver_register(&pmem_driver); -} - -static void __exit pmem_exit(void) -{ - platform_driver_unregister(&pmem_driver); -} - -module_init(pmem_init); -module_exit(pmem_exit); - -#endif diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c index 1e5676f1..079918eb 100644 --- a/drivers/staging/android/binder.c +++ b/drivers/staging/android/binder.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -81,7 +82,6 @@ enum { BINDER_DEBUG_BUFFER_ALLOC = 1U << 13, BINDER_DEBUG_PRIORITY_CAP = 1U << 14, BINDER_DEBUG_BUFFER_ALLOC_ASYNC = 1U << 15, - BINDER_DEBUG_IOCTL = 1U << 16, }; static uint32_t binder_debug_mask = BINDER_DEBUG_USER_ERROR | BINDER_DEBUG_FAILED_TRANSACTION | BINDER_DEBUG_DEAD_TRANSACTION; @@ -2615,9 +2615,7 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) unsigned int size = _IOC_SIZE(cmd); void __user *ubuf = (void __user *)arg; - binder_debug(BINDER_DEBUG_IOCTL, - "binder_ioctl begin: %d:%d %x %lx\n", - proc->pid, current->pid, cmd, arg); + /*printk(KERN_INFO "binder_ioctl: %d:%d %x %lx\n", proc->pid, current->pid, cmd, arg);*/ ret = wait_event_interruptible(binder_user_error_wait, binder_stop_on_user_error < 2); if (ret) @@ -2736,9 +2734,6 @@ err: wait_event_interruptible(binder_user_error_wait, binder_stop_on_user_error < 2); if (ret && ret != -ERESTARTSYS) printk(KERN_INFO "binder: %d:%d ioctl %x %lx returned %d\n", proc->pid, current->pid, cmd, arg, ret); - binder_debug(BINDER_DEBUG_IOCTL, - "binder_ioctl end: %d:%d %x %lx\n", - proc->pid, current->pid, cmd, arg); return ret; } diff --git a/drivers/staging/android/logger.c b/drivers/staging/android/logger.c index 15b2e970..1a0c1391 100644 --- a/drivers/staging/android/logger.c +++ b/drivers/staging/android/logger.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include "logger.h" @@ -431,7 +432,10 @@ static int logger_release(struct inode *ignored, struct file *file) { if (file->f_mode & FMODE_READ) { struct logger_reader *reader = file->private_data; + struct logger_log *log = reader->log; + mutex_lock(&log->mutex); list_del(&reader->list); + mutex_unlock(&log->mutex); kfree(reader); } diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c index 7c65cce1..42cd93ea 100644 --- a/drivers/staging/android/lowmemorykiller.c +++ b/drivers/staging/android/lowmemorykiller.c @@ -52,19 +52,8 @@ static size_t lowmem_minfree[6] = { }; static int lowmem_minfree_size = 4; -static size_t lowmem_minfile[6] = { - 1536, - 2048, - 4096, - 5120, - 5632, - 6144 -}; -static int lowmem_minfile_size = 6; - static struct task_struct *lowmem_deathpending; - -static uint32_t lowmem_check_filepages = 0; +static DEFINE_SPINLOCK(lowmem_deathpending_lock); #define lowmem_print(level, x...) \ do { \ @@ -79,13 +68,25 @@ static struct notifier_block task_nb = { .notifier_call = task_notify_func, }; + +static void task_free_fn(struct work_struct *work) +{ + unsigned long flags; + + task_free_unregister(&task_nb); + spin_lock_irqsave(&lowmem_deathpending_lock, flags); + lowmem_deathpending = NULL; + spin_unlock_irqrestore(&lowmem_deathpending_lock, flags); +} +static DECLARE_WORK(task_free_work, task_free_fn); + static int task_notify_func(struct notifier_block *self, unsigned long val, void *data) { struct task_struct *task = data; + if (task == lowmem_deathpending) { - lowmem_deathpending = NULL; - task_free_unregister(&task_nb); + schedule_work(&task_free_work); } return NOTIFY_OK; } @@ -103,8 +104,7 @@ static int lowmem_shrink(int nr_to_scan, gfp_t gfp_mask) int array_size = ARRAY_SIZE(lowmem_adj); int other_free = global_page_state(NR_FREE_PAGES); int other_file = global_page_state(NR_FILE_PAGES); - int lru_file = global_page_state(NR_ACTIVE_FILE) + - global_page_state(NR_INACTIVE_FILE); + unsigned long flags; /* * If we already have a death outstanding, then @@ -121,14 +121,9 @@ static int lowmem_shrink(int nr_to_scan, gfp_t gfp_mask) if (lowmem_minfree_size < array_size) array_size = lowmem_minfree_size; for (i = 0; i < array_size; i++) { - if (other_free < lowmem_minfree[i]) { - if(other_file < lowmem_minfree[i] || - (lowmem_check_filepages && - (lru_file < lowmem_minfile[i]))) { - - min_adj = lowmem_adj[i]; - break; - } + if (other_file < lowmem_minfree[i]) { + min_adj = lowmem_adj[i]; + break; } } if (nr_to_scan > 0) @@ -181,14 +176,20 @@ static int lowmem_shrink(int nr_to_scan, gfp_t gfp_mask) lowmem_print(2, "select %d (%s), adj %d, size %d, to kill\n", p->pid, p->comm, oom_adj, tasksize); } + if (selected) { - lowmem_print(1, "send sigkill to %d (%s), adj %d, size %d\n", - selected->pid, selected->comm, - selected_oom_adj, selected_tasksize); - lowmem_deathpending = selected; - task_free_register(&task_nb); - force_sig(SIGKILL, selected); - rem -= selected_tasksize; + spin_lock_irqsave(&lowmem_deathpending_lock, flags); + if (!lowmem_deathpending) { + lowmem_print(1, + "send sigkill to %d (%s), adj %d, size %d\n", + selected->pid, selected->comm, + selected_oom_adj, selected_tasksize); + lowmem_deathpending = selected; + task_free_register(&task_nb); + force_sig(SIGKILL, selected); + rem -= selected_tasksize; + } + spin_unlock_irqrestore(&lowmem_deathpending_lock, flags); } lowmem_print(4, "lowmem_shrink %d, %x, return %d\n", nr_to_scan, gfp_mask, rem); @@ -219,11 +220,6 @@ module_param_array_named(minfree, lowmem_minfree, uint, &lowmem_minfree_size, S_IRUGO | S_IWUSR); module_param_named(debug_level, lowmem_debug_level, uint, S_IRUGO | S_IWUSR); -module_param_named(check_filepages , lowmem_check_filepages, uint, - S_IRUGO | S_IWUSR); -module_param_array_named(minfile, lowmem_minfile, uint, &lowmem_minfile_size, - S_IRUGO | S_IWUSR); - module_init(lowmem_init); module_exit(lowmem_exit); diff --git a/drivers/staging/android/timed_gpio.c b/drivers/staging/android/timed_gpio.c index a646107d..a64481c3 100644 --- a/drivers/staging/android/timed_gpio.c +++ b/drivers/staging/android/timed_gpio.c @@ -16,6 +16,7 @@ #include #include +#include #include #include #include diff --git a/include/linux/android_alarm.h b/include/linux/android_alarm.h index f8f14e79..cbfeafce 100644 --- a/include/linux/android_alarm.h +++ b/include/linux/android_alarm.h @@ -74,6 +74,7 @@ ktime_t alarm_get_elapsed_realtime(void); /* set rtc while preserving elapsed realtime */ int alarm_set_rtc(const struct timespec ts); +void alarm_update_timedelta(struct timespec tv, struct timespec ts); #endif diff --git a/include/linux/android_pmem.h b/include/linux/android_pmem.h index 6e37c0f8..f5548820 100644 --- a/include/linux/android_pmem.h +++ b/include/linux/android_pmem.h @@ -8,12 +8,12 @@ * may be copied, distributed, and modified under those terms. * * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of + * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * */ -#ifdef CONFIG_MSM_KGSL + #ifndef _ANDROID_PMEM_H_ #define _ANDROID_PMEM_H_ @@ -45,7 +45,7 @@ */ #define PMEM_CONNECT _IOW(PMEM_IOCTL_MAGIC, 6, unsigned int) /* Returns the total size of the pmem region it is sent to as a pmem_region - * struct (with offset set to 0). + * struct (with offset set to 0). */ #define PMEM_GET_TOTAL_SIZE _IOW(PMEM_IOCTL_MAGIC, 7, unsigned int) /* Revokes gpu registers and resets the gpu. Pass a pointer to the @@ -54,6 +54,7 @@ #define HW3D_REVOKE_GPU _IOW(PMEM_IOCTL_MAGIC, 8, unsigned int) #define PMEM_CACHE_FLUSH _IOW(PMEM_IOCTL_MAGIC, 8, unsigned int) #define HW3D_GRANT_GPU _IOW(PMEM_IOCTL_MAGIC, 9, unsigned int) +#define HW3D_WAIT_FOR_INTERRUPT _IOW(PMEM_IOCTL_MAGIC, 10, unsigned int) #define PMEM_CLEAN_INV_CACHES _IOW(PMEM_IOCTL_MAGIC, 11, unsigned int) #define PMEM_CLEAN_CACHES _IOW(PMEM_IOCTL_MAGIC, 12, unsigned int) @@ -85,6 +86,8 @@ struct pmem_allocation { #ifdef __KERNEL__ int get_pmem_file(unsigned int fd, unsigned long *start, unsigned long *vstart, unsigned long *end, struct file **filp); +int get_pmem_addr(struct file *file, unsigned long *start, + unsigned long *vstart, unsigned long *len); int get_pmem_fd(int fd, unsigned long *start, unsigned long *end); int get_pmem_user_addr(struct file *file, unsigned long *start, unsigned long *end); @@ -101,6 +104,7 @@ enum pmem_allocator_type { * defined */ PMEM_ALLOCATORTYPE_BITMAP = 0, /* forced to be zero here */ + PMEM_ALLOCATORTYPE_SYSTEM, PMEM_ALLOCATORTYPE_ALLORNOTHING, PMEM_ALLOCATORTYPE_BUDDYBESTFIT, @@ -163,104 +167,7 @@ int pmem_setup(struct android_pmem_platform_data *pdata, int pmem_remap(struct pmem_region *region, struct file *file, unsigned operation); -int is_pmem_file(struct file *file); - #endif /* __KERNEL__ */ #endif //_ANDROID_PPP_H_ -#else -/* include/linux/android_pmem.h - * - * Copyright (C) 2007 Google, Inc. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#ifndef _ANDROID_PMEM_H_ -#define _ANDROID_PMEM_H_ - -#define PMEM_IOCTL_MAGIC 'p' -#define PMEM_GET_PHYS _IOW(PMEM_IOCTL_MAGIC, 1, unsigned int) -#define PMEM_MAP _IOW(PMEM_IOCTL_MAGIC, 2, unsigned int) -#define PMEM_GET_SIZE _IOW(PMEM_IOCTL_MAGIC, 3, unsigned int) -#define PMEM_UNMAP _IOW(PMEM_IOCTL_MAGIC, 4, unsigned int) -/* This ioctl will allocate pmem space, backing the file, it will fail - * if the file already has an allocation, pass it the len as the argument - * to the ioctl */ -#define PMEM_ALLOCATE _IOW(PMEM_IOCTL_MAGIC, 5, unsigned int) -/* This will connect a one pmem file to another, pass the file that is already - * backed in memory as the argument to the ioctl - */ -#define PMEM_CONNECT _IOW(PMEM_IOCTL_MAGIC, 6, unsigned int) -/* Returns the total size of the pmem region it is sent to as a pmem_region - * struct (with offset set to 0). - */ -#define PMEM_GET_TOTAL_SIZE _IOW(PMEM_IOCTL_MAGIC, 7, unsigned int) -#define PMEM_CACHE_FLUSH _IOW(PMEM_IOCTL_MAGIC, 8, unsigned int) - -struct android_pmem_platform_data -{ - const char* name; - /* starting physical address of memory region */ - unsigned long start; - /* size of memory region */ - unsigned long size; - /* set to indicate the region should not be managed with an allocator */ - unsigned no_allocator; - /* set to indicate maps of this region should be cached, if a mix of - * cached and uncached is desired, set this and open the device with - * O_SYNC to get an uncached region */ - unsigned cached; - /* The MSM7k has bits to enable a write buffer in the bus controller*/ - unsigned buffered; -}; - -struct pmem_region { - unsigned long offset; - unsigned long len; -}; - -#ifdef CONFIG_ANDROID_PMEM -int is_pmem_file(struct file *file); -int get_pmem_file(int fd, unsigned long *start, unsigned long *vstart, - unsigned long *end, struct file **filp); -int get_pmem_user_addr(struct file *file, unsigned long *start, - unsigned long *end); -void put_pmem_file(struct file* file); -void flush_pmem_file(struct file *file, unsigned long start, unsigned long len); -int pmem_setup(struct android_pmem_platform_data *pdata, - long (*ioctl)(struct file *, unsigned int, unsigned long), - int (*release)(struct inode *, struct file *)); -int pmem_remap(struct pmem_region *region, struct file *file, - unsigned operation); - -#else -static inline int is_pmem_file(struct file *file) { return 0; } -static inline int get_pmem_file(int fd, unsigned long *start, - unsigned long *vstart, unsigned long *end, - struct file **filp) { return -ENOSYS; } -static inline int get_pmem_user_addr(struct file *file, unsigned long *start, - unsigned long *end) { return -ENOSYS; } -static inline void put_pmem_file(struct file* file) { return; } -static inline void flush_pmem_file(struct file *file, unsigned long start, - unsigned long len) { return; } -static inline int pmem_setup(struct android_pmem_platform_data *pdata, - long (*ioctl)(struct file *, unsigned int, unsigned long), - int (*release)(struct inode *, struct file *)) { return -ENOSYS; } - -static inline int pmem_remap(struct pmem_region *region, struct file *file, - unsigned operation) { return -ENOSYS; } -#endif - -#endif //_ANDROID_PPP_H_ - -#endif \ No newline at end of file diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 557bdad3..76c0893d 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -15,7 +15,7 @@ struct vm_area_struct; * Zone modifiers (see linux/mmzone.h - low three bits) * * Do not put any conditional on these. If necessary modify the definitions - * without the underscores and use the consistently. The definitions here may + * without the underscores and use them consistently. The definitions here may * be used in bit comparisons. */ #define __GFP_DMA ((__force gfp_t)0x01u) @@ -30,7 +30,8 @@ struct vm_area_struct; * _might_ fail. This depends upon the particular VM implementation. * * __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller - * cannot handle allocation failures. + * cannot handle allocation failures. This modifier is deprecated and no new + * users should be added. * * __GFP_NORETRY: The VM implementation must not retry indefinitely. * @@ -83,6 +84,7 @@ struct vm_area_struct; #define GFP_HIGHUSER_MOVABLE (__GFP_WAIT | __GFP_IO | __GFP_FS | \ __GFP_HARDWALL | __GFP_HIGHMEM | \ __GFP_MOVABLE) +#define GFP_IOFS (__GFP_IO | __GFP_FS) #ifdef CONFIG_NUMA #define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY) @@ -99,7 +101,7 @@ struct vm_area_struct; __GFP_NORETRY|__GFP_NOMEMALLOC) /* Control slab gfp mask during early boot */ -#define GFP_BOOT_MASK __GFP_BITS_MASK & ~(__GFP_WAIT|__GFP_IO|__GFP_FS) +#define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_WAIT|__GFP_IO|__GFP_FS)) /* Control allocation constraints */ #define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE) @@ -150,12 +152,12 @@ static inline int allocflags_to_migratetype(gfp_t gfp_flags) * GFP_ZONE_TABLE is a word size bitstring that is used for looking up the * zone to use given the lowest 4 bits of gfp_t. Entries are ZONE_SHIFT long * and there are 16 of them to cover all possible combinations of - * __GFP_DMA, __GFP_DMA32, __GFP_MOVABLE and __GFP_HIGHMEM + * __GFP_DMA, __GFP_DMA32, __GFP_MOVABLE and __GFP_HIGHMEM. * * The zone fallback order is MOVABLE=>HIGHMEM=>NORMAL=>DMA32=>DMA. * But GFP_MOVABLE is not only a zone specifier but also an allocation * policy. Therefore __GFP_MOVABLE plus another zone selector is valid. - * Only 1bit of the lowest 3 bit (DMA,DMA32,HIGHMEM) can be set to "1". + * Only 1 bit of the lowest 3 bits (DMA,DMA32,HIGHMEM) can be set to "1". * * bit result * ================= @@ -185,7 +187,7 @@ static inline int allocflags_to_migratetype(gfp_t gfp_flags) #define GFP_ZONE_TABLE ( \ (ZONE_NORMAL << 0 * ZONES_SHIFT) \ - | (OPT_ZONE_DMA << __GFP_DMA * ZONES_SHIFT) \ + | (OPT_ZONE_DMA << __GFP_DMA * ZONES_SHIFT) \ | (OPT_ZONE_HIGHMEM << __GFP_HIGHMEM * ZONES_SHIFT) \ | (OPT_ZONE_DMA32 << __GFP_DMA32 * ZONES_SHIFT) \ | (ZONE_NORMAL << __GFP_MOVABLE * ZONES_SHIFT) \ @@ -195,7 +197,7 @@ static inline int allocflags_to_migratetype(gfp_t gfp_flags) ) /* - * GFP_ZONE_BAD is a bitmap for all combination of __GFP_DMA, __GFP_DMA32 + * GFP_ZONE_BAD is a bitmap for all combinations of __GFP_DMA, __GFP_DMA32 * __GFP_HIGHMEM and __GFP_MOVABLE that are not permitted. One flag per * entry starting with bit 0. Bit is set if the combination is not * allowed. @@ -318,10 +320,10 @@ void *alloc_pages_exact(size_t size, gfp_t gfp_mask); void free_pages_exact(void *virt, size_t size); #define __get_free_page(gfp_mask) \ - __get_free_pages((gfp_mask),0) + __get_free_pages((gfp_mask), 0) #define __get_dma_pages(gfp_mask, order) \ - __get_free_pages((gfp_mask) | GFP_DMA,(order)) + __get_free_pages((gfp_mask) | GFP_DMA, (order)) extern void __free_pages(struct page *page, unsigned int order); extern void free_pages(unsigned long addr, unsigned int order); diff --git a/include/linux/msm_kgsl.h b/include/linux/msm_kgsl.h index 86f6b5c5..ceada785 100644 --- a/include/linux/msm_kgsl.h +++ b/include/linux/msm_kgsl.h @@ -1,4 +1,3 @@ -#ifdef CONFIG_MSM_KGSL /* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -464,355 +463,3 @@ int kgsl_gem_obj_addr(int drm_fd, int handle, unsigned long *start, #endif #endif #endif /* _MSM_KGSL_H */ - - -#else - -/* - * (C) Copyright Advanced Micro Devices, Inc. 2002, 2007 - * Copyright (c) 2008-2009 QUALCOMM USA, INC. - * - * All source code in this file is licensed under the following license - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can find it at http://www.fsf.org - */ -#ifndef _MSM_KGSL_H -#define _MSM_KGSL_H - -/*context flags */ -#define KGSL_CONTEXT_SAVE_GMEM 1 -#define KGSL_CONTEXT_NO_GMEM_ALLOC 2 - -/* generic flag values */ -#define KGSL_FLAGS_NORMALMODE 0x00000000 -#define KGSL_FLAGS_SAFEMODE 0x00000001 -#define KGSL_FLAGS_INITIALIZED0 0x00000002 -#define KGSL_FLAGS_INITIALIZED 0x00000004 -#define KGSL_FLAGS_STARTED 0x00000008 -#define KGSL_FLAGS_ACTIVE 0x00000010 -#define KGSL_FLAGS_RESERVED0 0x00000020 -#define KGSL_FLAGS_RESERVED1 0x00000040 -#define KGSL_FLAGS_RESERVED2 0x00000080 - -/* device id */ -enum kgsl_deviceid { - KGSL_DEVICE_ANY = 0x00000000, - KGSL_DEVICE_YAMATO = 0x00000001, - KGSL_DEVICE_G12 = 0x00000002, - KGSL_DEVICE_MAX = 0x00000002 -}; - -struct kgsl_devinfo { - - unsigned int device_id; - /* chip revision id - * coreid:8 majorrev:8 minorrev:8 patch:8 - */ - unsigned int chip_id; - unsigned int mmu_enabled; - unsigned int gmem_gpubaseaddr; - /* if gmem_hostbaseaddr is NULL, we would know its not mapped into - * mmio space */ - unsigned int gmem_hostbaseaddr; - unsigned int gmem_sizebytes; -}; - -/* this structure defines the region of memory that can be mmap()ed from this - driver. The timestamp fields are volatile because they are written by the - GPU -*/ -struct kgsl_devmemstore { - volatile unsigned int soptimestamp; - unsigned int sbz; - volatile unsigned int eoptimestamp; - unsigned int sbz2; - volatile unsigned int ts_cmp_enable; - unsigned int sbz3; - volatile unsigned int ref_wait_ts; - unsigned int sbz4; -}; - -#define KGSL_DEVICE_MEMSTORE_OFFSET(field) \ - offsetof(struct kgsl_devmemstore, field) - - -/* timestamp id*/ -enum kgsl_timestamp_type { - KGSL_TIMESTAMP_CONSUMED = 0x00000001, /* start-of-pipeline timestamp */ - KGSL_TIMESTAMP_RETIRED = 0x00000002, /* end-of-pipeline timestamp*/ - KGSL_TIMESTAMP_MAX = 0x00000002, -}; - -/* property types - used with kgsl_device_getproperty */ -enum kgsl_property_type { - KGSL_PROP_DEVICE_INFO = 0x00000001, - KGSL_PROP_DEVICE_SHADOW = 0x00000002, - KGSL_PROP_DEVICE_POWER = 0x00000003, - KGSL_PROP_SHMEM = 0x00000004, - KGSL_PROP_SHMEM_APERTURES = 0x00000005, - KGSL_PROP_MMU_ENABLE = 0x00000006, - KGSL_PROP_INTERRUPT_WAITS = 0x00000007, -}; - -struct kgsl_shadowprop { - unsigned int gpuaddr; - unsigned int size; - unsigned int flags; /* contains KGSL_FLAGS_ values */ -}; - -#ifdef CONFIG_ARCH_MSM7X30 -struct kgsl_platform_data { - unsigned int high_axi_2d; - unsigned int high_axi_3d; - unsigned int max_grp2d_freq; - unsigned int min_grp2d_freq; - int (*set_grp2d_async)(void); - unsigned int max_grp3d_freq; - unsigned int min_grp3d_freq; - int (*set_grp3d_async)(void); -}; -#endif -/* ioctls */ -#define KGSL_IOC_TYPE 0x09 - -/* get misc info about the GPU - type should be a value from enum kgsl_property_type - value points to a structure that varies based on type - sizebytes is sizeof() that structure - for KGSL_PROP_DEVICE_INFO, use struct kgsl_devinfo - this structure contaings hardware versioning info. - for KGSL_PROP_DEVICE_SHADOW, use struct kgsl_shadowprop - this is used to find mmap() offset and sizes for mapping - struct kgsl_memstore into userspace. -*/ -struct kgsl_device_getproperty { -#ifdef CONFIG_ARCH_MSM7X30 - unsigned int device_id; -#endif - unsigned int type; - void *value; - unsigned int sizebytes; -}; - -#define IOCTL_KGSL_DEVICE_GETPROPERTY \ - _IOWR(KGSL_IOC_TYPE, 0x2, struct kgsl_device_getproperty) - - -/* read a GPU register. - offsetwords it the 32 bit word offset from the beginning of the - GPU register space. - */ -struct kgsl_device_regread { -#ifdef CONFIG_ARCH_MSM7X30 - unsigned int device_id; -#endif - unsigned int offsetwords; - unsigned int value; /* output param */ -}; - -#define IOCTL_KGSL_DEVICE_REGREAD \ - _IOWR(KGSL_IOC_TYPE, 0x3, struct kgsl_device_regread) - - -/* block until the GPU has executed past a given timestamp - * timeout is in milliseconds. - */ -struct kgsl_device_waittimestamp { -#ifdef CONFIG_ARCH_MSM7X30 - unsigned int device_id; -#endif - unsigned int timestamp; - unsigned int timeout; -}; - -#define IOCTL_KGSL_DEVICE_WAITTIMESTAMP \ - _IOW(KGSL_IOC_TYPE, 0x6, struct kgsl_device_waittimestamp) - - -/* issue indirect commands to the GPU. - * drawctxt_id must have been created with IOCTL_KGSL_DRAWCTXT_CREATE - * ibaddr and sizedwords must specify a subset of a buffer created - * with IOCTL_KGSL_SHAREDMEM_FROM_PMEM - * flags may be a mask of KGSL_CONTEXT_ values - * timestamp is a returned counter value which can be passed to - * other ioctls to determine when the commands have been executed by - * the GPU. - */ -struct kgsl_ringbuffer_issueibcmds { -#ifdef CONFIG_ARCH_MSM7X30 - unsigned int device_id; -#endif - unsigned int drawctxt_id; - unsigned int ibaddr; - unsigned int sizedwords; - unsigned int timestamp; /*output param */ - unsigned int flags; -}; - -#define IOCTL_KGSL_RINGBUFFER_ISSUEIBCMDS \ - _IOWR(KGSL_IOC_TYPE, 0x10, struct kgsl_ringbuffer_issueibcmds) - -/* read the most recently executed timestamp value - * type should be a value from enum kgsl_timestamp_type - */ -struct kgsl_cmdstream_readtimestamp { -#ifdef CONFIG_ARCH_MSM7X30 - unsigned int device_id; -#endif - unsigned int type; - unsigned int timestamp; /*output param */ -}; - -#define IOCTL_KGSL_CMDSTREAM_READTIMESTAMP \ - _IOR(KGSL_IOC_TYPE, 0x11, struct kgsl_cmdstream_readtimestamp) - -/* free memory when the GPU reaches a given timestamp. - * gpuaddr specify a memory region created by a - * IOCTL_KGSL_SHAREDMEM_FROM_PMEM call - * type should be a value from enum kgsl_timestamp_type - */ -struct kgsl_cmdstream_freememontimestamp { -#ifdef CONFIG_ARCH_MSM7X30 - unsigned int device_id; -#endif - unsigned int gpuaddr; - unsigned int type; - unsigned int timestamp; -}; - -#define IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP \ - _IOR(KGSL_IOC_TYPE, 0x12, struct kgsl_cmdstream_freememontimestamp) - -/* create a draw context, which is used to preserve GPU state. - * The flags field may contain a mask KGSL_CONTEXT_* values - */ -struct kgsl_drawctxt_create { -#ifdef CONFIG_ARCH_MSM7X30 - unsigned int device_id; -#endif - unsigned int flags; - unsigned int drawctxt_id; /*output param */ -}; - -#define IOCTL_KGSL_DRAWCTXT_CREATE \ - _IOWR(KGSL_IOC_TYPE, 0x13, struct kgsl_drawctxt_create) - -/* destroy a draw context */ -struct kgsl_drawctxt_destroy { -#ifdef CONFIG_ARCH_MSM7X30 - unsigned int device_id; -#endif - unsigned int drawctxt_id; -}; - -#define IOCTL_KGSL_DRAWCTXT_DESTROY \ - _IOW(KGSL_IOC_TYPE, 0x14, struct kgsl_drawctxt_destroy) - -/* add a block of pmem or fb into the GPU address space */ -struct kgsl_sharedmem_from_pmem { - int pmem_fd; - unsigned int gpuaddr; /*output param */ - unsigned int len; - unsigned int offset; -}; - -#define IOCTL_KGSL_SHAREDMEM_FROM_PMEM \ - _IOWR(KGSL_IOC_TYPE, 0x20, struct kgsl_sharedmem_from_pmem) - -/* remove memory from the GPU's address space */ -struct kgsl_sharedmem_free { - unsigned int gpuaddr; -}; - -#define IOCTL_KGSL_SHAREDMEM_FREE \ - _IOW(KGSL_IOC_TYPE, 0x21, struct kgsl_sharedmem_free) - -struct kgsl_gmem_desc { - unsigned int x; - unsigned int y; - unsigned int width; - unsigned int height; - unsigned int pitch; -}; - -struct kgsl_buffer_desc { - void *hostptr; - unsigned int gpuaddr; - int size; - unsigned int format; - unsigned int pitch; - unsigned int enabled; -}; - -struct kgsl_bind_gmem_shadow { - unsigned int drawctxt_id; - struct kgsl_gmem_desc gmem_desc; - unsigned int shadow_x; - unsigned int shadow_y; - struct kgsl_buffer_desc shadow_buffer; - unsigned int buffer_id; -}; - -#define IOCTL_KGSL_DRAWCTXT_BIND_GMEM_SHADOW \ - _IOW(KGSL_IOC_TYPE, 0x22, struct kgsl_bind_gmem_shadow) - -/* add a block of memory into the GPU address space */ -struct kgsl_sharedmem_from_vmalloc { - unsigned int gpuaddr; /*output param */ - unsigned int hostptr; - /* If set from user space then will attempt to - * allocate even if low watermark is crossed */ - int force_no_low_watermark; -}; - -#define IOCTL_KGSL_SHAREDMEM_FROM_VMALLOC \ - _IOWR(KGSL_IOC_TYPE, 0x23, struct kgsl_sharedmem_from_vmalloc) - -#define IOCTL_KGSL_SHAREDMEM_FLUSH_CACHE \ - _IOW(KGSL_IOC_TYPE, 0x24, struct kgsl_sharedmem_free) - -struct kgsl_drawctxt_set_bin_base_offset { -#ifdef CONFIG_ARCH_MSM7X30 - unsigned int device_id; -#endif - unsigned int drawctxt_id; - unsigned int offset; -}; - -#define IOCTL_KGSL_DRAWCTXT_SET_BIN_BASE_OFFSET \ - _IOW(KGSL_IOC_TYPE, 0x25, struct kgsl_drawctxt_set_bin_base_offset) - -enum kgsl_cmdwindow_type { - KGSL_CMDWINDOW_MIN = 0x00000000, - KGSL_CMDWINDOW_2D = 0x00000000, - KGSL_CMDWINDOW_3D = 0x00000001, /* legacy */ - KGSL_CMDWINDOW_MMU = 0x00000002, - KGSL_CMDWINDOW_ARBITER = 0x000000FF, - KGSL_CMDWINDOW_MAX = 0x000000FF, -}; - -/* write to the command window */ -struct kgsl_cmdwindow_write { -#ifdef CONFIG_ARCH_MSM7X30 - unsigned int device_id; -#endif - enum kgsl_cmdwindow_type target; - unsigned int addr; - unsigned int data; -}; - -#define IOCTL_KGSL_CMDWINDOW_WRITE \ - _IOW(KGSL_IOC_TYPE, 0x2e, struct kgsl_cmdwindow_write) - -#endif /* _MSM_KGSL_H */ -#endif diff --git a/mm/ashmem.c b/mm/ashmem.c index 16058090..a16f3f7c 100644 --- a/mm/ashmem.c +++ b/mm/ashmem.c @@ -29,6 +29,7 @@ #include #include #include +#include #define ASHMEM_NAME_PREFIX "dev/ashmem/" #define ASHMEM_NAME_PREFIX_LEN (sizeof(ASHMEM_NAME_PREFIX) - 1) @@ -45,6 +46,8 @@ struct ashmem_area { struct list_head unpinned_list; /* list of all ashmem areas */ struct file *file; /* the shmem-based backing file */ size_t size; /* size of the mapping, in bytes */ + unsigned long vm_start; /* Start address of vm_area + * which maps this ashmem */ unsigned long prot_mask; /* allowed prot bits, as vm_flags */ }; @@ -178,7 +181,7 @@ static int ashmem_open(struct inode *inode, struct file *file) struct ashmem_area *asma; int ret; - ret = nonseekable_open(inode, file); + ret = generic_file_open(inode, file); if (unlikely(ret)) return ret; @@ -211,6 +214,75 @@ static int ashmem_release(struct inode *ignored, struct file *file) return 0; } +static ssize_t ashmem_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + struct ashmem_area *asma = file->private_data; + int ret = 0; + + mutex_lock(&ashmem_mutex); + + /* If size is not set, or set to 0, always return EOF. */ + if (asma->size == 0) { + goto out; + } + + if (!asma->file) { + ret = -EBADF; + goto out; + } + + ret = asma->file->f_op->read(asma->file, buf, len, pos); + if (ret < 0) { + goto out; + } + + /** Update backing file pos, since f_ops->read() doesn't */ + asma->file->f_pos = *pos; + +out: + mutex_unlock(&ashmem_mutex); + return ret; +} + +static loff_t ashmem_llseek(struct file *file, loff_t offset, int origin) +{ + struct ashmem_area *asma = file->private_data; + int ret; + + mutex_lock(&ashmem_mutex); + + if (asma->size == 0) { + ret = -EINVAL; + goto out; + } + + if (!asma->file) { + ret = -EBADF; + goto out; + } + + ret = asma->file->f_op->llseek(asma->file, offset, origin); + if (ret < 0) { + goto out; + } + + /** Copy f_pos from backing file, since f_ops->llseek() sets it */ + file->f_pos = asma->file->f_pos; + +out: + mutex_unlock(&ashmem_mutex); + return ret; +} + +static inline unsigned long +calc_vm_may_flags(unsigned long prot) +{ + return _calc_vm_trans(prot, PROT_READ, VM_MAYREAD ) | + _calc_vm_trans(prot, PROT_WRITE, VM_MAYWRITE) | + _calc_vm_trans(prot, PROT_EXEC, VM_MAYEXEC); +} + static int ashmem_mmap(struct file *file, struct vm_area_struct *vma) { struct ashmem_area *asma = file->private_data; @@ -225,10 +297,12 @@ static int ashmem_mmap(struct file *file, struct vm_area_struct *vma) } /* requested protection bits must match our allowed protection mask */ - if (unlikely((vma->vm_flags & ~asma->prot_mask) & PROT_MASK)) { + if (unlikely((vma->vm_flags & ~calc_vm_prot_bits(asma->prot_mask)) & + calc_vm_prot_bits(PROT_MASK))) { ret = -EPERM; goto out; } + vma->vm_flags &= ~calc_vm_may_flags(~asma->prot_mask); if (!asma->file) { char *name = ASHMEM_NAME_DEF; @@ -255,6 +329,7 @@ static int ashmem_mmap(struct file *file, struct vm_area_struct *vma) vma->vm_file = asma->file; } vma->vm_flags |= VM_CAN_NONLINEAR; + asma->vm_start = vma->vm_start; out: mutex_unlock(&ashmem_mutex); @@ -286,11 +361,7 @@ static int ashmem_shrink(struct shrinker *s, int nr_to_scan, gfp_t gfp_mask) if (!nr_to_scan) return lru_count; - /* If our mutex is held, we are recursing into ourselves, so bail out */ - if (!mutex_trylock(&ashmem_mutex)) { - return -1; - } - + mutex_lock(&ashmem_mutex); list_for_each_entry_safe(range, next, &ashmem_lru_list, lru) { struct inode *inode = range->asma->file->f_dentry->d_inode; loff_t start = range->pgstart * PAGE_SIZE; @@ -559,6 +630,69 @@ static int ashmem_pin_unpin(struct ashmem_area *asma, unsigned long cmd, return ret; } +#ifdef CONFIG_OUTER_CACHE +static unsigned int virtaddr_to_physaddr(unsigned int virtaddr) +{ + unsigned int physaddr = 0; + pgd_t *pgd_ptr = NULL; + pmd_t *pmd_ptr = NULL; + pte_t *pte_ptr = NULL, pte; + + spin_lock(¤t->mm->page_table_lock); + pgd_ptr = pgd_offset(current->mm, virtaddr); + if (pgd_none(*pgd) || pgd_bad(*pgd)) { + pr_err("Failed to convert virtaddr %x to pgd_ptr\n", + virtaddr); + goto done; + } + + pmd_ptr = pmd_offset(pgd_ptr, virtaddr); + if (pmd_none(*pmd_ptr) || pmd_bad(*pmd_ptr)) { + pr_err("Failed to convert pgd_ptr %p to pmd_ptr\n", + (void *)pgd_ptr); + goto done; + } + + pte_ptr = pte_offset_map(pmd_ptr, virtaddr); + if (!pte_ptr) { + pr_err("Failed to convert pmd_ptr %p to pte_ptr\n", + (void *)pmd_ptr); + goto done; + } + pte = *pte_ptr; + physaddr = pte_pfn(pte); + pte_unmap(pte_ptr); +done: + spin_unlock(¤t->mm->page_table_lock); + physaddr <<= PAGE_SHIFT; + return physaddr; +} +#endif + +static int ashmem_cache_op(struct ashmem_area *asma, + void (*cache_func)(unsigned long vstart, unsigned long length, + unsigned long pstart)) +{ +#ifdef CONFIG_OUTER_CACHE + unsigned long vaddr; +#endif + mutex_lock(&ashmem_mutex); +#ifndef CONFIG_OUTER_CACHE + cache_func(asma->vm_start, asma->size, 0); +#else + for (vaddr = asma->vm_start; vaddr < asma->vm_start + asma->size; + vaddr += PAGE_SIZE) { + unsigned long physaddr; + physaddr = virtaddr_to_physaddr(vaddr); + if (!physaddr) + return -EINVAL; + cache_func(vaddr, PAGE_SIZE, physaddr); + } +#endif + mutex_unlock(&ashmem_mutex); + return 0; +} + static long ashmem_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct ashmem_area *asma = file->private_data; @@ -599,6 +733,15 @@ static long ashmem_ioctl(struct file *file, unsigned int cmd, unsigned long arg) ashmem_shrink(&ashmem_shrinker, ret, GFP_KERNEL); } break; + case ASHMEM_CACHE_FLUSH_RANGE: + ret = ashmem_cache_op(asma, &clean_and_invalidate_caches); + break; + case ASHMEM_CACHE_CLEAN_RANGE: + ret = ashmem_cache_op(asma, &clean_caches); + break; + case ASHMEM_CACHE_INV_RANGE: + ret = ashmem_cache_op(asma, &invalidate_caches); + break; } return ret; @@ -661,6 +804,8 @@ static struct file_operations ashmem_fops = { .owner = THIS_MODULE, .open = ashmem_open, .release = ashmem_release, + .read = ashmem_read, + .llseek = ashmem_llseek, .mmap = ashmem_mmap, .unlocked_ioctl = ashmem_ioctl, .compat_ioctl = ashmem_ioctl, From 14509b73e854ae48d17aa2eee12befacf0d6b830 Mon Sep 17 00:00:00 2001 From: tytung Date: Sun, 13 May 2012 03:45:08 +0800 Subject: [PATCH 002/155] msm: htcleo: cleanup --- arch/arm/mach-msm/board-htcleo-wifi-nvs.c | 3 +-- arch/arm/mach-msm/board-htcleo.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/arm/mach-msm/board-htcleo-wifi-nvs.c b/arch/arm/mach-msm/board-htcleo-wifi-nvs.c index 6b007131..562b94b1 100644 --- a/arch/arm/mach-msm/board-htcleo-wifi-nvs.c +++ b/arch/arm/mach-msm/board-htcleo-wifi-nvs.c @@ -81,7 +81,7 @@ unsigned char *get_wifi_nvs_ram( void ) } EXPORT_SYMBOL(get_wifi_nvs_ram); -static int parse_tag_msm_wifi(void) +static void parse_tag_msm_wifi(void) { uint32_t id1, id2, sid1, sid2, sid3; uint32_t id_base = 0xef260; @@ -102,7 +102,6 @@ static int parse_tag_msm_wifi(void) sprintf(nvs_mac_addr, "macaddr=00:23:76:%02x:%02x:%02x\n", sid1, sid2, sid3); pr_info("Device WiFi MAC Address: %s\n", nvs_mac_addr); - return 0; } static unsigned wifi_get_nvs_size( void ) diff --git a/arch/arm/mach-msm/board-htcleo.c b/arch/arm/mach-msm/board-htcleo.c index 6c34747e..0b21f220 100644 --- a/arch/arm/mach-msm/board-htcleo.c +++ b/arch/arm/mach-msm/board-htcleo.c @@ -541,7 +541,7 @@ static char bdaddr[BDADDR_STR_SIZE]; module_param_string(bdaddr, bdaddr, sizeof(bdaddr), 0400); MODULE_PARM_DESC(bdaddr, "bluetooth address"); -static int parse_tag_bdaddr(void) +static void parse_tag_bdaddr(void) { uint32_t id1, id2, sid1, sid2, sid3; uint32_t id_base = 0xef260; @@ -562,7 +562,6 @@ static int parse_tag_bdaddr(void) sprintf(bdaddr, "00:23:76:%02x:%02x:%02x", sid3, sid2, sid1); pr_info("Device Bluetooth MAC Address: %s\n", bdaddr); - return 0; } /* end AOSP style interface */ From ac0378e146d8cae35e63728db7d4d8931784a0f5 Mon Sep 17 00:00:00 2001 From: tytung Date: Sun, 13 May 2012 11:49:13 +0800 Subject: [PATCH 003/155] htcleo: updated htcleo_defconfig to tytung_HWA_r2.2-uniMAC --- arch/arm/configs/htcleo_defconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/configs/htcleo_defconfig b/arch/arm/configs/htcleo_defconfig index d85958f2..53792bfb 100644 --- a/arch/arm/configs/htcleo_defconfig +++ b/arch/arm/configs/htcleo_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: 2.6.32-ics -# Sat May 5 00:18:22 CST 2012 +# Sun May 13 11:35:40 CST 2012 # CONFIG_ARM=y CONFIG_SYS_SUPPORTS_APM_EMULATION=y @@ -32,7 +32,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_BROKEN_ON_SMP=y CONFIG_LOCK_KERNEL=y CONFIG_INIT_ENV_ARG_LIMIT=32 -CONFIG_LOCALVERSION="_tytung_HWA_r2" +CONFIG_LOCALVERSION="_tytung_HWA_r2.2-uniMAC" # CONFIG_LOCALVERSION_AUTO is not set CONFIG_HAVE_KERNEL_GZIP=y CONFIG_HAVE_KERNEL_BZIP2=y @@ -1681,7 +1681,7 @@ CONFIG_ANDROID_LOW_MEMORY_KILLER=y # CONFIG_XVMALLOC=y CONFIG_ZRAM=y -CONFIG_ZRAM_NUM_DEVICES=3 +CONFIG_ZRAM_NUM_DEVICES=1 CONFIG_ZRAM_DEFAULT_PERCENTAGE=18 # CONFIG_ZRAM_DEBUG is not set CONFIG_ZRAM_DEFAULT_DISKSIZE=100000000 From 1e3f6a926621453ecc62f7e85f637995c4447145 Mon Sep 17 00:00:00 2001 From: tytung Date: Sun, 13 May 2012 20:34:11 +0800 Subject: [PATCH 004/155] updated README. --- README | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README b/README index 5d014817..e10c6c2d 100644 --- a/README +++ b/README @@ -23,8 +23,8 @@ Primary features: - Native USB Tethering (for Gingerbread) (Credits: tytung) - Native Wi-Fi Tethering (Credits: tytung) - Real Wi-Fi MAC address (only for SD build on WinMo 6.5) (Credits: savan and tytung) -- Unique Wi-Fi MAC address (for MAGLDR and cLK) (Credits: markinus) -- Unique Bluetooth MAC address (Credits: markinus and tytung) +- Unique Wi-Fi MAC address (for MAGLDR and cLK) (Credits: Franck78 and markinus) +- Unique Bluetooth MAC address (Credits: Franck78, markinus and tytung) - Official HTC extended battery support (HTC EB 2300mAh) (Credits: arne) - ALSA sound driver as kernel modules (alsa-pcm-htc-leo.ko and alsa-mix-htc-leo.ko) (Credits: cotulla) - Wired headphones support for ICS. (Credits: zivan56) @@ -32,7 +32,7 @@ Primary features: - Improved Flashlight compatibility for ICS. (Credits: tytung) - Backported the GPU driver to enable the Hardware Acceleration for ICS. (Credits: Securecrt and Rick_1995) -Credits: Cotulla, Markinus, Hastarin, TYTung, Letama, Rajko, Dan1j3l, Cedesmith, Arne, Trilu, Charansingh, Mdebeljuh, Jdivic, Avs333, Snq-, Savan, Drizztje, Marc1706, Zivan56, Securecrt, Rick_1995, other devs, and testers. +Credits: Cotulla, Markinus, Hastarin, TYTung, Letama, Rajko, Dan1j3l, Cedesmith, Arne, Trilu, Charansingh, Mdebeljuh, Jdivic, Avs333, Snq-, Savan, Drizztje, Marc1706, Zivan56, Securecrt, Rick_1995, Franck78, other devs, and testers. =============================================================================== From 8afb87a6ea9a4fe7f8351ee810b69c2620e0fd71 Mon Sep 17 00:00:00 2001 From: Shantanu Gupta Date: Mon, 14 May 2012 01:49:10 +0530 Subject: [PATCH 005/155] [KGSL] update to msm-kgsl3d0 v3.8 --- arch/arm/mach-msm/include/mach/memory.h | 28 +- arch/arm/mach-msm/memory.c | 68 +- arch/arm/mach-msm/nand_partitions.c | 67 +- drivers/gpu/msm/Kconfig | 18 +- drivers/gpu/msm/Makefile | 8 +- drivers/gpu/msm/a200_reg.h | 448 ------- drivers/gpu/msm/a220_reg.h | 39 - drivers/gpu/msm/adreno.c | 575 ++++---- drivers/gpu/msm/adreno.h | 128 +- drivers/gpu/msm/adreno_debugfs.c | 13 +- drivers/gpu/msm/adreno_debugfs.h | 30 +- drivers/gpu/msm/adreno_drawctxt.c | 1631 ++--------------------- drivers/gpu/msm/adreno_drawctxt.h | 100 +- drivers/gpu/msm/adreno_pm4types.h | 168 +-- drivers/gpu/msm/adreno_postmortem.c | 139 +- drivers/gpu/msm/adreno_postmortem.h | 30 +- drivers/gpu/msm/adreno_ringbuffer.c | 226 +--- drivers/gpu/msm/adreno_ringbuffer.h | 38 +- drivers/gpu/msm/kgsl.c | 482 +++++-- drivers/gpu/msm/kgsl.h | 161 +-- drivers/gpu/msm/kgsl_cffdump.c | 12 +- drivers/gpu/msm/kgsl_cffdump.h | 30 +- drivers/gpu/msm/kgsl_debugfs.c | 1 + drivers/gpu/msm/kgsl_device.h | 206 ++- drivers/gpu/msm/kgsl_drm.c | 1 - drivers/gpu/msm/kgsl_log.h | 30 +- drivers/gpu/msm/kgsl_mmu.c | 989 ++++---------- drivers/gpu/msm/kgsl_mmu.h | 310 ++--- drivers/gpu/msm/kgsl_pwrctrl.c | 226 ++-- drivers/gpu/msm/kgsl_pwrctrl.h | 57 +- drivers/gpu/msm/kgsl_pwrscale.c | 82 +- drivers/gpu/msm/kgsl_pwrscale.h | 36 +- drivers/gpu/msm/kgsl_sharedmem.c | 287 ++-- drivers/gpu/msm/kgsl_sharedmem.h | 103 +- drivers/gpu/msm/z180.c | 436 +++--- drivers/gpu/msm/z180.h | 32 +- drivers/gpu/msm/z180_reg.h | 58 +- drivers/misc/Makefile | 10 +- drivers/staging/android/binder.c | 817 +++++------- drivers/staging/android/ram_console.c | 4 +- include/linux/memory_alloc.h | 2 +- include/linux/msm_kgsl.h | 32 +- include/linux/msm_q6vdec.h | 9 + include/linux/pm_qos_params.h | 3 + lib/Makefile | 6 +- mm/ashmem.c | 152 +-- 46 files changed, 2817 insertions(+), 5511 deletions(-) delete mode 100644 drivers/gpu/msm/a200_reg.h delete mode 100644 drivers/gpu/msm/a220_reg.h diff --git a/arch/arm/mach-msm/include/mach/memory.h b/arch/arm/mach-msm/include/mach/memory.h index 03aafb89..4e47f900 100644 --- a/arch/arm/mach-msm/include/mach/memory.h +++ b/arch/arm/mach-msm/include/mach/memory.h @@ -1,6 +1,7 @@ /* arch/arm/mach-msm/include/mach/memory.h * * Copyright (C) 2007 Google, Inc. + * Copyright (c) 2009-2010, Code Aurora Forum. All rights reserved. * * This software is licensed under the terms of the GNU General Public * License version 2, as published by the Free Software Foundation, and @@ -12,7 +13,6 @@ * GNU General Public License for more details. * */ - #ifndef __ASM_ARCH_MEMORY_H #define __ASM_ARCH_MEMORY_H @@ -37,28 +37,41 @@ #define PHYS_OFFSET UL(0x10000000) #endif +#define MAX_PHYSMEM_BITS 32 +#define SECTION_SIZE_BITS 25 + #define HAS_ARCH_IO_REMAP_PFN_RANGE #define CONSISTENT_DMA_SIZE (4*SZ_1M) #ifndef __ASSEMBLY__ void *alloc_bootmem_aligned(unsigned long size, unsigned long alignment); +unsigned long allocate_contiguous_ebi_nomap(unsigned long, unsigned long); void clean_and_invalidate_caches(unsigned long, unsigned long, unsigned long); void clean_caches(unsigned long, unsigned long, unsigned long); void invalidate_caches(unsigned long, unsigned long, unsigned long); +int platform_physical_remove_pages(unsigned long, unsigned long); +int platform_physical_add_pages(unsigned long, unsigned long); +int platform_physical_low_power_pages(unsigned long, unsigned long); #ifdef CONFIG_ARCH_MSM_ARM11 void write_to_strongly_ordered_memory(void); +void map_zero_page_strongly_ordered(void); + #include -#if defined(CONFIG_ARCH_MSM7227) +#ifdef CONFIG_ARCH_MSM7X27 #define arch_barrier_extra() do \ { \ write_to_strongly_ordered_memory(); \ } while (0) #else -#define arch_barrier_extra() do {} while (0) +#define arch_barrier_extra() do \ + { if (machine_is_msm7x27_surf() || machine_is_msm7x27_ffa()) \ + write_to_strongly_ordered_memory(); \ + } while (0) +#endif #endif #ifdef CONFIG_CACHE_L2X0 @@ -67,12 +80,17 @@ extern void l2x0_cache_flush_all(void); #define finish_arch_switch(prev) do { l2x0_cache_sync(); } while (0) #endif -#endif #endif #ifdef CONFIG_ARCH_MSM_SCORPION -#define arch_has_speculative_dfetch() 1 +#define arch_has_speculative_dfetch() 1 #endif #endif +/* these correspond to values known by the modem */ +#define MEMORY_DEEP_POWERDOWN 0 +#define MEMORY_SELF_REFRESH 1 +#define MEMORY_ACTIVE 2 + +#define NPA_MEMORY_NODE_NAME "/mem/ebi1/cs1" diff --git a/arch/arm/mach-msm/memory.c b/arch/arm/mach-msm/memory.c index 111ddec5..7eabdc78 100644 --- a/arch/arm/mach-msm/memory.c +++ b/arch/arm/mach-msm/memory.c @@ -16,10 +16,19 @@ #include #include #include +#include +#include #include #include #include #include +#include +#include +#if defined(CONFIG_MSM_NPA_REMOTE) +#include "npa_remote.h" +#include +#include +#endif int arch_io_remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn, unsigned long size, pgprot_t prot) @@ -34,7 +43,7 @@ int arch_io_remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, void *zero_page_strongly_ordered; -static void map_zero_page_strongly_ordered(void) +void map_zero_page_strongly_ordered(void) { if (zero_page_strongly_ordered) return; @@ -43,12 +52,15 @@ static void map_zero_page_strongly_ordered(void) ioremap_strongly_ordered(page_to_pfn(empty_zero_page) << PAGE_SHIFT, PAGE_SIZE); } +EXPORT_SYMBOL(map_zero_page_strongly_ordered); void write_to_strongly_ordered_memory(void) { map_zero_page_strongly_ordered(); *(int *)zero_page_strongly_ordered = 0; } +EXPORT_SYMBOL(write_to_strongly_ordered_memory); + void flush_axi_bus_buffer(void) { __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 5" \ @@ -109,3 +121,57 @@ void invalidate_caches(unsigned long vstart, flush_axi_bus_buffer(); } + +void *alloc_bootmem_aligned(unsigned long size, unsigned long alignment) +{ + void *unused_addr = NULL; + unsigned long addr, tmp_size, unused_size; + + /* Allocate maximum size needed, see where it ends up. + * Then free it -- in this path there are no other allocators + * so we can depend on getting the same address back + * when we allocate a smaller piece that is aligned + * at the end (if necessary) and the piece we really want, + * then free the unused first piece. + */ + + tmp_size = size + alignment - PAGE_SIZE; + addr = (unsigned long)alloc_bootmem(tmp_size); + free_bootmem(__pa(addr), tmp_size); + + unused_size = alignment - (addr % alignment); + if (unused_size) + unused_addr = alloc_bootmem(unused_size); + + addr = (unsigned long)alloc_bootmem(size); + if (unused_size) + free_bootmem(__pa(unused_addr), unused_size); + + return (void *)addr; +} + +int platform_physical_remove_pages(unsigned long start_pfn, + unsigned long nr_pages) +{ + return 0; +} + +int platform_physical_add_pages(unsigned long start_pfn, + unsigned long nr_pages) +{ + return 0; +} + +int platform_physical_low_power_pages(unsigned long start_pfn, + unsigned long nr_pages) +{ + return 0; +} + +unsigned long allocate_contiguous_ebi_nomap(unsigned long size, + unsigned long align) +{ + return _allocate_contiguous_memory_nomap(size, MEMTYPE_EBI0, + align, __builtin_return_address(0)); +} +EXPORT_SYMBOL(allocate_contiguous_ebi_nomap); diff --git a/arch/arm/mach-msm/nand_partitions.c b/arch/arm/mach-msm/nand_partitions.c index 89610b25..421a557c 100644 --- a/arch/arm/mach-msm/nand_partitions.c +++ b/arch/arm/mach-msm/nand_partitions.c @@ -4,6 +4,7 @@ * bootloader. * * Copyright (C) 2007 Google, Inc. + * Copyright (c) 2008-2009, Code Aurora Forum. All rights reserved. * Author: Brian Swetland * * This software is licensed under the terms of the GNU General Public @@ -22,7 +23,7 @@ #include #include -#include +#include #include @@ -38,47 +39,26 @@ #define ATAG_MSM_PARTITION 0x4d534D70 /* MSMp */ -struct msm_ptbl_entry -{ +struct msm_ptbl_entry { char name[16]; __u32 offset; __u32 size; __u32 flags; }; -#define MSM_MAX_PARTITIONS 11 +#define MSM_MAX_PARTITIONS 8 static struct mtd_partition msm_nand_partitions[MSM_MAX_PARTITIONS]; static char msm_nand_names[MSM_MAX_PARTITIONS * 16]; extern struct flash_platform_data msm_nand_data; -int emmc_partition_read_proc(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - struct mtd_partition *ptn = msm_nand_partitions; - char *p = page; - int i; - uint64_t offset; - uint64_t size; - - p += sprintf(p, "dev: size erasesize name\n"); - for (i = 0; i < MSM_MAX_PARTITIONS && ptn->name; i++, ptn++) { - offset = ptn->offset; - size = ptn->size; - p += sprintf(p, "mmcblk0p%llu: %08llx %08x \"%s\"\n", offset, size * 512, 512, ptn->name); - } - - return p - page; -} - static int __init parse_tag_msm_partition(const struct tag *tag) { struct mtd_partition *ptn = msm_nand_partitions; char *name = msm_nand_names; struct msm_ptbl_entry *entry = (void *) &tag->u; unsigned count, n; - unsigned have_kpanic = 0; count = (tag->hdr.size - 2) / (sizeof(struct msm_ptbl_entry) / sizeof(__u32)); @@ -90,9 +70,6 @@ static int __init parse_tag_msm_partition(const struct tag *tag) memcpy(name, entry->name, 15); name[15] = 0; - if (!strcmp(name, "kpanic")) - have_kpanic = 1; - ptn->name = name; ptn->offset = entry->offset; ptn->size = entry->size; @@ -102,42 +79,6 @@ static int __init parse_tag_msm_partition(const struct tag *tag) ptn++; } -#ifdef CONFIG_VIRTUAL_KPANIC_PARTITION - if (!have_kpanic) { - int i; - uint64_t kpanic_off = 0; - - if (count == MSM_MAX_PARTITIONS) { - printk("Cannot create virtual 'kpanic' partition\n"); - goto out; - } - - for (i = 0; i < count; i++) { - ptn = &msm_nand_partitions[i]; - if (!strcmp(ptn->name, CONFIG_VIRTUAL_KPANIC_SRC)) { - ptn->size -= CONFIG_VIRTUAL_KPANIC_PSIZE; - kpanic_off = ptn->offset + ptn->size; - break; - } - } - if (i == count) { - printk(KERN_ERR "Partition %s not found\n", - CONFIG_VIRTUAL_KPANIC_SRC); - goto out; - } - - ptn = &msm_nand_partitions[count]; - ptn->name ="kpanic"; - ptn->offset = kpanic_off; - ptn->size = CONFIG_VIRTUAL_KPANIC_PSIZE; - - printk("Virtual mtd partition '%s' created @%llx (%llu)\n", - ptn->name, ptn->offset, ptn->size); - - count++; - } -out: -#endif /* CONFIG_VIRTUAL_KPANIC_SRC */ msm_nand_data.nr_parts = count; msm_nand_data.parts = msm_nand_partitions; diff --git a/drivers/gpu/msm/Kconfig b/drivers/gpu/msm/Kconfig index 64cbc304..5852e269 100644 --- a/drivers/gpu/msm/Kconfig +++ b/drivers/gpu/msm/Kconfig @@ -64,22 +64,30 @@ config MSM_KGSL_DRM bool "Build a DRM interface for the MSM_KGSL driver" depends on MSM_KGSL && DRM -config MSM_KGSL_MMU +config MSM_KGSL_GPUMMU bool "Enable the GPU MMU in the MSM_KGSL driver" - depends on MSM_KGSL && MMU && !MSM_KGSL_CFF_DUMP + depends on MSM_KGSL && !MSM_KGSL_CFF_DUMP + default y + +config MSM_KGSL_IOMMU + bool "Enable the use of IOMMU in the MSM_KGSL driver" + depends on MSM_KGSL && MSM_IOMMU && !MSM_KGSL_GPUMMU && !MSM_KGSL_CFF_DUMP + +config MSM_KGSL_MMU + bool + depends on MSM_KGSL_GPUMMU || MSM_KGSL_IOMMU default y config KGSL_PER_PROCESS_PAGE_TABLE bool "Enable Per Process page tables for the KGSL driver" default n - depends on MSM_KGSL_MMU && !MSM_KGSL_DRM + depends on MSM_KGSL_GPUMMU && !MSM_KGSL_DRM ---help--- The MMU will use per process pagetables when enabled. config MSM_KGSL_PAGE_TABLE_SIZE hex "Size of pagetables" default 0xFFF0000 - depends on MSM_KGSL_MMU ---help--- Sets the pagetable size used by the MMU. The max value is 0xFFF0000 or (256M - 64K). @@ -97,7 +105,7 @@ config MSM_KGSL_PAGE_TABLE_COUNT config MSM_KGSL_MMU_PAGE_FAULT bool "Force the GPU MMU to page fault for unmapped regions" default y - depends on MSM_KGSL_MMU + depends on MSM_KGSL_GPUMMU config MSM_KGSL_DISABLE_SHADOW_WRITES bool "Disable register shadow writes for context switches" diff --git a/drivers/gpu/msm/Makefile b/drivers/gpu/msm/Makefile index c905bfec..f49e7164 100644 --- a/drivers/gpu/msm/Makefile +++ b/drivers/gpu/msm/Makefile @@ -4,17 +4,21 @@ msm_kgsl_core-y = \ kgsl.o \ kgsl_sharedmem.o \ kgsl_pwrctrl.o \ - kgsl_pwrscale.o + kgsl_pwrscale.o \ + kgsl_mmu.o \ + kgsl_gpummu.o msm_kgsl_core-$(CONFIG_DEBUG_FS) += kgsl_debugfs.o -msm_kgsl_core-$(CONFIG_MSM_KGSL_MMU) += kgsl_mmu.o msm_kgsl_core-$(CONFIG_MSM_KGSL_CFF_DUMP) += kgsl_cffdump.o msm_kgsl_core-$(CONFIG_MSM_KGSL_DRM) += kgsl_drm.o +msm_kgsl_core-$(CONFIG_MSM_SCM) += kgsl_pwrscale_trustzone.o +msm_kgsl_core-$(CONFIG_MSM_SLEEP_STATS) += kgsl_pwrscale_idlestats.o msm_adreno-y += \ adreno_ringbuffer.o \ adreno_drawctxt.o \ adreno_postmortem.o \ + adreno_a2xx.o \ adreno.o msm_adreno-$(CONFIG_DEBUG_FS) += adreno_debugfs.o diff --git a/drivers/gpu/msm/a200_reg.h b/drivers/gpu/msm/a200_reg.h deleted file mode 100644 index 4df6e14c..00000000 --- a/drivers/gpu/msm/a200_reg.h +++ /dev/null @@ -1,448 +0,0 @@ -/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * * Neither the name of Code Aurora Forum, Inc. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE - * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN - * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ -#ifndef __A200_REG_H -#define __A200_REG_H - -enum VGT_EVENT_TYPE { - VS_DEALLOC = 0, - PS_DEALLOC = 1, - VS_DONE_TS = 2, - PS_DONE_TS = 3, - CACHE_FLUSH_TS = 4, - CONTEXT_DONE = 5, - CACHE_FLUSH = 6, - VIZQUERY_START = 7, - VIZQUERY_END = 8, - SC_WAIT_WC = 9, - RST_PIX_CNT = 13, - RST_VTX_CNT = 14, - TILE_FLUSH = 15, - CACHE_FLUSH_AND_INV_TS_EVENT = 20, - ZPASS_DONE = 21, - CACHE_FLUSH_AND_INV_EVENT = 22, - PERFCOUNTER_START = 23, - PERFCOUNTER_STOP = 24, - VS_FETCH_DONE = 27, - FACENESS_FLUSH = 28, -}; - -enum COLORFORMATX { - COLORX_4_4_4_4 = 0, - COLORX_1_5_5_5 = 1, - COLORX_5_6_5 = 2, - COLORX_8 = 3, - COLORX_8_8 = 4, - COLORX_8_8_8_8 = 5, - COLORX_S8_8_8_8 = 6, - COLORX_16_FLOAT = 7, - COLORX_16_16_FLOAT = 8, - COLORX_16_16_16_16_FLOAT = 9, - COLORX_32_FLOAT = 10, - COLORX_32_32_FLOAT = 11, - COLORX_32_32_32_32_FLOAT = 12, - COLORX_2_3_3 = 13, - COLORX_8_8_8 = 14, -}; - -enum SURFACEFORMAT { - FMT_1_REVERSE = 0, - FMT_1 = 1, - FMT_8 = 2, - FMT_1_5_5_5 = 3, - FMT_5_6_5 = 4, - FMT_6_5_5 = 5, - FMT_8_8_8_8 = 6, - FMT_2_10_10_10 = 7, - FMT_8_A = 8, - FMT_8_B = 9, - FMT_8_8 = 10, - FMT_Cr_Y1_Cb_Y0 = 11, - FMT_Y1_Cr_Y0_Cb = 12, - FMT_5_5_5_1 = 13, - FMT_8_8_8_8_A = 14, - FMT_4_4_4_4 = 15, - FMT_10_11_11 = 16, - FMT_11_11_10 = 17, - FMT_DXT1 = 18, - FMT_DXT2_3 = 19, - FMT_DXT4_5 = 20, - FMT_24_8 = 22, - FMT_24_8_FLOAT = 23, - FMT_16 = 24, - FMT_16_16 = 25, - FMT_16_16_16_16 = 26, - FMT_16_EXPAND = 27, - FMT_16_16_EXPAND = 28, - FMT_16_16_16_16_EXPAND = 29, - FMT_16_FLOAT = 30, - FMT_16_16_FLOAT = 31, - FMT_16_16_16_16_FLOAT = 32, - FMT_32 = 33, - FMT_32_32 = 34, - FMT_32_32_32_32 = 35, - FMT_32_FLOAT = 36, - FMT_32_32_FLOAT = 37, - FMT_32_32_32_32_FLOAT = 38, - FMT_32_AS_8 = 39, - FMT_32_AS_8_8 = 40, - FMT_16_MPEG = 41, - FMT_16_16_MPEG = 42, - FMT_8_INTERLACED = 43, - FMT_32_AS_8_INTERLACED = 44, - FMT_32_AS_8_8_INTERLACED = 45, - FMT_16_INTERLACED = 46, - FMT_16_MPEG_INTERLACED = 47, - FMT_16_16_MPEG_INTERLACED = 48, - FMT_DXN = 49, - FMT_8_8_8_8_AS_16_16_16_16 = 50, - FMT_DXT1_AS_16_16_16_16 = 51, - FMT_DXT2_3_AS_16_16_16_16 = 52, - FMT_DXT4_5_AS_16_16_16_16 = 53, - FMT_2_10_10_10_AS_16_16_16_16 = 54, - FMT_10_11_11_AS_16_16_16_16 = 55, - FMT_11_11_10_AS_16_16_16_16 = 56, - FMT_32_32_32_FLOAT = 57, - FMT_DXT3A = 58, - FMT_DXT5A = 59, - FMT_CTX1 = 60, - FMT_DXT3A_AS_1_1_1_1 = 61 -}; - -#define REG_PERF_MODE_CNT 0x0 -#define REG_PERF_STATE_RESET 0x0 -#define REG_PERF_STATE_ENABLE 0x1 -#define REG_PERF_STATE_FREEZE 0x2 - -#define RB_EDRAM_INFO_EDRAM_SIZE_SIZE 4 -#define RB_EDRAM_INFO_EDRAM_MAPPING_MODE_SIZE 2 -#define RB_EDRAM_INFO_UNUSED0_SIZE 8 -#define RB_EDRAM_INFO_EDRAM_RANGE_SIZE 18 - -struct rb_edram_info_t { - unsigned int edram_size:RB_EDRAM_INFO_EDRAM_SIZE_SIZE; - unsigned int edram_mapping_mode:RB_EDRAM_INFO_EDRAM_MAPPING_MODE_SIZE; - unsigned int unused0:RB_EDRAM_INFO_UNUSED0_SIZE; - unsigned int edram_range:RB_EDRAM_INFO_EDRAM_RANGE_SIZE; -}; - -union reg_rb_edram_info { - unsigned int val; - struct rb_edram_info_t f; -}; - -#define RBBM_READ_ERROR_UNUSED0_SIZE 2 -#define RBBM_READ_ERROR_READ_ADDRESS_SIZE 15 -#define RBBM_READ_ERROR_UNUSED1_SIZE 13 -#define RBBM_READ_ERROR_READ_REQUESTER_SIZE 1 -#define RBBM_READ_ERROR_READ_ERROR_SIZE 1 - -struct rbbm_read_error_t { - unsigned int unused0:RBBM_READ_ERROR_UNUSED0_SIZE; - unsigned int read_address:RBBM_READ_ERROR_READ_ADDRESS_SIZE; - unsigned int unused1:RBBM_READ_ERROR_UNUSED1_SIZE; - unsigned int read_requester:RBBM_READ_ERROR_READ_REQUESTER_SIZE; - unsigned int read_error:RBBM_READ_ERROR_READ_ERROR_SIZE; -}; - -union rbbm_read_error_u { - unsigned int val:32; - struct rbbm_read_error_t f; -}; - -#define CP_RB_CNTL_RB_BUFSZ_SIZE 6 -#define CP_RB_CNTL_UNUSED0_SIZE 2 -#define CP_RB_CNTL_RB_BLKSZ_SIZE 6 -#define CP_RB_CNTL_UNUSED1_SIZE 2 -#define CP_RB_CNTL_BUF_SWAP_SIZE 2 -#define CP_RB_CNTL_UNUSED2_SIZE 2 -#define CP_RB_CNTL_RB_POLL_EN_SIZE 1 -#define CP_RB_CNTL_UNUSED3_SIZE 6 -#define CP_RB_CNTL_RB_NO_UPDATE_SIZE 1 -#define CP_RB_CNTL_UNUSED4_SIZE 3 -#define CP_RB_CNTL_RB_RPTR_WR_ENA_SIZE 1 - -struct cp_rb_cntl_t { - unsigned int rb_bufsz:CP_RB_CNTL_RB_BUFSZ_SIZE; - unsigned int unused0:CP_RB_CNTL_UNUSED0_SIZE; - unsigned int rb_blksz:CP_RB_CNTL_RB_BLKSZ_SIZE; - unsigned int unused1:CP_RB_CNTL_UNUSED1_SIZE; - unsigned int buf_swap:CP_RB_CNTL_BUF_SWAP_SIZE; - unsigned int unused2:CP_RB_CNTL_UNUSED2_SIZE; - unsigned int rb_poll_en:CP_RB_CNTL_RB_POLL_EN_SIZE; - unsigned int unused3:CP_RB_CNTL_UNUSED3_SIZE; - unsigned int rb_no_update:CP_RB_CNTL_RB_NO_UPDATE_SIZE; - unsigned int unused4:CP_RB_CNTL_UNUSED4_SIZE; - unsigned int rb_rptr_wr_ena:CP_RB_CNTL_RB_RPTR_WR_ENA_SIZE; -}; - -union reg_cp_rb_cntl { - unsigned int val:32; - struct cp_rb_cntl_t f; -}; - -#define RB_COLOR_INFO__COLOR_FORMAT_MASK 0x0000000fL -#define RB_COPY_DEST_INFO__COPY_DEST_FORMAT__SHIFT 0x00000004 - - -#define SQ_INT_CNTL__PS_WATCHDOG_MASK 0x00000001L -#define SQ_INT_CNTL__VS_WATCHDOG_MASK 0x00000002L - -#define RBBM_INT_CNTL__RDERR_INT_MASK 0x00000001L -#define RBBM_INT_CNTL__DISPLAY_UPDATE_INT_MASK 0x00000002L -#define RBBM_INT_CNTL__GUI_IDLE_INT_MASK 0x00080000L - -#define RBBM_STATUS__CMDFIFO_AVAIL_MASK 0x0000001fL -#define RBBM_STATUS__TC_BUSY_MASK 0x00000020L -#define RBBM_STATUS__HIRQ_PENDING_MASK 0x00000100L -#define RBBM_STATUS__CPRQ_PENDING_MASK 0x00000200L -#define RBBM_STATUS__CFRQ_PENDING_MASK 0x00000400L -#define RBBM_STATUS__PFRQ_PENDING_MASK 0x00000800L -#define RBBM_STATUS__VGT_BUSY_NO_DMA_MASK 0x00001000L -#define RBBM_STATUS__RBBM_WU_BUSY_MASK 0x00004000L -#define RBBM_STATUS__CP_NRT_BUSY_MASK 0x00010000L -#define RBBM_STATUS__MH_BUSY_MASK 0x00040000L -#define RBBM_STATUS__MH_COHERENCY_BUSY_MASK 0x00080000L -#define RBBM_STATUS__SX_BUSY_MASK 0x00200000L -#define RBBM_STATUS__TPC_BUSY_MASK 0x00400000L -#define RBBM_STATUS__SC_CNTX_BUSY_MASK 0x01000000L -#define RBBM_STATUS__PA_BUSY_MASK 0x02000000L -#define RBBM_STATUS__VGT_BUSY_MASK 0x04000000L -#define RBBM_STATUS__SQ_CNTX17_BUSY_MASK 0x08000000L -#define RBBM_STATUS__SQ_CNTX0_BUSY_MASK 0x10000000L -#define RBBM_STATUS__RB_CNTX_BUSY_MASK 0x40000000L -#define RBBM_STATUS__GUI_ACTIVE_MASK 0x80000000L - -#define CP_INT_CNTL__SW_INT_MASK 0x00080000L -#define CP_INT_CNTL__T0_PACKET_IN_IB_MASK 0x00800000L -#define CP_INT_CNTL__OPCODE_ERROR_MASK 0x01000000L -#define CP_INT_CNTL__PROTECTED_MODE_ERROR_MASK 0x02000000L -#define CP_INT_CNTL__RESERVED_BIT_ERROR_MASK 0x04000000L -#define CP_INT_CNTL__IB_ERROR_MASK 0x08000000L -#define CP_INT_CNTL__IB2_INT_MASK 0x20000000L -#define CP_INT_CNTL__IB1_INT_MASK 0x40000000L -#define CP_INT_CNTL__RB_INT_MASK 0x80000000L - -#define MASTER_INT_SIGNAL__MH_INT_STAT 0x00000020L -#define MASTER_INT_SIGNAL__SQ_INT_STAT 0x04000000L -#define MASTER_INT_SIGNAL__CP_INT_STAT 0x40000000L -#define MASTER_INT_SIGNAL__RBBM_INT_STAT 0x80000000L - -#define RB_EDRAM_INFO__EDRAM_SIZE_MASK 0x0000000fL -#define RB_EDRAM_INFO__EDRAM_RANGE_MASK 0xffffc000L - -#define MH_ARBITER_CONFIG__SAME_PAGE_GRANULARITY__SHIFT 0x00000006 -#define MH_ARBITER_CONFIG__L1_ARB_ENABLE__SHIFT 0x00000007 -#define MH_ARBITER_CONFIG__L1_ARB_HOLD_ENABLE__SHIFT 0x00000008 -#define MH_ARBITER_CONFIG__L2_ARB_CONTROL__SHIFT 0x00000009 -#define MH_ARBITER_CONFIG__PAGE_SIZE__SHIFT 0x0000000a -#define MH_ARBITER_CONFIG__TC_REORDER_ENABLE__SHIFT 0x0000000d -#define MH_ARBITER_CONFIG__TC_ARB_HOLD_ENABLE__SHIFT 0x0000000e -#define MH_ARBITER_CONFIG__IN_FLIGHT_LIMIT_ENABLE__SHIFT 0x0000000f -#define MH_ARBITER_CONFIG__IN_FLIGHT_LIMIT__SHIFT 0x00000010 -#define MH_ARBITER_CONFIG__CP_CLNT_ENABLE__SHIFT 0x00000016 -#define MH_ARBITER_CONFIG__VGT_CLNT_ENABLE__SHIFT 0x00000017 -#define MH_ARBITER_CONFIG__TC_CLNT_ENABLE__SHIFT 0x00000018 -#define MH_ARBITER_CONFIG__RB_CLNT_ENABLE__SHIFT 0x00000019 -#define MH_ARBITER_CONFIG__PA_CLNT_ENABLE__SHIFT 0x0000001a - -#define MH_MMU_CONFIG__RB_W_CLNT_BEHAVIOR__SHIFT 0x00000004 -#define MH_MMU_CONFIG__CP_W_CLNT_BEHAVIOR__SHIFT 0x00000006 -#define MH_MMU_CONFIG__CP_R0_CLNT_BEHAVIOR__SHIFT 0x00000008 -#define MH_MMU_CONFIG__CP_R1_CLNT_BEHAVIOR__SHIFT 0x0000000a -#define MH_MMU_CONFIG__CP_R2_CLNT_BEHAVIOR__SHIFT 0x0000000c -#define MH_MMU_CONFIG__CP_R3_CLNT_BEHAVIOR__SHIFT 0x0000000e -#define MH_MMU_CONFIG__CP_R4_CLNT_BEHAVIOR__SHIFT 0x00000010 -#define MH_MMU_CONFIG__VGT_R0_CLNT_BEHAVIOR__SHIFT 0x00000012 -#define MH_MMU_CONFIG__VGT_R1_CLNT_BEHAVIOR__SHIFT 0x00000014 -#define MH_MMU_CONFIG__TC_R_CLNT_BEHAVIOR__SHIFT 0x00000016 -#define MH_MMU_CONFIG__PA_W_CLNT_BEHAVIOR__SHIFT 0x00000018 - -#define CP_RB_CNTL__RB_BUFSZ__SHIFT 0x00000000 -#define CP_RB_CNTL__RB_BLKSZ__SHIFT 0x00000008 -#define CP_RB_CNTL__RB_POLL_EN__SHIFT 0x00000014 -#define CP_RB_CNTL__RB_NO_UPDATE__SHIFT 0x0000001b - -#define RB_COLOR_INFO__COLOR_FORMAT__SHIFT 0x00000000 -#define RB_EDRAM_INFO__EDRAM_MAPPING_MODE__SHIFT 0x00000004 -#define RB_EDRAM_INFO__EDRAM_RANGE__SHIFT 0x0000000e - -#define REG_CP_CSQ_IB1_STAT 0x01FE -#define REG_CP_CSQ_IB2_STAT 0x01FF -#define REG_CP_CSQ_RB_STAT 0x01FD -#define REG_CP_DEBUG 0x01FC -#define REG_CP_IB1_BASE 0x0458 -#define REG_CP_IB1_BUFSZ 0x0459 -#define REG_CP_IB2_BASE 0x045A -#define REG_CP_IB2_BUFSZ 0x045B -#define REG_CP_INT_ACK 0x01F4 -#define REG_CP_INT_CNTL 0x01F2 -#define REG_CP_INT_STATUS 0x01F3 -#define REG_CP_ME_CNTL 0x01F6 -#define REG_CP_ME_RAM_DATA 0x01FA -#define REG_CP_ME_RAM_WADDR 0x01F8 -#define REG_CP_ME_STATUS 0x01F7 -#define REG_CP_PFP_UCODE_ADDR 0x00C0 -#define REG_CP_PFP_UCODE_DATA 0x00C1 -#define REG_CP_QUEUE_THRESHOLDS 0x01D5 -#define REG_CP_RB_BASE 0x01C0 -#define REG_CP_RB_CNTL 0x01C1 -#define REG_CP_RB_RPTR 0x01C4 -#define REG_CP_RB_RPTR_ADDR 0x01C3 -#define REG_CP_RB_RPTR_WR 0x01C7 -#define REG_CP_RB_WPTR 0x01C5 -#define REG_CP_RB_WPTR_BASE 0x01C8 -#define REG_CP_RB_WPTR_DELAY 0x01C6 -#define REG_CP_STAT 0x047F -#define REG_CP_STATE_DEBUG_DATA 0x01ED -#define REG_CP_STATE_DEBUG_INDEX 0x01EC -#define REG_CP_ST_BASE 0x044D -#define REG_CP_ST_BUFSZ 0x044E - -#define REG_CP_PERFMON_CNTL 0x0444 -#define REG_CP_PERFCOUNTER_SELECT 0x0445 -#define REG_CP_PERFCOUNTER_LO 0x0446 -#define REG_CP_PERFCOUNTER_HI 0x0447 - -#define REG_RBBM_PERFCOUNTER1_SELECT 0x0395 -#define REG_RBBM_PERFCOUNTER1_HI 0x0398 -#define REG_RBBM_PERFCOUNTER1_LO 0x0397 - -#define REG_MASTER_INT_SIGNAL 0x03B7 - -#define REG_MH_ARBITER_CONFIG 0x0A40 -#define REG_MH_INTERRUPT_CLEAR 0x0A44 -#define REG_MH_INTERRUPT_MASK 0x0A42 -#define REG_MH_INTERRUPT_STATUS 0x0A43 -#define REG_MH_MMU_CONFIG 0x0040 -#define REG_MH_MMU_INVALIDATE 0x0045 -#define REG_MH_MMU_MPU_BASE 0x0046 -#define REG_MH_MMU_MPU_END 0x0047 -#define REG_MH_MMU_PAGE_FAULT 0x0043 -#define REG_MH_MMU_PT_BASE 0x0042 -#define REG_MH_MMU_TRAN_ERROR 0x0044 -#define REG_MH_MMU_VA_RANGE 0x0041 -#define REG_MH_CLNT_INTF_CTRL_CONFIG1 0x0A54 -#define REG_MH_CLNT_INTF_CTRL_CONFIG2 0x0A55 - -#define REG_PA_CL_VPORT_XSCALE 0x210F -#define REG_PA_CL_VPORT_ZOFFSET 0x2114 -#define REG_PA_CL_VPORT_ZSCALE 0x2113 -#define REG_PA_CL_CLIP_CNTL 0x2204 -#define REG_PA_CL_VTE_CNTL 0x2206 -#define REG_PA_SC_AA_MASK 0x2312 -#define REG_PA_SC_LINE_CNTL 0x2300 -#define REG_PA_SC_SCREEN_SCISSOR_BR 0x200F -#define REG_PA_SC_SCREEN_SCISSOR_TL 0x200E -#define REG_PA_SC_VIZ_QUERY 0x2293 -#define REG_PA_SC_VIZ_QUERY_STATUS 0x0C44 -#define REG_PA_SC_WINDOW_OFFSET 0x2080 -#define REG_PA_SC_WINDOW_SCISSOR_BR 0x2082 -#define REG_PA_SC_WINDOW_SCISSOR_TL 0x2081 -#define REG_PA_SU_FACE_DATA 0x0C86 -#define REG_PA_SU_POINT_SIZE 0x2280 -#define REG_PA_SU_LINE_CNTL 0x2282 -#define REG_PA_SU_POLY_OFFSET_BACK_OFFSET 0x2383 -#define REG_PA_SU_POLY_OFFSET_FRONT_SCALE 0x2380 -#define REG_PA_SU_SC_MODE_CNTL 0x2205 - -#define REG_PC_INDEX_OFFSET 0x2102 - -#define REG_RBBM_CNTL 0x003B -#define REG_RBBM_INT_ACK 0x03B6 -#define REG_RBBM_INT_CNTL 0x03B4 -#define REG_RBBM_INT_STATUS 0x03B5 -#define REG_RBBM_PATCH_RELEASE 0x0001 -#define REG_RBBM_PERIPHID1 0x03F9 -#define REG_RBBM_PERIPHID2 0x03FA -#define REG_RBBM_DEBUG 0x039B -#define REG_RBBM_DEBUG_OUT 0x03A0 -#define REG_RBBM_DEBUG_CNTL 0x03A1 -#define REG_RBBM_PM_OVERRIDE1 0x039C -#define REG_RBBM_PM_OVERRIDE2 0x039D -#define REG_RBBM_READ_ERROR 0x03B3 -#define REG_RBBM_SOFT_RESET 0x003C -#define REG_RBBM_STATUS 0x05D0 - -#define REG_RB_COLORCONTROL 0x2202 -#define REG_RB_COLOR_DEST_MASK 0x2326 -#define REG_RB_COLOR_MASK 0x2104 -#define REG_RB_COPY_CONTROL 0x2318 -#define REG_RB_DEPTHCONTROL 0x2200 -#define REG_RB_EDRAM_INFO 0x0F02 -#define REG_RB_MODECONTROL 0x2208 -#define REG_RB_SURFACE_INFO 0x2000 -#define REG_RB_SAMPLE_POS 0x220a - -#define REG_SCRATCH_ADDR 0x01DD -#define REG_SCRATCH_REG0 0x0578 -#define REG_SCRATCH_REG2 0x057A -#define REG_SCRATCH_UMSK 0x01DC - -#define REG_SQ_CF_BOOLEANS 0x4900 -#define REG_SQ_CF_LOOP 0x4908 -#define REG_SQ_GPR_MANAGEMENT 0x0D00 -#define REG_SQ_INST_STORE_MANAGMENT 0x0D02 -#define REG_SQ_INT_ACK 0x0D36 -#define REG_SQ_INT_CNTL 0x0D34 -#define REG_SQ_INT_STATUS 0x0D35 -#define REG_SQ_PROGRAM_CNTL 0x2180 -#define REG_SQ_PS_PROGRAM 0x21F6 -#define REG_SQ_VS_PROGRAM 0x21F7 -#define REG_SQ_WRAPPING_0 0x2183 -#define REG_SQ_WRAPPING_1 0x2184 - -#define REG_VGT_ENHANCE 0x2294 -#define REG_VGT_INDX_OFFSET 0x2102 -#define REG_VGT_MAX_VTX_INDX 0x2100 -#define REG_VGT_MIN_VTX_INDX 0x2101 - -#define REG_TP0_CHICKEN 0x0E1E -#define REG_TC_CNTL_STATUS 0x0E00 -#define REG_PA_SC_AA_CONFIG 0x2301 -#define REG_VGT_VERTEX_REUSE_BLOCK_CNTL 0x2316 -#define REG_SQ_INTERPOLATOR_CNTL 0x2182 -#define REG_RB_DEPTH_INFO 0x2002 -#define REG_COHER_DEST_BASE_0 0x2006 -#define REG_RB_FOG_COLOR 0x2109 -#define REG_RB_STENCILREFMASK_BF 0x210C -#define REG_PA_SC_LINE_STIPPLE 0x2283 -#define REG_SQ_PS_CONST 0x2308 -#define REG_RB_DEPTH_CLEAR 0x231D -#define REG_RB_SAMPLE_COUNT_CTL 0x2324 -#define REG_SQ_CONSTANT_0 0x4000 -#define REG_SQ_FETCH_0 0x4800 - -#define REG_MH_AXI_ERROR 0xA45 -#define REG_MH_DEBUG_CTRL 0xA4E -#define REG_MH_DEBUG_DATA 0xA4F -#define REG_COHER_BASE_PM4 0xA2A -#define REG_COHER_STATUS_PM4 0xA2B -#define REG_COHER_SIZE_PM4 0xA29 - -#endif /* __A200_REG_H */ diff --git a/drivers/gpu/msm/a220_reg.h b/drivers/gpu/msm/a220_reg.h deleted file mode 100644 index 9542a9ba..00000000 --- a/drivers/gpu/msm/a220_reg.h +++ /dev/null @@ -1,39 +0,0 @@ -/* Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * * Neither the name of Code Aurora Forum, Inc. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE - * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN - * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ -#ifndef __A205_REG_H -#define __A205_REG_H - -#define REG_LEIA_PC_INDX_OFFSET REG_VGT_INDX_OFFSET -#define REG_LEIA_PC_VERTEX_REUSE_BLOCK_CNTL REG_VGT_VERTEX_REUSE_BLOCK_CNTL -#define REG_LEIA_PC_MAX_VTX_INDX REG_VGT_MAX_VTX_INDX -#define REG_LEIA_GRAS_CONTROL 0x2210 -#define REG_LEIA_VSC_BIN_SIZE 0x0C01 -#define REG_LEIA_VSC_PIPE_DATA_LENGTH_7 0x0C1D - -#endif /*__A205_REG_H */ diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c index db3d9782..61f14a4a 100644 --- a/drivers/gpu/msm/adreno.c +++ b/drivers/gpu/msm/adreno.c @@ -19,21 +19,26 @@ #include "kgsl.h" #include "kgsl_pwrscale.h" #include "kgsl_cffdump.h" +#include "kgsl_sharedmem.h" #include "adreno.h" #include "adreno_pm4types.h" #include "adreno_debugfs.h" #include "adreno_postmortem.h" -#include "a200_reg.h" +#include "a2xx_reg.h" +#include "kgsl_mmu.h" + +#define cpu_is_msm7x01() 0 +#define cpu_is_msm7x30() 0 +#define cpu_is_qsd8x50() 1 +#define cpu_is_msm8x60() 0 +#define cpu_is_msm8960() 0 +#define cpu_is_msm8930() 0 #define DRIVER_VERSION_MAJOR 3 #define DRIVER_VERSION_MINOR 1 -#define GSL_RBBM_INT_MASK \ - (RBBM_INT_CNTL__RDERR_INT_MASK | \ - RBBM_INT_CNTL__DISPLAY_UPDATE_INT_MASK) - /* Adreno MH arbiter config*/ #define ADRENO_CFG_MHARB \ (0x10 \ @@ -66,8 +71,7 @@ | (MMU_CONFIG << MH_MMU_CONFIG__TC_R_CLNT_BEHAVIOR__SHIFT) \ | (MMU_CONFIG << MH_MMU_CONFIG__PA_W_CLNT_BEHAVIOR__SHIFT)) -/* max msecs to wait for gpu to finish its operation(s) */ -#define MAX_WAITGPU_SECS (HZ + HZ/2) +static const struct kgsl_functable adreno_functable; static struct adreno_device device_3d0 = { .dev = { @@ -75,29 +79,24 @@ static struct adreno_device device_3d0 = { .id = KGSL_DEVICE_3D0, .ver_major = DRIVER_VERSION_MAJOR, .ver_minor = DRIVER_VERSION_MINOR, - .mmu = { - .config = ADRENO_MMU_CONFIG, + .mh = { + .mharb = ADRENO_CFG_MHARB, + /* Remove 1k boundary check in z470 to avoid a GPU + * hang. Notice that this solution won't work if + * both EBI and SMI are used + */ + .mh_intf_cfg1 = 0x00032f07, /* turn off memory protection unit by setting acceptable physical address range to include all pages. */ .mpu_base = 0x00000000, .mpu_range = 0xFFFFF000, - .reg = { - .config = REG_MH_MMU_CONFIG, - .mpu_base = REG_MH_MMU_MPU_BASE, - .mpu_end = REG_MH_MMU_MPU_END, - .va_range = REG_MH_MMU_VA_RANGE, - .pt_page = REG_MH_MMU_PT_BASE, - .page_fault = REG_MH_MMU_PAGE_FAULT, - .tran_error = REG_MH_MMU_TRAN_ERROR, - .invalidate = REG_MH_MMU_INVALIDATE, - .interrupt_mask = REG_MH_INTERRUPT_MASK, - .interrupt_status = REG_MH_INTERRUPT_STATUS, - .interrupt_clear = REG_MH_INTERRUPT_CLEAR, - .axi_error = REG_MH_AXI_ERROR, - }, + }, + .mmu = { + .config = ADRENO_MMU_CONFIG, }, .pwrctrl = { + .pwr_rail = PWR_RAIL_GRP_CLK, .regulator_name = "fs_gfx3d", .irq_name = KGSL_3D0_IRQ, .src_clk_name = "grp_src_clk", @@ -106,6 +105,14 @@ static struct adreno_device device_3d0 = { .state = KGSL_STATE_INIT, .active_cnt = 0, .iomemname = KGSL_3D0_REG_MEMORY, + .ftbl = &adreno_functable, +#ifdef CONFIG_HAS_EARLYSUSPEND + .display_off = { + .level = EARLY_SUSPEND_LEVEL_STOP_DRAWING, + .suspend = kgsl_early_suspend_driver, + .resume = kgsl_late_resume_driver, + }, +#endif }, .gmemspace = { .gpu_base = 0, @@ -113,12 +120,39 @@ static struct adreno_device device_3d0 = { }, .pfp_fw = NULL, .pm4_fw = NULL, - .mharb = ADRENO_CFG_MHARB, }; -static void __devinit adreno_getfunctable(struct kgsl_functable *ftbl); +/* + * This is the master list of all GPU cores that are supported by this + * driver. + */ -static int adreno_gmeminit(struct adreno_device *adreno_dev) +#define ANY_ID (~0) + +static const struct { + enum adreno_gpurev gpurev; + unsigned int core, major, minor, patchid; + const char *pm4fw; + const char *pfpfw; + struct adreno_gpudev *gpudev; +} adreno_gpulist[] = { + { ADRENO_REV_A200, 0, 2, ANY_ID, ANY_ID, + "yamato_pm4.fw", "yamato_pfp.fw", &adreno_a2xx_gpudev }, + { ADRENO_REV_A205, 0, 1, 0, ANY_ID, + "yamato_pm4.fw", "yamato_pfp.fw", &adreno_a2xx_gpudev }, + { ADRENO_REV_A220, 2, 1, ANY_ID, ANY_ID, + "leia_pm4_470.fw", "leia_pfp_470.fw", &adreno_a2xx_gpudev }, + /* + * patchlevel 5 (8960v2) needs special pm4 firmware to work around + * a hardware problem. + */ + { ADRENO_REV_A225, 2, 2, 0, 5, + "a225p5_pm4.fw", "a225_pfp.fw", &adreno_a2xx_gpudev }, + { ADRENO_REV_A225, 2, 2, ANY_ID, ANY_ID, + "a225_pm4.fw", "a225_pfp.fw", &adreno_a2xx_gpudev }, +}; + +static void adreno_gmeminit(struct adreno_device *adreno_dev) { struct kgsl_device *device = &adreno_dev->dev; union reg_rb_edram_info rb_edram_info; @@ -137,90 +171,27 @@ static int adreno_gmeminit(struct adreno_device *adreno_dev) rb_edram_info.val = 0; rb_edram_info.f.edram_size = edram_value; - if (!adreno_is_a220(adreno_dev)) - rb_edram_info.f.edram_mapping_mode = 0; /* EDRAM_MAP_UPPER */ + rb_edram_info.f.edram_mapping_mode = 0; /* EDRAM_MAP_UPPER */ /* must be aligned to size */ rb_edram_info.f.edram_range = (adreno_dev->gmemspace.gpu_base >> 14); adreno_regwrite(device, REG_RB_EDRAM_INFO, rb_edram_info.val); - - return 0; } -static int adreno_gmemclose(struct kgsl_device *device) +static irqreturn_t adreno_isr(int irq, void *data) { - adreno_regwrite(device, REG_RB_EDRAM_INFO, 0x00000000); + irqreturn_t result; + struct kgsl_device *device = data; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - return 0; -} - -static void adreno_rbbm_intrcallback(struct kgsl_device *device) -{ - unsigned int status = 0; - unsigned int rderr = 0; - - adreno_regread_isr(device, REG_RBBM_INT_STATUS, &status); - - if (status & RBBM_INT_CNTL__RDERR_INT_MASK) { - union rbbm_read_error_u rerr; - adreno_regread_isr(device, REG_RBBM_READ_ERROR, &rderr); - rerr.val = rderr; - if (rerr.f.read_address == REG_CP_INT_STATUS && - rerr.f.read_error && - rerr.f.read_requester) - KGSL_DRV_WARN(device, - "rbbm read error interrupt: %08x\n", rderr); - else - KGSL_DRV_CRIT(device, - "rbbm read error interrupt: %08x\n", rderr); - } else if (status & RBBM_INT_CNTL__DISPLAY_UPDATE_INT_MASK) { - KGSL_DRV_INFO(device, "rbbm display update interrupt\n"); - } else if (status & RBBM_INT_CNTL__GUI_IDLE_INT_MASK) { - KGSL_DRV_INFO(device, "rbbm gui idle interrupt\n"); - } else { - KGSL_CMD_WARN(device, - "bad bits in REG_CP_INT_STATUS %08x\n", status); - } - - status &= GSL_RBBM_INT_MASK; - adreno_regwrite_isr(device, REG_RBBM_INT_ACK, status); -} - -irqreturn_t adreno_isr(int irq, void *data) -{ - irqreturn_t result = IRQ_NONE; - struct kgsl_device *device; - unsigned int status; - - device = (struct kgsl_device *) data; - - BUG_ON(device == NULL); - BUG_ON(device->regspace.sizebytes == 0); - BUG_ON(device->regspace.mmio_virt_base == 0); - - adreno_regread_isr(device, REG_MASTER_INT_SIGNAL, &status); - - if (status & MASTER_INT_SIGNAL__MH_INT_STAT) { - kgsl_mh_intrcallback(device); - result = IRQ_HANDLED; - } - - if (status & MASTER_INT_SIGNAL__CP_INT_STAT) { - kgsl_cp_intrcallback(device); - result = IRQ_HANDLED; - } - - if (status & MASTER_INT_SIGNAL__RBBM_INT_STAT) { - adreno_rbbm_intrcallback(device); - result = IRQ_HANDLED; - } + result = adreno_dev->gpudev->irq_handler(adreno_dev); if (device->requested_state == KGSL_STATE_NONE) { if (device->pwrctrl.nap_allowed == true) { device->requested_state = KGSL_STATE_NAP; queue_work(device->work_queue, &device->idle_check_ws); - } else if (device->pwrctrl.idle_pass == true) { + } else if (device->pwrscale.policy != NULL) { queue_work(device->work_queue, &device->idle_check_ws); } } @@ -231,7 +202,7 @@ irqreturn_t adreno_isr(int irq, void *data) return result; } -static int adreno_cleanup_pt(struct kgsl_device *device, +static void adreno_cleanup_pt(struct kgsl_device *device, struct kgsl_pagetable *pagetable) { struct adreno_device *adreno_dev = ADRENO_DEVICE(device); @@ -243,9 +214,7 @@ static int adreno_cleanup_pt(struct kgsl_device *device, kgsl_mmu_unmap(pagetable, &device->memstore); - kgsl_mmu_unmap(pagetable, &device->mmu.dummyspace); - - return 0; + kgsl_mmu_unmap(pagetable, &device->mmu.setstate_memory); } static int adreno_setup_pt(struct kgsl_device *device, @@ -255,12 +224,6 @@ static int adreno_setup_pt(struct kgsl_device *device, struct adreno_device *adreno_dev = ADRENO_DEVICE(device); struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer; - BUG_ON(rb->buffer_desc.physaddr == 0); - BUG_ON(rb->memptrs_desc.physaddr == 0); - BUG_ON(device->memstore.physaddr == 0); -#ifdef CONFIG_MSM_KGSL_MMU - BUG_ON(device->mmu.dummyspace.physaddr == 0); -#endif result = kgsl_mmu_map_global(pagetable, &rb->buffer_desc, GSL_PT_PAGE_RV); if (result) @@ -276,7 +239,7 @@ static int adreno_setup_pt(struct kgsl_device *device, if (result) goto unmap_memptrs_desc; - result = kgsl_mmu_map_global(pagetable, &device->mmu.dummyspace, + result = kgsl_mmu_map_global(pagetable, &device->mmu.setstate_memory, GSL_PT_PAGE_RV | GSL_PT_PAGE_WV); if (result) goto unmap_memstore_desc; @@ -296,7 +259,8 @@ error: return result; } -static int adreno_setstate(struct kgsl_device *device, uint32_t flags) +static void adreno_setstate(struct kgsl_device *device, + uint32_t flags) { struct adreno_device *adreno_dev = ADRENO_DEVICE(device); unsigned int link[32]; @@ -304,38 +268,36 @@ static int adreno_setstate(struct kgsl_device *device, uint32_t flags) int sizedwords = 0; unsigned int mh_mmu_invalidate = 0x00000003; /*invalidate all and tc */ -#ifndef CONFIG_MSM_KGSL_MMU - return 0; -#endif - /* if possible, set via command stream, - * otherwise set via direct register writes - */ + /* If possible, then set the state via the command stream to avoid + a CPU idle. Otherwise, use the default setstate which uses register + writes */ if (adreno_dev->drawctxt_active) { if (flags & KGSL_MMUFLAGS_PTUPDATE) { /* wait for graphics pipe to be idle */ - *cmds++ = pm4_type3_packet(PM4_WAIT_FOR_IDLE, 1); + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); *cmds++ = 0x00000000; /* set page table base */ - *cmds++ = pm4_type0_packet(REG_MH_MMU_PT_BASE, 1); - *cmds++ = device->mmu.hwpagetable->base.gpuaddr; + *cmds++ = cp_type0_packet(MH_MMU_PT_BASE, 1); + *cmds++ = kgsl_pt_get_base_addr( + device->mmu.hwpagetable); sizedwords += 4; } if (flags & KGSL_MMUFLAGS_TLBFLUSH) { if (!(flags & KGSL_MMUFLAGS_PTUPDATE)) { - *cmds++ = pm4_type3_packet(PM4_WAIT_FOR_IDLE, + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); *cmds++ = 0x00000000; sizedwords += 2; } - *cmds++ = pm4_type0_packet(REG_MH_MMU_INVALIDATE, 1); + *cmds++ = cp_type0_packet(MH_MMU_INVALIDATE, 1); *cmds++ = mh_mmu_invalidate; sizedwords += 2; } if (flags & KGSL_MMUFLAGS_PTUPDATE && - !adreno_is_a220(adreno_dev)) { + adreno_is_a20x(adreno_dev)) { /* HW workaround: to resolve MMU page fault interrupts * caused by the VGT.It prevents the CP PFP from filling * the VGT DMA request fifo too early,thereby ensuring @@ -348,34 +310,36 @@ static int adreno_setstate(struct kgsl_device *device, uint32_t flags) * VGT DMA request fifo and prevent any further * vertex/bin updates from occurring until the wait * has finished. */ - *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2); + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); *cmds++ = (0x4 << 16) | (REG_PA_SU_SC_MODE_CNTL - 0x2000); *cmds++ = 0; /* disable faceness generation */ - *cmds++ = pm4_type3_packet(PM4_SET_BIN_BASE_OFFSET, 1); - *cmds++ = device->mmu.dummyspace.gpuaddr; - *cmds++ = pm4_type3_packet(PM4_DRAW_INDX_BIN, 6); + *cmds++ = cp_type3_packet(CP_SET_BIN_BASE_OFFSET, 1); + *cmds++ = device->mmu.setstate_memory.gpuaddr; + *cmds++ = cp_type3_packet(CP_DRAW_INDX_BIN, 6); *cmds++ = 0; /* viz query info */ *cmds++ = 0x0003C004; /* draw indicator */ *cmds++ = 0; /* bin base */ *cmds++ = 3; /* bin size */ - *cmds++ = device->mmu.dummyspace.gpuaddr; /* dma base */ + *cmds++ = + device->mmu.setstate_memory.gpuaddr; /* dma base */ *cmds++ = 6; /* dma size */ - *cmds++ = pm4_type3_packet(PM4_DRAW_INDX_BIN, 6); + *cmds++ = cp_type3_packet(CP_DRAW_INDX_BIN, 6); *cmds++ = 0; /* viz query info */ *cmds++ = 0x0003C004; /* draw indicator */ *cmds++ = 0; /* bin base */ *cmds++ = 3; /* bin size */ /* dma base */ - *cmds++ = device->mmu.dummyspace.gpuaddr; + *cmds++ = device->mmu.setstate_memory.gpuaddr; *cmds++ = 6; /* dma size */ - *cmds++ = pm4_type3_packet(PM4_WAIT_FOR_IDLE, 1); + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); *cmds++ = 0x00000000; sizedwords += 21; } + if (flags & (KGSL_MMUFLAGS_PTUPDATE | KGSL_MMUFLAGS_TLBFLUSH)) { - *cmds++ = pm4_type3_packet(PM4_INVALIDATE_STATE, 1); + *cmds++ = cp_type3_packet(CP_INVALIDATE_STATE, 1); *cmds++ = 0x7fff; /* invalidate all base pointers */ sizedwords += 2; } @@ -383,25 +347,13 @@ static int adreno_setstate(struct kgsl_device *device, uint32_t flags) adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE, &link[0], sizedwords); } else { - if (flags & KGSL_MMUFLAGS_PTUPDATE) { - adreno_idle(device, KGSL_TIMEOUT_DEFAULT); - adreno_regwrite(device, REG_MH_MMU_PT_BASE, - device->mmu.hwpagetable->base.gpuaddr); - } - - if (flags & KGSL_MMUFLAGS_TLBFLUSH) { - adreno_regwrite(device, REG_MH_MMU_INVALIDATE, - mh_mmu_invalidate); - } + kgsl_mmu_device_setstate(device, flags); } - - return 0; } static unsigned int adreno_getchipid(struct kgsl_device *device) { - /* XXX: drewis edit: only for 8x50 */ unsigned int chipid = 0; unsigned int coreid, majorid, minorid, patchid, revid; @@ -409,57 +361,64 @@ adreno_getchipid(struct kgsl_device *device) adreno_regread(device, REG_RBBM_PERIPHID2, &majorid); adreno_regread(device, REG_RBBM_PATCH_RELEASE, &revid); - chipid = (coreid & 0xF) << 24; + /* + * adreno 22x gpus are indicated by coreid 2, + * but REG_RBBM_PERIPHID1 always contains 0 for this field + */ + if (cpu_is_msm8960() || cpu_is_msm8x60() || cpu_is_msm8930()) + chipid = 2 << 24; + else + chipid = (coreid & 0xF) << 24; chipid |= ((majorid >> 4) & 0xF) << 16; minorid = ((revid >> 0) & 0xFF); - patchid = 1; + patchid = ((revid >> 16) & 0xFF); + + /* 8x50 returns 0 for patch release, but it should be 1 */ + if (cpu_is_qsd8x50()) + patchid = 1; chipid |= (minorid << 8) | patchid; return chipid; } -/* all chipid fields are 8 bits wide so 256 won't occur in a real chipid */ -#define DONT_CARE 256 -static const struct { - unsigned int core; - unsigned int major; - unsigned int minor; - enum adreno_gpurev gpurev; -} gpurev_table[] = { - /* major and minor may be DONT_CARE, but core must not be */ - {0, 2, DONT_CARE, ADRENO_REV_A200}, - {0, 1, 0, ADRENO_REV_A205}, - {2, 1, DONT_CARE, ADRENO_REV_A220}, - {2, 2, DONT_CARE, ADRENO_REV_A225}, -}; - static inline bool _rev_match(unsigned int id, unsigned int entry) { - return (entry == DONT_CARE || entry == id); + return (entry == ANY_ID || entry == id); } -#undef DONT_CARE -enum adreno_gpurev adreno_get_rev(struct adreno_device *adreno_dev) +static void +adreno_identify_gpu(struct adreno_device *adreno_dev) { - enum adreno_gpurev gpurev = ADRENO_REV_UNKNOWN; - unsigned int i, core, major, minor; + unsigned int i, core, major, minor, patchid; + + adreno_dev->chip_id = adreno_getchipid(&adreno_dev->dev); + core = (adreno_dev->chip_id >> 24) & 0xff; major = (adreno_dev->chip_id >> 16) & 0xff; minor = (adreno_dev->chip_id >> 8) & 0xff; + patchid = (adreno_dev->chip_id & 0xff); - for (i = 0; i < ARRAY_SIZE(gpurev_table); i++) { - if (core == gpurev_table[i].core && - _rev_match(major, gpurev_table[i].major) && - _rev_match(minor, gpurev_table[i].minor)) { - gpurev = gpurev_table[i].gpurev; + for (i = 0; i < ARRAY_SIZE(adreno_gpulist); i++) { + if (core == adreno_gpulist[i].core && + _rev_match(major, adreno_gpulist[i].major) && + _rev_match(minor, adreno_gpulist[i].minor) && + _rev_match(patchid, adreno_gpulist[i].patchid)) break; - } } - return gpurev; + + if (i == ARRAY_SIZE(adreno_gpulist)) { + adreno_dev->gpurev = ADRENO_REV_UNKNOWN; + return; + } + + adreno_dev->gpurev = adreno_gpulist[i].gpurev; + adreno_dev->gpudev = adreno_gpulist[i].gpudev; + adreno_dev->pfp_fwfile = adreno_gpulist[i].pfpfw; + adreno_dev->pm4_fwfile = adreno_gpulist[i].pm4fw; } static int __devinit @@ -473,9 +432,9 @@ adreno_probe(struct platform_device *pdev) adreno_dev = ADRENO_DEVICE(device); device->parentdev = &pdev->dev; - init_completion(&device->recovery_gate); + adreno_dev->wait_timeout = 10000; /* default value in milliseconds */ - adreno_getfunctable(&device->ftbl); + init_completion(&device->recovery_gate); status = adreno_ringbuffer_init(device); if (status != 0) @@ -487,6 +446,9 @@ adreno_probe(struct platform_device *pdev) adreno_debugfs_init(device); + kgsl_pwrscale_init(device); + kgsl_pwrscale_attach_policy(device, ADRENO_DEFAULT_PWRSCALE_POLICY); + device->flags &= ~KGSL_FLAGS_SOFT_RESET; return 0; @@ -505,6 +467,9 @@ static int __devexit adreno_remove(struct platform_device *pdev) device = (struct kgsl_device *)pdev->id_entry->driver_data; adreno_dev = ADRENO_DEVICE(device); + kgsl_pwrscale_detach_policy(device); + kgsl_pwrscale_close(device); + adreno_ringbuffer_close(&adreno_dev->ringbuffer); kgsl_device_platform_remove(device); @@ -523,23 +488,38 @@ static int adreno_start(struct kgsl_device *device, unsigned int init_ram) /* Power up the device */ kgsl_pwrctrl_enable(device); + /* Identify the specific GPU */ + adreno_identify_gpu(adreno_dev); + + if (adreno_dev->gpurev == ADRENO_REV_UNKNOWN) { + KGSL_DRV_ERR(device, "Unknown chip ID %x\n", + adreno_dev->chip_id); + goto error_clk_off; + } + + if (adreno_is_a20x(adreno_dev)) { + /* + * the MH_CLNT_INTF_CTRL_CONFIG registers aren't present + * on older gpus + */ + device->mh.mh_intf_cfg1 = 0; + device->mh.mh_intf_cfg2 = 0; + } + + kgsl_mh_start(device); + if (kgsl_mmu_start(device)) goto error_clk_off; - adreno_dev->chip_id = adreno_getchipid(device); - /*We need to make sure all blocks are powered up and clocked before *issuing a soft reset. The overrides will then be turned off (set to 0) */ adreno_regwrite(device, REG_RBBM_PM_OVERRIDE1, 0xfffffffe); - if (adreno_dev->chip_id == CHIP_REV_251) - adreno_regwrite(device, REG_RBBM_PM_OVERRIDE2, 0x000000ff); - else - adreno_regwrite(device, REG_RBBM_PM_OVERRIDE2, 0xffffffff); + adreno_regwrite(device, REG_RBBM_PM_OVERRIDE2, 0xffffffff); /* Only reset CP block if all blocks have previously been reset */ if (!(device->flags & KGSL_FLAGS_SOFT_RESET) || - !adreno_is_a220(adreno_dev)) { + !adreno_is_a22x(adreno_dev)) { adreno_regwrite(device, REG_RBBM_SOFT_RESET, 0xFFFFFFFF); device->flags |= KGSL_FLAGS_SOFT_RESET; } else @@ -554,44 +534,39 @@ static int adreno_start(struct kgsl_device *device, unsigned int init_ram) adreno_regwrite(device, REG_RBBM_CNTL, 0x00004442); - adreno_regwrite(device, REG_MH_ARBITER_CONFIG, - adreno_dev->mharb); - - if (!adreno_is_a220(adreno_dev)) { - adreno_regwrite(device, - REG_MH_CLNT_INTF_CTRL_CONFIG1, 0x00030f27); - adreno_regwrite(device, - REG_MH_CLNT_INTF_CTRL_CONFIG2, 0x00472747); - } - - /* Remove 1k boundary check in z470 to avoid GPU hang. - Notice that, this solution won't work if both EBI and SMI are used */ - if (adreno_is_a220(adreno_dev)) { - adreno_regwrite(device, REG_MH_CLNT_INTF_CTRL_CONFIG1, - 0x00032f07); + if (adreno_is_a225(adreno_dev)) { + /* Enable large instruction store for A225 */ + adreno_regwrite(device, REG_SQ_FLOW_CONTROL, 0x18000000); } adreno_regwrite(device, REG_SQ_VS_PROGRAM, 0x00000000); adreno_regwrite(device, REG_SQ_PS_PROGRAM, 0x00000000); - adreno_regwrite(device, REG_RBBM_PM_OVERRIDE1, 0); - if (!adreno_is_a220(adreno_dev)) + if (cpu_is_msm8960() || cpu_is_msm8930()) + adreno_regwrite(device, REG_RBBM_PM_OVERRIDE1, 0x200); + else + adreno_regwrite(device, REG_RBBM_PM_OVERRIDE1, 0); + + if (!adreno_is_a22x(adreno_dev)) adreno_regwrite(device, REG_RBBM_PM_OVERRIDE2, 0); else adreno_regwrite(device, REG_RBBM_PM_OVERRIDE2, 0x80); + kgsl_sharedmem_set(&device->memstore, 0, 0, device->memstore.size); + kgsl_sharedmem_writel(&device->memstore, KGSL_DEVICE_MEMSTORE_OFFSET(ref_wait_ts), init_reftimestamp); - adreno_regwrite(device, REG_RBBM_DEBUG, 0x000C0000); + adreno_regwrite(device, REG_RBBM_DEBUG, 0x00080000); - adreno_regwrite(device, REG_RBBM_INT_CNTL, GSL_RBBM_INT_MASK); + /* Make sure interrupts are disabled */ - /* make sure SQ interrupts are disabled */ + adreno_regwrite(device, REG_RBBM_INT_CNTL, 0); + adreno_regwrite(device, REG_CP_INT_CNTL, 0); adreno_regwrite(device, REG_SQ_INT_CNTL, 0); - if (adreno_is_a220(adreno_dev)) + if (adreno_is_a22x(adreno_dev)) adreno_dev->gmemspace.sizebytes = SZ_512K; else adreno_dev->gmemspace.sizebytes = SZ_256K; @@ -608,9 +583,9 @@ static int adreno_start(struct kgsl_device *device, unsigned int init_ram) error_irq_off: kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF); + kgsl_mmu_stop(device); error_clk_off: kgsl_pwrctrl_disable(device); - kgsl_mmu_stop(device); return status; } @@ -618,19 +593,15 @@ error_clk_off: static int adreno_stop(struct kgsl_device *device) { struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - del_timer(&device->idle_timer); - adreno_regwrite(device, REG_RBBM_INT_CNTL, 0); adreno_dev->drawctxt_active = NULL; adreno_ringbuffer_stop(&adreno_dev->ringbuffer); - adreno_gmemclose(device); - kgsl_mmu_stop(device); - /* Disable the clocks before the power rail. */ kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF); + del_timer_sync(&device->idle_timer); /* Power down the device */ kgsl_pwrctrl_disable(device); @@ -786,7 +757,7 @@ static int adreno_getproperty(struct kgsl_device *device, devinfo.device_id = device->id+1; devinfo.chip_id = adreno_dev->chip_id; devinfo.mmu_enabled = kgsl_mmu_enabled(); - devinfo.gpu_id = adreno_get_rev(adreno_dev); + devinfo.gpu_id = adreno_dev->gpurev; devinfo.gmem_gpubaseaddr = adreno_dev->gmemspace. gpu_base; devinfo.gmem_sizebytes = adreno_dev->gmemspace. @@ -829,16 +800,13 @@ static int adreno_getproperty(struct kgsl_device *device, break; case KGSL_PROP_MMU_ENABLE: { -#ifdef CONFIG_MSM_KGSL_MMU - int mmuProp = 1; -#else - int mmuProp = 0; -#endif + int mmu_prop = kgsl_mmu_enabled(); + if (sizebytes != sizeof(int)) { status = -EINVAL; break; } - if (copy_to_user(value, &mmuProp, sizeof(mmuProp))) { + if (copy_to_user(value, &mmu_prop, sizeof(mmu_prop))) { status = -EFAULT; break; } @@ -872,7 +840,9 @@ int adreno_idle(struct kgsl_device *device, unsigned int timeout) struct adreno_device *adreno_dev = ADRENO_DEVICE(device); struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer; unsigned int rbbm_status; - unsigned long wait_time = jiffies + MAX_WAITGPU_SECS; + unsigned long wait_timeout = + msecs_to_jiffies(adreno_dev->wait_timeout); + unsigned long wait_time = jiffies + wait_timeout; kgsl_cffdump_regpoll(device->id, REG_RBBM_STATUS << 2, 0x00000000, 0x80000000); @@ -892,7 +862,7 @@ retry: } /* now, wait for the GPU to finish its operations */ - wait_time = jiffies + MAX_WAITGPU_SECS; + wait_time = jiffies + wait_timeout; while (time_before(jiffies, wait_time)) { adreno_regread(device, REG_RBBM_STATUS, &rbbm_status); if (rbbm_status == 0x110) @@ -902,7 +872,7 @@ retry: err: KGSL_DRV_ERR(device, "spun too long waiting for RB to idle\n"); if (!adreno_dump_and_recover(device)) { - wait_time = jiffies + MAX_WAITGPU_SECS; + wait_time = jiffies + wait_timeout; goto retry; } return -ETIMEDOUT; @@ -915,6 +885,7 @@ static unsigned int adreno_isidle(struct kgsl_device *device) struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer; unsigned int rbbm_status; + WARN_ON(!(rb->flags & KGSL_FLAGS_STARTED)); if (rb->flags & KGSL_FLAGS_STARTED) { /* Is the ring buffer is empty? */ GSL_RB_GET_READPTR(rb, &rb->rptr); @@ -926,40 +897,20 @@ static unsigned int adreno_isidle(struct kgsl_device *device) status = true; } } else { - KGSL_DRV_ERR(device, "ringbuffer not started\n"); - BUG(); + /* if the ringbuffer isn't started we are VERY idle */ + status = true; } return status; } - -/******************************************************************/ -/* Caller must hold the driver mutex. */ -static int adreno_resume_context(struct kgsl_device *device) -{ - int status = 0; - struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - - if (device->pwrctrl.suspended_ctxt != NULL) { - adreno_drawctxt_switch(adreno_dev, - device->pwrctrl.suspended_ctxt, 0); - status = adreno_idle(device, 0); - - } - - return status; -} - -/******************************************************************/ /* Caller must hold the device mutex. */ static int adreno_suspend_context(struct kgsl_device *device) { int status = 0; struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - /* save ctxt ptr and switch to NULL ctxt */ - device->pwrctrl.suspended_ctxt = adreno_dev->drawctxt_active; - if (device->pwrctrl.suspended_ctxt != NULL) { + /* switch to NULL ctxt */ + if (adreno_dev->drawctxt_active != NULL) { adreno_drawctxt_switch(adreno_dev, NULL, 0); status = adreno_idle(device, KGSL_TIMEOUT_DEFAULT); } @@ -993,12 +944,8 @@ uint8_t *kgsl_sharedmem_convertaddr(struct kgsl_device *device, mutex_lock(&kgsl_driver.process_mutex); list_for_each_entry(priv, &kgsl_driver.process_list, list) { - if (pt_base != 0 - && priv->pagetable - && priv->pagetable->base.gpuaddr != pt_base) { + if (!kgsl_mmu_pt_equal(priv->pagetable, pt_base)) continue; - } - spin_lock(&priv->mem_lock); entry = kgsl_sharedmem_find_region(priv, gpuaddr, sizeof(unsigned int)); @@ -1025,42 +972,33 @@ uint8_t *kgsl_sharedmem_convertaddr(struct kgsl_device *device, return result; } -static void _adreno_regread(struct kgsl_device *device, - unsigned int offsetwords, - unsigned int *value) +void adreno_regread(struct kgsl_device *device, unsigned int offsetwords, + unsigned int *value) { unsigned int *reg; BUG_ON(offsetwords*sizeof(uint32_t) >= device->regspace.sizebytes); reg = (unsigned int *)(device->regspace.mmio_virt_base + (offsetwords << 2)); + + if (!in_interrupt()) + kgsl_pre_hwaccess(device); + /*ensure this read finishes before the next one. * i.e. act like normal readl() */ *value = __raw_readl(reg); rmb(); } -void adreno_regread(struct kgsl_device *device, unsigned int offsetwords, - unsigned int *value) -{ - kgsl_pre_hwaccess(device); - _adreno_regread(device, offsetwords, value); -} - -void adreno_regread_isr(struct kgsl_device *device, - unsigned int offsetwords, - unsigned int *value) -{ - _adreno_regread(device, offsetwords, value); -} - -static void _adreno_regwrite(struct kgsl_device *device, - unsigned int offsetwords, - unsigned int value) +void adreno_regwrite(struct kgsl_device *device, unsigned int offsetwords, + unsigned int value) { unsigned int *reg; BUG_ON(offsetwords*sizeof(uint32_t) >= device->regspace.sizebytes); + if (!in_interrupt()) + kgsl_pre_hwaccess(device); + kgsl_cffdump_regwrite(device->id, offsetwords << 2, value); reg = (unsigned int *)(device->regspace.mmio_virt_base + (offsetwords << 2)); @@ -1071,20 +1009,6 @@ static void _adreno_regwrite(struct kgsl_device *device, __raw_writel(value, reg); } -void adreno_regwrite(struct kgsl_device *device, unsigned int offsetwords, - unsigned int value) -{ - kgsl_pre_hwaccess(device); - _adreno_regwrite(device, offsetwords, value); -} - -void adreno_regwrite_isr(struct kgsl_device *device, - unsigned int offsetwords, - unsigned int value) -{ - _adreno_regwrite(device, offsetwords, value); -} - static int kgsl_check_interrupt_timestamp(struct kgsl_device *device, unsigned int timestamp) { @@ -1102,7 +1026,7 @@ static int kgsl_check_interrupt_timestamp(struct kgsl_device *device, kgsl_sharedmem_readl(&device->memstore, &ref_ts, KGSL_DEVICE_MEMSTORE_OFFSET(ref_wait_ts)); mb(); - if (timestamp_cmp(ref_ts, timestamp)) { + if (timestamp_cmp(ref_ts, timestamp) >= 0) { kgsl_sharedmem_writel(&device->memstore, KGSL_DEVICE_MEMSTORE_OFFSET(ref_wait_ts), timestamp); @@ -1121,7 +1045,7 @@ static int kgsl_check_interrupt_timestamp(struct kgsl_device *device, /* submit a dummy packet so that even if all * commands upto timestamp get executed we will still * get an interrupt */ - cmds[0] = pm4_type3_packet(PM4_NOP, 1); + cmds[0] = cp_type3_packet(CP_NOP, 1); cmds[1] = 0; adreno_ringbuffer_issuecmds(device, 0, &cmds[0], 2); } @@ -1132,15 +1056,18 @@ static int kgsl_check_interrupt_timestamp(struct kgsl_device *device, } /* - wait_io_event_interruptible_timeout checks for the exit condition before + wait_event_interruptible_timeout checks for the exit condition before placing a process in wait q. For conditional interrupts we expect the process to already be in its wait q when its exit condition checking function is called. */ -#define kgsl_wait_io_event_interruptible_timeout(wq, condition, timeout)\ +#define kgsl_wait_event_interruptible_timeout(wq, condition, timeout, io)\ ({ \ long __ret = timeout; \ - __wait_io_event_interruptible_timeout(wq, condition, __ret); \ + if (io) \ + __wait_io_event_interruptible_timeout(wq, condition, __ret);\ + else \ + __wait_event_interruptible_timeout(wq, condition, __ret);\ __ret; \ }) @@ -1150,11 +1077,15 @@ static int adreno_waittimestamp(struct kgsl_device *device, unsigned int msecs) { long status = 0; + uint io = 1; struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; - if (timestamp != adreno_dev->ringbuffer.timestamp && - timestamp_cmp(timestamp, - adreno_dev->ringbuffer.timestamp)) { + /* Don't wait forever, set a max value for now */ + if (msecs == -1) + msecs = adreno_dev->wait_timeout; + + if (timestamp_cmp(timestamp, adreno_dev->ringbuffer.timestamp) > 0) { KGSL_DRV_ERR(device, "Cannot wait for invalid ts: %x, " "rb->timestamp: %x\n", timestamp, adreno_dev->ringbuffer.timestamp); @@ -1162,13 +1093,20 @@ static int adreno_waittimestamp(struct kgsl_device *device, goto done; } if (!kgsl_check_timestamp(device, timestamp)) { + if (pwr->active_pwrlevel) { + int low_pwrlevel = pwr->num_pwrlevels - + KGSL_PWRLEVEL_LOW_OFFSET; + if (pwr->active_pwrlevel == low_pwrlevel) + io = 0; + } mutex_unlock(&device->mutex); /* We need to make sure that the process is placed in wait-q * before its condition is called */ - status = kgsl_wait_io_event_interruptible_timeout( + status = kgsl_wait_event_interruptible_timeout( device->wait_queue, kgsl_check_interrupt_timestamp(device, - timestamp), msecs_to_jiffies(msecs)); + timestamp), + msecs_to_jiffies(msecs), io); mutex_lock(&device->mutex); if (status > 0) @@ -1227,10 +1165,8 @@ static long adreno_ioctl(struct kgsl_device_private *dev_priv, context = kgsl_find_context(dev_priv, binbase->drawctxt_id); if (context) { - result = adreno_drawctxt_set_bin_base_offset( - dev_priv->device, - context, - binbase->offset); + adreno_drawctxt_set_bin_base_offset( + dev_priv->device, context, binbase->offset); } else { result = -EINVAL; KGSL_DRV_ERR(dev_priv->device, @@ -1299,33 +1235,36 @@ static void adreno_power_stats(struct kgsl_device *device, REG_PERF_MODE_CNT | REG_PERF_STATE_ENABLE); } -static void __devinit adreno_getfunctable(struct kgsl_functable *ftbl) +void adreno_irqctrl(struct kgsl_device *device, int state) { - if (ftbl == NULL) - return; - ftbl->device_regread = adreno_regread; - ftbl->device_regwrite = adreno_regwrite; - ftbl->device_regread_isr = adreno_regread_isr; - ftbl->device_regwrite_isr = adreno_regwrite_isr; - ftbl->device_setstate = adreno_setstate; - ftbl->device_idle = adreno_idle; - ftbl->device_isidle = adreno_isidle; - ftbl->device_suspend_context = adreno_suspend_context; - ftbl->device_resume_context = adreno_resume_context; - ftbl->device_start = adreno_start; - ftbl->device_stop = adreno_stop; - ftbl->device_getproperty = adreno_getproperty; - ftbl->device_waittimestamp = adreno_waittimestamp; - ftbl->device_readtimestamp = adreno_readtimestamp; - ftbl->device_issueibcmds = adreno_ringbuffer_issueibcmds; - ftbl->device_drawctxt_create = adreno_drawctxt_create; - ftbl->device_drawctxt_destroy = adreno_drawctxt_destroy; - ftbl->device_ioctl = adreno_ioctl; - ftbl->device_setup_pt = adreno_setup_pt; - ftbl->device_cleanup_pt = adreno_cleanup_pt; - ftbl->device_power_stats = adreno_power_stats; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + adreno_dev->gpudev->irq_control(adreno_dev, state); } +static const struct kgsl_functable adreno_functable = { + /* Mandatory functions */ + .regread = adreno_regread, + .regwrite = adreno_regwrite, + .idle = adreno_idle, + .isidle = adreno_isidle, + .suspend_context = adreno_suspend_context, + .start = adreno_start, + .stop = adreno_stop, + .getproperty = adreno_getproperty, + .waittimestamp = adreno_waittimestamp, + .readtimestamp = adreno_readtimestamp, + .issueibcmds = adreno_ringbuffer_issueibcmds, + .ioctl = adreno_ioctl, + .setup_pt = adreno_setup_pt, + .cleanup_pt = adreno_cleanup_pt, + .power_stats = adreno_power_stats, + .irqctrl = adreno_irqctrl, + /* Optional functions */ + .setstate = adreno_setstate, + .drawctxt_create = adreno_drawctxt_create, + .drawctxt_destroy = adreno_drawctxt_destroy, +}; + static struct platform_device_id adreno_id_table[] = { { DEVICE_3D0_NAME, (kernel_ulong_t)&device_3d0.dev, }, { }, diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h index 74b3a845..51ee31a5 100644 --- a/drivers/gpu/msm/adreno.h +++ b/drivers/gpu/msm/adreno.h @@ -1,34 +1,19 @@ /* Copyright (c) 2008-2011, Code Aurora Forum. All rights reserved. * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * * Neither the name of Code Aurora Forum, Inc. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. * - * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE - * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN - * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. * */ #ifndef __ADRENO_H #define __ADRENO_H +#include "kgsl_device.h" #include "adreno_drawctxt.h" #include "adreno_ringbuffer.h" @@ -47,34 +32,11 @@ #define KGSL_CONTEXT_TO_MEM_IDENTIFIER 0xDEADBEEF #define KGSL_CMD_IDENTIFIER 0xFEEDFACE -struct adreno_device { - struct kgsl_device dev; /* Must be first field in this struct */ - unsigned int chip_id; - struct kgsl_memregion gmemspace; - struct adreno_context *drawctxt_active; - wait_queue_head_t ib1_wq; - unsigned int *pfp_fw; - size_t pfp_fw_size; - unsigned int *pm4_fw; - size_t pm4_fw_size; - struct adreno_ringbuffer ringbuffer; - unsigned int mharb; -}; - -int adreno_idle(struct kgsl_device *device, unsigned int timeout); -void adreno_regread(struct kgsl_device *device, unsigned int offsetwords, - unsigned int *value); -void adreno_regwrite(struct kgsl_device *device, unsigned int offsetwords, - unsigned int value); -void adreno_regread_isr(struct kgsl_device *device, - unsigned int offsetwords, - unsigned int *value); -void adreno_regwrite_isr(struct kgsl_device *device, - unsigned int offsetwords, - unsigned int value); - -uint8_t *kgsl_sharedmem_convertaddr(struct kgsl_device *device, - unsigned int pt_base, unsigned int gpuaddr, unsigned int *size); +#ifdef CONFIG_MSM_SCM +#define ADRENO_DEFAULT_PWRSCALE_POLICY (&kgsl_pwrscale_policy_tz) +#else +#define ADRENO_DEFAULT_PWRSCALE_POLICY NULL +#endif enum adreno_gpurev { ADRENO_REV_UNKNOWN = 0, @@ -84,38 +46,84 @@ enum adreno_gpurev { ADRENO_REV_A225 = 225, }; -enum adreno_gpurev adreno_get_rev(struct adreno_device *adreno_dev); +struct adreno_gpudev; + +struct adreno_device { + struct kgsl_device dev; /* Must be first field in this struct */ + unsigned int chip_id; + enum adreno_gpurev gpurev; + struct kgsl_memregion gmemspace; + struct adreno_context *drawctxt_active; + const char *pfp_fwfile; + unsigned int *pfp_fw; + size_t pfp_fw_size; + const char *pm4_fwfile; + unsigned int *pm4_fw; + size_t pm4_fw_size; + struct adreno_ringbuffer ringbuffer; + unsigned int mharb; + struct adreno_gpudev *gpudev; + unsigned int wait_timeout; +}; + +struct adreno_gpudev { + int (*ctxt_gpustate_shadow)(struct adreno_device *, + struct adreno_context *); + int (*ctxt_gmem_shadow)(struct adreno_device *, + struct adreno_context *); + void (*ctxt_save)(struct adreno_device *, struct adreno_context *); + void (*ctxt_restore)(struct adreno_device *, struct adreno_context *); + irqreturn_t (*irq_handler)(struct adreno_device *); + void (*irq_control)(struct adreno_device *, int); +}; + +extern struct adreno_gpudev adreno_a2xx_gpudev; + +int adreno_idle(struct kgsl_device *device, unsigned int timeout); +void adreno_regread(struct kgsl_device *device, unsigned int offsetwords, + unsigned int *value); +void adreno_regwrite(struct kgsl_device *device, unsigned int offsetwords, + unsigned int value); + +uint8_t *kgsl_sharedmem_convertaddr(struct kgsl_device *device, + unsigned int pt_base, unsigned int gpuaddr, unsigned int *size); static inline int adreno_is_a200(struct adreno_device *adreno_dev) { - return (adreno_get_rev(adreno_dev) == ADRENO_REV_A200); + return (adreno_dev->gpurev == ADRENO_REV_A200); } static inline int adreno_is_a205(struct adreno_device *adreno_dev) { - return (adreno_get_rev(adreno_dev) == ADRENO_REV_A200); + return (adreno_dev->gpurev == ADRENO_REV_A200); } static inline int adreno_is_a20x(struct adreno_device *adreno_dev) { - enum adreno_gpurev rev = adreno_get_rev(adreno_dev); - return (rev == ADRENO_REV_A200 || rev == ADRENO_REV_A205); + return (adreno_dev->gpurev == ADRENO_REV_A200 || + adreno_dev->gpurev == ADRENO_REV_A205); } static inline int adreno_is_a220(struct adreno_device *adreno_dev) { - return (adreno_get_rev(adreno_dev) == ADRENO_REV_A220); + return (adreno_dev->gpurev == ADRENO_REV_A220); } static inline int adreno_is_a225(struct adreno_device *adreno_dev) { - return (adreno_get_rev(adreno_dev) == ADRENO_REV_A225); + return (adreno_dev->gpurev == ADRENO_REV_A225); } static inline int adreno_is_a22x(struct adreno_device *adreno_dev) { - enum adreno_gpurev rev = adreno_get_rev(adreno_dev); - return (rev == ADRENO_REV_A220 || rev == ADRENO_REV_A225); + return (adreno_dev->gpurev == ADRENO_REV_A220 || + adreno_dev->gpurev == ADRENO_REV_A225); } +static inline int adreno_is_a2xx(struct adreno_device *adreno_dev) +{ + return (adreno_dev->gpurev <= ADRENO_REV_A225); +} + + #endif /*__ADRENO_H */ diff --git a/drivers/gpu/msm/adreno_debugfs.c b/drivers/gpu/msm/adreno_debugfs.c index 487e1f7f..c878a2c2 100644 --- a/drivers/gpu/msm/adreno_debugfs.c +++ b/drivers/gpu/msm/adreno_debugfs.c @@ -14,12 +14,13 @@ #include #include #include +#include #include "kgsl.h" #include "adreno_postmortem.h" #include "adreno.h" -#include "a200_reg.h" +#include "a2xx_reg.h" unsigned int kgsl_cff_dump_enable; int kgsl_pm_regs_enabled; @@ -130,7 +131,7 @@ static ssize_t kgsl_ib_dump_read( if (!ppos || !device || !kgsl_ib_base) return 0; - kgsl_regread(device, REG_MH_MMU_PT_BASE, &pt_base); + kgsl_regread(device, MH_MMU_PT_BASE, &pt_base); base_addr = kgsl_sharedmem_convertaddr(device, pt_base, kgsl_ib_base, &ib_memsize); @@ -395,8 +396,8 @@ static void kgsl_mh_reg_read_fill(struct kgsl_device *device, int i, int j; for (j = 0; j < linec; ++j) { - kgsl_regwrite(device, REG_MH_DEBUG_CTRL, i+j); - kgsl_regread(device, REG_MH_DEBUG_DATA, vals+j); + kgsl_regwrite(device, MH_DEBUG_CTRL, i+j); + kgsl_regread(device, MH_DEBUG_DATA, vals+j); } } @@ -420,6 +421,8 @@ static const struct file_operations kgsl_mh_debug_fops = { void adreno_debugfs_init(struct kgsl_device *device) { + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + if (!device->d_debugfs || IS_ERR(device->d_debugfs)) return; @@ -435,6 +438,8 @@ void adreno_debugfs_init(struct kgsl_device *device) &kgsl_mh_debug_fops); debugfs_create_file("cff_dump", 0644, device->d_debugfs, device, &kgsl_cff_dump_enable_fops); + debugfs_create_u32("wait_timeout", 0644, device->d_debugfs, + &adreno_dev->wait_timeout); /* Create post mortem control files */ diff --git a/drivers/gpu/msm/adreno_debugfs.h b/drivers/gpu/msm/adreno_debugfs.h index 680eb849..0356ac6e 100644 --- a/drivers/gpu/msm/adreno_debugfs.h +++ b/drivers/gpu/msm/adreno_debugfs.h @@ -1,29 +1,13 @@ /* Copyright (c) 2002,2008-2011, Code Aurora Forum. All rights reserved. * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * * Neither the name of Code Aurora Forum, Inc. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. * - * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE - * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN - * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. * */ #ifndef __ADRENO_DEBUGFS_H diff --git a/drivers/gpu/msm/adreno_drawctxt.c b/drivers/gpu/msm/adreno_drawctxt.c index ab28b0d2..b7b0ea46 100644 --- a/drivers/gpu/msm/adreno_drawctxt.c +++ b/drivers/gpu/msm/adreno_drawctxt.c @@ -10,157 +10,50 @@ * GNU General Public License for more details. * */ + #include #include "kgsl.h" - +#include "kgsl_sharedmem.h" #include "adreno.h" -#include "adreno_pm4types.h" -#include "adreno_drawctxt.h" -/* - * - * Memory Map for Register, Constant & Instruction Shadow, and Command Buffers - * (34.5KB) - * - * +---------------------+------------+-------------+---+---------------------+ - * | ALU Constant Shadow | Reg Shadow | C&V Buffers |Tex| Shader Instr Shadow | - * +---------------------+------------+-------------+---+---------------------+ - * ________________________________/ \____________________ - * / | - * +--------------+-----------+------+-----------+------------------------+ - * | Restore Regs | Save Regs | Quad | Gmem Save | Gmem Restore | unused | - * +--------------+-----------+------+-----------+------------------------+ - * - * 8K - ALU Constant Shadow (8K aligned) - * 4K - H/W Register Shadow (8K aligned) - * 4K - Command and Vertex Buffers - * - Indirect command buffer : Const/Reg restore - * - includes Loop & Bool const shadows - * - Indirect command buffer : Const/Reg save - * - Quad vertices & texture coordinates - * - Indirect command buffer : Gmem save - * - Indirect command buffer : Gmem restore - * - Unused (padding to 8KB boundary) - * <1K - Texture Constant Shadow (768 bytes) (8K aligned) - * 18K - Shader Instruction Shadow - * - 6K vertex (32 byte aligned) - * - 6K pixel (32 byte aligned) - * - 6K shared (32 byte aligned) - * - * Note: Reading constants into a shadow, one at a time using REG_TO_MEM, takes - * 3 DWORDS per DWORD transfered, plus 1 DWORD for the shadow, for a total of - * 16 bytes per constant. If the texture constants were transfered this way, - * the Command & Vertex Buffers section would extend past the 16K boundary. - * By moving the texture constant shadow area to start at 16KB boundary, we - * only require approximately 40 bytes more memory, but are able to use the - * LOAD_CONSTANT_CONTEXT shadowing feature for the textures, speeding up - * context switching. - * - * [Using LOAD_CONSTANT_CONTEXT shadowing feature for the Loop and/or Bool - * constants would require an additional 8KB each, for alignment.] - * - */ - -/* Constants */ - -#define ALU_CONSTANTS 2048 /* DWORDS */ -#define NUM_REGISTERS 1024 /* DWORDS */ -#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES -#define CMD_BUFFER_LEN 9216 /* DWORDS */ -#else -#define CMD_BUFFER_LEN 3072 /* DWORDS */ -#endif -#define TEX_CONSTANTS (32*6) /* DWORDS */ -#define BOOL_CONSTANTS 8 /* DWORDS */ -#define LOOP_CONSTANTS 56 /* DWORDS */ -#define SHADER_INSTRUCT_LOG2 9U /* 2^n == SHADER_INSTRUCTIONS */ - -#if defined(PM4_IM_STORE) -/* 96-bit instructions */ -#define SHADER_INSTRUCT (1<= 0x10000) { - exp += 16; - u >>= 16; - } - if (u >= 0x100) { - exp += 8; - u >>= 8; - } - if (u >= 0x10) { - exp += 4; - u >>= 4; - } - if (u >= 0x4) { - exp += 2; - u >>= 2; - } - if (u >= 0x2) { - exp += 1; - u >>= 1; - } + + exp = ilog2(uintval); /* Calculate fraction */ if (23 > exp) @@ -172,1018 +65,6 @@ unsigned int uint2float(unsigned int uintval) return exp | frac; } -/* context save (gmem -> sys) */ - -/* pre-compiled vertex shader program -* -* attribute vec4 P; -* void main(void) -* { -* gl_Position = P; -* } -*/ -#define GMEM2SYS_VTX_PGM_LEN 0x12 - -static unsigned int gmem2sys_vtx_pgm[GMEM2SYS_VTX_PGM_LEN] = { - 0x00011003, 0x00001000, 0xc2000000, - 0x00001004, 0x00001000, 0xc4000000, - 0x00001005, 0x00002000, 0x00000000, - 0x1cb81000, 0x00398a88, 0x00000003, - 0x140f803e, 0x00000000, 0xe2010100, - 0x14000000, 0x00000000, 0xe2000000 -}; - -/* pre-compiled fragment shader program -* -* precision highp float; -* uniform vec4 clear_color; -* void main(void) -* { -* gl_FragColor = clear_color; -* } -*/ - -#define GMEM2SYS_FRAG_PGM_LEN 0x0c - -static unsigned int gmem2sys_frag_pgm[GMEM2SYS_FRAG_PGM_LEN] = { - 0x00000000, 0x1002c400, 0x10000000, - 0x00001003, 0x00002000, 0x00000000, - 0x140f8000, 0x00000000, 0x22000000, - 0x14000000, 0x00000000, 0xe2000000 -}; - -/* context restore (sys -> gmem) */ -/* pre-compiled vertex shader program -* -* attribute vec4 position; -* attribute vec4 texcoord; -* varying vec4 texcoord0; -* void main() -* { -* gl_Position = position; -* texcoord0 = texcoord; -* } -*/ - -#define SYS2GMEM_VTX_PGM_LEN 0x18 - -static unsigned int sys2gmem_vtx_pgm[SYS2GMEM_VTX_PGM_LEN] = { - 0x00052003, 0x00001000, 0xc2000000, 0x00001005, - 0x00001000, 0xc4000000, 0x00001006, 0x10071000, - 0x20000000, 0x18981000, 0x0039ba88, 0x00000003, - 0x12982000, 0x40257b08, 0x00000002, 0x140f803e, - 0x00000000, 0xe2010100, 0x140f8000, 0x00000000, - 0xe2020200, 0x14000000, 0x00000000, 0xe2000000 -}; - -/* pre-compiled fragment shader program -* -* precision mediump float; -* uniform sampler2D tex0; -* varying vec4 texcoord0; -* void main() -* { -* gl_FragColor = texture2D(tex0, texcoord0.xy); -* } -*/ - -#define SYS2GMEM_FRAG_PGM_LEN 0x0f - -static unsigned int sys2gmem_frag_pgm[SYS2GMEM_FRAG_PGM_LEN] = { - 0x00011002, 0x00001000, 0xc4000000, 0x00001003, - 0x10041000, 0x20000000, 0x10000001, 0x1ffff688, - 0x00000002, 0x140f8000, 0x00000000, 0xe2000000, - 0x14000000, 0x00000000, 0xe2000000 -}; - -/* shader texture constants (sysmem -> gmem) */ -#define SYS2GMEM_TEX_CONST_LEN 6 - -static unsigned int sys2gmem_tex_const[SYS2GMEM_TEX_CONST_LEN] = { - /* Texture, FormatXYZW=Unsigned, ClampXYZ=Wrap/Repeat, - * RFMode=ZeroClamp-1, Dim=1:2d - */ - 0x00000002, /* Pitch = TBD */ - - /* Format=6:8888_WZYX, EndianSwap=0:None, ReqSize=0:256bit, DimHi=0, - * NearestClamp=1:OGL Mode - */ - 0x00000800, /* Address[31:12] = TBD */ - - /* Width, Height, EndianSwap=0:None */ - 0, /* Width & Height = TBD */ - - /* NumFormat=0:RF, DstSelXYZW=XYZW, ExpAdj=0, MagFilt=MinFilt=0:Point, - * Mip=2:BaseMap - */ - 0 << 1 | 1 << 4 | 2 << 7 | 3 << 10 | 2 << 23, - - /* VolMag=VolMin=0:Point, MinMipLvl=0, MaxMipLvl=1, LodBiasH=V=0, - * Dim3d=0 - */ - 0, - - /* BorderColor=0:ABGRBlack, ForceBC=0:diable, TriJuice=0, Aniso=0, - * Dim=1:2d, MipPacking=0 - */ - 1 << 9 /* Mip Address[31:12] = TBD */ -}; - -/* quad for copying GMEM to context shadow */ -#define QUAD_LEN 12 - -static unsigned int gmem_copy_quad[QUAD_LEN] = { - 0x00000000, 0x00000000, 0x3f800000, - 0x00000000, 0x00000000, 0x3f800000, - 0x00000000, 0x00000000, 0x3f800000, - 0x00000000, 0x00000000, 0x3f800000 -}; - -#define TEXCOORD_LEN 8 - -static unsigned int gmem_copy_texcoord[TEXCOORD_LEN] = { - 0x00000000, 0x3f800000, - 0x3f800000, 0x3f800000, - 0x00000000, 0x00000000, - 0x3f800000, 0x00000000 -}; - -#define NUM_COLOR_FORMATS 13 - -static enum SURFACEFORMAT surface_format_table[NUM_COLOR_FORMATS] = { - FMT_4_4_4_4, /* COLORX_4_4_4_4 */ - FMT_1_5_5_5, /* COLORX_1_5_5_5 */ - FMT_5_6_5, /* COLORX_5_6_5 */ - FMT_8, /* COLORX_8 */ - FMT_8_8, /* COLORX_8_8 */ - FMT_8_8_8_8, /* COLORX_8_8_8_8 */ - FMT_8_8_8_8, /* COLORX_S8_8_8_8 */ - FMT_16_FLOAT, /* COLORX_16_FLOAT */ - FMT_16_16_FLOAT, /* COLORX_16_16_FLOAT */ - FMT_16_16_16_16_FLOAT, /* COLORX_16_16_16_16_FLOAT */ - FMT_32_FLOAT, /* COLORX_32_FLOAT */ - FMT_32_32_FLOAT, /* COLORX_32_32_FLOAT */ - FMT_32_32_32_32_FLOAT, /* COLORX_32_32_32_32_FLOAT */ -}; - -static unsigned int format2bytesperpixel[NUM_COLOR_FORMATS] = { - 2, /* COLORX_4_4_4_4 */ - 2, /* COLORX_1_5_5_5 */ - 2, /* COLORX_5_6_5 */ - 1, /* COLORX_8 */ - 2, /* COLORX_8_8 8*/ - 4, /* COLORX_8_8_8_8 */ - 4, /* COLORX_S8_8_8_8 */ - 2, /* COLORX_16_FLOAT */ - 4, /* COLORX_16_16_FLOAT */ - 8, /* COLORX_16_16_16_16_FLOAT */ - 4, /* COLORX_32_FLOAT */ - 8, /* COLORX_32_32_FLOAT */ - 16, /* COLORX_32_32_32_32_FLOAT */ -}; - -/* shader linkage info */ -#define SHADER_CONST_ADDR (11 * 6 + 3) - -/* gmem command buffer length */ -#define PM4_REG(reg) ((0x4 << 16) | (GSL_HAL_SUBBLOCK_OFFSET(reg))) - -/* functions */ -static void config_gmemsize(struct gmem_shadow_t *shadow, int gmem_size) -{ - int w = 64, h = 64; /* 16KB surface, minimum */ - - shadow->format = COLORX_8_8_8_8; - /* convert from bytes to 32-bit words */ - gmem_size = (gmem_size + 3) / 4; - - /* find the right surface size, close to a square. */ - while (w * h < gmem_size) - if (w < h) - w *= 2; - else - h *= 2; - - shadow->width = w; - shadow->pitch = w; - shadow->height = h; - shadow->gmem_pitch = shadow->pitch; - - shadow->size = shadow->pitch * shadow->height * 4; -} - -static unsigned int gpuaddr(unsigned int *cmd, struct kgsl_memdesc *memdesc) -{ - return memdesc->gpuaddr + ((char *)cmd - (char *)memdesc->hostptr); -} - -static void -create_ib1(struct adreno_context *drawctxt, unsigned int *cmd, - unsigned int *start, unsigned int *end) -{ - cmd[0] = PM4_HDR_INDIRECT_BUFFER_PFD; - cmd[1] = gpuaddr(start, &drawctxt->gpustate); - cmd[2] = end - start; -} - -static unsigned int *program_shader(unsigned int *cmds, int vtxfrag, - unsigned int *shader_pgm, int dwords) -{ - /* load the patched vertex shader stream */ - *cmds++ = pm4_type3_packet(PM4_IM_LOAD_IMMEDIATE, 2 + dwords); - /* 0=vertex shader, 1=fragment shader */ - *cmds++ = vtxfrag; - /* instruction start & size (in 32-bit words) */ - *cmds++ = ((0 << 16) | dwords); - - memcpy(cmds, shader_pgm, dwords << 2); - cmds += dwords; - - return cmds; -} - -static unsigned int *reg_to_mem(unsigned int *cmds, uint32_t dst, - uint32_t src, int dwords) -{ - while (dwords-- > 0) { - *cmds++ = pm4_type3_packet(PM4_REG_TO_MEM, 2); - *cmds++ = src++; - *cmds++ = dst; - dst += 4; - } - - return cmds; -} - -#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES - -static void build_reg_to_mem_range(unsigned int start, unsigned int end, - unsigned int **cmd, - struct adreno_context *drawctxt) -{ - unsigned int i = start; - - for (i = start; i <= end; i++) { - *(*cmd)++ = pm4_type3_packet(PM4_REG_TO_MEM, 2); - *(*cmd)++ = i; - *(*cmd)++ = - ((drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000) + - (i - 0x2000) * 4; - } -} - -#endif - -/* chicken restore */ -static unsigned int *build_chicken_restore_cmds( - struct adreno_context *drawctxt, - struct tmp_ctx *ctx) -{ - unsigned int *start = ctx->cmd; - unsigned int *cmds = start; - - *cmds++ = pm4_type3_packet(PM4_WAIT_FOR_IDLE, 1); - *cmds++ = 0; - - *cmds++ = pm4_type0_packet(REG_TP0_CHICKEN, 1); - ctx->chicken_restore = gpuaddr(cmds, &drawctxt->gpustate); - *cmds++ = 0x00000000; - - /* create indirect buffer command for above command sequence */ - create_ib1(drawctxt, drawctxt->chicken_restore, start, cmds); - - return cmds; -} - -/* save h/w regs, alu constants, texture contants, etc. ... -* requires: bool_shadow_gpuaddr, loop_shadow_gpuaddr -*/ -static void build_regsave_cmds(struct adreno_device *adreno_dev, - struct adreno_context *drawctxt, - struct tmp_ctx *ctx) -{ - unsigned int *start = ctx->cmd; - unsigned int *cmd = start; - - *cmd++ = pm4_type3_packet(PM4_WAIT_FOR_IDLE, 1); - *cmd++ = 0; - -#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES - /* Make sure the HW context has the correct register values - * before reading them. */ - *cmd++ = pm4_type3_packet(PM4_CONTEXT_UPDATE, 1); - *cmd++ = 0; -#endif - -#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES - /* Write HW registers into shadow */ - build_reg_to_mem_range(REG_RB_SURFACE_INFO, REG_RB_DEPTH_INFO, - &cmd, drawctxt); - build_reg_to_mem_range(REG_COHER_DEST_BASE_0, - REG_PA_SC_SCREEN_SCISSOR_BR, - &cmd, drawctxt); - build_reg_to_mem_range(REG_PA_SC_WINDOW_OFFSET, - REG_PA_SC_WINDOW_SCISSOR_BR, - &cmd, drawctxt); - if (!adreno_is_a220(adreno_dev)) { - build_reg_to_mem_range(REG_VGT_MAX_VTX_INDX, REG_RB_FOG_COLOR, - &cmd, drawctxt); - } else { - build_reg_to_mem_range(REG_LEIA_PC_MAX_VTX_INDX, - REG_LEIA_PC_INDX_OFFSET, - &cmd, drawctxt); - build_reg_to_mem_range(REG_RB_COLOR_MASK, - REG_RB_FOG_COLOR, - &cmd, drawctxt); - } - build_reg_to_mem_range(REG_RB_STENCILREFMASK_BF, - REG_PA_CL_VPORT_ZOFFSET, - &cmd, drawctxt); - build_reg_to_mem_range(REG_SQ_PROGRAM_CNTL, REG_SQ_WRAPPING_1, - &cmd, drawctxt); - if (!adreno_is_a220(adreno_dev)) { - build_reg_to_mem_range(REG_RB_DEPTHCONTROL, REG_RB_MODECONTROL, - &cmd, drawctxt); - build_reg_to_mem_range(REG_PA_SU_POINT_SIZE, - REG_PA_SC_LINE_STIPPLE, - &cmd, drawctxt); - build_reg_to_mem_range(REG_PA_SC_VIZ_QUERY, REG_PA_SC_VIZ_QUERY, - &cmd, drawctxt); - } else { - build_reg_to_mem_range(REG_RB_DEPTHCONTROL, - REG_RB_COLORCONTROL, - &cmd, drawctxt); - build_reg_to_mem_range(REG_PA_CL_CLIP_CNTL, - REG_PA_CL_VTE_CNTL, - &cmd, drawctxt); - build_reg_to_mem_range(REG_RB_MODECONTROL, - REG_LEIA_GRAS_CONTROL, - &cmd, drawctxt); - build_reg_to_mem_range(REG_PA_SU_POINT_SIZE, - REG_PA_SU_LINE_CNTL, - &cmd, drawctxt); - } - build_reg_to_mem_range(REG_PA_SC_LINE_CNTL, REG_SQ_PS_CONST, - &cmd, drawctxt); - build_reg_to_mem_range(REG_PA_SC_AA_MASK, REG_PA_SC_AA_MASK, - &cmd, drawctxt); - if (!adreno_is_a220(adreno_dev)) { - build_reg_to_mem_range(REG_VGT_VERTEX_REUSE_BLOCK_CNTL, - REG_RB_DEPTH_CLEAR, - &cmd, drawctxt); - } else { - build_reg_to_mem_range(REG_LEIA_PC_VERTEX_REUSE_BLOCK_CNTL, - REG_LEIA_PC_VERTEX_REUSE_BLOCK_CNTL, - &cmd, drawctxt); - build_reg_to_mem_range(REG_RB_COPY_CONTROL, - REG_RB_DEPTH_CLEAR, - &cmd, drawctxt); - } - build_reg_to_mem_range(REG_RB_SAMPLE_COUNT_CTL, - REG_RB_COLOR_DEST_MASK, - &cmd, drawctxt); - build_reg_to_mem_range(REG_PA_SU_POLY_OFFSET_FRONT_SCALE, - REG_PA_SU_POLY_OFFSET_BACK_OFFSET, - &cmd, drawctxt); - - /* Copy ALU constants */ - cmd = - reg_to_mem(cmd, (drawctxt->gpustate.gpuaddr) & 0xFFFFE000, - REG_SQ_CONSTANT_0, ALU_CONSTANTS); - - /* Copy Tex constants */ - cmd = - reg_to_mem(cmd, - (drawctxt->gpustate.gpuaddr + TEX_OFFSET) & 0xFFFFE000, - REG_SQ_FETCH_0, TEX_CONSTANTS); -#else - - /* Insert a wait for idle packet before reading the registers. - * This is to fix a hang/reset seen during stress testing. In this - * hang, CP encountered a timeout reading SQ's boolean constant - * register. There is logic in the HW that blocks reading of this - * register when the SQ block is not idle, which we believe is - * contributing to the hang.*/ - *cmd++ = pm4_type3_packet(PM4_WAIT_FOR_IDLE, 1); - *cmd++ = 0; - - /* H/w registers are already shadowed; just need to disable shadowing - * to prevent corruption. - */ - *cmd++ = pm4_type3_packet(PM4_LOAD_CONSTANT_CONTEXT, 3); - *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000; - *cmd++ = 4 << 16; /* regs, start=0 */ - *cmd++ = 0x0; /* count = 0 */ - - /* ALU constants are already shadowed; just need to disable shadowing - * to prevent corruption. - */ - *cmd++ = pm4_type3_packet(PM4_LOAD_CONSTANT_CONTEXT, 3); - *cmd++ = drawctxt->gpustate.gpuaddr & 0xFFFFE000; - *cmd++ = 0 << 16; /* ALU, start=0 */ - *cmd++ = 0x0; /* count = 0 */ - - /* Tex constants are already shadowed; just need to disable shadowing - * to prevent corruption. - */ - *cmd++ = pm4_type3_packet(PM4_LOAD_CONSTANT_CONTEXT, 3); - *cmd++ = (drawctxt->gpustate.gpuaddr + TEX_OFFSET) & 0xFFFFE000; - *cmd++ = 1 << 16; /* Tex, start=0 */ - *cmd++ = 0x0; /* count = 0 */ -#endif - - /* Need to handle some of the registers separately */ - *cmd++ = pm4_type3_packet(PM4_REG_TO_MEM, 2); - *cmd++ = REG_SQ_GPR_MANAGEMENT; - *cmd++ = ctx->reg_values[0]; - - *cmd++ = pm4_type3_packet(PM4_REG_TO_MEM, 2); - *cmd++ = REG_TP0_CHICKEN; - *cmd++ = ctx->reg_values[1]; - - *cmd++ = pm4_type3_packet(PM4_REG_TO_MEM, 2); - *cmd++ = REG_RBBM_PM_OVERRIDE2; - *cmd++ = ctx->reg_values[2]; - - if (adreno_is_a220(adreno_dev)) { - unsigned int i; - unsigned int j = 3; - for (i = REG_LEIA_VSC_BIN_SIZE; i <= - REG_LEIA_VSC_PIPE_DATA_LENGTH_7; i++) { - *cmd++ = pm4_type3_packet(PM4_REG_TO_MEM, 2); - *cmd++ = i; - *cmd++ = ctx->reg_values[j]; - j++; - } - } - - /* Copy Boolean constants */ - cmd = reg_to_mem(cmd, ctx->bool_shadow, REG_SQ_CF_BOOLEANS, - BOOL_CONSTANTS); - - /* Copy Loop constants */ - cmd = reg_to_mem(cmd, ctx->loop_shadow, REG_SQ_CF_LOOP, LOOP_CONSTANTS); - - /* create indirect buffer command for above command sequence */ - create_ib1(drawctxt, drawctxt->reg_save, start, cmd); - - ctx->cmd = cmd; -} - -/*copy colour, depth, & stencil buffers from graphics memory to system memory*/ -static unsigned int *build_gmem2sys_cmds(struct adreno_device *adreno_dev, - struct adreno_context *drawctxt, - struct tmp_ctx *ctx, - struct gmem_shadow_t *shadow) -{ - unsigned int *cmds = shadow->gmem_save_commands; - unsigned int *start = cmds; - /* Calculate the new offset based on the adjusted base */ - unsigned int bytesperpixel = format2bytesperpixel[shadow->format]; - unsigned int addr = shadow->gmemshadow.gpuaddr; - unsigned int offset = (addr - (addr & 0xfffff000)) / bytesperpixel; - - /* Store TP0_CHICKEN register */ - *cmds++ = pm4_type3_packet(PM4_REG_TO_MEM, 2); - *cmds++ = REG_TP0_CHICKEN; - if (ctx) - *cmds++ = ctx->chicken_restore; - else - cmds++; - - *cmds++ = pm4_type3_packet(PM4_WAIT_FOR_IDLE, 1); - *cmds++ = 0; - - /* Set TP0_CHICKEN to zero */ - *cmds++ = pm4_type0_packet(REG_TP0_CHICKEN, 1); - *cmds++ = 0x00000000; - - /* Set PA_SC_AA_CONFIG to 0 */ - *cmds++ = pm4_type0_packet(REG_PA_SC_AA_CONFIG, 1); - *cmds++ = 0x00000000; - - /* program shader */ - - /* load shader vtx constants ... 5 dwords */ - *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 4); - *cmds++ = (0x1 << 16) | SHADER_CONST_ADDR; - *cmds++ = 0; - /* valid(?) vtx constant flag & addr */ - *cmds++ = shadow->quad_vertices.gpuaddr | 0x3; - /* limit = 12 dwords */ - *cmds++ = 0x00000030; - - /* Invalidate L2 cache to make sure vertices are updated */ - *cmds++ = pm4_type0_packet(REG_TC_CNTL_STATUS, 1); - *cmds++ = 0x1; - - *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 4); - *cmds++ = PM4_REG(REG_VGT_MAX_VTX_INDX); - *cmds++ = 0x00ffffff; /* REG_VGT_MAX_VTX_INDX */ - *cmds++ = 0x0; /* REG_VGT_MIN_VTX_INDX */ - *cmds++ = 0x00000000; /* REG_VGT_INDX_OFFSET */ - - *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2); - *cmds++ = PM4_REG(REG_PA_SC_AA_MASK); - *cmds++ = 0x0000ffff; /* REG_PA_SC_AA_MASK */ - - /* load the patched vertex shader stream */ - cmds = program_shader(cmds, 0, gmem2sys_vtx_pgm, GMEM2SYS_VTX_PGM_LEN); - - /* Load the patched fragment shader stream */ - cmds = - program_shader(cmds, 1, gmem2sys_frag_pgm, GMEM2SYS_FRAG_PGM_LEN); - - /* SQ_PROGRAM_CNTL / SQ_CONTEXT_MISC */ - *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 3); - *cmds++ = PM4_REG(REG_SQ_PROGRAM_CNTL); - if (adreno_is_a220(adreno_dev)) - *cmds++ = 0x10018001; - else - *cmds++ = 0x10010001; - *cmds++ = 0x00000008; - - /* resolve */ - - /* PA_CL_VTE_CNTL */ - *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2); - *cmds++ = PM4_REG(REG_PA_CL_VTE_CNTL); - /* disable X/Y/Z transforms, X/Y/Z are premultiplied by W */ - *cmds++ = 0x00000b00; - - /* program surface info */ - *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 3); - *cmds++ = PM4_REG(REG_RB_SURFACE_INFO); - *cmds++ = shadow->gmem_pitch; /* pitch, MSAA = 1 */ - - /* RB_COLOR_INFO Endian=none, Linear, Format=RGBA8888, Swap=0, - * Base=gmem_base - */ - /* gmem base assumed 4K aligned. */ - if (ctx) { - BUG_ON(ctx->gmem_base & 0xFFF); - *cmds++ = - (shadow-> - format << RB_COLOR_INFO__COLOR_FORMAT__SHIFT) | ctx-> - gmem_base; - } else { - unsigned int temp = *cmds; - *cmds++ = (temp & ~RB_COLOR_INFO__COLOR_FORMAT_MASK) | - (shadow->format << RB_COLOR_INFO__COLOR_FORMAT__SHIFT); - } - - /* disable Z */ - *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2); - *cmds++ = PM4_REG(REG_RB_DEPTHCONTROL); - if (adreno_is_a220(adreno_dev)) - *cmds++ = 0x08; - else - *cmds++ = 0; - - /* set REG_PA_SU_SC_MODE_CNTL - * Front_ptype = draw triangles - * Back_ptype = draw triangles - * Provoking vertex = last - */ - *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2); - *cmds++ = PM4_REG(REG_PA_SU_SC_MODE_CNTL); - *cmds++ = 0x00080240; - - /* Use maximum scissor values -- quad vertices already have the - * correct bounds */ - *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 3); - *cmds++ = PM4_REG(REG_PA_SC_SCREEN_SCISSOR_TL); - *cmds++ = (0 << 16) | 0; - *cmds++ = (0x1fff << 16) | (0x1fff); - *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 3); - *cmds++ = PM4_REG(REG_PA_SC_WINDOW_SCISSOR_TL); - *cmds++ = (unsigned int)((1U << 31) | (0 << 16) | 0); - *cmds++ = (0x1fff << 16) | (0x1fff); - - /* load the viewport so that z scale = clear depth and - * z offset = 0.0f - */ - *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 3); - *cmds++ = PM4_REG(REG_PA_CL_VPORT_ZSCALE); - *cmds++ = 0xbf800000; /* -1.0f */ - *cmds++ = 0x0; - - /* load the stencil ref value - * $AAM - do this later - */ - - /* load the COPY state */ - *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 6); - *cmds++ = PM4_REG(REG_RB_COPY_CONTROL); - *cmds++ = 0; /* RB_COPY_CONTROL */ - *cmds++ = addr & 0xfffff000; /* RB_COPY_DEST_BASE */ - *cmds++ = shadow->pitch >> 5; /* RB_COPY_DEST_PITCH */ - - /* Endian=none, Linear, Format=RGBA8888,Swap=0,!Dither, - * MaskWrite:R=G=B=A=1 - */ - *cmds++ = 0x0003c008 | - (shadow->format << RB_COPY_DEST_INFO__COPY_DEST_FORMAT__SHIFT); - /* Make sure we stay in offsetx field. */ - BUG_ON(offset & 0xfffff000); - *cmds++ = offset; - - *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2); - *cmds++ = PM4_REG(REG_RB_MODECONTROL); - *cmds++ = 0x6; /* EDRAM copy */ - - if (adreno_is_a220(adreno_dev)) { - *cmds++ = 0xc0043600; /* packet 3 3D_DRAW_INDX_2 */ - *cmds++ = 0x0; - *cmds++ = 0x00004046; /* tristrip */ - *cmds++ = 0x00000004; /* NUM_INDICES */ - *cmds++ = 0x00010000; /* index: 0x00, 0x01 */ - *cmds++ = 0x00030002; /* index: 0x02, 0x03 */ - } else { - /* queue the draw packet */ - *cmds++ = pm4_type3_packet(PM4_DRAW_INDX, 2); - *cmds++ = 0; /* viz query info. */ - /* PrimType=RectList, NumIndices=3, SrcSel=AutoIndex */ - *cmds++ = 0x00030088; - } - - /* create indirect buffer command for above command sequence */ - create_ib1(drawctxt, shadow->gmem_save, start, cmds); - - return cmds; -} - -/* context restore */ - -/*copy colour, depth, & stencil buffers from system memory to graphics memory*/ -static unsigned int *build_sys2gmem_cmds(struct adreno_device *adreno_dev, - struct adreno_context *drawctxt, - struct tmp_ctx *ctx, - struct gmem_shadow_t *shadow) -{ - unsigned int *cmds = shadow->gmem_restore_commands; - unsigned int *start = cmds; - - /* Store TP0_CHICKEN register */ - *cmds++ = pm4_type3_packet(PM4_REG_TO_MEM, 2); - *cmds++ = REG_TP0_CHICKEN; - if (ctx) - *cmds++ = ctx->chicken_restore; - else - cmds++; - - *cmds++ = pm4_type3_packet(PM4_WAIT_FOR_IDLE, 1); - *cmds++ = 0; - - /* Set TP0_CHICKEN to zero */ - *cmds++ = pm4_type0_packet(REG_TP0_CHICKEN, 1); - *cmds++ = 0x00000000; - - /* Set PA_SC_AA_CONFIG to 0 */ - *cmds++ = pm4_type0_packet(REG_PA_SC_AA_CONFIG, 1); - *cmds++ = 0x00000000; - /* shader constants */ - - /* vertex buffer constants */ - *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 7); - - *cmds++ = (0x1 << 16) | (9 * 6); - /* valid(?) vtx constant flag & addr */ - *cmds++ = shadow->quad_vertices.gpuaddr | 0x3; - /* limit = 12 dwords */ - *cmds++ = 0x00000030; - /* valid(?) vtx constant flag & addr */ - *cmds++ = shadow->quad_texcoords.gpuaddr | 0x3; - /* limit = 8 dwords */ - *cmds++ = 0x00000020; - *cmds++ = 0; - *cmds++ = 0; - - /* Invalidate L2 cache to make sure vertices are updated */ - *cmds++ = pm4_type0_packet(REG_TC_CNTL_STATUS, 1); - *cmds++ = 0x1; - - cmds = program_shader(cmds, 0, sys2gmem_vtx_pgm, SYS2GMEM_VTX_PGM_LEN); - - /* Load the patched fragment shader stream */ - cmds = - program_shader(cmds, 1, sys2gmem_frag_pgm, SYS2GMEM_FRAG_PGM_LEN); - - /* SQ_PROGRAM_CNTL / SQ_CONTEXT_MISC */ - *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 3); - *cmds++ = PM4_REG(REG_SQ_PROGRAM_CNTL); - *cmds++ = 0x10030002; - *cmds++ = 0x00000008; - - *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2); - *cmds++ = PM4_REG(REG_PA_SC_AA_MASK); - *cmds++ = 0x0000ffff; /* REG_PA_SC_AA_MASK */ - - if (!adreno_is_a220(adreno_dev)) { - /* PA_SC_VIZ_QUERY */ - *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2); - *cmds++ = PM4_REG(REG_PA_SC_VIZ_QUERY); - *cmds++ = 0x0; /*REG_PA_SC_VIZ_QUERY */ - } - - /* RB_COLORCONTROL */ - *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2); - *cmds++ = PM4_REG(REG_RB_COLORCONTROL); - *cmds++ = 0x00000c20; - - *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 4); - *cmds++ = PM4_REG(REG_VGT_MAX_VTX_INDX); - *cmds++ = 0x00ffffff; /* mmVGT_MAX_VTX_INDX */ - *cmds++ = 0x0; /* mmVGT_MIN_VTX_INDX */ - *cmds++ = 0x00000000; /* mmVGT_INDX_OFFSET */ - - *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 3); - *cmds++ = PM4_REG(REG_VGT_VERTEX_REUSE_BLOCK_CNTL); - *cmds++ = 0x00000002; /* mmVGT_VERTEX_REUSE_BLOCK_CNTL */ - *cmds++ = 0x00000002; /* mmVGT_OUT_DEALLOC_CNTL */ - - *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2); - *cmds++ = PM4_REG(REG_SQ_INTERPOLATOR_CNTL); - *cmds++ = 0xffffffff; /* mmSQ_INTERPOLATOR_CNTL */ - - *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2); - *cmds++ = PM4_REG(REG_PA_SC_AA_CONFIG); - *cmds++ = 0x00000000; /* REG_PA_SC_AA_CONFIG */ - - /* set REG_PA_SU_SC_MODE_CNTL - * Front_ptype = draw triangles - * Back_ptype = draw triangles - * Provoking vertex = last - */ - *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2); - *cmds++ = PM4_REG(REG_PA_SU_SC_MODE_CNTL); - *cmds++ = 0x00080240; - - /* texture constants */ - *cmds++ = - pm4_type3_packet(PM4_SET_CONSTANT, (SYS2GMEM_TEX_CONST_LEN + 1)); - *cmds++ = (0x1 << 16) | (0 * 6); - memcpy(cmds, sys2gmem_tex_const, SYS2GMEM_TEX_CONST_LEN << 2); - cmds[0] |= (shadow->pitch >> 5) << 22; - cmds[1] |= - shadow->gmemshadow.gpuaddr | surface_format_table[shadow->format]; - cmds[2] |= (shadow->width - 1) | (shadow->height - 1) << 13; - cmds += SYS2GMEM_TEX_CONST_LEN; - - /* program surface info */ - *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 3); - *cmds++ = PM4_REG(REG_RB_SURFACE_INFO); - *cmds++ = shadow->gmem_pitch; /* pitch, MSAA = 1 */ - - /* RB_COLOR_INFO Endian=none, Linear, Format=RGBA8888, Swap=0, - * Base=gmem_base - */ - if (ctx) - *cmds++ = - (shadow-> - format << RB_COLOR_INFO__COLOR_FORMAT__SHIFT) | ctx-> - gmem_base; - else { - unsigned int temp = *cmds; - *cmds++ = (temp & ~RB_COLOR_INFO__COLOR_FORMAT_MASK) | - (shadow->format << RB_COLOR_INFO__COLOR_FORMAT__SHIFT); - } - - /* RB_DEPTHCONTROL */ - *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2); - *cmds++ = PM4_REG(REG_RB_DEPTHCONTROL); - - if (adreno_is_a220(adreno_dev)) - *cmds++ = 8; /* disable Z */ - else - *cmds++ = 0; /* disable Z */ - - /* Use maximum scissor values -- quad vertices already - * have the correct bounds */ - *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 3); - *cmds++ = PM4_REG(REG_PA_SC_SCREEN_SCISSOR_TL); - *cmds++ = (0 << 16) | 0; - *cmds++ = ((0x1fff) << 16) | 0x1fff; - *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 3); - *cmds++ = PM4_REG(REG_PA_SC_WINDOW_SCISSOR_TL); - *cmds++ = (unsigned int)((1U << 31) | (0 << 16) | 0); - *cmds++ = ((0x1fff) << 16) | 0x1fff; - - *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2); - *cmds++ = PM4_REG(REG_PA_CL_VTE_CNTL); - /* disable X/Y/Z transforms, X/Y/Z are premultiplied by W */ - *cmds++ = 0x00000b00; - - /*load the viewport so that z scale = clear depth and z offset = 0.0f */ - *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 3); - *cmds++ = PM4_REG(REG_PA_CL_VPORT_ZSCALE); - *cmds++ = 0xbf800000; - *cmds++ = 0x0; - - *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2); - *cmds++ = PM4_REG(REG_RB_COLOR_MASK); - *cmds++ = 0x0000000f; /* R = G = B = 1:enabled */ - - *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2); - *cmds++ = PM4_REG(REG_RB_COLOR_DEST_MASK); - *cmds++ = 0xffffffff; - - *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 3); - *cmds++ = PM4_REG(REG_SQ_WRAPPING_0); - *cmds++ = 0x00000000; - *cmds++ = 0x00000000; - - /* load the stencil ref value - * $AAM - do this later - */ - *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2); - *cmds++ = PM4_REG(REG_RB_MODECONTROL); - /* draw pixels with color and depth/stencil component */ - *cmds++ = 0x4; - - if (adreno_is_a220(adreno_dev)) { - *cmds++ = 0xc0043600; /* packet 3 3D_DRAW_INDX_2 */ - *cmds++ = 0x0; - *cmds++ = 0x00004046; /* tristrip */ - *cmds++ = 0x00000004; /* NUM_INDICES */ - *cmds++ = 0x00010000; /* index: 0x00, 0x01 */ - *cmds++ = 0x00030002; /* index: 0x02, 0x03 */ - } else { - /* queue the draw packet */ - *cmds++ = pm4_type3_packet(PM4_DRAW_INDX, 2); - *cmds++ = 0; /* viz query info. */ - /* PrimType=RectList, NumIndices=3, SrcSel=AutoIndex */ - *cmds++ = 0x00030088; - } - - /* create indirect buffer command for above command sequence */ - create_ib1(drawctxt, shadow->gmem_restore, start, cmds); - - return cmds; -} - -/* restore h/w regs, alu constants, texture constants, etc. ... */ -static unsigned *reg_range(unsigned int *cmd, unsigned int start, - unsigned int end) -{ - *cmd++ = PM4_REG(start); /* h/w regs, start addr */ - *cmd++ = end - start + 1; /* count */ - return cmd; -} - -static void build_regrestore_cmds(struct adreno_device *adreno_dev, - struct adreno_context *drawctxt, - struct tmp_ctx *ctx) -{ - unsigned int *start = ctx->cmd; - unsigned int *cmd = start; - - *cmd++ = pm4_type3_packet(PM4_WAIT_FOR_IDLE, 1); - *cmd++ = 0; - - /* H/W Registers */ - /* deferred pm4_type3_packet(PM4_LOAD_CONSTANT_CONTEXT, ???); */ - cmd++; -#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES - /* Force mismatch */ - *cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000) | 1; -#else - *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000; -#endif - - if (!adreno_is_a220(adreno_dev)) { - cmd = reg_range(cmd, REG_RB_SURFACE_INFO, - REG_PA_SC_SCREEN_SCISSOR_BR); - } else { - cmd = reg_range(cmd, REG_RB_SURFACE_INFO, REG_RB_DEPTH_INFO); - cmd = reg_range(cmd, REG_COHER_DEST_BASE_0, - REG_PA_SC_SCREEN_SCISSOR_BR); - } - cmd = reg_range(cmd, REG_PA_SC_WINDOW_OFFSET, - REG_PA_SC_WINDOW_SCISSOR_BR); - if (!adreno_is_a220(adreno_dev)) { - cmd = reg_range(cmd, REG_VGT_MAX_VTX_INDX, - REG_PA_CL_VPORT_ZOFFSET); - } else { - cmd = reg_range(cmd, REG_LEIA_PC_MAX_VTX_INDX, - REG_LEIA_PC_INDX_OFFSET); - cmd = reg_range(cmd, REG_RB_COLOR_MASK, REG_RB_FOG_COLOR); - cmd = reg_range(cmd, REG_RB_STENCILREFMASK_BF, - REG_PA_CL_VPORT_ZOFFSET); - } - cmd = reg_range(cmd, REG_SQ_PROGRAM_CNTL, REG_SQ_WRAPPING_1); - if (!adreno_is_a220(adreno_dev)) { - cmd = reg_range(cmd, REG_RB_DEPTHCONTROL, REG_RB_MODECONTROL); - cmd = reg_range(cmd, REG_PA_SU_POINT_SIZE, - REG_PA_SC_VIZ_QUERY); /*REG_VGT_ENHANCE */ - cmd = reg_range(cmd, REG_PA_SC_LINE_CNTL, - REG_RB_COLOR_DEST_MASK); - } else { - cmd = reg_range(cmd, REG_RB_DEPTHCONTROL, REG_RB_COLORCONTROL); - cmd = reg_range(cmd, REG_PA_CL_CLIP_CNTL, REG_PA_CL_VTE_CNTL); - cmd = reg_range(cmd, REG_RB_MODECONTROL, REG_LEIA_GRAS_CONTROL); - cmd = reg_range(cmd, REG_PA_SU_POINT_SIZE, REG_PA_SU_LINE_CNTL); - cmd = reg_range(cmd, REG_PA_SC_LINE_CNTL, REG_SQ_PS_CONST); - cmd = reg_range(cmd, REG_PA_SC_AA_MASK, REG_PA_SC_AA_MASK); - cmd = reg_range(cmd, REG_LEIA_PC_VERTEX_REUSE_BLOCK_CNTL, - REG_LEIA_PC_VERTEX_REUSE_BLOCK_CNTL); - cmd = reg_range(cmd, REG_RB_COPY_CONTROL, REG_RB_DEPTH_CLEAR); - cmd = reg_range(cmd, REG_RB_SAMPLE_COUNT_CTL, - REG_RB_COLOR_DEST_MASK); - } - cmd = reg_range(cmd, REG_PA_SU_POLY_OFFSET_FRONT_SCALE, - REG_PA_SU_POLY_OFFSET_BACK_OFFSET); - - /* Now we know how many register blocks we have, we can compute command - * length - */ - start[2] = - pm4_type3_packet(PM4_LOAD_CONSTANT_CONTEXT, (cmd - start) - 3); - /* Enable shadowing for the entire register block. */ -#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES - start[4] |= (0 << 24) | (4 << 16); /* Disable shadowing. */ -#else - start[4] |= (1 << 24) | (4 << 16); -#endif - - /* Need to handle some of the registers separately */ - *cmd++ = pm4_type0_packet(REG_SQ_GPR_MANAGEMENT, 1); - ctx->reg_values[0] = gpuaddr(cmd, &drawctxt->gpustate); - *cmd++ = 0x00040400; - - *cmd++ = pm4_type3_packet(PM4_WAIT_FOR_IDLE, 1); - *cmd++ = 0; - *cmd++ = pm4_type0_packet(REG_TP0_CHICKEN, 1); - ctx->reg_values[1] = gpuaddr(cmd, &drawctxt->gpustate); - *cmd++ = 0x00000000; - - *cmd++ = pm4_type0_packet(REG_RBBM_PM_OVERRIDE2, 1); - ctx->reg_values[2] = gpuaddr(cmd, &drawctxt->gpustate); - if (!adreno_is_a220(adreno_dev)) - *cmd++ = 0x00000000; - else - *cmd++ = 0x80; - - if (adreno_is_a220(adreno_dev)) { - unsigned int i; - unsigned int j = 3; - for (i = REG_LEIA_VSC_BIN_SIZE; i <= - REG_LEIA_VSC_PIPE_DATA_LENGTH_7; i++) { - *cmd++ = pm4_type0_packet(i, 1); - ctx->reg_values[j] = gpuaddr(cmd, &drawctxt->gpustate); - *cmd++ = 0x00000000; - j++; - } - } - - /* ALU Constants */ - *cmd++ = pm4_type3_packet(PM4_LOAD_CONSTANT_CONTEXT, 3); - *cmd++ = drawctxt->gpustate.gpuaddr & 0xFFFFE000; -#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES - *cmd++ = (0 << 24) | (0 << 16) | 0; /* Disable shadowing */ -#else - *cmd++ = (1 << 24) | (0 << 16) | 0; -#endif - *cmd++ = ALU_CONSTANTS; - - /* Texture Constants */ - *cmd++ = pm4_type3_packet(PM4_LOAD_CONSTANT_CONTEXT, 3); - *cmd++ = (drawctxt->gpustate.gpuaddr + TEX_OFFSET) & 0xFFFFE000; -#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES - /* Disable shadowing */ - *cmd++ = (0 << 24) | (1 << 16) | 0; -#else - *cmd++ = (1 << 24) | (1 << 16) | 0; -#endif - *cmd++ = TEX_CONSTANTS; - - /* Boolean Constants */ - *cmd++ = pm4_type3_packet(PM4_SET_CONSTANT, 1 + BOOL_CONSTANTS); - *cmd++ = (2 << 16) | 0; - - /* the next BOOL_CONSTANT dwords is the shadow area for - * boolean constants. - */ - ctx->bool_shadow = gpuaddr(cmd, &drawctxt->gpustate); - cmd += BOOL_CONSTANTS; - - /* Loop Constants */ - *cmd++ = pm4_type3_packet(PM4_SET_CONSTANT, 1 + LOOP_CONSTANTS); - *cmd++ = (3 << 16) | 0; - - /* the next LOOP_CONSTANTS dwords is the shadow area for - * loop constants. - */ - ctx->loop_shadow = gpuaddr(cmd, &drawctxt->gpustate); - cmd += LOOP_CONSTANTS; - - /* create indirect buffer command for above command sequence */ - create_ib1(drawctxt, drawctxt->reg_restore, start, cmd); - - ctx->cmd = cmd; -} - -/* quad for saving/restoring gmem */ static void set_gmem_copy_quad(struct gmem_shadow_t *shadow) { /* set vertex buffer values */ @@ -1192,280 +73,62 @@ static void set_gmem_copy_quad(struct gmem_shadow_t *shadow) gmem_copy_quad[4] = uint2float(shadow->height); gmem_copy_quad[9] = uint2float(shadow->width); - gmem_copy_quad[0] = uint2float(0); - gmem_copy_quad[6] = uint2float(0); - gmem_copy_quad[7] = uint2float(0); - gmem_copy_quad[10] = uint2float(0); + gmem_copy_quad[0] = 0; + gmem_copy_quad[6] = 0; + gmem_copy_quad[7] = 0; + gmem_copy_quad[10] = 0; memcpy(shadow->quad_vertices.hostptr, gmem_copy_quad, QUAD_LEN << 2); memcpy(shadow->quad_texcoords.hostptr, gmem_copy_texcoord, - TEXCOORD_LEN << 2); + TEXCOORD_LEN << 2); } +/** + * build_quad_vtxbuff - Create a quad for saving/restoring GMEM + * @ context - Pointer to the context being created + * @ shadow - Pointer to the GMEM shadow structure + * @ incmd - Pointer to pointer to the temporary command buffer + */ + /* quad for saving/restoring gmem */ -static void build_quad_vtxbuff(struct adreno_context *drawctxt, - struct tmp_ctx *ctx, struct gmem_shadow_t *shadow) +void build_quad_vtxbuff(struct adreno_context *drawctxt, + struct gmem_shadow_t *shadow, unsigned int **incmd) { - unsigned int *cmd = ctx->cmd; + unsigned int *cmd = *incmd; /* quad vertex buffer location (in GPU space) */ shadow->quad_vertices.hostptr = cmd; - shadow->quad_vertices.gpuaddr = gpuaddr(cmd, &drawctxt->gpustate); + shadow->quad_vertices.gpuaddr = virt2gpu(cmd, &drawctxt->gpustate); cmd += QUAD_LEN; /* tex coord buffer location (in GPU space) */ shadow->quad_texcoords.hostptr = cmd; - shadow->quad_texcoords.gpuaddr = gpuaddr(cmd, &drawctxt->gpustate); + shadow->quad_texcoords.gpuaddr = virt2gpu(cmd, &drawctxt->gpustate); cmd += TEXCOORD_LEN; set_gmem_copy_quad(shadow); - - ctx->cmd = cmd; + *incmd = cmd; } -static void -build_shader_save_restore_cmds(struct adreno_context *drawctxt, - struct tmp_ctx *ctx) -{ - unsigned int *cmd = ctx->cmd; - unsigned int *save, *restore, *fixup; -#if defined(PM4_IM_STORE) - unsigned int *startSizeVtx, *startSizePix, *startSizeShared; -#endif - unsigned int *partition1; - unsigned int *shaderBases, *partition2; - -#if defined(PM4_IM_STORE) - /* compute vertex, pixel and shared instruction shadow GPU addresses */ - ctx->shader_vertex = drawctxt->gpustate.gpuaddr + SHADER_OFFSET; - ctx->shader_pixel = ctx->shader_vertex + SHADER_SHADOW_SIZE; - ctx->shader_shared = ctx->shader_pixel + SHADER_SHADOW_SIZE; -#endif - - /* restore shader partitioning and instructions */ - - restore = cmd; /* start address */ - - /* Invalidate Vertex & Pixel instruction code address and sizes */ - *cmd++ = pm4_type3_packet(PM4_INVALIDATE_STATE, 1); - *cmd++ = 0x00000300; /* 0x100 = Vertex, 0x200 = Pixel */ - - /* Restore previous shader vertex & pixel instruction bases. */ - *cmd++ = pm4_type3_packet(PM4_SET_SHADER_BASES, 1); - shaderBases = cmd++; /* TBD #5: shader bases (from fixup) */ - - /* write the shader partition information to a scratch register */ - *cmd++ = pm4_type0_packet(REG_SQ_INST_STORE_MANAGMENT, 1); - partition1 = cmd++; /* TBD #4a: partition info (from save) */ - -#if defined(PM4_IM_STORE) - /* load vertex shader instructions from the shadow. */ - *cmd++ = pm4_type3_packet(PM4_IM_LOAD, 2); - *cmd++ = ctx->shader_vertex + 0x0; /* 0x0 = Vertex */ - startSizeVtx = cmd++; /* TBD #1: start/size (from save) */ - - /* load pixel shader instructions from the shadow. */ - *cmd++ = pm4_type3_packet(PM4_IM_LOAD, 2); - *cmd++ = ctx->shader_pixel + 0x1; /* 0x1 = Pixel */ - startSizePix = cmd++; /* TBD #2: start/size (from save) */ - - /* load shared shader instructions from the shadow. */ - *cmd++ = pm4_type3_packet(PM4_IM_LOAD, 2); - *cmd++ = ctx->shader_shared + 0x2; /* 0x2 = Shared */ - startSizeShared = cmd++; /* TBD #3: start/size (from save) */ -#endif - - /* create indirect buffer command for above command sequence */ - create_ib1(drawctxt, drawctxt->shader_restore, restore, cmd); - - /* - * fixup SET_SHADER_BASES data - * - * since self-modifying PM4 code is being used here, a seperate - * command buffer is used for this fixup operation, to ensure the - * commands are not read by the PM4 engine before the data fields - * have been written. - */ - - fixup = cmd; /* start address */ - - /* write the shader partition information to a scratch register */ - *cmd++ = pm4_type0_packet(REG_SCRATCH_REG2, 1); - partition2 = cmd++; /* TBD #4b: partition info (from save) */ - - /* mask off unused bits, then OR with shader instruction memory size */ - *cmd++ = pm4_type3_packet(PM4_REG_RMW, 3); - *cmd++ = REG_SCRATCH_REG2; - /* AND off invalid bits. */ - *cmd++ = 0x0FFF0FFF; - /* OR in instruction memory size */ - *cmd++ = (unsigned int)((SHADER_INSTRUCT_LOG2 - 5U) << 29); - - /* write the computed value to the SET_SHADER_BASES data field */ - *cmd++ = pm4_type3_packet(PM4_REG_TO_MEM, 2); - *cmd++ = REG_SCRATCH_REG2; - /* TBD #5: shader bases (to restore) */ - *cmd++ = gpuaddr(shaderBases, &drawctxt->gpustate); - - /* create indirect buffer command for above command sequence */ - create_ib1(drawctxt, drawctxt->shader_fixup, fixup, cmd); - - /* save shader partitioning and instructions */ - - save = cmd; /* start address */ - - *cmd++ = pm4_type3_packet(PM4_WAIT_FOR_IDLE, 1); - *cmd++ = 0; - - /* fetch the SQ_INST_STORE_MANAGMENT register value, - * store the value in the data fields of the SET_CONSTANT commands - * above. - */ - *cmd++ = pm4_type3_packet(PM4_REG_TO_MEM, 2); - *cmd++ = REG_SQ_INST_STORE_MANAGMENT; - /* TBD #4a: partition info (to restore) */ - *cmd++ = gpuaddr(partition1, &drawctxt->gpustate); - *cmd++ = pm4_type3_packet(PM4_REG_TO_MEM, 2); - *cmd++ = REG_SQ_INST_STORE_MANAGMENT; - /* TBD #4b: partition info (to fixup) */ - *cmd++ = gpuaddr(partition2, &drawctxt->gpustate); - -#if defined(PM4_IM_STORE) - - /* store the vertex shader instructions */ - *cmd++ = pm4_type3_packet(PM4_IM_STORE, 2); - *cmd++ = ctx->shader_vertex + 0x0; /* 0x0 = Vertex */ - /* TBD #1: start/size (to restore) */ - *cmd++ = gpuaddr(startSizeVtx, &drawctxt->gpustate); - - /* store the pixel shader instructions */ - *cmd++ = pm4_type3_packet(PM4_IM_STORE, 2); - *cmd++ = ctx->shader_pixel + 0x1; /* 0x1 = Pixel */ - /* TBD #2: start/size (to restore) */ - *cmd++ = gpuaddr(startSizePix, &drawctxt->gpustate); - - /* store the shared shader instructions if vertex base is nonzero */ - - *cmd++ = pm4_type3_packet(PM4_IM_STORE, 2); - *cmd++ = ctx->shader_shared + 0x2; /* 0x2 = Shared */ - /* TBD #3: start/size (to restore) */ - *cmd++ = gpuaddr(startSizeShared, &drawctxt->gpustate); - -#endif - - *cmd++ = pm4_type3_packet(PM4_WAIT_FOR_IDLE, 1); - *cmd++ = 0; - - /* create indirect buffer command for above command sequence */ - create_ib1(drawctxt, drawctxt->shader_save, save, cmd); - - ctx->cmd = cmd; -} - -/* create buffers for saving/restoring registers, constants, & GMEM */ -static int -create_gpustate_shadow(struct kgsl_device *device, - struct adreno_context *drawctxt, - struct tmp_ctx *ctx) -{ - struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - int result; - - /* Allocate vmalloc memory to store the gpustate */ - result = kgsl_sharedmem_vmalloc(&drawctxt->gpustate, - drawctxt->pagetable, CONTEXT_SIZE); - - if (result) - return result; - - drawctxt->flags |= CTXT_FLAGS_STATE_SHADOW; - - /* Blank out h/w register, constant, and command buffer shadows. */ - kgsl_sharedmem_set(&drawctxt->gpustate, 0, 0, CONTEXT_SIZE); - - /* set-up command and vertex buffer pointers */ - ctx->cmd = ctx->start - = (unsigned int *)((char *)drawctxt->gpustate.hostptr + CMD_OFFSET); - - /* build indirect command buffers to save & restore regs/constants */ - adreno_idle(device, KGSL_TIMEOUT_DEFAULT); - build_regrestore_cmds(adreno_dev, drawctxt, ctx); - build_regsave_cmds(adreno_dev, drawctxt, ctx); - - build_shader_save_restore_cmds(drawctxt, ctx); - - kgsl_cache_range_op(&drawctxt->gpustate, - KGSL_CACHE_OP_FLUSH); - - return 0; -} - -/* create buffers for saving/restoring registers, constants, & GMEM */ -static int -create_gmem_shadow(struct adreno_device *adreno_dev, - struct adreno_context *drawctxt, - struct tmp_ctx *ctx) -{ - struct kgsl_device *device = &adreno_dev->dev; - int result; - - config_gmemsize(&drawctxt->context_gmem_shadow, - adreno_dev->gmemspace.sizebytes); - ctx->gmem_base = adreno_dev->gmemspace.gpu_base; - - result = kgsl_sharedmem_vmalloc( - &drawctxt->context_gmem_shadow.gmemshadow, - drawctxt->pagetable, - drawctxt->context_gmem_shadow.size); - - if (result) - return result; - - /* we've allocated the shadow, when swapped out, GMEM must be saved. */ - drawctxt->flags |= CTXT_FLAGS_GMEM_SHADOW | CTXT_FLAGS_GMEM_SAVE; - - /* blank out gmem shadow. */ - kgsl_sharedmem_set(&drawctxt->context_gmem_shadow.gmemshadow, 0, 0, - drawctxt->context_gmem_shadow.size); - - /* build quad vertex buffer */ - build_quad_vtxbuff(drawctxt, ctx, &drawctxt->context_gmem_shadow); - - /* build TP0_CHICKEN register restore command buffer */ - ctx->cmd = build_chicken_restore_cmds(drawctxt, ctx); - - /* build indirect command buffers to save & restore gmem */ - /* Idle because we are reading PM override registers */ - adreno_idle(device, KGSL_TIMEOUT_DEFAULT); - drawctxt->context_gmem_shadow.gmem_save_commands = ctx->cmd; - ctx->cmd = - build_gmem2sys_cmds(adreno_dev, drawctxt, ctx, - &drawctxt->context_gmem_shadow); - drawctxt->context_gmem_shadow.gmem_restore_commands = ctx->cmd; - ctx->cmd = - build_sys2gmem_cmds(adreno_dev, drawctxt, ctx, - &drawctxt->context_gmem_shadow); - - kgsl_cache_range_op(&drawctxt->context_gmem_shadow.gmemshadow, - KGSL_CACHE_OP_FLUSH); - - return 0; -} - -/* create a new drawing context */ - -int -adreno_drawctxt_create(struct kgsl_device_private *dev_priv, uint32_t flags, - struct kgsl_context *context) +/** + * adreno_drawctxt_create - create a new adreno draw context + * @device - KGSL device to create the context on + * @pagetable - Pagetable for the context + * @context- Generic KGSL context structure + * @flags - flags for the context (passed from user space) + * + * Create a new draw context for the 3D core. Return 0 on success, + * or error code on failure. + */ +int adreno_drawctxt_create(struct kgsl_device *device, + struct kgsl_pagetable *pagetable, + struct kgsl_context *context, uint32_t flags) { struct adreno_context *drawctxt; - struct kgsl_device *device = dev_priv->device; struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - struct kgsl_pagetable *pagetable = dev_priv->process_priv->pagetable; - struct tmp_ctx ctx; int ret; drawctxt = kzalloc(sizeof(struct adreno_context), GFP_KERNEL); @@ -1476,25 +139,23 @@ adreno_drawctxt_create(struct kgsl_device_private *dev_priv, uint32_t flags, drawctxt->pagetable = pagetable; drawctxt->bin_base_offset = 0; - ret = create_gpustate_shadow(device, drawctxt, &ctx); + /* FIXME: Deal with preambles */ + + ret = adreno_dev->gpudev->ctxt_gpustate_shadow(adreno_dev, drawctxt); if (ret) goto err; /* Save the shader instruction memory on context switching */ drawctxt->flags |= CTXT_FLAGS_SHADER_SAVE; - memset(&drawctxt->context_gmem_shadow.gmemshadow, - 0, sizeof(struct kgsl_memdesc)); - if (!(flags & KGSL_CONTEXT_NO_GMEM_ALLOC)) { /* create gmem shadow */ - ret = create_gmem_shadow(adreno_dev, drawctxt, &ctx); + ret = adreno_dev->gpudev->ctxt_gmem_shadow(adreno_dev, + drawctxt); if (ret != 0) goto err; } - BUG_ON(ctx.cmd - ctx.start > CMD_BUFFER_LEN); - context->devctxt = drawctxt; return 0; err: @@ -1503,16 +164,25 @@ err: return ret; } +/** + * adreno_drawctxt_destroy - destroy a draw context + * @device - KGSL device that owns the context + * @context- Generic KGSL context container for the context + * + * Destroy an existing context. Return 0 on success or error + * code on failure. + */ + /* destroy a drawing context */ -int adreno_drawctxt_destroy(struct kgsl_device *device, +void adreno_drawctxt_destroy(struct kgsl_device *device, struct kgsl_context *context) { struct adreno_device *adreno_dev = ADRENO_DEVICE(device); struct adreno_context *drawctxt = context->devctxt; if (drawctxt == NULL) - return -EINVAL; + return; /* deactivate context */ if (adreno_dev->drawctxt_active == drawctxt) { @@ -1534,35 +204,41 @@ int adreno_drawctxt_destroy(struct kgsl_device *device, kfree(drawctxt); context->devctxt = NULL; - - return 0; } -/* set bin base offset */ -int adreno_drawctxt_set_bin_base_offset(struct kgsl_device *device, +/** + * adreno_drawctxt_set_bin_base_offset - set bin base offset for the context + * @device - KGSL device that owns the context + * @context- Generic KGSL context container for the context + * @offset - Offset to set + * + * Set the bin base offset for A2XX devices. Not valid for A3XX devices. + */ + +void adreno_drawctxt_set_bin_base_offset(struct kgsl_device *device, struct kgsl_context *context, unsigned int offset) { struct adreno_context *drawctxt = context->devctxt; - if (drawctxt == NULL) - return -EINVAL; - - drawctxt->bin_base_offset = offset; - - return 0; + if (drawctxt) + drawctxt->bin_base_offset = offset; } -/* switch drawing contexts */ -void -adreno_drawctxt_switch(struct adreno_device *adreno_dev, - struct adreno_context *drawctxt, - unsigned int flags) +/** + * adreno_drawctxt_switch - switch the current draw context + * @adreno_dev - The 3D device that owns the context + * @drawctxt - the 3D context to switch to + * @flags - Flags to accompany the switch (from user space) + * + * Switch the current draw context + */ + +void adreno_drawctxt_switch(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, + unsigned int flags) { - struct adreno_context *active_ctxt = - adreno_dev->drawctxt_active; struct kgsl_device *device = &adreno_dev->dev; - unsigned int cmds[5]; if (drawctxt) { if (flags & KGSL_CONTEXT_SAVE_GMEM) @@ -1573,109 +249,18 @@ adreno_drawctxt_switch(struct adreno_device *adreno_dev, /* Remove GMEM saving flag from the context */ drawctxt->flags &= ~CTXT_FLAGS_GMEM_SAVE; } + /* already current? */ - if (active_ctxt == drawctxt) + if (adreno_dev->drawctxt_active == drawctxt) return; KGSL_CTXT_INFO(device, "from %p to %p flags %d\n", adreno_dev->drawctxt_active, drawctxt, flags); - /* save old context*/ - if (active_ctxt && active_ctxt->flags & CTXT_FLAGS_GPU_HANG) - KGSL_CTXT_WARN(device, - "Current active context has caused gpu hang\n"); - if (active_ctxt != NULL) { - KGSL_CTXT_INFO(device, - "active_ctxt flags %08x\n", active_ctxt->flags); - /* save registers and constants. */ - adreno_ringbuffer_issuecmds(device, 0, - active_ctxt->reg_save, 3); - - if (active_ctxt->flags & CTXT_FLAGS_SHADER_SAVE) { - /* save shader partitioning and instructions. */ - adreno_ringbuffer_issuecmds(device, - KGSL_CMD_FLAGS_PMODE, - active_ctxt->shader_save, 3); - - /* fixup shader partitioning parameter for - * SET_SHADER_BASES. - */ - adreno_ringbuffer_issuecmds(device, 0, - active_ctxt->shader_fixup, 3); - - active_ctxt->flags |= CTXT_FLAGS_SHADER_RESTORE; - } - - if (active_ctxt->flags & CTXT_FLAGS_GMEM_SAVE - && active_ctxt->flags & CTXT_FLAGS_GMEM_SHADOW) { - /* save gmem. - * (note: changes shader. shader must already be saved.) - */ - adreno_ringbuffer_issuecmds(device, - KGSL_CMD_FLAGS_PMODE, - active_ctxt->context_gmem_shadow.gmem_save, 3); - - /* Restore TP0_CHICKEN */ - adreno_ringbuffer_issuecmds(device, 0, - active_ctxt->chicken_restore, 3); - - active_ctxt->flags |= CTXT_FLAGS_GMEM_RESTORE; - } - } + /* Save the old context */ + adreno_dev->gpudev->ctxt_save(adreno_dev, adreno_dev->drawctxt_active); + /* Set the new context */ adreno_dev->drawctxt_active = drawctxt; - - /* restore new context */ - if (drawctxt != NULL) { - - KGSL_CTXT_INFO(device, - "drawctxt flags %08x\n", drawctxt->flags); - cmds[0] = pm4_nop_packet(1); - cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER; - cmds[2] = pm4_type3_packet(PM4_MEM_WRITE, 2); - cmds[3] = device->memstore.gpuaddr + - KGSL_DEVICE_MEMSTORE_OFFSET(current_context); - cmds[4] = (unsigned int)adreno_dev->drawctxt_active; - adreno_ringbuffer_issuecmds(device, 0, cmds, 5); - kgsl_mmu_setstate(device, drawctxt->pagetable); - -#ifndef CONFIG_MSM_KGSL_CFF_DUMP_NO_CONTEXT_MEM_DUMP - kgsl_cffdump_syncmem(NULL, &drawctxt->gpustate, - drawctxt->gpustate.gpuaddr, LCC_SHADOW_SIZE + - REG_SHADOW_SIZE + CMD_BUFFER_SIZE + TEX_SHADOW_SIZE, - false); -#endif - - /* restore gmem. - * (note: changes shader. shader must not already be restored.) - */ - if (drawctxt->flags & CTXT_FLAGS_GMEM_RESTORE) { - adreno_ringbuffer_issuecmds(device, - KGSL_CMD_FLAGS_PMODE, - drawctxt->context_gmem_shadow.gmem_restore, 3); - - /* Restore TP0_CHICKEN */ - adreno_ringbuffer_issuecmds(device, 0, - drawctxt->chicken_restore, 3); - - drawctxt->flags &= ~CTXT_FLAGS_GMEM_RESTORE; - } - - /* restore registers and constants. */ - adreno_ringbuffer_issuecmds(device, 0, - drawctxt->reg_restore, 3); - - /* restore shader instructions & partitioning. */ - if (drawctxt->flags & CTXT_FLAGS_SHADER_RESTORE) { - adreno_ringbuffer_issuecmds(device, 0, - drawctxt->shader_restore, 3); - } - - cmds[0] = pm4_type3_packet(PM4_SET_BIN_BASE_OFFSET, 1); - cmds[1] = drawctxt->bin_base_offset; - if (!adreno_is_a220(adreno_dev)) - adreno_ringbuffer_issuecmds(device, 0, cmds, 2); - - } else - kgsl_mmu_setstate(device, device->mmu.defaultpagetable); + adreno_dev->gpudev->ctxt_restore(adreno_dev, drawctxt); } diff --git a/drivers/gpu/msm/adreno_drawctxt.h b/drivers/gpu/msm/adreno_drawctxt.h index 8ea40436..3c3a8536 100644 --- a/drivers/gpu/msm/adreno_drawctxt.h +++ b/drivers/gpu/msm/adreno_drawctxt.h @@ -1,36 +1,20 @@ /* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved. * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * * Neither the name of Code Aurora Forum, Inc. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. * - * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE - * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN - * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. * */ #ifndef __ADRENO_DRAWCTXT_H #define __ADRENO_DRAWCTXT_H -#include "a200_reg.h" -#include "a220_reg.h" +#include "adreno_pm4types.h" +#include "a2xx_reg.h" /* Flags */ @@ -95,19 +79,73 @@ struct adreno_context { struct gmem_shadow_t context_gmem_shadow; }; +int adreno_drawctxt_create(struct kgsl_device *device, + struct kgsl_pagetable *pagetable, + struct kgsl_context *context, + uint32_t flags); -int adreno_drawctxt_create(struct kgsl_device_private *dev_priv, - uint32_t flags, - struct kgsl_context *context); - -int adreno_drawctxt_destroy(struct kgsl_device *device, +void adreno_drawctxt_destroy(struct kgsl_device *device, struct kgsl_context *context); void adreno_drawctxt_switch(struct adreno_device *adreno_dev, struct adreno_context *drawctxt, unsigned int flags); -int adreno_drawctxt_set_bin_base_offset(struct kgsl_device *device, +void adreno_drawctxt_set_bin_base_offset(struct kgsl_device *device, struct kgsl_context *context, unsigned int offset); +/* GPU context switch helper functions */ + +void build_quad_vtxbuff(struct adreno_context *drawctxt, + struct gmem_shadow_t *shadow, unsigned int **incmd); + +unsigned int uint2float(unsigned int); + +static inline unsigned int virt2gpu(unsigned int *cmd, + struct kgsl_memdesc *memdesc) +{ + return memdesc->gpuaddr + ((char *) cmd - (char *) memdesc->hostptr); +} + +static inline void create_ib1(struct adreno_context *drawctxt, + unsigned int *cmd, + unsigned int *start, + unsigned int *end) +{ + cmd[0] = CP_HDR_INDIRECT_BUFFER_PFD; + cmd[1] = virt2gpu(start, &drawctxt->gpustate); + cmd[2] = end - start; +} + + +static inline unsigned int *reg_range(unsigned int *cmd, unsigned int start, + unsigned int end) +{ + *cmd++ = CP_REG(start); /* h/w regs, start addr */ + *cmd++ = end - start + 1; /* count */ + return cmd; +} + +static inline void calc_gmemsize(struct gmem_shadow_t *shadow, int gmem_size) +{ + int w = 64, h = 64; + + shadow->format = COLORX_8_8_8_8; + + /* convert from bytes to 32-bit words */ + gmem_size = (gmem_size + 3) / 4; + + while ((w * h) < gmem_size) { + if (w < h) + w *= 2; + else + h *= 2; + } + + shadow->pitch = shadow->width = w; + shadow->height = h; + shadow->gmem_pitch = shadow->pitch; + shadow->size = shadow->pitch * shadow->height * 4; +} + #endif /* __ADRENO_DRAWCTXT_H */ diff --git a/drivers/gpu/msm/adreno_pm4types.h b/drivers/gpu/msm/adreno_pm4types.h index 246315b6..8aea58c9 100644 --- a/drivers/gpu/msm/adreno_pm4types.h +++ b/drivers/gpu/msm/adreno_pm4types.h @@ -1,193 +1,193 @@ /* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved. * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * * Neither the name of Code Aurora Forum, Inc. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. * - * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE - * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN - * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. * */ #ifndef __ADRENO_PM4TYPES_H #define __ADRENO_PM4TYPES_H -#define PM4_PKT_MASK 0xc0000000 +#define CP_PKT_MASK 0xc0000000 -#define PM4_TYPE0_PKT ((unsigned int)0 << 30) -#define PM4_TYPE1_PKT ((unsigned int)1 << 30) -#define PM4_TYPE2_PKT ((unsigned int)2 << 30) -#define PM4_TYPE3_PKT ((unsigned int)3 << 30) +#define CP_TYPE0_PKT ((unsigned int)0 << 30) +#define CP_TYPE1_PKT ((unsigned int)1 << 30) +#define CP_TYPE2_PKT ((unsigned int)2 << 30) +#define CP_TYPE3_PKT ((unsigned int)3 << 30) /* type3 packets */ /* initialize CP's micro-engine */ -#define PM4_ME_INIT 0x48 +#define CP_ME_INIT 0x48 /* skip N 32-bit words to get to the next packet */ -#define PM4_NOP 0x10 +#define CP_NOP 0x10 /* indirect buffer dispatch. prefetch parser uses this packet type to determine * whether to pre-fetch the IB */ -#define PM4_INDIRECT_BUFFER 0x3f +#define CP_INDIRECT_BUFFER 0x3f /* indirect buffer dispatch. same as IB, but init is pipelined */ -#define PM4_INDIRECT_BUFFER_PFD 0x37 +#define CP_INDIRECT_BUFFER_PFD 0x37 /* wait for the IDLE state of the engine */ -#define PM4_WAIT_FOR_IDLE 0x26 +#define CP_WAIT_FOR_IDLE 0x26 /* wait until a register or memory location is a specific value */ -#define PM4_WAIT_REG_MEM 0x3c +#define CP_WAIT_REG_MEM 0x3c /* wait until a register location is equal to a specific value */ -#define PM4_WAIT_REG_EQ 0x52 +#define CP_WAIT_REG_EQ 0x52 /* wait until a register location is >= a specific value */ -#define PM4_WAT_REG_GTE 0x53 +#define CP_WAT_REG_GTE 0x53 /* wait until a read completes */ -#define PM4_WAIT_UNTIL_READ 0x5c +#define CP_WAIT_UNTIL_READ 0x5c /* wait until all base/size writes from an IB_PFD packet have completed */ -#define PM4_WAIT_IB_PFD_COMPLETE 0x5d +#define CP_WAIT_IB_PFD_COMPLETE 0x5d /* register read/modify/write */ -#define PM4_REG_RMW 0x21 +#define CP_REG_RMW 0x21 /* reads register in chip and writes to memory */ -#define PM4_REG_TO_MEM 0x3e +#define CP_REG_TO_MEM 0x3e /* write N 32-bit words to memory */ -#define PM4_MEM_WRITE 0x3d +#define CP_MEM_WRITE 0x3d /* write CP_PROG_COUNTER value to memory */ -#define PM4_MEM_WRITE_CNTR 0x4f +#define CP_MEM_WRITE_CNTR 0x4f /* conditional execution of a sequence of packets */ -#define PM4_COND_EXEC 0x44 +#define CP_COND_EXEC 0x44 /* conditional write to memory or register */ -#define PM4_COND_WRITE 0x45 +#define CP_COND_WRITE 0x45 /* generate an event that creates a write to memory when completed */ -#define PM4_EVENT_WRITE 0x46 +#define CP_EVENT_WRITE 0x46 /* generate a VS|PS_done event */ -#define PM4_EVENT_WRITE_SHD 0x58 +#define CP_EVENT_WRITE_SHD 0x58 /* generate a cache flush done event */ -#define PM4_EVENT_WRITE_CFL 0x59 +#define CP_EVENT_WRITE_CFL 0x59 /* generate a z_pass done event */ -#define PM4_EVENT_WRITE_ZPD 0x5b +#define CP_EVENT_WRITE_ZPD 0x5b /* initiate fetch of index buffer and draw */ -#define PM4_DRAW_INDX 0x22 +#define CP_DRAW_INDX 0x22 /* draw using supplied indices in packet */ -#define PM4_DRAW_INDX_2 0x36 +#define CP_DRAW_INDX_2 0x36 /* initiate fetch of index buffer and binIDs and draw */ -#define PM4_DRAW_INDX_BIN 0x34 +#define CP_DRAW_INDX_BIN 0x34 /* initiate fetch of bin IDs and draw using supplied indices */ -#define PM4_DRAW_INDX_2_BIN 0x35 +#define CP_DRAW_INDX_2_BIN 0x35 /* begin/end initiator for viz query extent processing */ -#define PM4_VIZ_QUERY 0x23 +#define CP_VIZ_QUERY 0x23 /* fetch state sub-blocks and initiate shader code DMAs */ -#define PM4_SET_STATE 0x25 +#define CP_SET_STATE 0x25 /* load constant into chip and to memory */ -#define PM4_SET_CONSTANT 0x2d +#define CP_SET_CONSTANT 0x2d /* load sequencer instruction memory (pointer-based) */ -#define PM4_IM_LOAD 0x27 +#define CP_IM_LOAD 0x27 /* load sequencer instruction memory (code embedded in packet) */ -#define PM4_IM_LOAD_IMMEDIATE 0x2b +#define CP_IM_LOAD_IMMEDIATE 0x2b /* load constants from a location in memory */ -#define PM4_LOAD_CONSTANT_CONTEXT 0x2e +#define CP_LOAD_CONSTANT_CONTEXT 0x2e /* selective invalidation of state pointers */ -#define PM4_INVALIDATE_STATE 0x3b +#define CP_INVALIDATE_STATE 0x3b /* dynamically changes shader instruction memory partition */ -#define PM4_SET_SHADER_BASES 0x4A +#define CP_SET_SHADER_BASES 0x4A /* sets the 64-bit BIN_MASK register in the PFP */ -#define PM4_SET_BIN_MASK 0x50 +#define CP_SET_BIN_MASK 0x50 /* sets the 64-bit BIN_SELECT register in the PFP */ -#define PM4_SET_BIN_SELECT 0x51 +#define CP_SET_BIN_SELECT 0x51 /* updates the current context, if needed */ -#define PM4_CONTEXT_UPDATE 0x5e +#define CP_CONTEXT_UPDATE 0x5e /* generate interrupt from the command stream */ -#define PM4_INTERRUPT 0x40 +#define CP_INTERRUPT 0x40 /* copy sequencer instruction memory to system memory */ -#define PM4_IM_STORE 0x2c +#define CP_IM_STORE 0x2c -/* program an offset that will added to the BIN_BASE value of - * the 3D_DRAW_INDX_BIN packet */ -#define PM4_SET_BIN_BASE_OFFSET 0x4B +/* + * for a20x + * program an offset that will added to the BIN_BASE value of + * the 3D_DRAW_INDX_BIN packet + */ +#define CP_SET_BIN_BASE_OFFSET 0x4B -#define PM4_SET_PROTECTED_MODE 0x5f /* sets the register protection mode */ +/* + * for a22x + * sets draw initiator flags register in PFP, gets bitwise-ORed into + * every draw initiator + */ +#define CP_SET_DRAW_INIT_FLAGS 0x4B + +#define CP_SET_PROTECTED_MODE 0x5f /* sets the register protection mode */ /* packet header building macros */ -#define pm4_type0_packet(regindx, cnt) \ - (PM4_TYPE0_PKT | (((cnt)-1) << 16) | ((regindx) & 0x7FFF)) +#define cp_type0_packet(regindx, cnt) \ + (CP_TYPE0_PKT | (((cnt)-1) << 16) | ((regindx) & 0x7FFF)) -#define pm4_type0_packet_for_sameregister(regindx, cnt) \ - ((PM4_TYPE0_PKT | (((cnt)-1) << 16) | ((1 << 15) | \ +#define cp_type0_packet_for_sameregister(regindx, cnt) \ + ((CP_TYPE0_PKT | (((cnt)-1) << 16) | ((1 << 15) | \ ((regindx) & 0x7FFF))) -#define pm4_type1_packet(reg0, reg1) \ - (PM4_TYPE1_PKT | ((reg1) << 12) | (reg0)) +#define cp_type1_packet(reg0, reg1) \ + (CP_TYPE1_PKT | ((reg1) << 12) | (reg0)) -#define pm4_type3_packet(opcode, cnt) \ - (PM4_TYPE3_PKT | (((cnt)-1) << 16) | (((opcode) & 0xFF) << 8)) +#define cp_type3_packet(opcode, cnt) \ + (CP_TYPE3_PKT | (((cnt)-1) << 16) | (((opcode) & 0xFF) << 8)) -#define pm4_predicated_type3_packet(opcode, cnt) \ - (PM4_TYPE3_PKT | (((cnt)-1) << 16) | (((opcode) & 0xFF) << 8) | 0x1) +#define cp_predicated_type3_packet(opcode, cnt) \ + (CP_TYPE3_PKT | (((cnt)-1) << 16) | (((opcode) & 0xFF) << 8) | 0x1) -#define pm4_nop_packet(cnt) \ - (PM4_TYPE3_PKT | (((cnt)-1) << 16) | (PM4_NOP << 8)) +#define cp_nop_packet(cnt) \ + (CP_TYPE3_PKT | (((cnt)-1) << 16) | (CP_NOP << 8)) /* packet headers */ -#define PM4_HDR_ME_INIT pm4_type3_packet(PM4_ME_INIT, 18) -#define PM4_HDR_INDIRECT_BUFFER_PFD pm4_type3_packet(PM4_INDIRECT_BUFFER_PFD, 2) -#define PM4_HDR_INDIRECT_BUFFER pm4_type3_packet(PM4_INDIRECT_BUFFER, 2) +#define CP_HDR_ME_INIT cp_type3_packet(CP_ME_INIT, 18) +#define CP_HDR_INDIRECT_BUFFER_PFD cp_type3_packet(CP_INDIRECT_BUFFER_PFD, 2) +#define CP_HDR_INDIRECT_BUFFER cp_type3_packet(CP_INDIRECT_BUFFER, 2) + +/* dword base address of the GFX decode space */ +#define SUBBLOCK_OFFSET(reg) ((unsigned int)((reg) - (0x2000))) + +/* gmem command buffer length */ +#define CP_REG(reg) ((0x4 << 16) | (SUBBLOCK_OFFSET(reg))) #endif /* __ADRENO_PM4TYPES_H */ diff --git a/drivers/gpu/msm/adreno_postmortem.c b/drivers/gpu/msm/adreno_postmortem.c index 4911e937..3d957f69 100644 --- a/drivers/gpu/msm/adreno_postmortem.c +++ b/drivers/gpu/msm/adreno_postmortem.c @@ -22,9 +22,10 @@ #include "adreno_debugfs.h" #include "kgsl_cffdump.h" -#include "a200_reg.h" +#include "a2xx_reg.h" #define INVALID_RB_CMD 0xaaaaaaaa +#define NUM_DWORDS_OF_RINGBUFFER_HISTORY 100 struct pm_id_name { uint32_t id; @@ -43,28 +44,28 @@ static const struct pm_id_name pm0_types[] = { }; static const struct pm_id_name pm3_types[] = { - {PM4_COND_EXEC, "CND_EXEC"}, - {PM4_CONTEXT_UPDATE, "CX__UPDT"}, - {PM4_DRAW_INDX, "DRW_NDX_"}, - {PM4_DRAW_INDX_BIN, "DRW_NDXB"}, - {PM4_EVENT_WRITE, "EVENT_WT"}, - {PM4_IM_LOAD, "IN__LOAD"}, - {PM4_IM_LOAD_IMMEDIATE, "IM_LOADI"}, - {PM4_IM_STORE, "IM_STORE"}, - {PM4_INDIRECT_BUFFER, "IND_BUF_"}, - {PM4_INDIRECT_BUFFER_PFD, "IND_BUFP"}, - {PM4_INTERRUPT, "PM4_INTR"}, - {PM4_INVALIDATE_STATE, "INV_STAT"}, - {PM4_LOAD_CONSTANT_CONTEXT, "LD_CN_CX"}, - {PM4_ME_INIT, "ME__INIT"}, - {PM4_NOP, "PM4__NOP"}, - {PM4_REG_RMW, "REG__RMW"}, - {PM4_REG_TO_MEM, "REG2_MEM"}, - {PM4_SET_BIN_BASE_OFFSET, "ST_BIN_O"}, - {PM4_SET_CONSTANT, "ST_CONST"}, - {PM4_SET_PROTECTED_MODE, "ST_PRT_M"}, - {PM4_SET_SHADER_BASES, "ST_SHD_B"}, - {PM4_WAIT_FOR_IDLE, "WAIT4IDL"}, + {CP_COND_EXEC, "CND_EXEC"}, + {CP_CONTEXT_UPDATE, "CX__UPDT"}, + {CP_DRAW_INDX, "DRW_NDX_"}, + {CP_DRAW_INDX_BIN, "DRW_NDXB"}, + {CP_EVENT_WRITE, "EVENT_WT"}, + {CP_IM_LOAD, "IN__LOAD"}, + {CP_IM_LOAD_IMMEDIATE, "IM_LOADI"}, + {CP_IM_STORE, "IM_STORE"}, + {CP_INDIRECT_BUFFER, "IND_BUF_"}, + {CP_INDIRECT_BUFFER_PFD, "IND_BUFP"}, + {CP_INTERRUPT, "PM4_INTR"}, + {CP_INVALIDATE_STATE, "INV_STAT"}, + {CP_LOAD_CONSTANT_CONTEXT, "LD_CN_CX"}, + {CP_ME_INIT, "ME__INIT"}, + {CP_NOP, "PM4__NOP"}, + {CP_REG_RMW, "REG__RMW"}, + {CP_REG_TO_MEM, "REG2_MEM"}, + {CP_SET_BIN_BASE_OFFSET, "ST_BIN_O"}, + {CP_SET_CONSTANT, "ST_CONST"}, + {CP_SET_PROTECTED_MODE, "ST_PRT_M"}, + {CP_SET_SHADER_BASES, "ST_SHD_B"}, + {CP_WAIT_FOR_IDLE, "WAIT4IDL"}, }; /* Offset address pairs: start, end of range to dump (inclusive) */ @@ -174,14 +175,14 @@ static bool adreno_is_pm4_type(uint32_t word) if (adreno_is_pm4_len(word) > 16) return 0; - if ((word & (3<<30)) == PM4_TYPE0_PKT) { + if ((word & (3<<30)) == CP_TYPE0_PKT) { for (i = 0; i < ARRAY_SIZE(pm0_types); ++i) { if ((word & 0x7FFF) == pm0_types[i].id) return 1; } return 0; } - if ((word & (3<<30)) == PM4_TYPE3_PKT) { + if ((word & (3<<30)) == CP_TYPE3_PKT) { for (i = 0; i < ARRAY_SIZE(pm3_types); ++i) { if ((word & 0xFFFF) == (pm3_types[i].id << 8)) return 1; @@ -198,14 +199,14 @@ static const char *adreno_pm4_name(uint32_t word) if (word == INVALID_RB_CMD) return "--------"; - if ((word & (3<<30)) == PM4_TYPE0_PKT) { + if ((word & (3<<30)) == CP_TYPE0_PKT) { for (i = 0; i < ARRAY_SIZE(pm0_types); ++i) { if ((word & 0x7FFF) == pm0_types[i].id) return pm0_types[i].name; } return "????????"; } - if ((word & (3<<30)) == PM4_TYPE3_PKT) { + if ((word & (3<<30)) == CP_TYPE3_PKT) { for (i = 0; i < ARRAY_SIZE(pm3_types); ++i) { if ((word & 0xFFFF) == (pm3_types[i].id << 8)) return pm3_types[i].name; @@ -289,7 +290,7 @@ static void dump_ib1(struct kgsl_device *device, uint32_t pt_base, for (i = 0; i+3 < ib1_size; ) { value = ib1_addr[i++]; - if (value == pm4_type3_packet(PM4_INDIRECT_BUFFER_PFD, 2)) { + if (value == cp_type3_packet(CP_INDIRECT_BUFFER_PFD, 2)) { uint32_t ib2_base = ib1_addr[i++]; uint32_t ib2_size = ib1_addr[i++]; @@ -456,7 +457,7 @@ static int adreno_dump(struct kgsl_device *device) unsigned int r1, r2, r3, rbbm_status; unsigned int cp_ib1_base, cp_ib1_bufsz, cp_stat; unsigned int cp_ib2_base, cp_ib2_bufsz; - unsigned int pt_base; + unsigned int pt_base, cur_pt_base; unsigned int cp_rb_base, rb_count; unsigned int cp_rb_wptr, cp_rb_rptr; unsigned int i; @@ -536,7 +537,12 @@ static int adreno_dump(struct kgsl_device *device) kgsl_regread(device, REG_CP_RB_RPTR_ADDR, &r3); KGSL_LOG_DUMP(device, "CP_RB: BASE = %08X | CNTL = %08X | RPTR_ADDR = %08X" - "\n", cp_rb_base, r2, r3); + " | rb_count = %08X\n", cp_rb_base, r2, r3, rb_count); + { + struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer; + if (rb->sizedwords != rb_count) + rb_count = rb->sizedwords; + } kgsl_regread(device, REG_CP_RB_RPTR, &cp_rb_rptr); kgsl_regread(device, REG_CP_RB_WPTR, &cp_rb_wptr); @@ -628,38 +634,37 @@ static int adreno_dump(struct kgsl_device *device) "COHER: SIZE_PM4 = %08X | BASE_PM4 = %08X | STATUS_PM4" " = %08X\n", r1, r2, r3); - kgsl_regread(device, REG_MH_AXI_ERROR, &r1); + kgsl_regread(device, MH_AXI_ERROR, &r1); KGSL_LOG_DUMP(device, "MH: AXI_ERROR = %08X\n", r1); - kgsl_regread(device, REG_MH_MMU_PAGE_FAULT, &r1); - kgsl_regread(device, REG_MH_MMU_CONFIG, &r2); - kgsl_regread(device, REG_MH_MMU_MPU_BASE, &r3); + kgsl_regread(device, MH_MMU_PAGE_FAULT, &r1); + kgsl_regread(device, MH_MMU_CONFIG, &r2); + kgsl_regread(device, MH_MMU_MPU_BASE, &r3); KGSL_LOG_DUMP(device, "MH_MMU: PAGE_FAULT = %08X | CONFIG = %08X | MPU_BASE =" " %08X\n", r1, r2, r3); - kgsl_regread(device, REG_MH_MMU_MPU_END, &r1); - kgsl_regread(device, REG_MH_MMU_VA_RANGE, &r2); - kgsl_regread(device, REG_MH_MMU_PT_BASE, &pt_base); + kgsl_regread(device, MH_MMU_MPU_END, &r1); + kgsl_regread(device, MH_MMU_VA_RANGE, &r2); + pt_base = kgsl_mmu_get_current_ptbase(device); KGSL_LOG_DUMP(device, " MPU_END = %08X | VA_RANGE = %08X | PT_BASE =" " %08X\n", r1, r2, pt_base); + cur_pt_base = pt_base; KGSL_LOG_DUMP(device, "PAGETABLE SIZE: %08X ", KGSL_PAGETABLE_SIZE); - kgsl_regread(device, REG_MH_MMU_TRAN_ERROR, &r1); + kgsl_regread(device, MH_MMU_TRAN_ERROR, &r1); KGSL_LOG_DUMP(device, " TRAN_ERROR = %08X\n", r1); - kgsl_regread(device, REG_MH_INTERRUPT_MASK, &r1); - kgsl_regread(device, REG_MH_INTERRUPT_STATUS, &r2); + kgsl_regread(device, MH_INTERRUPT_MASK, &r1); + kgsl_regread(device, MH_INTERRUPT_STATUS, &r2); KGSL_LOG_DUMP(device, "MH_INTERRUPT: MASK = %08X | STATUS = %08X\n", r1, r2); - if (device->ftbl.device_readtimestamp != NULL) { - ts_processed = device->ftbl.device_readtimestamp( - device, KGSL_TIMESTAMP_RETIRED); - KGSL_LOG_DUMP(device, "TIMESTM RTRD: %08X\n", ts_processed); - } + ts_processed = device->ftbl->readtimestamp(device, + KGSL_TIMESTAMP_RETIRED); + KGSL_LOG_DUMP(device, "TIMESTM RTRD: %08X\n", ts_processed); num_item = adreno_ringbuffer_count(&adreno_dev->ringbuffer, cp_rb_rptr); @@ -676,21 +681,21 @@ static int adreno_dump(struct kgsl_device *device) KGSL_LOG_DUMP(device, "RB: rd_addr:%8.8x rb_size:%d num_item:%d\n", cp_rb_base, rb_count<<2, num_item); - rb_vaddr = (const uint32_t *)kgsl_sharedmem_convertaddr(device, pt_base, - cp_rb_base, &rb_memsize); + rb_vaddr = (const uint32_t *)kgsl_sharedmem_convertaddr(device, + cur_pt_base, cp_rb_base, &rb_memsize); if (!rb_vaddr) { KGSL_LOG_POSTMORTEM_WRITE(device, "Can't fetch vaddr for CP_RB_BASE\n"); goto error_vfree; } - read_idx = (int)cp_rb_rptr - 64; + read_idx = (int)cp_rb_rptr - NUM_DWORDS_OF_RINGBUFFER_HISTORY; if (read_idx < 0) read_idx += rb_count; write_idx = (int)cp_rb_wptr + 16; if (write_idx > rb_count) write_idx -= rb_count; - num_item += 64+16; + num_item += NUM_DWORDS_OF_RINGBUFFER_HISTORY+16; if (num_item > rb_count) num_item = rb_count; if (write_idx >= read_idx) @@ -706,20 +711,27 @@ static int adreno_dump(struct kgsl_device *device) i = 0; for (read_idx = 0; read_idx < num_item; ) { uint32_t this_cmd = rb_copy[read_idx++]; - if (this_cmd == pm4_type3_packet(PM4_INDIRECT_BUFFER_PFD, 2)) { + if (this_cmd == cp_type3_packet(CP_INDIRECT_BUFFER_PFD, 2)) { uint32_t ib_addr = rb_copy[read_idx++]; uint32_t ib_size = rb_copy[read_idx++]; - dump_ib1(device, pt_base, (read_idx-3)<<2, ib_addr, + dump_ib1(device, cur_pt_base, (read_idx-3)<<2, ib_addr, ib_size, &ib_list, 0); for (; i < ib_list.count; ++i) - dump_ib(device, "IB2:", pt_base, + dump_ib(device, "IB2:", cur_pt_base, ib_list.offsets[i], ib_list.bases[i], ib_list.sizes[i], 0); + } else if (this_cmd == cp_type0_packet(MH_MMU_PT_BASE, 1)) { + /* Set cur_pt_base to the new pagetable base */ + cur_pt_base = rb_copy[read_idx++]; } } - read_idx = (int)cp_rb_rptr - 64; + /* Restore cur_pt_base back to the pt_base of + the process in whose context the GPU hung */ + cur_pt_base = pt_base; + + read_idx = (int)cp_rb_rptr - NUM_DWORDS_OF_RINGBUFFER_HISTORY; if (read_idx < 0) read_idx += rb_count; KGSL_LOG_DUMP(device, @@ -728,30 +740,31 @@ static int adreno_dump(struct kgsl_device *device) adreno_dump_rb(device, rb_copy, num_item<<2, read_idx, rb_count); if (adreno_ib_dump_enabled()) { - for (read_idx = 64; read_idx >= 0; --read_idx) { + for (read_idx = NUM_DWORDS_OF_RINGBUFFER_HISTORY; + read_idx >= 0; --read_idx) { uint32_t this_cmd = rb_copy[read_idx]; - if (this_cmd == pm4_type3_packet( - PM4_INDIRECT_BUFFER_PFD, 2)) { + if (this_cmd == cp_type3_packet( + CP_INDIRECT_BUFFER_PFD, 2)) { uint32_t ib_addr = rb_copy[read_idx+1]; uint32_t ib_size = rb_copy[read_idx+2]; - if (cp_ib1_bufsz && cp_ib1_base == ib_addr) { + if (ib_size && cp_ib1_base == ib_addr) { KGSL_LOG_DUMP(device, "IB1: base:%8.8X " "count:%d\n", ib_addr, ib_size); - dump_ib(device, "IB1: ", pt_base, + dump_ib(device, "IB1: ", cur_pt_base, read_idx<<2, ib_addr, ib_size, 1); } } } for (i = 0; i < ib_list.count; ++i) { - if (cp_ib2_bufsz && cp_ib2_base == ib_list.bases[i]) { - uint32_t ib_size = ib_list.sizes[i]; - uint32_t ib_offset = ib_list.offsets[i]; + uint32_t ib_size = ib_list.sizes[i]; + uint32_t ib_offset = ib_list.offsets[i]; + if (ib_size && cp_ib2_base == ib_list.bases[i]) { KGSL_LOG_DUMP(device, "IB2: base:%8.8X count:%d\n", cp_ib2_base, ib_size); - dump_ib(device, "IB2: ", pt_base, ib_offset, + dump_ib(device, "IB2: ", cur_pt_base, ib_offset, ib_list.bases[i], ib_size, 1); } } @@ -802,7 +815,7 @@ int adreno_postmortem_dump(struct kgsl_device *device, int manual) } /* Disable the idle timer so we don't get interrupted */ - del_timer(&device->idle_timer); + del_timer_sync(&device->idle_timer); /* Turn off napping to make sure we have the clocks full attention through the following process */ diff --git a/drivers/gpu/msm/adreno_postmortem.h b/drivers/gpu/msm/adreno_postmortem.h index 1a432489..b6778006 100644 --- a/drivers/gpu/msm/adreno_postmortem.h +++ b/drivers/gpu/msm/adreno_postmortem.h @@ -1,29 +1,13 @@ /* Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved. * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * * Neither the name of Code Aurora Forum, Inc. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. * - * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE - * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN - * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. * */ diff --git a/drivers/gpu/msm/adreno_ringbuffer.c b/drivers/gpu/msm/adreno_ringbuffer.c index 4aaa2c6c..d59057c8 100644 --- a/drivers/gpu/msm/adreno_ringbuffer.c +++ b/drivers/gpu/msm/adreno_ringbuffer.c @@ -16,14 +16,15 @@ #include #include "kgsl.h" +#include "kgsl_sharedmem.h" +#include "kgsl_cffdump.h" #include "adreno.h" #include "adreno_pm4types.h" #include "adreno_ringbuffer.h" -#include "a200_reg.h" +#include "a2xx_reg.h" -#define VALID_STATUS_COUNT_MAX 10 #define GSL_RB_NOP_SIZEDWORDS 2 /* protected mode error checking below register address 0x800 * note: if CP_INTERRUPT packet is used then checking needs @@ -31,17 +32,6 @@ */ #define GSL_RB_PROTECTED_MODE_CONTROL 0x200001F2 -#define GSL_CP_INT_MASK \ - (CP_INT_CNTL__SW_INT_MASK | \ - CP_INT_CNTL__T0_PACKET_IN_IB_MASK | \ - CP_INT_CNTL__OPCODE_ERROR_MASK | \ - CP_INT_CNTL__PROTECTED_MODE_ERROR_MASK | \ - CP_INT_CNTL__RESERVED_BIT_ERROR_MASK | \ - CP_INT_CNTL__IB_ERROR_MASK | \ - CP_INT_CNTL__IB2_INT_MASK | \ - CP_INT_CNTL__IB1_INT_MASK | \ - CP_INT_CNTL__RB_INT_MASK) - /* Firmware file names * Legacy names must remain but replacing macro names to * match current kgsl model. @@ -52,102 +42,17 @@ #define A200_PM4_FW "yamato_pm4.fw" #define A220_PFP_470_FW "leia_pfp_470.fw" #define A220_PM4_470_FW "leia_pm4_470.fw" - -/* functions */ -void kgsl_cp_intrcallback(struct kgsl_device *device) -{ - unsigned int status = 0, num_reads = 0, master_status = 0; - struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer; - - adreno_regread_isr(device, REG_MASTER_INT_SIGNAL, &master_status); - while (!status && (num_reads < VALID_STATUS_COUNT_MAX) && - (master_status & MASTER_INT_SIGNAL__CP_INT_STAT)) { - adreno_regread_isr(device, REG_CP_INT_STATUS, &status); - adreno_regread_isr(device, REG_MASTER_INT_SIGNAL, - &master_status); - num_reads++; - } - if (num_reads > 1) - KGSL_DRV_WARN(device, - "Looped %d times to read REG_CP_INT_STATUS\n", - num_reads); - if (!status) { - if (master_status & MASTER_INT_SIGNAL__CP_INT_STAT) { - /* This indicates that we could not read CP_INT_STAT. - * As a precaution just wake up processes so - * they can check their timestamps. Since, we - * did not ack any interrupts this interrupt will - * be generated again */ - KGSL_DRV_WARN(device, "Unable to read CP_INT_STATUS\n"); - wake_up_interruptible_all(&device->wait_queue); - } else - KGSL_DRV_WARN(device, "Spurious interrput detected\n"); - return; - } - - if (status & CP_INT_CNTL__RB_INT_MASK) { - /* signal intr completion event */ - unsigned int enableflag = 0; - kgsl_sharedmem_writel(&rb->device->memstore, - KGSL_DEVICE_MEMSTORE_OFFSET(ts_cmp_enable), - enableflag); - wmb(); - KGSL_CMD_WARN(rb->device, "ringbuffer rb interrupt\n"); - } - - if (status & CP_INT_CNTL__T0_PACKET_IN_IB_MASK) { - KGSL_CMD_CRIT(rb->device, - "ringbuffer TO packet in IB interrupt\n"); - adreno_regwrite_isr(rb->device, REG_CP_INT_CNTL, 0); - } - if (status & CP_INT_CNTL__OPCODE_ERROR_MASK) { - KGSL_CMD_CRIT(rb->device, - "ringbuffer opcode error interrupt\n"); - adreno_regwrite_isr(rb->device, REG_CP_INT_CNTL, 0); - } - if (status & CP_INT_CNTL__PROTECTED_MODE_ERROR_MASK) { - KGSL_CMD_CRIT(rb->device, - "ringbuffer protected mode error interrupt\n"); - adreno_regwrite_isr(rb->device, REG_CP_INT_CNTL, 0); - } - if (status & CP_INT_CNTL__RESERVED_BIT_ERROR_MASK) { - KGSL_CMD_CRIT(rb->device, - "ringbuffer reserved bit error interrupt\n"); - adreno_regwrite_isr(rb->device, REG_CP_INT_CNTL, 0); - } - if (status & CP_INT_CNTL__IB_ERROR_MASK) { - KGSL_CMD_CRIT(rb->device, - "ringbuffer IB error interrupt\n"); - adreno_regwrite_isr(rb->device, REG_CP_INT_CNTL, 0); - } - if (status & CP_INT_CNTL__SW_INT_MASK) - KGSL_CMD_INFO(rb->device, "ringbuffer software interrupt\n"); - - if (status & CP_INT_CNTL__IB2_INT_MASK) - KGSL_CMD_INFO(rb->device, "ringbuffer ib2 interrupt\n"); - - if (status & (~GSL_CP_INT_MASK)) - KGSL_CMD_WARN(rb->device, - "bad bits in REG_CP_INT_STATUS %08x\n", status); - - /* only ack bits we understand */ - status &= GSL_CP_INT_MASK; - adreno_regwrite_isr(device, REG_CP_INT_ACK, status); - - if (status & (CP_INT_CNTL__IB1_INT_MASK | CP_INT_CNTL__RB_INT_MASK)) { - KGSL_CMD_WARN(rb->device, "ringbuffer ib1/rb interrupt\n"); - wake_up_interruptible_all(&device->wait_queue); - atomic_notifier_call_chain(&(device->ts_notifier_list), - device->id, - NULL); - } -} +#define A225_PFP_FW "a225_pfp.fw" +#define A225_PM4_FW "a225_pm4.fw" static void adreno_ringbuffer_submit(struct adreno_ringbuffer *rb) { BUG_ON(rb->wptr == 0); + /* Let the pwrscale policy know that new commands have + been submitted. */ + kgsl_pwrscale_busy(rb->device); + /*synchronize memory before informing the hardware of the *new commands. */ @@ -156,7 +61,7 @@ static void adreno_ringbuffer_submit(struct adreno_ringbuffer *rb) adreno_regwrite(rb->device, REG_CP_RB_WPTR, rb->wptr); } -static int +static void adreno_ringbuffer_waitspace(struct adreno_ringbuffer *rb, unsigned int numcmds, int wptr_ahead) { @@ -173,7 +78,7 @@ adreno_ringbuffer_waitspace(struct adreno_ringbuffer *rb, unsigned int numcmds, cmds = (unsigned int *)rb->buffer_desc.hostptr + rb->wptr; cmds_gpu = rb->buffer_desc.gpuaddr + sizeof(uint)*rb->wptr; - GSL_RB_WRITE(cmds, cmds_gpu, pm4_nop_packet(nopcount)); + GSL_RB_WRITE(cmds, cmds_gpu, cp_nop_packet(nopcount)); /* Make sure that rptr is not 0 before submitting * commands at the end of ringbuffer. We do not @@ -197,8 +102,6 @@ adreno_ringbuffer_waitspace(struct adreno_ringbuffer *rb, unsigned int numcmds, freecmds = rb->rptr - rb->wptr; } while ((freecmds != 0) && (freecmds <= numcmds)); - - return 0; } @@ -206,7 +109,6 @@ static unsigned int *adreno_ringbuffer_allocspace(struct adreno_ringbuffer *rb, unsigned int numcmds) { unsigned int *ptr = NULL; - int status = 0; BUG_ON(numcmds >= rb->sizedwords); @@ -217,22 +119,20 @@ static unsigned int *adreno_ringbuffer_allocspace(struct adreno_ringbuffer *rb, /* reserve dwords for nop packet */ if ((rb->wptr + numcmds) > (rb->sizedwords - GSL_RB_NOP_SIZEDWORDS)) - status = adreno_ringbuffer_waitspace(rb, numcmds, 1); + adreno_ringbuffer_waitspace(rb, numcmds, 1); } else { /* wptr behind rptr */ if ((rb->wptr + numcmds) >= rb->rptr) - status = adreno_ringbuffer_waitspace(rb, numcmds, 0); + adreno_ringbuffer_waitspace(rb, numcmds, 0); /* check for remaining space */ /* reserve dwords for nop packet */ if ((rb->wptr + numcmds) > (rb->sizedwords - GSL_RB_NOP_SIZEDWORDS)) - status = adreno_ringbuffer_waitspace(rb, numcmds, 1); + adreno_ringbuffer_waitspace(rb, numcmds, 1); } - if (status == 0) { - ptr = (unsigned int *)rb->buffer_desc.hostptr + rb->wptr; - rb->wptr += numcmds; - } + ptr = (unsigned int *)rb->buffer_desc.hostptr + rb->wptr; + rb->wptr += numcmds; return ptr; } @@ -266,19 +166,15 @@ static int _load_firmware(struct kgsl_device *device, const char *fwfile, static int adreno_ringbuffer_load_pm4_ucode(struct kgsl_device *device) { struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - const char *fwfile; int i, ret = 0; - if (adreno_is_a220(adreno_dev)) - fwfile = A220_PM4_470_FW; - else - fwfile = A200_PM4_FW; - if (adreno_dev->pm4_fw == NULL) { int len; - unsigned int *ptr; + void *ptr; + + ret = _load_firmware(device, adreno_dev->pm4_fwfile, + &ptr, &len); - ret = _load_firmware(device, fwfile, (void *) &ptr, &len); if (ret) goto err; @@ -309,19 +205,14 @@ err: static int adreno_ringbuffer_load_pfp_ucode(struct kgsl_device *device) { struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - const char *fwfile; int i, ret = 0; - if (adreno_is_a220(adreno_dev)) - fwfile = A220_PFP_470_FW; - else - fwfile = A200_PFP_FW; - if (adreno_dev->pfp_fw == NULL) { int len; - unsigned int *ptr; + void *ptr; - ret = _load_firmware(device, fwfile, (void *) &ptr, &len); + ret = _load_firmware(device, adreno_dev->pfp_fwfile, + &ptr, &len); if (ret) goto err; @@ -441,7 +332,7 @@ int adreno_ringbuffer_start(struct adreno_ringbuffer *rb, unsigned int init_ram) cmds = adreno_ringbuffer_allocspace(rb, 19); cmds_gpu = rb->buffer_desc.gpuaddr + sizeof(uint)*(rb->wptr-19); - GSL_RB_WRITE(cmds, cmds_gpu, PM4_HDR_ME_INIT); + GSL_RB_WRITE(cmds, cmds_gpu, CP_HDR_ME_INIT); /* All fields present (bits 9:0) */ GSL_RB_WRITE(cmds, cmds_gpu, 0x000003ff); /* Disable/Enable Real-Time Stream processing (present but ignored) */ @@ -450,21 +341,21 @@ int adreno_ringbuffer_start(struct adreno_ringbuffer *rb, unsigned int init_ram) GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); GSL_RB_WRITE(cmds, cmds_gpu, - GSL_HAL_SUBBLOCK_OFFSET(REG_RB_SURFACE_INFO)); + SUBBLOCK_OFFSET(REG_RB_SURFACE_INFO)); GSL_RB_WRITE(cmds, cmds_gpu, - GSL_HAL_SUBBLOCK_OFFSET(REG_PA_SC_WINDOW_OFFSET)); + SUBBLOCK_OFFSET(REG_PA_SC_WINDOW_OFFSET)); GSL_RB_WRITE(cmds, cmds_gpu, - GSL_HAL_SUBBLOCK_OFFSET(REG_VGT_MAX_VTX_INDX)); + SUBBLOCK_OFFSET(REG_VGT_MAX_VTX_INDX)); GSL_RB_WRITE(cmds, cmds_gpu, - GSL_HAL_SUBBLOCK_OFFSET(REG_SQ_PROGRAM_CNTL)); + SUBBLOCK_OFFSET(REG_SQ_PROGRAM_CNTL)); GSL_RB_WRITE(cmds, cmds_gpu, - GSL_HAL_SUBBLOCK_OFFSET(REG_RB_DEPTHCONTROL)); + SUBBLOCK_OFFSET(REG_RB_DEPTHCONTROL)); GSL_RB_WRITE(cmds, cmds_gpu, - GSL_HAL_SUBBLOCK_OFFSET(REG_PA_SU_POINT_SIZE)); + SUBBLOCK_OFFSET(REG_PA_SU_POINT_SIZE)); GSL_RB_WRITE(cmds, cmds_gpu, - GSL_HAL_SUBBLOCK_OFFSET(REG_PA_SC_LINE_CNTL)); + SUBBLOCK_OFFSET(REG_PA_SC_LINE_CNTL)); GSL_RB_WRITE(cmds, cmds_gpu, - GSL_HAL_SUBBLOCK_OFFSET(REG_PA_SU_POLY_OFFSET_FRONT_SCALE)); + SUBBLOCK_OFFSET(REG_PA_SU_POLY_OFFSET_FRONT_SCALE)); /* Vertex and Pixel Shader Start Addresses in instructions * (3 DWORDS per instruction) */ @@ -489,25 +380,20 @@ int adreno_ringbuffer_start(struct adreno_ringbuffer *rb, unsigned int init_ram) /* idle device to validate ME INIT */ status = adreno_idle(device, KGSL_TIMEOUT_DEFAULT); - adreno_regwrite(rb->device, REG_CP_INT_CNTL, GSL_CP_INT_MASK); if (status == 0) rb->flags |= KGSL_FLAGS_STARTED; return status; } -int adreno_ringbuffer_stop(struct adreno_ringbuffer *rb) +void adreno_ringbuffer_stop(struct adreno_ringbuffer *rb) { if (rb->flags & KGSL_FLAGS_STARTED) { - adreno_regwrite(rb->device, REG_CP_INT_CNTL, 0); - /* ME_HALT */ adreno_regwrite(rb->device, REG_CP_ME_CNTL, 0x10000000); rb->flags &= ~KGSL_FLAGS_STARTED; } - - return 0; } int adreno_ringbuffer_init(struct kgsl_device *device) @@ -525,7 +411,8 @@ int adreno_ringbuffer_init(struct kgsl_device *device) rb->sizedwords = KGSL_RB_SIZE >> 2; /* allocate memory for ringbuffer */ - status = kgsl_allocate_contig(&rb->buffer_desc, (rb->sizedwords << 2)); + status = kgsl_allocate_contiguous(&rb->buffer_desc, + (rb->sizedwords << 2)); if (status != 0) { adreno_ringbuffer_close(rb); @@ -535,7 +422,7 @@ int adreno_ringbuffer_init(struct kgsl_device *device) /* allocate memory for polling and timestamps */ /* This really can be at 4 byte alignment boundry but for using MMU * we need to make it at page boundary */ - status = kgsl_allocate_contig(&rb->memptrs_desc, + status = kgsl_allocate_contiguous(&rb->memptrs_desc, sizeof(struct kgsl_rbmemptrs)); if (status != 0) { @@ -549,7 +436,7 @@ int adreno_ringbuffer_init(struct kgsl_device *device) return 0; } -int adreno_ringbuffer_close(struct adreno_ringbuffer *rb) +void adreno_ringbuffer_close(struct adreno_ringbuffer *rb) { struct adreno_device *adreno_dev = ADRENO_DEVICE(rb->device); @@ -563,8 +450,6 @@ int adreno_ringbuffer_close(struct adreno_ringbuffer *rb) adreno_dev->pm4_fw = NULL; memset(rb, 0, sizeof(struct adreno_ringbuffer)); - - return 0; } static uint32_t @@ -590,13 +475,13 @@ adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb, + sizeof(uint)*(rb->wptr-total_sizedwords); if (!(flags & KGSL_CMD_FLAGS_NOT_KERNEL_CMD)) { - GSL_RB_WRITE(ringcmds, rcmd_gpu, pm4_nop_packet(1)); + GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_nop_packet(1)); GSL_RB_WRITE(ringcmds, rcmd_gpu, KGSL_CMD_IDENTIFIER); } if (flags & KGSL_CMD_FLAGS_PMODE) { /* disable protected mode error checking */ GSL_RB_WRITE(ringcmds, rcmd_gpu, - pm4_type3_packet(PM4_SET_PROTECTED_MODE, 1)); + cp_type3_packet(CP_SET_PROTECTED_MODE, 1)); GSL_RB_WRITE(ringcmds, rcmd_gpu, 0); } @@ -608,7 +493,7 @@ adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb, if (flags & KGSL_CMD_FLAGS_PMODE) { /* re-enable protected mode error checking */ GSL_RB_WRITE(ringcmds, rcmd_gpu, - pm4_type3_packet(PM4_SET_PROTECTED_MODE, 1)); + cp_type3_packet(CP_SET_PROTECTED_MODE, 1)); GSL_RB_WRITE(ringcmds, rcmd_gpu, 1); } @@ -616,9 +501,9 @@ adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb, timestamp = rb->timestamp; /* start-of-pipeline and end-of-pipeline timestamps */ - GSL_RB_WRITE(ringcmds, rcmd_gpu, pm4_type0_packet(REG_CP_TIMESTAMP, 1)); + GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type0_packet(REG_CP_TIMESTAMP, 1)); GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp); - GSL_RB_WRITE(ringcmds, rcmd_gpu, pm4_type3_packet(PM4_EVENT_WRITE, 3)); + GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_EVENT_WRITE, 3)); GSL_RB_WRITE(ringcmds, rcmd_gpu, CACHE_FLUSH_TS); GSL_RB_WRITE(ringcmds, rcmd_gpu, (rb->device->memstore.gpuaddr + @@ -628,7 +513,7 @@ adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb, if (!(flags & KGSL_CMD_FLAGS_NO_TS_CMP)) { /* Conditional execution based on memory values */ GSL_RB_WRITE(ringcmds, rcmd_gpu, - pm4_type3_packet(PM4_COND_EXEC, 4)); + cp_type3_packet(CP_COND_EXEC, 4)); GSL_RB_WRITE(ringcmds, rcmd_gpu, (rb->device->memstore.gpuaddr + KGSL_DEVICE_MEMSTORE_OFFSET(ts_cmp_enable)) >> 2); GSL_RB_WRITE(ringcmds, rcmd_gpu, (rb->device->memstore.gpuaddr + @@ -637,7 +522,7 @@ adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb, /* # of conditional command DWORDs */ GSL_RB_WRITE(ringcmds, rcmd_gpu, 2); GSL_RB_WRITE(ringcmds, rcmd_gpu, - pm4_type3_packet(PM4_INTERRUPT, 1)); + cp_type3_packet(CP_INTERRUPT, 1)); GSL_RB_WRITE(ringcmds, rcmd_gpu, CP_INT_CNTL__RB_INT_MASK); } @@ -701,13 +586,13 @@ adreno_ringbuffer_issueibcmds(struct kgsl_device_private *dev_priv, (void)kgsl_cffdump_parse_ibs(dev_priv, NULL, ibdesc[i].gpuaddr, ibdesc[i].sizedwords, false); - *cmds++ = PM4_HDR_INDIRECT_BUFFER_PFD; + *cmds++ = CP_HDR_INDIRECT_BUFFER_PFD; *cmds++ = ibdesc[i].gpuaddr; *cmds++ = ibdesc[i].sizedwords; } kgsl_setstate(device, - kgsl_pt_get_flags(device->mmu.hwpagetable, + kgsl_mmu_pt_get_flags(device->mmu.hwpagetable, device->id)); adreno_drawctxt_switch(adreno_dev, drawctxt, flags); @@ -751,13 +636,8 @@ int adreno_ringbuffer_extract(struct adreno_ringbuffer *rb, GSL_RB_GET_READPTR(rb, &rb->rptr); -/* drewis: still not sure where this struct was changed */ -#if 0 retired_timestamp = device->ftbl->readtimestamp(device, KGSL_TIMESTAMP_RETIRED); -#endif - retired_timestamp = device->ftbl.device_readtimestamp( - device, KGSL_TIMESTAMP_RETIRED); KGSL_DRV_ERR(device, "GPU successfully executed till ts: %x\n", retired_timestamp); /* @@ -786,9 +666,9 @@ int adreno_ringbuffer_extract(struct adreno_ringbuffer *rb, kgsl_sharedmem_readl(&rb->buffer_desc, &val3, rb_rptr); /* match the pattern found at the end of a command */ if ((val1 == 2 && - val2 == pm4_type3_packet(PM4_INTERRUPT, 1) + val2 == cp_type3_packet(CP_INTERRUPT, 1) && val3 == CP_INT_CNTL__RB_INT_MASK) || - (val1 == pm4_type3_packet(PM4_EVENT_WRITE, 3) + (val1 == cp_type3_packet(CP_EVENT_WRITE, 3) && val2 == CACHE_FLUSH_TS && val3 == (rb->device->memstore.gpuaddr + KGSL_DEVICE_MEMSTORE_OFFSET(eoptimestamp)))) { @@ -830,7 +710,7 @@ int adreno_ringbuffer_extract(struct adreno_ringbuffer *rb, kgsl_sharedmem_readl(&rb->buffer_desc, &val2, adreno_ringbuffer_inc_wrapped(rb_rptr, rb->buffer_desc.size)); - if (val1 == pm4_nop_packet(1) && val2 == KGSL_CMD_IDENTIFIER) { + if (val1 == cp_nop_packet(1) && val2 == KGSL_CMD_IDENTIFIER) { KGSL_DRV_ERR(device, "GPU recovery from hang not possible because " "of hang in kgsl command\n"); @@ -850,7 +730,7 @@ int adreno_ringbuffer_extract(struct adreno_ringbuffer *rb, kgsl_sharedmem_readl(&rb->buffer_desc, &value, rb_rptr); rb_rptr = adreno_ringbuffer_inc_wrapped(rb_rptr, rb->buffer_desc.size); - BUG_ON(value != pm4_type3_packet(PM4_MEM_WRITE, 2)); + BUG_ON(value != cp_type3_packet(CP_MEM_WRITE, 2)); kgsl_sharedmem_readl(&rb->buffer_desc, &val1, rb_rptr); rb_rptr = adreno_ringbuffer_inc_wrapped(rb_rptr, rb->buffer_desc.size); @@ -873,14 +753,14 @@ int adreno_ringbuffer_extract(struct adreno_ringbuffer *rb, * commands can be executed */ if (value != cur_context) { copy_rb_contents = 1; - temp_rb_buffer[temp_idx++] = pm4_nop_packet(1); + temp_rb_buffer[temp_idx++] = cp_nop_packet(1); temp_rb_buffer[temp_idx++] = KGSL_CMD_IDENTIFIER; - temp_rb_buffer[temp_idx++] = pm4_nop_packet(1); + temp_rb_buffer[temp_idx++] = cp_nop_packet(1); temp_rb_buffer[temp_idx++] = KGSL_CONTEXT_TO_MEM_IDENTIFIER; temp_rb_buffer[temp_idx++] = - pm4_type3_packet(PM4_MEM_WRITE, 2); + cp_type3_packet(CP_MEM_WRITE, 2); temp_rb_buffer[temp_idx++] = val1; temp_rb_buffer[temp_idx++] = value; } else { diff --git a/drivers/gpu/msm/adreno_ringbuffer.h b/drivers/gpu/msm/adreno_ringbuffer.h index 9162dea2..3e7a6880 100644 --- a/drivers/gpu/msm/adreno_ringbuffer.h +++ b/drivers/gpu/msm/adreno_ringbuffer.h @@ -1,29 +1,14 @@ /* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved. + * Copyright (C) 2011 Sony Ericsson Mobile Communications AB. * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * * Neither the name of Code Aurora Forum, Inc. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. * - * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE - * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN - * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. * */ #ifndef __ADRENO_RINGBUFFER_H @@ -77,9 +62,6 @@ struct adreno_ringbuffer { uint32_t timestamp; }; -/* dword base address of the GFX decode space */ -#define GSL_HAL_SUBBLOCK_OFFSET(reg) ((unsigned int)((reg) - (0x2000))) - #define GSL_RB_WRITE(ring, gpuaddr, data) \ do { \ writel_relaxed(data, ring); \ @@ -135,9 +117,9 @@ int adreno_ringbuffer_init(struct kgsl_device *device); int adreno_ringbuffer_start(struct adreno_ringbuffer *rb, unsigned int init_ram); -int adreno_ringbuffer_stop(struct adreno_ringbuffer *rb); +void adreno_ringbuffer_stop(struct adreno_ringbuffer *rb); -int adreno_ringbuffer_close(struct adreno_ringbuffer *rb); +void adreno_ringbuffer_close(struct adreno_ringbuffer *rb); void adreno_ringbuffer_issuecmds(struct kgsl_device *device, unsigned int flags, diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c index 2732ffeb..e21ca09c 100644 --- a/drivers/gpu/msm/kgsl.c +++ b/drivers/gpu/msm/kgsl.c @@ -1,4 +1,5 @@ /* Copyright (c) 2008-2011, Code Aurora Forum. All rights reserved. + * Copyright (C) 2011 Sony Ericsson Mobile Communications AB. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -20,6 +21,7 @@ #include #include #include +#include #include #include @@ -27,14 +29,78 @@ #include "kgsl.h" #include "kgsl_debugfs.h" #include "kgsl_cffdump.h" +#include "kgsl_log.h" +#include "kgsl_sharedmem.h" +#include "kgsl_device.h" #undef MODULE_PARAM_PREFIX #define MODULE_PARAM_PREFIX "kgsl." static int kgsl_pagetable_count = KGSL_PAGETABLE_COUNT; +static char *ksgl_mmu_type; module_param_named(ptcount, kgsl_pagetable_count, int, 0); MODULE_PARM_DESC(kgsl_pagetable_count, "Minimum number of pagetables for KGSL to allocate at initialization time"); +module_param_named(mmutype, ksgl_mmu_type, charp, 0); +MODULE_PARM_DESC(ksgl_mmu_type, +"Type of MMU to be used for graphics. Valid values are 'iommu' or 'gpummu' or 'nommu'"); + +#ifdef CONFIG_GENLOCK + +/** + * kgsl_add_event - Add a new timstamp event for the KGSL device + * @device - KGSL device for the new event + * @ts - the timestamp to trigger the event on + * @cb - callback function to call when the timestamp expires + * @priv - private data for the specific event type + * + * @returns - 0 on success or error code on failure + */ + +static int kgsl_add_event(struct kgsl_device *device, u32 ts, + void (*cb)(struct kgsl_device *, void *, u32), void *priv) +{ + struct kgsl_event *event; + struct list_head *n; + unsigned int cur = device->ftbl->readtimestamp(device, + KGSL_TIMESTAMP_RETIRED); + + if (cb == NULL) + return -EINVAL; + + /* Check to see if the requested timestamp has already fired */ + + if (timestamp_cmp(cur, ts) >= 0) { + cb(device, priv, cur); + return 0; + } + + event = kzalloc(sizeof(*event), GFP_KERNEL); + if (event == NULL) + return -ENOMEM; + + event->timestamp = ts; + event->priv = priv; + event->func = cb; + + /* Add the event in order to the list */ + + for (n = device->events.next ; n != &device->events; n = n->next) { + struct kgsl_event *e = + list_entry(n, struct kgsl_event, list); + + if (timestamp_cmp(e->timestamp, ts) > 0) { + list_add(&event->list, n->prev); + break; + } + } + + if (n == &device->events) + list_add_tail(&event->list, &device->events); + + return 0; +} +#endif static inline struct kgsl_mem_entry * kgsl_mem_entry_create(void) @@ -173,36 +239,41 @@ static void kgsl_memqueue_freememontimestamp(struct kgsl_device *device, list_add_tail(&entry->list, &device->memqueue); } -static void kgsl_memqueue_drain(struct kgsl_device *device) +static void kgsl_timestamp_expired(struct work_struct *work) { + struct kgsl_device *device = container_of(work, struct kgsl_device, + ts_expired_ws); struct kgsl_mem_entry *entry, *entry_tmp; + struct kgsl_event *event, *event_tmp; uint32_t ts_processed; - BUG_ON(!mutex_is_locked(&device->mutex)); + mutex_lock(&device->mutex); /* get current EOP timestamp */ - ts_processed = device->ftbl.device_readtimestamp( - device, - KGSL_TIMESTAMP_RETIRED); + ts_processed = device->ftbl->readtimestamp(device, + KGSL_TIMESTAMP_RETIRED); + /* Flush the freememontimestamp queue */ list_for_each_entry_safe(entry, entry_tmp, &device->memqueue, list) { - KGSL_MEM_INFO(device, - "ts_processed %d ts_free %d gpuaddr %x)\n", - ts_processed, entry->free_timestamp, - entry->memdesc.gpuaddr); - if (!timestamp_cmp(ts_processed, entry->free_timestamp)) + if (timestamp_cmp(ts_processed, entry->free_timestamp) < 0) break; list_del(&entry->list); kgsl_mem_entry_put(entry); } -} -static void kgsl_memqueue_drain_unlocked(struct kgsl_device *device) -{ - mutex_lock(&device->mutex); - kgsl_check_suspended(device); - kgsl_memqueue_drain(device); + /* Process expired events */ + list_for_each_entry_safe(event, event_tmp, &device->events, list) { + if (timestamp_cmp(ts_processed, event->timestamp) < 0) + break; + + if (event->func) + event->func(device, event->priv, ts_processed); + + list_del(&event->list); + kfree(event); + } + mutex_unlock(&device->mutex); } @@ -280,43 +351,19 @@ EXPORT_SYMBOL(kgsl_unregister_ts_notifier); int kgsl_check_timestamp(struct kgsl_device *device, unsigned int timestamp) { unsigned int ts_processed; - BUG_ON(device->ftbl.device_readtimestamp == NULL); - ts_processed = device->ftbl.device_readtimestamp( - device, KGSL_TIMESTAMP_RETIRED); + ts_processed = device->ftbl->readtimestamp(device, + KGSL_TIMESTAMP_RETIRED); - return timestamp_cmp(ts_processed, timestamp); + return (timestamp_cmp(ts_processed, timestamp) >= 0); } EXPORT_SYMBOL(kgsl_check_timestamp); -int kgsl_setstate(struct kgsl_device *device, uint32_t flags) -{ - int status = -ENXIO; - - if (flags && device->ftbl.device_setstate) { - status = device->ftbl.device_setstate(device, flags); - } else - status = 0; - - return status; -} -EXPORT_SYMBOL(kgsl_setstate); - -int kgsl_idle(struct kgsl_device *device, unsigned int timeout) -{ - int status = -ENXIO; - - if (device->ftbl.device_idle) - status = device->ftbl.device_idle(device, timeout); - - return status; -} -EXPORT_SYMBOL(kgsl_idle); - static int kgsl_suspend_device(struct kgsl_device *device, pm_message_t state) { int status = -EINVAL; unsigned int nap_allowed_saved; + struct kgsl_pwrscale_policy *policy_saved; if (!device) return -EINVAL; @@ -326,6 +373,8 @@ static int kgsl_suspend_device(struct kgsl_device *device, pm_message_t state) mutex_lock(&device->mutex); nap_allowed_saved = device->pwrctrl.nap_allowed; device->pwrctrl.nap_allowed = false; + policy_saved = device->pwrscale.policy; + device->pwrscale.policy = NULL; device->requested_state = KGSL_STATE_SUSPEND; /* Make sure no user process is waiting for a timestamp * * before supending */ @@ -335,19 +384,19 @@ static int kgsl_suspend_device(struct kgsl_device *device, pm_message_t state) mutex_lock(&device->mutex); } /* Don't let the timer wake us during suspended sleep. */ - del_timer(&device->idle_timer); + del_timer_sync(&device->idle_timer); switch (device->state) { case KGSL_STATE_INIT: break; case KGSL_STATE_ACTIVE: /* Wait for the device to become idle */ - device->ftbl.device_idle(device, KGSL_TIMEOUT_DEFAULT); + device->ftbl->idle(device, KGSL_TIMEOUT_DEFAULT); case KGSL_STATE_NAP: case KGSL_STATE_SLEEP: /* Get the completion ready to be waited upon. */ INIT_COMPLETION(device->hwaccess_gate); - device->ftbl.device_suspend_context(device); - device->ftbl.device_stop(device); + device->ftbl->suspend_context(device); + device->ftbl->stop(device); device->state = KGSL_STATE_SUSPEND; KGSL_PWR_WARN(device, "state -> SUSPEND, device %d\n", device->id); @@ -359,6 +408,7 @@ static int kgsl_suspend_device(struct kgsl_device *device, pm_message_t state) } device->requested_state = KGSL_STATE_NONE; device->pwrctrl.nap_allowed = nap_allowed_saved; + device->pwrscale.policy = policy_saved; status = 0; end: @@ -378,7 +428,8 @@ static int kgsl_resume_device(struct kgsl_device *device) mutex_lock(&device->mutex); if (device->state == KGSL_STATE_SUSPEND) { device->requested_state = KGSL_STATE_ACTIVE; - status = device->ftbl.device_start(device, 0); + kgsl_pwrctrl_pwrlevel_change(device, KGSL_PWRLEVEL_NOMINAL); + status = device->ftbl->start(device, 0); if (status == 0) { device->state = KGSL_STATE_ACTIVE; KGSL_PWR_WARN(device, @@ -391,13 +442,13 @@ static int kgsl_resume_device(struct kgsl_device *device) device->state = KGSL_STATE_INIT; goto end; } - status = device->ftbl.device_resume_context(device); complete_all(&device->hwaccess_gate); } device->requested_state = KGSL_STATE_NONE; end: mutex_unlock(&device->mutex); + kgsl_check_idle(device); KGSL_PWR_WARN(device, "resume end\n"); return status; } @@ -434,6 +485,16 @@ const struct dev_pm_ops kgsl_pm_ops = { }; EXPORT_SYMBOL(kgsl_pm_ops); +void kgsl_early_suspend_driver(struct early_suspend *h) +{ + struct kgsl_device *device = container_of(h, + struct kgsl_device, display_off); + mutex_lock(&device->mutex); + kgsl_pwrctrl_pwrlevel_change(device, KGSL_PWRLEVEL_NOMINAL); + mutex_unlock(&device->mutex); +} +EXPORT_SYMBOL(kgsl_early_suspend_driver); + int kgsl_suspend_driver(struct platform_device *pdev, pm_message_t state) { @@ -449,6 +510,16 @@ int kgsl_resume_driver(struct platform_device *pdev) } EXPORT_SYMBOL(kgsl_resume_driver); +void kgsl_late_resume_driver(struct early_suspend *h) +{ + struct kgsl_device *device = container_of(h, + struct kgsl_device, display_off); + mutex_lock(&device->mutex); + kgsl_pwrctrl_pwrlevel_change(device, KGSL_PWRLEVEL_TURBO); + mutex_unlock(&device->mutex); +} +EXPORT_SYMBOL(kgsl_late_resume_driver); + /* file operations */ static struct kgsl_process_private * kgsl_get_process_private(struct kgsl_device_private *cur_dev_priv) @@ -477,15 +548,11 @@ kgsl_get_process_private(struct kgsl_device_private *cur_dev_priv) INIT_LIST_HEAD(&private->mem_list); -#ifdef CONFIG_MSM_KGSL_MMU + if (kgsl_mmu_enabled()) { unsigned long pt_name; -#ifdef CONFIG_KGSL_PER_PROCESS_PAGE_TABLE pt_name = task_tgid_nr(current); -#else - pt_name = KGSL_MMU_GLOBAL_PT; -#endif private->pagetable = kgsl_mmu_getpagetable(pt_name); if (private->pagetable == NULL) { kfree(private); @@ -493,7 +560,6 @@ kgsl_get_process_private(struct kgsl_device_private *cur_dev_priv) goto out; } } -#endif list_add(&private->list, &kgsl_driver.process_list); @@ -559,7 +625,7 @@ static int kgsl_release(struct inode *inodep, struct file *filep) break; if (context->dev_priv == dev_priv) { - device->ftbl.device_drawctxt_destroy(device, context); + device->ftbl->drawctxt_destroy(device, context); kgsl_destroy_context(dev_priv, context); } @@ -568,7 +634,7 @@ static int kgsl_release(struct inode *inodep, struct file *filep) device->open_count--; if (device->open_count == 0) { - result = device->ftbl.device_stop(device); + result = device->ftbl->stop(device); device->state = KGSL_STATE_INIT; KGSL_PWR_WARN(device, "state -> INIT, device %d\n", device->id); } @@ -602,7 +668,6 @@ static int kgsl_open(struct inode *inodep, struct file *filep) } result = pm_runtime_get_sync(device->parentdev); - result = 0; if (result < 0) { KGSL_DRV_ERR(device, "Runtime PM: Unable to wake up the device, rc = %d\n", @@ -633,7 +698,7 @@ static int kgsl_open(struct inode *inodep, struct file *filep) kgsl_check_suspended(device); if (device->open_count == 0) { - result = device->ftbl.device_start(device, true); + result = device->ftbl->start(device, true); if (result) { mutex_unlock(&device->mutex); @@ -648,7 +713,7 @@ static int kgsl_open(struct inode *inodep, struct file *filep) KGSL_DRV_INFO(device, "Initialized %s: mmu=%s pagetable_count=%d\n", device->name, kgsl_mmu_enabled() ? "on" : "off", - KGSL_PAGETABLE_COUNT); + kgsl_pagetable_count); return result; @@ -746,7 +811,7 @@ static long kgsl_ioctl_device_getproperty(struct kgsl_device_private *dev_priv, break; } default: - result = dev_priv->device->ftbl.device_getproperty( + result = dev_priv->device->ftbl->getproperty( dev_priv->device, param->type, param->value, param->sizebytes); } @@ -767,16 +832,10 @@ static long kgsl_ioctl_device_waittimestamp(struct kgsl_device_private dev_priv->device->active_cnt++; - /* Don't wait forever, set a max value for now */ - if (param->timeout == -1) - param->timeout = 10 * MSEC_PER_SEC; - - result = dev_priv->device->ftbl.device_waittimestamp(dev_priv->device, + result = dev_priv->device->ftbl->waittimestamp(dev_priv->device, param->timestamp, param->timeout); - kgsl_memqueue_drain(dev_priv->device); - /* Fire off any pending suspend operations that are in flight */ INIT_COMPLETION(dev_priv->device->suspend_gate); @@ -894,15 +953,7 @@ static long kgsl_ioctl_rb_issueibcmds(struct kgsl_device_private *dev_priv, goto free_ibdesc; } - /* Let the pwrscale policy know that a new command buffer - is being issued */ - - kgsl_pwrscale_busy(dev_priv->device); - -/* drewis: don't know what changed this...diff from cherry-pick - f3c1074d1539be20cecbb82f37705bd16058418e */ -/* result = dev_priv->device->ftbl->issueibcmds(dev_priv,*/ - result = dev_priv->device->ftbl.device_issueibcmds(dev_priv, + result = dev_priv->device->ftbl->issueibcmds(dev_priv, context, ibdesc, param->numibs, @@ -939,8 +990,8 @@ static long kgsl_ioctl_cmdstream_readtimestamp(struct kgsl_device_private struct kgsl_cmdstream_readtimestamp *param = data; param->timestamp = - dev_priv->device->ftbl.device_readtimestamp( - dev_priv->device, param->type); + dev_priv->device->ftbl->readtimestamp(dev_priv->device, + param->type); return 0; } @@ -962,7 +1013,6 @@ static long kgsl_ioctl_cmdstream_freememontimestamp(struct kgsl_device_private if (entry) { kgsl_memqueue_freememontimestamp(dev_priv->device, entry, param->timestamp, param->type); - kgsl_memqueue_drain(dev_priv->device); } else { KGSL_DRV_ERR(dev_priv->device, "invalid gpuaddr %08x\n", param->gpuaddr); @@ -986,10 +1036,10 @@ static long kgsl_ioctl_drawctxt_create(struct kgsl_device_private *dev_priv, goto done; } - if (dev_priv->device->ftbl.device_drawctxt_create != NULL) - result = dev_priv->device->ftbl.device_drawctxt_create(dev_priv, - param->flags, - context); + if (dev_priv->device->ftbl->drawctxt_create) + result = dev_priv->device->ftbl->drawctxt_create( + dev_priv->device, dev_priv->process_priv->pagetable, + context, param->flags); param->drawctxt_id = context->id; @@ -1014,9 +1064,9 @@ static long kgsl_ioctl_drawctxt_destroy(struct kgsl_device_private *dev_priv, goto done; } - result = dev_priv->device->ftbl.device_drawctxt_destroy( - dev_priv->device, - context); + if (dev_priv->device->ftbl->drawctxt_destroy) + dev_priv->device->ftbl->drawctxt_destroy(dev_priv->device, + context); kgsl_destroy_context(dev_priv, context); @@ -1074,9 +1124,6 @@ kgsl_ioctl_sharedmem_from_vmalloc(struct kgsl_device_private *dev_priv, if (!kgsl_mmu_enabled()) return -ENODEV; - /* Make sure all pending freed memory is collected */ - kgsl_memqueue_drain_unlocked(dev_priv->device); - if (!param->hostptr) { KGSL_CORE_ERR("invalid hostptr %x\n", param->hostptr); result = -EINVAL; @@ -1251,7 +1298,11 @@ static int kgsl_setup_phys_file(struct kgsl_mem_entry *entry, entry->memdesc.size = size; entry->memdesc.physaddr = phys + (offset & PAGE_MASK); entry->memdesc.hostptr = (void *) (virt + (offset & PAGE_MASK)); - entry->memdesc.ops = &kgsl_contig_ops; + + ret = memdesc_sg_phys(&entry->memdesc, + phys + (offset & PAGE_MASK), size); + if (ret) + goto err; return 0; err: @@ -1261,6 +1312,60 @@ err: return ret; } +static int memdesc_sg_virt(struct kgsl_memdesc *memdesc, + void *addr, int size) +{ + int i; + int sglen = PAGE_ALIGN(size) / PAGE_SIZE; + unsigned long paddr = (unsigned long) addr; + + memdesc->sg = kmalloc(sglen * sizeof(struct scatterlist), + GFP_KERNEL); + if (memdesc->sg == NULL) + return -ENOMEM; + + memdesc->sglen = sglen; + sg_init_table(memdesc->sg, sglen); + + spin_lock(¤t->mm->page_table_lock); + + for (i = 0; i < sglen; i++, paddr += PAGE_SIZE) { + struct page *page; + pmd_t *ppmd; + pte_t *ppte; + pgd_t *ppgd = pgd_offset(current->mm, paddr); + + if (pgd_none(*ppgd) || pgd_bad(*ppgd)) + goto err; + + ppmd = pmd_offset(ppgd, paddr); + if (pmd_none(*ppmd) || pmd_bad(*ppmd)) + goto err; + + ppte = pte_offset_map(ppmd, paddr); + if (ppte == NULL) + goto err; + + page = pfn_to_page(pte_pfn(*ppte)); + if (!page) + goto err; + + sg_set_page(&memdesc->sg[i], page, PAGE_SIZE, 0); + pte_unmap(ppte); + } + + spin_unlock(¤t->mm->page_table_lock); + + return 0; + +err: + spin_unlock(¤t->mm->page_table_lock); + kfree(memdesc->sg); + memdesc->sg = NULL; + + return -EINVAL; +} + static int kgsl_setup_hostptr(struct kgsl_mem_entry *entry, struct kgsl_pagetable *pagetable, void *hostptr, unsigned int offset, @@ -1310,9 +1415,9 @@ static int kgsl_setup_hostptr(struct kgsl_mem_entry *entry, entry->memdesc.pagetable = pagetable; entry->memdesc.size = size; entry->memdesc.hostptr = hostptr + (offset & PAGE_MASK); - entry->memdesc.ops = &kgsl_userptr_ops; - return 0; + return memdesc_sg_virt(&entry->memdesc, + hostptr + (offset & PAGE_MASK), size); } #ifdef CONFIG_ASHMEM @@ -1360,11 +1465,13 @@ static int kgsl_setup_ashmem(struct kgsl_mem_entry *entry, } entry->file_ptr = filep; - entry->memdesc.pagetable = pagetable; entry->memdesc.size = ALIGN(size, PAGE_SIZE); entry->memdesc.hostptr = hostptr; - entry->memdesc.ops = &kgsl_userptr_ops; + + ret = memdesc_sg_virt(&entry->memdesc, hostptr, size); + if (ret) + goto err; return 0; @@ -1395,8 +1502,6 @@ static long kgsl_ioctl_map_user_mem(struct kgsl_device_private *dev_priv, if (entry == NULL) return -ENOMEM; - kgsl_memqueue_drain_unlocked(dev_priv->device); - if (_IOC_SIZE(cmd) == sizeof(struct kgsl_sharedmem_from_pmem)) memtype = KGSL_USER_MEM_TYPE_PMEM; else @@ -1536,9 +1641,6 @@ kgsl_ioctl_gpumem_alloc(struct kgsl_device_private *dev_priv, if (entry == NULL) return -ENOMEM; - /* Make sure all pending freed memory is collected */ - kgsl_memqueue_drain_unlocked(dev_priv->device); - result = kgsl_allocate_user(&entry->memdesc, private->pagetable, param->size, param->flags); @@ -1586,6 +1688,114 @@ static long kgsl_ioctl_cff_user_event(struct kgsl_device_private *dev_priv, return result; } +#ifdef CONFIG_GENLOCK +struct kgsl_genlock_event_priv { + struct genlock_handle *handle; + struct genlock *lock; +}; + +/** + * kgsl_genlock_event_cb - Event callback for a genlock timestamp event + * @device - The KGSL device that expired the timestamp + * @priv - private data for the event + * @timestamp - the timestamp that triggered the event + * + * Release a genlock lock following the expiration of a timestamp + */ + +static void kgsl_genlock_event_cb(struct kgsl_device *device, + void *priv, u32 timestamp) +{ + struct kgsl_genlock_event_priv *ev = priv; + int ret; + + ret = genlock_lock(ev->handle, GENLOCK_UNLOCK, 0, 0); + if (ret) + KGSL_CORE_ERR("Error while unlocking genlock: %d\n", ret); + + genlock_put_handle(ev->handle); + + kfree(ev); +} + +/** + * kgsl_add_genlock-event - Create a new genlock event + * @device - KGSL device to create the event on + * @timestamp - Timestamp to trigger the event + * @data - User space buffer containing struct kgsl_genlock_event_priv + * @len - length of the userspace buffer + * @returns 0 on success or error code on error + * + * Attack to a genlock handle and register an event to release the + * genlock lock when the timestamp expires + */ + +static int kgsl_add_genlock_event(struct kgsl_device *device, + u32 timestamp, void __user *data, int len) +{ + struct kgsl_genlock_event_priv *event; + struct kgsl_timestamp_event_genlock priv; + int ret; + + if (len != sizeof(priv)) + return -EINVAL; + + if (copy_from_user(&priv, data, sizeof(priv))) + return -EFAULT; + + event = kzalloc(sizeof(*event), GFP_KERNEL); + + if (event == NULL) + return -ENOMEM; + + event->handle = genlock_get_handle_fd(priv.handle); + + if (IS_ERR(event->handle)) { + int ret = PTR_ERR(event->handle); + kfree(event); + return ret; + } + + ret = kgsl_add_event(device, timestamp, kgsl_genlock_event_cb, event); + if (ret) + kfree(event); + + return ret; +} +#else +static long kgsl_add_genlock_event(struct kgsl_device *device, + u32 timestamp, void __user *data, int len) +{ + return -EINVAL; +} +#endif + +/** + * kgsl_ioctl_timestamp_event - Register a new timestamp event from userspace + * @dev_priv - pointer to the private device structure + * @cmd - the ioctl cmd passed from kgsl_ioctl + * @data - the user data buffer from kgsl_ioctl + * @returns 0 on success or error code on failure + */ + +static long kgsl_ioctl_timestamp_event(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_timestamp_event *param = data; + int ret; + + switch (param->type) { + case KGSL_TIMESTAMP_EVENT_GENLOCK: + ret = kgsl_add_genlock_event(dev_priv->device, + param->timestamp, param->priv, param->len); + break; + default: + ret = -EINVAL; + } + + return ret; +} + typedef long (*kgsl_ioctl_func_t)(struct kgsl_device_private *, unsigned int, void *); @@ -1627,6 +1837,8 @@ static const struct { kgsl_ioctl_cff_syncmem, 0), KGSL_IOCTL_FUNC(IOCTL_KGSL_CFF_USER_EVENT, kgsl_ioctl_cff_user_event, 0), + KGSL_IOCTL_FUNC(IOCTL_KGSL_TIMESTAMP_EVENT, + kgsl_ioctl_timestamp_event, 1), }; static long kgsl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) @@ -1676,7 +1888,13 @@ static long kgsl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) func = kgsl_ioctl_funcs[nr].func; lock = kgsl_ioctl_funcs[nr].lock; } else { - func = dev_priv->device->ftbl.device_ioctl; + func = dev_priv->device->ftbl->ioctl; + if (!func) { + KGSL_DRV_INFO(dev_priv->device, + "invalid ioctl code %08x\n", cmd); + ret = -EINVAL; + goto done; + } lock = 1; } @@ -1749,7 +1967,7 @@ kgsl_gpumem_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { struct kgsl_mem_entry *entry = vma->vm_private_data; - if (!entry->memdesc.ops->vmfault) + if (!entry->memdesc.ops || !entry->memdesc.ops->vmfault) return VM_FAULT_SIGBUS; return entry->memdesc.ops->vmfault(&entry->memdesc, vma, vmf); @@ -1773,7 +1991,7 @@ static int kgsl_mmap(struct file *file, struct vm_area_struct *vma) unsigned long vma_offset = vma->vm_pgoff << PAGE_SHIFT; struct kgsl_device_private *dev_priv = file->private_data; struct kgsl_process_private *private = dev_priv->process_priv; - struct kgsl_mem_entry *entry; + struct kgsl_mem_entry *tmp, *entry = NULL; struct kgsl_device *device = dev_priv->device; /* Handle leagacy behavior for memstore */ @@ -1784,9 +2002,10 @@ static int kgsl_mmap(struct file *file, struct vm_area_struct *vma) /* Find a chunk of GPU memory */ spin_lock(&private->mem_lock); - list_for_each_entry(entry, &private->mem_list, list) { - if (vma_offset == entry->memdesc.gpuaddr) { - kgsl_mem_entry_get(entry); + list_for_each_entry(tmp, &private->mem_list, list) { + if (vma_offset == tmp->memdesc.gpuaddr) { + kgsl_mem_entry_get(tmp); + entry = tmp; break; } } @@ -1795,7 +2014,9 @@ static int kgsl_mmap(struct file *file, struct vm_area_struct *vma) if (entry == NULL) return -EINVAL; - if (!entry->memdesc.ops->vmflags || !entry->memdesc.ops->vmfault) + if (!entry->memdesc.ops || + !entry->memdesc.ops->vmflags || + !entry->memdesc.ops->vmfault) return -EINVAL; vma->vm_flags |= entry->memdesc.ops->vmflags(&entry->memdesc); @@ -1818,7 +2039,7 @@ static const struct file_operations kgsl_fops = { struct kgsl_driver kgsl_driver = { .process_mutex = __MUTEX_INITIALIZER(kgsl_driver.process_mutex), - .pt_mutex = __MUTEX_INITIALIZER(kgsl_driver.pt_mutex), + .ptlock = __SPIN_LOCK_UNLOCKED(kgsl_driver.ptlock), .devlock = __MUTEX_INITIALIZER(kgsl_driver.devlock), }; EXPORT_SYMBOL(kgsl_driver); @@ -1842,6 +2063,8 @@ void kgsl_unregister_device(struct kgsl_device *device) kgsl_pwrctrl_uninit_sysfs(device); wake_lock_destroy(&device->idle_wakelock); + pm_qos_remove_requirement(PM_QOS_CPU_DMA_LATENCY, "kgsl"); + idr_destroy(&device->context_idr); if (device->memstore.hostptr) @@ -1860,8 +2083,6 @@ void kgsl_unregister_device(struct kgsl_device *device) mutex_lock(&kgsl_driver.devlock); kgsl_driver.devp[minor] = NULL; mutex_unlock(&kgsl_driver.devlock); - - atomic_dec(&kgsl_driver.device_count); } EXPORT_SYMBOL(kgsl_unregister_device); @@ -1904,8 +2125,6 @@ kgsl_register_device(struct kgsl_device *device) dev_set_drvdata(device->parentdev, device); /* Generic device initialization */ - atomic_inc(&kgsl_driver.device_count); - init_waitqueue_head(&device->wait_queue); kgsl_cffdump_open(device->id); @@ -1921,22 +2140,25 @@ kgsl_register_device(struct kgsl_device *device) goto err_devlist; INIT_WORK(&device->idle_check_ws, kgsl_idle_check); + INIT_WORK(&device->ts_expired_ws, kgsl_timestamp_expired); INIT_LIST_HEAD(&device->memqueue); + INIT_LIST_HEAD(&device->events); ret = kgsl_mmu_init(device); if (ret != 0) goto err_dest_work_q; - ret = kgsl_allocate_contig(&device->memstore, + ret = kgsl_allocate_contiguous(&device->memstore, sizeof(struct kgsl_devmemstore)); if (ret != 0) goto err_close_mmu; - kgsl_sharedmem_set(&device->memstore, 0, 0, device->memstore.size); - wake_lock_init(&device->idle_wakelock, WAKE_LOCK_IDLE, device->name); + pm_qos_add_requirement(PM_QOS_CPU_DMA_LATENCY, "kgsl", + PM_QOS_DEFAULT_VALUE); + idr_init(&device->context_idr); /* sysfs and debugfs initalization - failure here is non fatal */ @@ -2066,17 +2288,19 @@ EXPORT_SYMBOL(kgsl_device_platform_remove); static int __devinit kgsl_ptdata_init(void) { - INIT_LIST_HEAD(&kgsl_driver.pagetable_list); - - return kgsl_ptpool_init(&kgsl_driver.ptpool, KGSL_PAGETABLE_SIZE, - kgsl_pagetable_count); + kgsl_driver.ptpool = kgsl_mmu_ptpool_init(KGSL_PAGETABLE_SIZE, + kgsl_pagetable_count); + if (!kgsl_driver.ptpool) + return -ENOMEM; + return 0; } static void kgsl_core_exit(void) { unregister_chrdev_region(kgsl_driver.major, KGSL_DEVICE_MAX); - kgsl_ptpool_destroy(&kgsl_driver.ptpool); + kgsl_mmu_ptpool_destroy(&kgsl_driver.ptpool); + kgsl_driver.ptpool = NULL; device_unregister(&kgsl_driver.virtdev); @@ -2094,7 +2318,6 @@ static void kgsl_core_exit(void) static int __init kgsl_core_init(void) { int result = 0; - /* alloc major and minor device numbers */ result = alloc_chrdev_region(&kgsl_driver.major, 0, KGSL_DEVICE_MAX, KGSL_NAME); @@ -2148,14 +2371,17 @@ static int __init kgsl_core_init(void) kgsl_sharedmem_init_sysfs(); kgsl_cffdump_init(); - /* Generic device initialization */ - atomic_set(&kgsl_driver.device_count, -1); - INIT_LIST_HEAD(&kgsl_driver.process_list); - result = kgsl_ptdata_init(); - if (result) - goto err; + INIT_LIST_HEAD(&kgsl_driver.pagetable_list); + + kgsl_mmu_set_mmutype(ksgl_mmu_type); + + if (KGSL_MMU_TYPE_GPU == kgsl_mmu_get_mmutype()) { + result = kgsl_ptdata_init(); + if (result) + goto err; + } result = kgsl_drm_init(NULL); diff --git a/drivers/gpu/msm/kgsl.h b/drivers/gpu/msm/kgsl.h index d107a0b0..e26cdc9e 100644 --- a/drivers/gpu/msm/kgsl.h +++ b/drivers/gpu/msm/kgsl.h @@ -1,29 +1,13 @@ /* Copyright (c) 2008-2011, Code Aurora Forum. All rights reserved. * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * * Neither the name of Code Aurora Forum, Inc. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. * - * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE - * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN - * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. * */ #ifndef __KGSL_H @@ -38,26 +22,8 @@ #include #include -#include - -#include "kgsl_device.h" -#include "kgsl_pwrctrl.h" -#include "kgsl_sharedmem.h" -#include "kgsl_log.h" -#include "kgsl_cffdump.h" - #define KGSL_NAME "kgsl" -#define CHIP_REV_251 0x020501 - -/* Flags to control whether to flush or invalidate a cached memory range */ -#define KGSL_CACHE_INV 0x00000000 -#define KGSL_CACHE_CLEAN 0x00000001 -#define KGSL_CACHE_FLUSH 0x00000002 - -#define KGSL_CACHE_USER_ADDR 0x00000010 -#define KGSL_CACHE_VMALLOC_ADDR 0x00000020 - /*cache coherency ops */ #define DRM_KGSL_GEM_CACHE_OP_TO_DEV 0x0001 #define DRM_KGSL_GEM_CACHE_OP_FROM_DEV 0x0002 @@ -74,13 +40,9 @@ #define KGSL_PAGETABLE_ENTRIES(_sz) (((_sz) >> PAGE_SHIFT) + \ KGSL_PT_EXTRA_ENTRIES) -#ifdef CONFIG_MSM_KGSL_MMU #define KGSL_PAGETABLE_SIZE \ ALIGN(KGSL_PAGETABLE_ENTRIES(CONFIG_MSM_KGSL_PAGE_TABLE_SIZE) * \ KGSL_PAGETABLE_ENTRY_SIZE, PAGE_SIZE) -#else -#define KGSL_PAGETABLE_SIZE 0 -#endif #ifdef CONFIG_KGSL_PER_PROCESS_PAGE_TABLE #define KGSL_PAGETABLE_COUNT (CONFIG_MSM_KGSL_PAGE_TABLE_COUNT) @@ -99,6 +61,8 @@ KGSL_PAGETABLE_ENTRY_SIZE, PAGE_SIZE) #define KGSL_STATS_ADD(_size, _stat, _max) \ do { _stat += (_size); if (_stat > _max) _max = _stat; } while (0) +struct kgsl_device; + struct kgsl_driver { struct cdev cdev; dev_t major; @@ -108,24 +72,21 @@ struct kgsl_driver { /* Kobjects for storing pagetable and process statistics */ struct kobject *ptkobj; struct kobject *prockobj; - atomic_t device_count; struct kgsl_device *devp[KGSL_DEVICE_MAX]; - uint32_t flags_debug; - /* Global lilst of open processes */ struct list_head process_list; /* Global list of pagetables */ struct list_head pagetable_list; - /* Mutex for accessing the pagetable list */ - struct mutex pt_mutex; + /* Spinlock for accessing the pagetable list */ + spinlock_t ptlock; /* Mutex for accessing the process list */ struct mutex process_mutex; /* Mutex for protecting the device list */ struct mutex devlock; - struct kgsl_ptpool ptpool; + void *ptpool; struct { unsigned int vmalloc; @@ -143,6 +104,22 @@ extern struct kgsl_driver kgsl_driver; #define KGSL_USER_MEMORY 1 #define KGSL_MAPPED_MEMORY 2 +struct kgsl_pagetable; +struct kgsl_memdesc_ops; + +/* shared memory allocation */ +struct kgsl_memdesc { + struct kgsl_pagetable *pagetable; + void *hostptr; + unsigned int gpuaddr; + unsigned int physaddr; + unsigned int size; + unsigned int priv; + struct scatterlist *sg; + unsigned int sglen; + struct kgsl_memdesc_ops *ops; +}; + struct kgsl_mem_entry { struct kref refcount; struct kgsl_memdesc memdesc; @@ -167,53 +144,14 @@ uint8_t *kgsl_gpuaddr_to_vaddr(const struct kgsl_memdesc *memdesc, struct kgsl_mem_entry *kgsl_sharedmem_find_region( struct kgsl_process_private *private, unsigned int gpuaddr, size_t size); -int kgsl_idle(struct kgsl_device *device, unsigned int timeout); -int kgsl_setstate(struct kgsl_device *device, uint32_t flags); - -static inline void kgsl_regread(struct kgsl_device *device, - unsigned int offsetwords, - unsigned int *value) -{ - device->ftbl.device_regread(device, offsetwords, value); -} - -static inline void kgsl_regwrite(struct kgsl_device *device, - unsigned int offsetwords, - unsigned int value) -{ - device->ftbl.device_regwrite(device, offsetwords, value); -} - -static inline void kgsl_regread_isr(struct kgsl_device *device, - unsigned int offsetwords, - unsigned int *value) -{ - device->ftbl.device_regread_isr(device, offsetwords, value); -} - -static inline void kgsl_regwrite_isr(struct kgsl_device *device, - unsigned int offsetwords, - unsigned int value) -{ - device->ftbl.device_regwrite_isr(device, offsetwords, value); -} - -int kgsl_check_timestamp(struct kgsl_device *device, unsigned int timestamp); - -int kgsl_register_ts_notifier(struct kgsl_device *device, - struct notifier_block *nb); - -int kgsl_unregister_ts_notifier(struct kgsl_device *device, - struct notifier_block *nb); - -int kgsl_device_platform_probe(struct kgsl_device *device, - irqreturn_t (*dev_isr) (int, void*)); -void kgsl_device_platform_remove(struct kgsl_device *device); extern const struct dev_pm_ops kgsl_pm_ops; +struct early_suspend; int kgsl_suspend_driver(struct platform_device *pdev, pm_message_t state); int kgsl_resume_driver(struct platform_device *pdev); +void kgsl_early_suspend_driver(struct early_suspend *h); +void kgsl_late_resume_driver(struct early_suspend *h); #ifdef CONFIG_MSM_KGSL_DRM extern int kgsl_drm_init(struct platform_device *dev); @@ -240,22 +178,14 @@ static inline int kgsl_gpuaddr_in_memdesc(const struct kgsl_memdesc *memdesc, return 0; } -static inline struct kgsl_device *kgsl_device_from_dev(struct device *dev) -{ - int i; - - for (i = 0; i < KGSL_DEVICE_MAX; i++) { - if (kgsl_driver.devp[i] && kgsl_driver.devp[i]->dev == dev) - return kgsl_driver.devp[i]; - } - - return NULL; -} - -static inline bool timestamp_cmp(unsigned int new, unsigned int old) +static inline int timestamp_cmp(unsigned int new, unsigned int old) { int ts_diff = new - old; - return (ts_diff >= 0) || (ts_diff < -20000); + + if (ts_diff == 0) + return 0; + + return ((ts_diff > 0) || (ts_diff < -20000)) ? 1 : -1; } static inline void @@ -270,21 +200,4 @@ kgsl_mem_entry_put(struct kgsl_mem_entry *entry) kref_put(&entry->refcount, kgsl_mem_entry_destroy); } -static inline int kgsl_create_device_sysfs_files(struct device *root, - struct device_attribute **list) -{ - int ret = 0, i; - for (i = 0; list[i] != NULL; i++) - ret |= device_create_file(root, list[i]); - return ret; -} - -static inline void kgsl_remove_device_sysfs_files(struct device *root, - struct device_attribute **list) -{ - int i; - for (i = 0; list[i] != NULL; i++) - device_remove_file(root, list[i]); -} - #endif /* __KGSL_H */ diff --git a/drivers/gpu/msm/kgsl_cffdump.c b/drivers/gpu/msm/kgsl_cffdump.c index 702e1ac7..aa33152c 100644 --- a/drivers/gpu/msm/kgsl_cffdump.c +++ b/drivers/gpu/msm/kgsl_cffdump.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "kgsl.h" #include "kgsl_cffdump.h" @@ -362,8 +363,10 @@ void kgsl_cffdump_open(enum kgsl_deviceid device_id) /*TODO: move this to where we can report correct gmemsize*/ unsigned int va_base; - /* XXX: drewis edit: only for 8x50 */ - va_base = 0x20000000; + if (cpu_is_msm8x60() || cpu_is_msm8960() || cpu_is_msm8930()) + va_base = 0x40000000; + else + va_base = 0x20000000; kgsl_cffdump_memory_base(device_id, va_base, CONFIG_MSM_KGSL_PAGE_TABLE_SIZE, SZ_256K); @@ -523,8 +526,8 @@ static bool kgsl_cffdump_handle_type3(struct kgsl_device_private *dev_priv, static uint size_stack[ADDRESS_STACK_SIZE]; switch (GET_PM4_TYPE3_OPCODE(hostaddr)) { - case PM4_INDIRECT_BUFFER_PFD: - case PM4_INDIRECT_BUFFER: + case CP_INDIRECT_BUFFER_PFD: + case CP_INDIRECT_BUFFER: { /* traverse indirect buffers */ int i; @@ -607,7 +610,6 @@ bool kgsl_cffdump_parse_ibs(struct kgsl_device_private *dev_priv, if (!memdesc->physaddr) { KGSL_CORE_ERR("no physaddr"); - return true; } else { mb(); kgsl_cache_range_op((struct kgsl_memdesc *)memdesc, diff --git a/drivers/gpu/msm/kgsl_cffdump.h b/drivers/gpu/msm/kgsl_cffdump.h index d2f9d172..140e4868 100644 --- a/drivers/gpu/msm/kgsl_cffdump.h +++ b/drivers/gpu/msm/kgsl_cffdump.h @@ -1,29 +1,13 @@ /* Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved. * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * * Neither the name of Code Aurora Forum, Inc. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. * - * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE - * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN - * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. * */ diff --git a/drivers/gpu/msm/kgsl_debugfs.c b/drivers/gpu/msm/kgsl_debugfs.c index d98586a9..f5eeb3fb 100644 --- a/drivers/gpu/msm/kgsl_debugfs.c +++ b/drivers/gpu/msm/kgsl_debugfs.c @@ -14,6 +14,7 @@ #include #include "kgsl.h" +#include "kgsl_device.h" /*default log levels is error for everything*/ #define KGSL_LOG_LEVEL_DEFAULT 3 diff --git a/drivers/gpu/msm/kgsl_device.h b/drivers/gpu/msm/kgsl_device.h index bb0f2b35..64d369eb 100644 --- a/drivers/gpu/msm/kgsl_device.h +++ b/drivers/gpu/msm/kgsl_device.h @@ -1,29 +1,14 @@ /* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved. + * Copyright (C) 2011 Sony Ericsson Mobile Communications AB. * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * * Neither the name of Code Aurora Forum, Inc. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. * - * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE - * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN - * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. * */ #ifndef __KGSL_DEVICE_H @@ -31,7 +16,10 @@ #include #include +#include +#include +#include "kgsl.h" #include "kgsl_mmu.h" #include "kgsl_pwrctrl.h" #include "kgsl_log.h" @@ -71,55 +59,48 @@ struct kgsl_context; struct kgsl_power_stats; struct kgsl_functable { - void (*device_regread) (struct kgsl_device *device, - unsigned int offsetwords, - unsigned int *value); - void (*device_regwrite) (struct kgsl_device *device, - unsigned int offsetwords, - unsigned int value); - void (*device_regread_isr) (struct kgsl_device *device, - unsigned int offsetwords, - unsigned int *value); - void (*device_regwrite_isr) (struct kgsl_device *device, - unsigned int offsetwords, - unsigned int value); - int (*device_setstate) (struct kgsl_device *device, uint32_t flags); - int (*device_idle) (struct kgsl_device *device, unsigned int timeout); - unsigned int (*device_isidle) (struct kgsl_device *device); - int (*device_suspend_context) (struct kgsl_device *device); - int (*device_resume_context) (struct kgsl_device *device); - int (*device_start) (struct kgsl_device *device, unsigned int init_ram); - int (*device_stop) (struct kgsl_device *device); - int (*device_getproperty) (struct kgsl_device *device, - enum kgsl_property_type type, - void *value, - unsigned int sizebytes); - int (*device_waittimestamp) (struct kgsl_device *device, - unsigned int timestamp, - unsigned int msecs); - unsigned int (*device_readtimestamp) ( - struct kgsl_device *device, - enum kgsl_timestamp_type type); - int (*device_issueibcmds) (struct kgsl_device_private *dev_priv, - struct kgsl_context *context, - struct kgsl_ibdesc *ibdesc, - unsigned int sizedwords, - uint32_t *timestamp, - unsigned int flags); - int (*device_drawctxt_create) (struct kgsl_device_private *dev_priv, - uint32_t flags, - struct kgsl_context *context); - int (*device_drawctxt_destroy) (struct kgsl_device *device, - struct kgsl_context *context); - long (*device_ioctl) (struct kgsl_device_private *dev_priv, - unsigned int cmd, void *data); - int (*device_setup_pt)(struct kgsl_device *device, - struct kgsl_pagetable *pagetable); - - int (*device_cleanup_pt)(struct kgsl_device *device, - struct kgsl_pagetable *pagetable); - void (*device_power_stats)(struct kgsl_device *device, + /* Mandatory functions - these functions must be implemented + by the client device. The driver will not check for a NULL + pointer before calling the hook. + */ + void (*regread) (struct kgsl_device *device, + unsigned int offsetwords, unsigned int *value); + void (*regwrite) (struct kgsl_device *device, + unsigned int offsetwords, unsigned int value); + int (*idle) (struct kgsl_device *device, unsigned int timeout); + unsigned int (*isidle) (struct kgsl_device *device); + int (*suspend_context) (struct kgsl_device *device); + int (*start) (struct kgsl_device *device, unsigned int init_ram); + int (*stop) (struct kgsl_device *device); + int (*getproperty) (struct kgsl_device *device, + enum kgsl_property_type type, void *value, + unsigned int sizebytes); + int (*waittimestamp) (struct kgsl_device *device, + unsigned int timestamp, unsigned int msecs); + unsigned int (*readtimestamp) (struct kgsl_device *device, + enum kgsl_timestamp_type type); + int (*issueibcmds) (struct kgsl_device_private *dev_priv, + struct kgsl_context *context, struct kgsl_ibdesc *ibdesc, + unsigned int sizedwords, uint32_t *timestamp, + unsigned int flags); + int (*setup_pt)(struct kgsl_device *device, + struct kgsl_pagetable *pagetable); + void (*cleanup_pt)(struct kgsl_device *device, + struct kgsl_pagetable *pagetable); + void (*power_stats)(struct kgsl_device *device, struct kgsl_power_stats *stats); + void (*irqctrl)(struct kgsl_device *device, int state); + /* Optional functions - these functions are not mandatory. The + driver will check that the function pointer is not NULL before + calling the hook */ + void (*setstate) (struct kgsl_device *device, uint32_t flags); + int (*drawctxt_create) (struct kgsl_device *device, + struct kgsl_pagetable *pagetable, struct kgsl_context *context, + uint32_t flags); + void (*drawctxt_destroy) (struct kgsl_device *device, + struct kgsl_context *context); + long (*ioctl) (struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); }; struct kgsl_memregion { @@ -129,6 +110,23 @@ struct kgsl_memregion { unsigned int sizebytes; }; +/* MH register values */ +struct kgsl_mh { + unsigned int mharb; + unsigned int mh_intf_cfg1; + unsigned int mh_intf_cfg2; + uint32_t mpu_base; + int mpu_range; +}; + +struct kgsl_event { + uint32_t timestamp; + void (*func)(struct kgsl_device *, void *, u32); + void *priv; + struct list_head list; +}; + + struct kgsl_device { struct device *dev; const char *name; @@ -140,9 +138,10 @@ struct kgsl_device { struct kgsl_memdesc memstore; const char *iomemname; + struct kgsl_mh mh; struct kgsl_mmu mmu; struct completion hwaccess_gate; - struct kgsl_functable ftbl; + const struct kgsl_functable *ftbl; struct work_struct idle_check_ws; struct timer_list idle_timer; struct kgsl_pwrctrl pwrctrl; @@ -163,6 +162,7 @@ struct kgsl_device { struct completion recovery_gate; struct dentry *d_debugfs; struct idr context_idr; + struct early_suspend display_off; /* Logging levels */ int cmd_log; @@ -173,6 +173,8 @@ struct kgsl_device { struct wake_lock idle_wakelock; struct kgsl_pwrscale pwrscale; struct kobject pwrscale_kobj; + struct work_struct ts_expired_ws; + struct list_head events; }; struct kgsl_context { @@ -215,12 +217,60 @@ struct kgsl_power_stats { struct kgsl_device *kgsl_get_device(int dev_idx); +static inline void kgsl_regread(struct kgsl_device *device, + unsigned int offsetwords, + unsigned int *value) +{ + device->ftbl->regread(device, offsetwords, value); +} + +static inline void kgsl_regwrite(struct kgsl_device *device, + unsigned int offsetwords, + unsigned int value) +{ + device->ftbl->regwrite(device, offsetwords, value); +} + +static inline int kgsl_idle(struct kgsl_device *device, unsigned int timeout) +{ + return device->ftbl->idle(device, timeout); +} + +static inline int kgsl_create_device_sysfs_files(struct device *root, + struct device_attribute **list) +{ + int ret = 0, i; + for (i = 0; list[i] != NULL; i++) + ret |= device_create_file(root, list[i]); + return ret; +} + +static inline void kgsl_remove_device_sysfs_files(struct device *root, + struct device_attribute **list) +{ + int i; + for (i = 0; list[i] != NULL; i++) + device_remove_file(root, list[i]); +} + static inline struct kgsl_mmu * kgsl_get_mmu(struct kgsl_device *device) { return (struct kgsl_mmu *) (device ? &device->mmu : NULL); } +static inline struct kgsl_device *kgsl_device_from_dev(struct device *dev) +{ + int i; + + for (i = 0; i < KGSL_DEVICE_MAX; i++) { + if (kgsl_driver.devp[i] && kgsl_driver.devp[i]->dev == dev) + return kgsl_driver.devp[i]; + } + + return NULL; +} + static inline int kgsl_create_device_workqueue(struct kgsl_device *device) { device->work_queue = create_workqueue(device->name); @@ -244,4 +294,16 @@ kgsl_find_context(struct kgsl_device_private *dev_priv, uint32_t id) return (ctxt && ctxt->dev_priv == dev_priv) ? ctxt : NULL; } +int kgsl_check_timestamp(struct kgsl_device *device, unsigned int timestamp); + +int kgsl_register_ts_notifier(struct kgsl_device *device, + struct notifier_block *nb); + +int kgsl_unregister_ts_notifier(struct kgsl_device *device, + struct notifier_block *nb); + +int kgsl_device_platform_probe(struct kgsl_device *device, + irqreturn_t (*dev_isr) (int, void*)); +void kgsl_device_platform_remove(struct kgsl_device *device); + #endif /* __KGSL_DEVICE_H */ diff --git a/drivers/gpu/msm/kgsl_drm.c b/drivers/gpu/msm/kgsl_drm.c index 1e878e15..cdf9dc4e 100644 --- a/drivers/gpu/msm/kgsl_drm.c +++ b/drivers/gpu/msm/kgsl_drm.c @@ -293,7 +293,6 @@ kgsl_gem_alloc_memory(struct drm_gem_object *obj) } priv->memdesc.size = obj->size * priv->bufcount; - priv->memdesc.ops = &kgsl_contig_ops; } else if (TYPE_IS_MEM(priv->type)) { priv->memdesc.hostptr = diff --git a/drivers/gpu/msm/kgsl_log.h b/drivers/gpu/msm/kgsl_log.h index e816e568..9fafcf4d 100644 --- a/drivers/gpu/msm/kgsl_log.h +++ b/drivers/gpu/msm/kgsl_log.h @@ -1,29 +1,13 @@ /* Copyright (c) 2002,2008-2011, Code Aurora Forum. All rights reserved. * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * * Neither the name of Code Aurora Forum, Inc. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. * - * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE - * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN - * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. * */ #ifndef __KGSL_LOG_H diff --git a/drivers/gpu/msm/kgsl_mmu.c b/drivers/gpu/msm/kgsl_mmu.c index bc35c411..7916ecb0 100644 --- a/drivers/gpu/msm/kgsl_mmu.c +++ b/drivers/gpu/msm/kgsl_mmu.c @@ -1,4 +1,5 @@ /* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved. + * Copyright (C) 2011 Sony Ericsson Mobile Communications AB. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -16,335 +17,76 @@ #include #include #include +#include #include "kgsl.h" #include "kgsl_mmu.h" +#include "kgsl_device.h" +#include "kgsl_sharedmem.h" #define KGSL_MMU_ALIGN_SHIFT 13 #define KGSL_MMU_ALIGN_MASK (~((1 << KGSL_MMU_ALIGN_SHIFT) - 1)) -#define GSL_PT_PAGE_BITS_MASK 0x00000007 -#define GSL_PT_PAGE_ADDR_MASK PAGE_MASK +static enum kgsl_mmutype kgsl_mmu_type; -#define GSL_MMU_INT_MASK \ - (MH_INTERRUPT_MASK__AXI_READ_ERROR | \ - MH_INTERRUPT_MASK__AXI_WRITE_ERROR) +static void pagetable_remove_sysfs_objects(struct kgsl_pagetable *pagetable); -static ssize_t -sysfs_show_ptpool_entries(struct kobject *kobj, - struct kobj_attribute *attr, - char *buf) +static int kgsl_cleanup_pt(struct kgsl_pagetable *pt) { - return snprintf(buf, PAGE_SIZE, "%d\n", kgsl_driver.ptpool.entries); -} - -static ssize_t -sysfs_show_ptpool_min(struct kobject *kobj, - struct kobj_attribute *attr, - char *buf) -{ - return snprintf(buf, PAGE_SIZE, "%d\n", - kgsl_driver.ptpool.static_entries); -} - -static ssize_t -sysfs_show_ptpool_chunks(struct kobject *kobj, - struct kobj_attribute *attr, - char *buf) -{ - return snprintf(buf, PAGE_SIZE, "%d\n", kgsl_driver.ptpool.chunks); -} - -static ssize_t -sysfs_show_ptpool_ptsize(struct kobject *kobj, - struct kobj_attribute *attr, - char *buf) -{ - return snprintf(buf, PAGE_SIZE, "%d\n", kgsl_driver.ptpool.ptsize); -} - -static struct kobj_attribute attr_ptpool_entries = { - .attr = { .name = "ptpool_entries", .mode = 0444 }, - .show = sysfs_show_ptpool_entries, - .store = NULL, -}; - -static struct kobj_attribute attr_ptpool_min = { - .attr = { .name = "ptpool_min", .mode = 0444 }, - .show = sysfs_show_ptpool_min, - .store = NULL, -}; - -static struct kobj_attribute attr_ptpool_chunks = { - .attr = { .name = "ptpool_chunks", .mode = 0444 }, - .show = sysfs_show_ptpool_chunks, - .store = NULL, -}; - -static struct kobj_attribute attr_ptpool_ptsize = { - .attr = { .name = "ptpool_ptsize", .mode = 0444 }, - .show = sysfs_show_ptpool_ptsize, - .store = NULL, -}; - -static struct attribute *ptpool_attrs[] = { - &attr_ptpool_entries.attr, - &attr_ptpool_min.attr, - &attr_ptpool_chunks.attr, - &attr_ptpool_ptsize.attr, - NULL, -}; - -static struct attribute_group ptpool_attr_group = { - .attrs = ptpool_attrs, -}; - -static int -_kgsl_ptpool_add_entries(struct kgsl_ptpool *pool, int count, int dynamic) -{ - struct kgsl_ptpool_chunk *chunk; - size_t size = ALIGN(count * pool->ptsize, PAGE_SIZE); - - BUG_ON(count == 0); - - if (get_order(size) >= MAX_ORDER) { - KGSL_CORE_ERR("ptpool allocation is too big: %d\n", size); - return -EINVAL; + int i; + for (i = 0; i < KGSL_DEVICE_MAX; i++) { + struct kgsl_device *device = kgsl_driver.devp[i]; + if (device) + device->ftbl->cleanup_pt(device, pt); } - - chunk = kzalloc(sizeof(*chunk), GFP_KERNEL); - if (chunk == NULL) { - KGSL_CORE_ERR("kzalloc(%d) failed\n", sizeof(*chunk)); - return -ENOMEM; - } - - chunk->size = size; - chunk->count = count; - chunk->dynamic = dynamic; - - chunk->data = dma_alloc_coherent(NULL, size, - &chunk->phys, GFP_KERNEL); - - if (chunk->data == NULL) { - KGSL_CORE_ERR("dma_alloc_coherent(%d) failed\n", size); - goto err; - } - - chunk->bitmap = kzalloc(BITS_TO_LONGS(count) * 4, GFP_KERNEL); - - if (chunk->bitmap == NULL) { - KGSL_CORE_ERR("kzalloc(%d) failed\n", - BITS_TO_LONGS(count) * 4); - goto err_dma; - } - - list_add_tail(&chunk->list, &pool->list); - - pool->chunks++; - pool->entries += count; - - if (!dynamic) - pool->static_entries += count; - return 0; - -err_dma: - dma_free_coherent(NULL, chunk->size, chunk->data, chunk->phys); -err: - kfree(chunk); - return -ENOMEM; } -static void * -_kgsl_ptpool_get_entry(struct kgsl_ptpool *pool, unsigned int *physaddr) +static void kgsl_destroy_pagetable(struct kref *kref) { - struct kgsl_ptpool_chunk *chunk; + struct kgsl_pagetable *pagetable = container_of(kref, + struct kgsl_pagetable, refcount); + unsigned long flags; - list_for_each_entry(chunk, &pool->list, list) { - int bit = find_first_zero_bit(chunk->bitmap, chunk->count); + spin_lock_irqsave(&kgsl_driver.ptlock, flags); + list_del(&pagetable->list); + spin_unlock_irqrestore(&kgsl_driver.ptlock, flags); - if (bit >= chunk->count) - continue; + pagetable_remove_sysfs_objects(pagetable); - set_bit(bit, chunk->bitmap); - *physaddr = chunk->phys + (bit * pool->ptsize); + kgsl_cleanup_pt(pagetable); - return chunk->data + (bit * pool->ptsize); - } + if (pagetable->pool) + gen_pool_destroy(pagetable->pool); - return NULL; + pagetable->pt_ops->mmu_destroy_pagetable(pagetable->priv); + + kfree(pagetable); } -/** - * kgsl_ptpool_add - * @pool: A pointer to a ptpool structure - * @entries: Number of entries to add - * - * Add static entries to the pagetable pool. - */ - -int -kgsl_ptpool_add(struct kgsl_ptpool *pool, int count) +static inline void kgsl_put_pagetable(struct kgsl_pagetable *pagetable) { - int ret = 0; - BUG_ON(count == 0); - - mutex_lock(&pool->lock); - - /* Only 4MB can be allocated in one chunk, so larger allocations - need to be split into multiple sections */ - - while (count) { - int entries = ((count * pool->ptsize) > SZ_4M) ? - SZ_4M / pool->ptsize : count; - - /* Add the entries as static, i.e. they don't ever stand - a chance of being removed */ - - ret = _kgsl_ptpool_add_entries(pool, entries, 0); - if (ret) - break; - - count -= entries; - } - - mutex_unlock(&pool->lock); - return ret; + if (pagetable) + kref_put(&pagetable->refcount, kgsl_destroy_pagetable); } -/** - * kgsl_ptpool_alloc - * @pool: A pointer to a ptpool structure - * @addr: A pointer to store the physical address of the chunk - * - * Allocate a pagetable from the pool. Returns the virtual address - * of the pagetable, the physical address is returned in physaddr - */ - -void *kgsl_ptpool_alloc(struct kgsl_ptpool *pool, unsigned int *physaddr) -{ - void *addr = NULL; - int ret; - - mutex_lock(&pool->lock); - addr = _kgsl_ptpool_get_entry(pool, physaddr); - if (addr) - goto done; - - /* Add a chunk for 1 more pagetable and mark it as dynamic */ - ret = _kgsl_ptpool_add_entries(pool, 1, 1); - - if (ret) - goto done; - - addr = _kgsl_ptpool_get_entry(pool, physaddr); -done: - mutex_unlock(&pool->lock); - return addr; -} - -static inline void _kgsl_ptpool_rm_chunk(struct kgsl_ptpool_chunk *chunk) -{ - list_del(&chunk->list); - - if (chunk->data) - dma_free_coherent(NULL, chunk->size, chunk->data, - chunk->phys); - kfree(chunk->bitmap); - kfree(chunk); -} - -/** - * kgsl_ptpool_free - * @pool: A pointer to a ptpool structure - * @addr: A pointer to the virtual address to free - * - * Free a pagetable allocated from the pool - */ - -void kgsl_ptpool_free(struct kgsl_ptpool *pool, void *addr) -{ - struct kgsl_ptpool_chunk *chunk, *tmp; - - if (pool == NULL || addr == NULL) - return; - - mutex_lock(&pool->lock); - list_for_each_entry_safe(chunk, tmp, &pool->list, list) { - if (addr >= chunk->data && - addr < chunk->data + chunk->size) { - int bit = ((unsigned long) (addr - chunk->data)) / - pool->ptsize; - - clear_bit(bit, chunk->bitmap); - memset(addr, 0, pool->ptsize); - - if (chunk->dynamic && - bitmap_empty(chunk->bitmap, chunk->count)) - _kgsl_ptpool_rm_chunk(chunk); - - break; - } - } - - mutex_unlock(&pool->lock); -} - -void kgsl_ptpool_destroy(struct kgsl_ptpool *pool) -{ - struct kgsl_ptpool_chunk *chunk, *tmp; - - if (pool == NULL) - return; - - mutex_lock(&pool->lock); - list_for_each_entry_safe(chunk, tmp, &pool->list, list) - _kgsl_ptpool_rm_chunk(chunk); - mutex_unlock(&pool->lock); - - memset(pool, 0, sizeof(*pool)); -} - -/** - * kgsl_ptpool_init - * @pool: A pointer to a ptpool structure to initialize - * @ptsize: The size of each pagetable entry - * @entries: The number of inital entries to add to the pool - * - * Initalize a pool and allocate an initial chunk of entries. - */ - -int kgsl_ptpool_init(struct kgsl_ptpool *pool, int ptsize, int entries) -{ - int ret = 0; - BUG_ON(ptsize == 0); - - pool->ptsize = ptsize; - mutex_init(&pool->lock); - INIT_LIST_HEAD(&pool->list); - - if (entries) { - ret = kgsl_ptpool_add(pool, entries); - if (ret) - return ret; - } - - return sysfs_create_group(kgsl_driver.ptkobj, &ptpool_attr_group); -} - -/* pt_mutex needs to be held in this function */ - static struct kgsl_pagetable * kgsl_get_pagetable(unsigned long name) { - struct kgsl_pagetable *pt; + struct kgsl_pagetable *pt, *ret = NULL; + unsigned long flags; - list_for_each_entry(pt, &kgsl_driver.pagetable_list, list) { - if (pt->name == name) - return pt; + spin_lock_irqsave(&kgsl_driver.ptlock, flags); + list_for_each_entry(pt, &kgsl_driver.pagetable_list, list) { + if (pt->name == name) { + ret = pt; + kref_get(&ret->refcount); + break; + } } - return NULL; + spin_unlock_irqrestore(&kgsl_driver.ptlock, flags); + return ret; } static struct kgsl_pagetable * @@ -369,13 +111,12 @@ sysfs_show_entries(struct kobject *kobj, struct kgsl_pagetable *pt; int ret = 0; - mutex_lock(&kgsl_driver.pt_mutex); pt = _get_pt_from_kobj(kobj); if (pt) ret += snprintf(buf, PAGE_SIZE, "%d\n", pt->stats.entries); - mutex_unlock(&kgsl_driver.pt_mutex); + kgsl_put_pagetable(pt); return ret; } @@ -387,13 +128,12 @@ sysfs_show_mapped(struct kobject *kobj, struct kgsl_pagetable *pt; int ret = 0; - mutex_lock(&kgsl_driver.pt_mutex); pt = _get_pt_from_kobj(kobj); if (pt) ret += snprintf(buf, PAGE_SIZE, "%d\n", pt->stats.mapped); - mutex_unlock(&kgsl_driver.pt_mutex); + kgsl_put_pagetable(pt); return ret; } @@ -405,13 +145,13 @@ sysfs_show_va_range(struct kobject *kobj, struct kgsl_pagetable *pt; int ret = 0; - mutex_lock(&kgsl_driver.pt_mutex); pt = _get_pt_from_kobj(kobj); if (pt) - ret += snprintf(buf, PAGE_SIZE, "0x%x\n", pt->va_range); + ret += snprintf(buf, PAGE_SIZE, "0x%x\n", + CONFIG_MSM_KGSL_PAGE_TABLE_SIZE); - mutex_unlock(&kgsl_driver.pt_mutex); + kgsl_put_pagetable(pt); return ret; } @@ -423,13 +163,12 @@ sysfs_show_max_mapped(struct kobject *kobj, struct kgsl_pagetable *pt; int ret = 0; - mutex_lock(&kgsl_driver.pt_mutex); pt = _get_pt_from_kobj(kobj); if (pt) ret += snprintf(buf, PAGE_SIZE, "%d\n", pt->stats.max_mapped); - mutex_unlock(&kgsl_driver.pt_mutex); + kgsl_put_pagetable(pt); return ret; } @@ -441,13 +180,12 @@ sysfs_show_max_entries(struct kobject *kobj, struct kgsl_pagetable *pt; int ret = 0; - mutex_lock(&kgsl_driver.pt_mutex); pt = _get_pt_from_kobj(kobj); if (pt) ret += snprintf(buf, PAGE_SIZE, "%d\n", pt->stats.max_entries); - mutex_unlock(&kgsl_driver.pt_mutex); + kgsl_put_pagetable(pt); return ret; } @@ -529,55 +267,96 @@ err: return ret; } -static inline uint32_t -kgsl_pt_entry_get(struct kgsl_pagetable *pt, uint32_t va) +unsigned int kgsl_mmu_get_current_ptbase(struct kgsl_device *device) { - return (va - pt->va_base) >> PAGE_SHIFT; + struct kgsl_mmu *mmu = &device->mmu; + if (KGSL_MMU_TYPE_NONE == kgsl_mmu_type) + return 0; + else + return mmu->mmu_ops->mmu_get_current_ptbase(device); } +EXPORT_SYMBOL(kgsl_mmu_get_current_ptbase); -static inline void -kgsl_pt_map_set(struct kgsl_pagetable *pt, uint32_t pte, uint32_t val) +int +kgsl_mmu_get_ptname_from_ptbase(unsigned int pt_base) { - uint32_t *baseptr = (uint32_t *)pt->base.hostptr; + struct kgsl_pagetable *pt; + int ptid = -1; - writel_relaxed(val, &baseptr[pte]); + spin_lock(&kgsl_driver.ptlock); + list_for_each_entry(pt, &kgsl_driver.pagetable_list, list) { + if (pt->pt_ops->mmu_pt_equal(pt, pt_base)) { + ptid = (int) pt->name; + break; + } + } + spin_unlock(&kgsl_driver.ptlock); + + return ptid; } +EXPORT_SYMBOL(kgsl_mmu_get_ptname_from_ptbase); -static inline uint32_t -kgsl_pt_map_getaddr(struct kgsl_pagetable *pt, uint32_t pte) +void kgsl_mmu_setstate(struct kgsl_device *device, + struct kgsl_pagetable *pagetable) { - uint32_t *baseptr = (uint32_t *)pt->base.hostptr; - uint32_t ret = readl_relaxed(&baseptr[pte]) & GSL_PT_PAGE_ADDR_MASK; - return ret; + struct kgsl_mmu *mmu = &device->mmu; + + if (KGSL_MMU_TYPE_NONE == kgsl_mmu_type) + return; + else + mmu->mmu_ops->mmu_setstate(device, + pagetable); } +EXPORT_SYMBOL(kgsl_mmu_setstate); + +int kgsl_mmu_init(struct kgsl_device *device) +{ + struct kgsl_mmu *mmu = &device->mmu; + + mmu->device = device; + + if (KGSL_MMU_TYPE_NONE == kgsl_mmu_type || + KGSL_MMU_TYPE_IOMMU == kgsl_mmu_type) { + dev_info(device->dev, "|%s| MMU type set for device is " + "NOMMU\n", __func__); + return 0; + } else if (KGSL_MMU_TYPE_GPU == kgsl_mmu_type) + mmu->mmu_ops = &gpummu_ops; + + return mmu->mmu_ops->mmu_init(device); +} +EXPORT_SYMBOL(kgsl_mmu_init); + +int kgsl_mmu_start(struct kgsl_device *device) +{ + struct kgsl_mmu *mmu = &device->mmu; + + if (kgsl_mmu_type == KGSL_MMU_TYPE_NONE) { + kgsl_regwrite(device, MH_MMU_CONFIG, 0); + return 0; + } else { + return mmu->mmu_ops->mmu_start(device); + } +} +EXPORT_SYMBOL(kgsl_mmu_start); void kgsl_mh_intrcallback(struct kgsl_device *device) { unsigned int status = 0; unsigned int reg; - kgsl_regread_isr(device, device->mmu.reg.interrupt_status, &status); + kgsl_regread(device, MH_INTERRUPT_STATUS, &status); + kgsl_regread(device, MH_AXI_ERROR, ®); - if (status & MH_INTERRUPT_MASK__AXI_READ_ERROR) { - kgsl_regread_isr(device, device->mmu.reg.axi_error, ®); + if (status & MH_INTERRUPT_MASK__AXI_READ_ERROR) KGSL_MEM_CRIT(device, "axi read error interrupt: %08x\n", reg); - } else if (status & MH_INTERRUPT_MASK__AXI_WRITE_ERROR) { - kgsl_regread_isr(device, device->mmu.reg.axi_error, ®); + if (status & MH_INTERRUPT_MASK__AXI_WRITE_ERROR) KGSL_MEM_CRIT(device, "axi write error interrupt: %08x\n", reg); - } else if (status & MH_INTERRUPT_MASK__MMU_PAGE_FAULT) { - kgsl_regread_isr(device, device->mmu.reg.page_fault, ®); - KGSL_MEM_CRIT(device, "mmu page fault interrupt: %08x\n", reg); - } else { - KGSL_MEM_WARN(device, - "bad bits in REG_MH_INTERRUPT_STATUS %08x\n", status); - } + if (status & MH_INTERRUPT_MASK__MMU_PAGE_FAULT) + device->mmu.mmu_ops->mmu_pagefault(device); - kgsl_regwrite_isr(device, device->mmu.reg.interrupt_clear, status); - - /*TODO: figure out how to handle errror interupts. - * specifically, page faults should probably nuke the client that - * caused them, but we don't have enough info to figure that out yet. - */ + status &= KGSL_MMU_INT_MASK; + kgsl_regwrite(device, MH_INTERRUPT_CLEAR, status); } EXPORT_SYMBOL(kgsl_mh_intrcallback); @@ -589,7 +368,7 @@ static int kgsl_setup_pt(struct kgsl_pagetable *pt) for (i = 0; i < KGSL_DEVICE_MAX; i++) { struct kgsl_device *device = kgsl_driver.devp[i]; if (device) { - status = device->ftbl.device_setup_pt(device, pt); + status = device->ftbl->setup_pt(device, pt); if (status) goto error_pt; } @@ -599,28 +378,18 @@ error_pt: while (i >= 0) { struct kgsl_device *device = kgsl_driver.devp[i]; if (device) - device->ftbl.device_cleanup_pt(device, pt); + device->ftbl->cleanup_pt(device, pt); i--; } return status; } -static int kgsl_cleanup_pt(struct kgsl_pagetable *pt) -{ - int i; - for (i = 0; i < KGSL_DEVICE_MAX; i++) { - struct kgsl_device *device = kgsl_driver.devp[i]; - if (device) - device->ftbl.device_cleanup_pt(device, pt); - } - return 0; -} - static struct kgsl_pagetable *kgsl_mmu_createpagetableobject( unsigned int name) { int status = 0; struct kgsl_pagetable *pagetable = NULL; + unsigned long flags; pagetable = kzalloc(sizeof(struct kgsl_pagetable), GFP_KERNEL); if (pagetable == NULL) { @@ -629,311 +398,127 @@ static struct kgsl_pagetable *kgsl_mmu_createpagetableobject( return NULL; } - pagetable->refcnt = 1; + kref_init(&pagetable->refcount); spin_lock_init(&pagetable->lock); - pagetable->tlb_flags = 0; pagetable->name = name; - pagetable->va_base = KGSL_PAGETABLE_BASE; - pagetable->va_range = CONFIG_MSM_KGSL_PAGE_TABLE_SIZE; - pagetable->last_superpte = 0; - pagetable->max_entries = KGSL_PAGETABLE_ENTRIES(pagetable->va_range); - - pagetable->tlbflushfilter.size = (pagetable->va_range / - (PAGE_SIZE * GSL_PT_SUPER_PTE * 8)) + 1; - pagetable->tlbflushfilter.base = (unsigned int *) - kzalloc(pagetable->tlbflushfilter.size, GFP_KERNEL); - if (!pagetable->tlbflushfilter.base) { - KGSL_CORE_ERR("kzalloc(%d) failed\n", - pagetable->tlbflushfilter.size); - goto err_alloc; - } - GSL_TLBFLUSH_FILTER_RESET(); + pagetable->max_entries = KGSL_PAGETABLE_ENTRIES( + CONFIG_MSM_KGSL_PAGE_TABLE_SIZE); pagetable->pool = gen_pool_create(PAGE_SHIFT, -1); if (pagetable->pool == NULL) { KGSL_CORE_ERR("gen_pool_create(%d) failed\n", PAGE_SHIFT); - goto err_flushfilter; + goto err_alloc; } - if (gen_pool_add(pagetable->pool, pagetable->va_base, - pagetable->va_range, -1)) { + if (gen_pool_add(pagetable->pool, KGSL_PAGETABLE_BASE, + CONFIG_MSM_KGSL_PAGE_TABLE_SIZE, -1)) { KGSL_CORE_ERR("gen_pool_add failed\n"); goto err_pool; } - pagetable->base.hostptr = kgsl_ptpool_alloc(&kgsl_driver.ptpool, - &pagetable->base.physaddr); + if (KGSL_MMU_TYPE_GPU == kgsl_mmu_type) + pagetable->pt_ops = &gpummu_pt_ops; - if (pagetable->base.hostptr == NULL) + pagetable->priv = pagetable->pt_ops->mmu_create_pagetable(); + if (!pagetable->priv) goto err_pool; - /* ptpool allocations are from coherent memory, so update the - device statistics acordingly */ - - KGSL_STATS_ADD(KGSL_PAGETABLE_SIZE, kgsl_driver.stats.coherent, - kgsl_driver.stats.coherent_max); - - pagetable->base.gpuaddr = pagetable->base.physaddr; - pagetable->base.size = KGSL_PAGETABLE_SIZE; - status = kgsl_setup_pt(pagetable); if (status) - goto err_free_sharedmem; + goto err_mmu_create; + spin_lock_irqsave(&kgsl_driver.ptlock, flags); list_add(&pagetable->list, &kgsl_driver.pagetable_list); + spin_unlock_irqrestore(&kgsl_driver.ptlock, flags); /* Create the sysfs entries */ pagetable_add_sysfs_objects(pagetable); return pagetable; -err_free_sharedmem: - kgsl_ptpool_free(&kgsl_driver.ptpool, &pagetable->base.hostptr); +err_mmu_create: + pagetable->pt_ops->mmu_destroy_pagetable(pagetable->priv); err_pool: gen_pool_destroy(pagetable->pool); -err_flushfilter: - kfree(pagetable->tlbflushfilter.base); err_alloc: kfree(pagetable); return NULL; } -static void kgsl_mmu_destroypagetable(struct kgsl_pagetable *pagetable) -{ - list_del(&pagetable->list); - - pagetable_remove_sysfs_objects(pagetable); - - kgsl_cleanup_pt(pagetable); - - kgsl_ptpool_free(&kgsl_driver.ptpool, pagetable->base.hostptr); - - kgsl_driver.stats.coherent -= KGSL_PAGETABLE_SIZE; - - if (pagetable->pool) { - gen_pool_destroy(pagetable->pool); - pagetable->pool = NULL; - } - - if (pagetable->tlbflushfilter.base) { - pagetable->tlbflushfilter.size = 0; - kfree(pagetable->tlbflushfilter.base); - pagetable->tlbflushfilter.base = NULL; - } - - kfree(pagetable); -} - struct kgsl_pagetable *kgsl_mmu_getpagetable(unsigned long name) { struct kgsl_pagetable *pt; - mutex_lock(&kgsl_driver.pt_mutex); + if (KGSL_MMU_TYPE_NONE == kgsl_mmu_type) + return (void *)(-1); +#ifdef CONFIG_KGSL_PER_PROCESS_PAGE_TABLE +#else + name = KGSL_MMU_GLOBAL_PT; +#endif pt = kgsl_get_pagetable(name); - if (pt) { - spin_lock(&pt->lock); - pt->refcnt++; - spin_unlock(&pt->lock); - goto done; - } + if (pt == NULL) + pt = kgsl_mmu_createpagetableobject(name); - pt = kgsl_mmu_createpagetableobject(name); - -done: - mutex_unlock(&kgsl_driver.pt_mutex); return pt; } void kgsl_mmu_putpagetable(struct kgsl_pagetable *pagetable) { - bool dead; - if (pagetable == NULL) + kgsl_put_pagetable(pagetable); +} +EXPORT_SYMBOL(kgsl_mmu_putpagetable); + +void kgsl_setstate(struct kgsl_device *device, uint32_t flags) +{ + struct kgsl_mmu *mmu = &device->mmu; + if (KGSL_MMU_TYPE_NONE == kgsl_mmu_type) return; - - mutex_lock(&kgsl_driver.pt_mutex); - - spin_lock(&pagetable->lock); - dead = (--pagetable->refcnt) == 0; - spin_unlock(&pagetable->lock); - - if (dead) - kgsl_mmu_destroypagetable(pagetable); - - mutex_unlock(&kgsl_driver.pt_mutex); + else if (device->ftbl->setstate) + device->ftbl->setstate(device, flags); + else if (mmu->mmu_ops->mmu_device_setstate) + mmu->mmu_ops->mmu_device_setstate(device, flags); } +EXPORT_SYMBOL(kgsl_setstate); -int kgsl_mmu_setstate(struct kgsl_device *device, - struct kgsl_pagetable *pagetable) +void kgsl_mmu_device_setstate(struct kgsl_device *device, uint32_t flags) { - int status = 0; struct kgsl_mmu *mmu = &device->mmu; - - if (mmu->flags & KGSL_FLAGS_STARTED) { - /* page table not current, then setup mmu to use new - * specified page table - */ - if (mmu->hwpagetable != pagetable) { - mmu->hwpagetable = pagetable; - spin_lock(&mmu->hwpagetable->lock); - mmu->hwpagetable->tlb_flags &= ~(1<id); - spin_unlock(&mmu->hwpagetable->lock); - - /* call device specific set page table */ - status = kgsl_setstate(mmu->device, - KGSL_MMUFLAGS_TLBFLUSH | - KGSL_MMUFLAGS_PTUPDATE); - - } - } - - return status; + if (KGSL_MMU_TYPE_NONE == kgsl_mmu_type) + return; + else if (mmu->mmu_ops->mmu_device_setstate) + mmu->mmu_ops->mmu_device_setstate(device, flags); } -EXPORT_SYMBOL(kgsl_mmu_setstate); +EXPORT_SYMBOL(kgsl_mmu_device_setstate); -int kgsl_mmu_init(struct kgsl_device *device) +void kgsl_mh_start(struct kgsl_device *device) { - /* - * intialize device mmu - * - * call this with the global lock held - */ - int status = 0; - struct kgsl_mmu *mmu = &device->mmu; - - mmu->device = device; - - /* make sure aligned to pagesize */ - BUG_ON(mmu->mpu_base & (PAGE_SIZE - 1)); - BUG_ON((mmu->mpu_base + mmu->mpu_range) & (PAGE_SIZE - 1)); - - /* sub-client MMU lookups require address translation */ - if ((mmu->config & ~0x1) > 0) { - /*make sure virtual address range is a multiple of 64Kb */ - BUG_ON(CONFIG_MSM_KGSL_PAGE_TABLE_SIZE & ((1 << 16) - 1)); - - /* allocate memory used for completing r/w operations that - * cannot be mapped by the MMU - */ - status = kgsl_allocate_contig(&mmu->dummyspace, 64); - if (!status) - kgsl_sharedmem_set(&mmu->dummyspace, 0, 0, - mmu->dummyspace.size); - } - - return status; -} - -int kgsl_mmu_start(struct kgsl_device *device) -{ - /* - * intialize device mmu - * - * call this with the global lock held - */ - int status; - struct kgsl_mmu *mmu = &device->mmu; - - if (mmu->flags & KGSL_FLAGS_STARTED) - return 0; - - /* MMU not enabled */ - if ((mmu->config & 0x1) == 0) - return 0; - - mmu->flags |= KGSL_FLAGS_STARTED; - - /* setup MMU and sub-client behavior */ - kgsl_regwrite(device, device->mmu.reg.config, mmu->config); - - /* enable axi interrupts */ - kgsl_regwrite(device, device->mmu.reg.interrupt_mask, - GSL_MMU_INT_MASK); - - /* idle device */ + struct kgsl_mh *mh = &device->mh; + /* force mmu off to for now*/ + kgsl_regwrite(device, MH_MMU_CONFIG, 0); kgsl_idle(device, KGSL_TIMEOUT_DEFAULT); /* define physical memory range accessible by the core */ - kgsl_regwrite(device, device->mmu.reg.mpu_base, mmu->mpu_base); - kgsl_regwrite(device, device->mmu.reg.mpu_end, - mmu->mpu_base + mmu->mpu_range); + kgsl_regwrite(device, MH_MMU_MPU_BASE, mh->mpu_base); + kgsl_regwrite(device, MH_MMU_MPU_END, + mh->mpu_base + mh->mpu_range); + kgsl_regwrite(device, MH_ARBITER_CONFIG, mh->mharb); - /* enable axi interrupts */ - kgsl_regwrite(device, device->mmu.reg.interrupt_mask, - GSL_MMU_INT_MASK | MH_INTERRUPT_MASK__MMU_PAGE_FAULT); + if (mh->mh_intf_cfg1 != 0) + kgsl_regwrite(device, MH_CLNT_INTF_CTRL_CONFIG1, + mh->mh_intf_cfg1); - /* sub-client MMU lookups require address translation */ - if ((mmu->config & ~0x1) > 0) { + if (mh->mh_intf_cfg2 != 0) + kgsl_regwrite(device, MH_CLNT_INTF_CTRL_CONFIG2, + mh->mh_intf_cfg2); - kgsl_sharedmem_set(&mmu->dummyspace, 0, 0, - mmu->dummyspace.size); - - /* TRAN_ERROR needs a 32 byte (32 byte aligned) chunk of memory - * to complete transactions in case of an MMU fault. Note that - * we'll leave the bottom 32 bytes of the dummyspace for other - * purposes (e.g. use it when dummy read cycles are needed - * for other blocks */ - kgsl_regwrite(device, device->mmu.reg.tran_error, - mmu->dummyspace.physaddr + 32); - - if (mmu->defaultpagetable == NULL) - mmu->defaultpagetable = - kgsl_mmu_getpagetable(KGSL_MMU_GLOBAL_PT); - mmu->hwpagetable = mmu->defaultpagetable; - - kgsl_regwrite(device, device->mmu.reg.pt_page, - mmu->hwpagetable->base.gpuaddr); - kgsl_regwrite(device, device->mmu.reg.va_range, - (mmu->hwpagetable->va_base | - (mmu->hwpagetable->va_range >> 16))); - status = kgsl_setstate(device, KGSL_MMUFLAGS_TLBFLUSH); - if (status) { - KGSL_MEM_ERR(device, "Failed to setstate TLBFLUSH\n"); - goto error; - } - } - - return 0; -error: - /* disable MMU */ - kgsl_regwrite(device, device->mmu.reg.interrupt_mask, 0); - kgsl_regwrite(device, device->mmu.reg.config, 0x00000000); - return status; -} -EXPORT_SYMBOL(kgsl_mmu_start); - -unsigned int kgsl_virtaddr_to_physaddr(void *virtaddr) -{ - unsigned int physaddr = 0; - pgd_t *pgd_ptr = NULL; - pmd_t *pmd_ptr = NULL; - pte_t *pte_ptr = NULL, pte; - - pgd_ptr = pgd_offset(current->mm, (unsigned long) virtaddr); - if (pgd_none(*pgd) || pgd_bad(*pgd)) { - KGSL_CORE_ERR("Invalid pgd entry\n"); - return 0; - } - - pmd_ptr = pmd_offset(pgd_ptr, (unsigned long) virtaddr); - if (pmd_none(*pmd_ptr) || pmd_bad(*pmd_ptr)) { - KGSL_CORE_ERR("Invalid pmd entry\n"); - return 0; - } - - pte_ptr = pte_offset_map(pmd_ptr, (unsigned long) virtaddr); - if (!pte_ptr) { - KGSL_CORE_ERR("pt_offset_map failed\n"); - return 0; - } - pte = *pte_ptr; - physaddr = pte_pfn(pte); - pte_unmap(pte_ptr); - physaddr <<= PAGE_SHIFT; - return physaddr; + /* + * Interrupts are enabled on a per-device level when + * kgsl_pwrctrl_irq() is called + */ } int @@ -941,14 +526,12 @@ kgsl_mmu_map(struct kgsl_pagetable *pagetable, struct kgsl_memdesc *memdesc, unsigned int protflags) { - int numpages; - unsigned int pte, ptefirst, ptelast, physaddr; - int flushtlb; - unsigned int offset = 0; - - BUG_ON(protflags & ~(GSL_PT_PAGE_RV | GSL_PT_PAGE_WV)); - BUG_ON(protflags == 0); + int ret; + if (kgsl_mmu_type == KGSL_MMU_TYPE_NONE) { + memdesc->gpuaddr = memdesc->physaddr; + return 0; + } memdesc->gpuaddr = gen_pool_alloc_aligned(pagetable->pool, memdesc->size, KGSL_MMU_ALIGN_SHIFT); @@ -960,36 +543,11 @@ kgsl_mmu_map(struct kgsl_pagetable *pagetable, return -ENOMEM; } - numpages = (memdesc->size >> PAGE_SHIFT); - - ptefirst = kgsl_pt_entry_get(pagetable, memdesc->gpuaddr); - ptelast = ptefirst + numpages; - - pte = ptefirst; - flushtlb = 0; - - /* tlb needs to be flushed when the first and last pte are not at - * superpte boundaries */ - if ((ptefirst & (GSL_PT_SUPER_PTE - 1)) != 0 || - ((ptelast + 1) & (GSL_PT_SUPER_PTE-1)) != 0) - flushtlb = 1; - spin_lock(&pagetable->lock); - for (pte = ptefirst; pte < ptelast; pte++, offset += PAGE_SIZE) { -#ifdef VERBOSE_DEBUG - /* check if PTE exists */ - uint32_t val = kgsl_pt_map_getaddr(pagetable, pte); - BUG_ON(val != 0 && val != GSL_PT_PAGE_DIRTY); -#endif - if ((pte & (GSL_PT_SUPER_PTE-1)) == 0) - if (GSL_TLBFLUSH_FILTER_ISDIRTY(pte / GSL_PT_SUPER_PTE)) - flushtlb = 1; - /* mark pte as in use */ + ret = pagetable->pt_ops->mmu_map(pagetable->priv, memdesc, protflags); - physaddr = memdesc->ops->physaddr(memdesc, offset); - BUG_ON(physaddr == 0); - kgsl_pt_map_set(pagetable, pte, physaddr | protflags); - } + if (ret) + goto err_free_gpuaddr; /* Keep track of the statistics for the sysfs files */ @@ -999,70 +557,40 @@ kgsl_mmu_map(struct kgsl_pagetable *pagetable, KGSL_STATS_ADD(memdesc->size, pagetable->stats.mapped, pagetable->stats.max_mapped); - /* Post all writes to the pagetable */ - wmb(); - - /* Invalidate tlb only if current page table used by GPU is the - * pagetable that we used to allocate */ - if (flushtlb) { - /*set all devices as needing flushing*/ - pagetable->tlb_flags = UINT_MAX; - GSL_TLBFLUSH_FILTER_RESET(); - } spin_unlock(&pagetable->lock); return 0; + +err_free_gpuaddr: + spin_unlock(&pagetable->lock); + gen_pool_free(pagetable->pool, memdesc->gpuaddr, memdesc->size); + memdesc->gpuaddr = 0; + return ret; } +EXPORT_SYMBOL(kgsl_mmu_map); int kgsl_mmu_unmap(struct kgsl_pagetable *pagetable, struct kgsl_memdesc *memdesc) { - unsigned int numpages; - unsigned int pte, ptefirst, ptelast, superpte; - unsigned int range = memdesc->size; - - /* All GPU addresses as assigned are page aligned, but some - functions purturb the gpuaddr with an offset, so apply the - mask here to make sure we have the right address */ - - unsigned int gpuaddr = memdesc->gpuaddr & KGSL_MMU_ALIGN_MASK; - - if (range == 0 || gpuaddr == 0) + if (memdesc->size == 0 || memdesc->gpuaddr == 0) return 0; - numpages = (range >> PAGE_SHIFT); - if (range & (PAGE_SIZE - 1)) - numpages++; - - ptefirst = kgsl_pt_entry_get(pagetable, gpuaddr); - ptelast = ptefirst + numpages; - - spin_lock(&pagetable->lock); - superpte = ptefirst - (ptefirst & (GSL_PT_SUPER_PTE-1)); - GSL_TLBFLUSH_FILTER_SETDIRTY(superpte / GSL_PT_SUPER_PTE); - for (pte = ptefirst; pte < ptelast; pte++) { -#ifdef VERBOSE_DEBUG - /* check if PTE exists */ - BUG_ON(!kgsl_pt_map_getaddr(pagetable, pte)); -#endif - kgsl_pt_map_set(pagetable, pte, GSL_PT_PAGE_DIRTY); - superpte = pte - (pte & (GSL_PT_SUPER_PTE - 1)); - if (pte == superpte) - GSL_TLBFLUSH_FILTER_SETDIRTY(superpte / - GSL_PT_SUPER_PTE); + if (kgsl_mmu_type == KGSL_MMU_TYPE_NONE) { + memdesc->gpuaddr = 0; + return 0; } - + spin_lock(&pagetable->lock); + pagetable->pt_ops->mmu_unmap(pagetable->priv, memdesc); /* Remove the statistics */ pagetable->stats.entries--; - pagetable->stats.mapped -= range; - - /* Post all writes to the pagetable */ - wmb(); + pagetable->stats.mapped -= memdesc->size; spin_unlock(&pagetable->lock); - gen_pool_free(pagetable->pool, gpuaddr, range); + gen_pool_free(pagetable->pool, + memdesc->gpuaddr & KGSL_MMU_ALIGN_MASK, + memdesc->size); return 0; } @@ -1078,6 +606,9 @@ int kgsl_mmu_map_global(struct kgsl_pagetable *pagetable, KGSL_CORE_ERR("invalid memdesc\n"); goto error; } + /* Not all global mappings are needed for all MMU types */ + if (!memdesc->size) + return 0; gpuaddr = memdesc->gpuaddr; @@ -1102,40 +633,88 @@ EXPORT_SYMBOL(kgsl_mmu_map_global); int kgsl_mmu_stop(struct kgsl_device *device) { - /* - * stop device mmu - * - * call this with the global lock held - */ struct kgsl_mmu *mmu = &device->mmu; - if (mmu->flags & KGSL_FLAGS_STARTED) { - /* disable mh interrupts */ - /* disable MMU */ - kgsl_regwrite(device, device->mmu.reg.interrupt_mask, 0); - kgsl_regwrite(device, device->mmu.reg.config, 0x00000000); - - mmu->flags &= ~KGSL_FLAGS_STARTED; - } - - return 0; + if (kgsl_mmu_type == KGSL_MMU_TYPE_NONE) + return 0; + else + return mmu->mmu_ops->mmu_stop(device); } EXPORT_SYMBOL(kgsl_mmu_stop); int kgsl_mmu_close(struct kgsl_device *device) { - /* - * close device mmu - * - * call this with the global lock held - */ struct kgsl_mmu *mmu = &device->mmu; - if (mmu->dummyspace.gpuaddr) - kgsl_sharedmem_free(&mmu->dummyspace); - - if (mmu->defaultpagetable) - kgsl_mmu_putpagetable(mmu->defaultpagetable); - - return 0; + if (kgsl_mmu_type == KGSL_MMU_TYPE_NONE) + return 0; + else + return mmu->mmu_ops->mmu_close(device); } +EXPORT_SYMBOL(kgsl_mmu_close); + +int kgsl_mmu_pt_get_flags(struct kgsl_pagetable *pt, + enum kgsl_deviceid id) +{ + if (KGSL_MMU_TYPE_GPU == kgsl_mmu_type) + return pt->pt_ops->mmu_pt_get_flags(pt, id); + else + return 0; +} +EXPORT_SYMBOL(kgsl_mmu_pt_get_flags); + +void kgsl_mmu_ptpool_destroy(void *ptpool) +{ + if (KGSL_MMU_TYPE_GPU == kgsl_mmu_type) + kgsl_gpummu_ptpool_destroy(ptpool); + ptpool = 0; +} +EXPORT_SYMBOL(kgsl_mmu_ptpool_destroy); + +void *kgsl_mmu_ptpool_init(int ptsize, int entries) +{ + if (KGSL_MMU_TYPE_GPU == kgsl_mmu_type) + return kgsl_gpummu_ptpool_init(ptsize, entries); + else + return (void *)(-1); +} +EXPORT_SYMBOL(kgsl_mmu_ptpool_init); + +int kgsl_mmu_enabled(void) +{ + if (KGSL_MMU_TYPE_NONE != kgsl_mmu_type) + return 1; + else + return 0; +} +EXPORT_SYMBOL(kgsl_mmu_enabled); + +int kgsl_mmu_pt_equal(struct kgsl_pagetable *pt, + unsigned int pt_base) +{ + if (KGSL_MMU_TYPE_NONE == kgsl_mmu_type) + return true; + else + return pt->pt_ops->mmu_pt_equal(pt, pt_base); +} +EXPORT_SYMBOL(kgsl_mmu_pt_equal); + +enum kgsl_mmutype kgsl_mmu_get_mmutype(void) +{ + return kgsl_mmu_type; +} +EXPORT_SYMBOL(kgsl_mmu_get_mmutype); + +void kgsl_mmu_set_mmutype(char *mmutype) +{ + kgsl_mmu_type = KGSL_MMU_TYPE_NONE; +#ifdef CONFIG_MSM_KGSL_GPUMMU + kgsl_mmu_type = KGSL_MMU_TYPE_GPU; +#elif defined(CONFIG_MSM_KGSL_IOMMU) +#endif + if (mmutype && !strncmp(mmutype, "gpummu", 6)) + kgsl_mmu_type = KGSL_MMU_TYPE_GPU; + if (mmutype && !strncmp(mmutype, "nommu", 5)) + kgsl_mmu_type = KGSL_MMU_TYPE_NONE; +} +EXPORT_SYMBOL(kgsl_mmu_set_mmutype); diff --git a/drivers/gpu/msm/kgsl_mmu.h b/drivers/gpu/msm/kgsl_mmu.h index 4a67ea67..4af073a7 100644 --- a/drivers/gpu/msm/kgsl_mmu.h +++ b/drivers/gpu/msm/kgsl_mmu.h @@ -1,34 +1,20 @@ /* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved. * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * * Neither the name of Code Aurora Forum, Inc. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. * - * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE - * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN - * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. * */ #ifndef __KGSL_MMU_H #define __KGSL_MMU_H -#include "kgsl_sharedmem.h" + +#define KGSL_MMU_ALIGN_SHIFT 13 +#define KGSL_MMU_ALIGN_MASK (~((1 << KGSL_MMU_ALIGN_SHIFT) - 1)) /* Identifier for the global page table */ /* Per process page tables will probably pass in the thread group @@ -36,10 +22,52 @@ #define KGSL_MMU_GLOBAL_PT 0 +struct kgsl_device; + #define GSL_PT_SUPER_PTE 8 #define GSL_PT_PAGE_WV 0x00000001 #define GSL_PT_PAGE_RV 0x00000002 #define GSL_PT_PAGE_DIRTY 0x00000004 + +/* MMU registers - the register locations for all cores are the + same. The method for getting to those locations differs between + 2D and 3D, but the 2D and 3D register functions do that magic + for us */ + +#define MH_MMU_CONFIG 0x0040 +#define MH_MMU_VA_RANGE 0x0041 +#define MH_MMU_PT_BASE 0x0042 +#define MH_MMU_PAGE_FAULT 0x0043 +#define MH_MMU_TRAN_ERROR 0x0044 +#define MH_MMU_INVALIDATE 0x0045 +#define MH_MMU_MPU_BASE 0x0046 +#define MH_MMU_MPU_END 0x0047 + +#define MH_INTERRUPT_MASK 0x0A42 +#define MH_INTERRUPT_STATUS 0x0A43 +#define MH_INTERRUPT_CLEAR 0x0A44 +#define MH_AXI_ERROR 0x0A45 +#define MH_ARBITER_CONFIG 0x0A40 +#define MH_DEBUG_CTRL 0x0A4E +#define MH_DEBUG_DATA 0x0A4F +#define MH_AXI_HALT_CONTROL 0x0A50 +#define MH_CLNT_INTF_CTRL_CONFIG1 0x0A54 +#define MH_CLNT_INTF_CTRL_CONFIG2 0x0A55 + +/* MH_MMU_CONFIG bit definitions */ + +#define MH_MMU_CONFIG__RB_W_CLNT_BEHAVIOR__SHIFT 0x00000004 +#define MH_MMU_CONFIG__CP_W_CLNT_BEHAVIOR__SHIFT 0x00000006 +#define MH_MMU_CONFIG__CP_R0_CLNT_BEHAVIOR__SHIFT 0x00000008 +#define MH_MMU_CONFIG__CP_R1_CLNT_BEHAVIOR__SHIFT 0x0000000a +#define MH_MMU_CONFIG__CP_R2_CLNT_BEHAVIOR__SHIFT 0x0000000c +#define MH_MMU_CONFIG__CP_R3_CLNT_BEHAVIOR__SHIFT 0x0000000e +#define MH_MMU_CONFIG__CP_R4_CLNT_BEHAVIOR__SHIFT 0x00000010 +#define MH_MMU_CONFIG__VGT_R0_CLNT_BEHAVIOR__SHIFT 0x00000012 +#define MH_MMU_CONFIG__VGT_R1_CLNT_BEHAVIOR__SHIFT 0x00000014 +#define MH_MMU_CONFIG__TC_R_CLNT_BEHAVIOR__SHIFT 0x00000016 +#define MH_MMU_CONFIG__PA_W_CLNT_BEHAVIOR__SHIFT 0x00000018 + /* MMU Flags */ #define KGSL_MMUFLAGS_TLBFLUSH 0x10000000 #define KGSL_MMUFLAGS_PTUPDATE 0x20000000 @@ -48,43 +76,30 @@ #define MH_INTERRUPT_MASK__AXI_WRITE_ERROR 0x00000002L #define MH_INTERRUPT_MASK__MMU_PAGE_FAULT 0x00000004L -/* Macros to manage TLB flushing */ -#define GSL_TLBFLUSH_FILTER_ENTRY_NUMBITS (sizeof(unsigned char) * 8) -#define GSL_TLBFLUSH_FILTER_GET(superpte) \ - (*((unsigned char *) \ - (((unsigned int)pagetable->tlbflushfilter.base) \ - + (superpte / GSL_TLBFLUSH_FILTER_ENTRY_NUMBITS)))) -#define GSL_TLBFLUSH_FILTER_SETDIRTY(superpte) \ - (GSL_TLBFLUSH_FILTER_GET((superpte)) |= 1 << \ - (superpte % GSL_TLBFLUSH_FILTER_ENTRY_NUMBITS)) -#define GSL_TLBFLUSH_FILTER_ISDIRTY(superpte) \ - (GSL_TLBFLUSH_FILTER_GET((superpte)) & \ - (1 << (superpte % GSL_TLBFLUSH_FILTER_ENTRY_NUMBITS))) -#define GSL_TLBFLUSH_FILTER_RESET() memset(pagetable->tlbflushfilter.base,\ - 0, pagetable->tlbflushfilter.size) +#ifdef CONFIG_MSM_KGSL_MMU +#define KGSL_MMU_INT_MASK \ + (MH_INTERRUPT_MASK__AXI_READ_ERROR | \ + MH_INTERRUPT_MASK__AXI_WRITE_ERROR | \ + MH_INTERRUPT_MASK__MMU_PAGE_FAULT) +#else +#define KGSL_MMU_INT_MASK \ + (MH_INTERRUPT_MASK__AXI_READ_ERROR | \ + MH_INTERRUPT_MASK__AXI_WRITE_ERROR) +#endif - -struct kgsl_device; - -struct kgsl_tlbflushfilter { - unsigned int *base; - unsigned int size; +enum kgsl_mmutype { + KGSL_MMU_TYPE_GPU = 0, + KGSL_MMU_TYPE_IOMMU, + KGSL_MMU_TYPE_NONE }; struct kgsl_pagetable { spinlock_t lock; - unsigned int refcnt; - struct kgsl_memdesc base; - uint32_t va_base; - unsigned int va_range; - unsigned int last_superpte; + struct kref refcount; unsigned int max_entries; struct gen_pool *pool; struct list_head list; unsigned int name; - /* Maintain filter to manage tlb flushing */ - struct kgsl_tlbflushfilter tlbflushfilter; - unsigned int tlb_flags; struct kobject *kobj; struct { @@ -93,22 +108,36 @@ struct kgsl_pagetable { unsigned int max_mapped; unsigned int max_entries; } stats; + const struct kgsl_mmu_pt_ops *pt_ops; + void *priv; }; -struct kgsl_mmu_reg { +struct kgsl_mmu_ops { + int (*mmu_init) (struct kgsl_device *device); + int (*mmu_close) (struct kgsl_device *device); + int (*mmu_start) (struct kgsl_device *device); + int (*mmu_stop) (struct kgsl_device *device); + void (*mmu_setstate) (struct kgsl_device *device, + struct kgsl_pagetable *pagetable); + void (*mmu_device_setstate) (struct kgsl_device *device, + uint32_t flags); + void (*mmu_pagefault) (struct kgsl_device *device); + unsigned int (*mmu_get_current_ptbase) + (struct kgsl_device *device); +}; - uint32_t config; - uint32_t mpu_base; - uint32_t mpu_end; - uint32_t va_range; - uint32_t pt_page; - uint32_t page_fault; - uint32_t tran_error; - uint32_t invalidate; - uint32_t interrupt_mask; - uint32_t interrupt_status; - uint32_t interrupt_clear; - uint32_t axi_error; +struct kgsl_mmu_pt_ops { + int (*mmu_map) (void *mmu_pt, + struct kgsl_memdesc *memdesc, + unsigned int protflags); + int (*mmu_unmap) (void *mmu_pt, + struct kgsl_memdesc *memdesc); + void *(*mmu_create_pagetable) (void); + void (*mmu_destroy_pagetable) (void *pt); + int (*mmu_pt_equal) (struct kgsl_pagetable *pt, + unsigned int pt_base); + unsigned int (*mmu_pt_get_flags) (struct kgsl_pagetable *pt, + enum kgsl_deviceid id); }; struct kgsl_mmu { @@ -116,46 +145,27 @@ struct kgsl_mmu { uint32_t flags; struct kgsl_device *device; unsigned int config; - uint32_t mpu_base; - int mpu_range; - struct kgsl_memdesc dummyspace; - struct kgsl_mmu_reg reg; + struct kgsl_memdesc setstate_memory; /* current page table object being used by device mmu */ struct kgsl_pagetable *defaultpagetable; struct kgsl_pagetable *hwpagetable; + const struct kgsl_mmu_ops *mmu_ops; + void *priv; }; -struct kgsl_ptpool_chunk { - size_t size; - unsigned int count; - int dynamic; +#include "kgsl_gpummu.h" - void *data; - unsigned int phys; - - unsigned long *bitmap; - struct list_head list; -}; - -struct kgsl_ptpool { - size_t ptsize; - struct mutex lock; - struct list_head list; - int entries; - int static_entries; - int chunks; -}; +extern struct kgsl_mmu_ops iommu_ops; +extern struct kgsl_mmu_pt_ops iommu_pt_ops; struct kgsl_pagetable *kgsl_mmu_getpagetable(unsigned long name); - -#ifdef CONFIG_MSM_KGSL_MMU - +void kgsl_mmu_putpagetable(struct kgsl_pagetable *pagetable); +void kgsl_mh_start(struct kgsl_device *device); +void kgsl_mh_intrcallback(struct kgsl_device *device); int kgsl_mmu_init(struct kgsl_device *device); int kgsl_mmu_start(struct kgsl_device *device); int kgsl_mmu_stop(struct kgsl_device *device); int kgsl_mmu_close(struct kgsl_device *device); -int kgsl_mmu_setstate(struct kgsl_device *device, - struct kgsl_pagetable *pagetable); int kgsl_mmu_map(struct kgsl_pagetable *pagetable, struct kgsl_memdesc *memdesc, unsigned int protflags); @@ -163,105 +173,21 @@ int kgsl_mmu_map_global(struct kgsl_pagetable *pagetable, struct kgsl_memdesc *memdesc, unsigned int protflags); int kgsl_mmu_unmap(struct kgsl_pagetable *pagetable, struct kgsl_memdesc *memdesc); -void kgsl_ptpool_destroy(struct kgsl_ptpool *pool); -int kgsl_ptpool_init(struct kgsl_ptpool *pool, int ptsize, int entries); -void kgsl_mh_intrcallback(struct kgsl_device *device); -void kgsl_mmu_putpagetable(struct kgsl_pagetable *pagetable); unsigned int kgsl_virtaddr_to_physaddr(void *virtaddr); +void kgsl_setstate(struct kgsl_device *device, uint32_t flags); +void kgsl_mmu_device_setstate(struct kgsl_device *device, uint32_t flags); +void kgsl_mmu_setstate(struct kgsl_device *device, + struct kgsl_pagetable *pt); +int kgsl_mmu_get_ptname_from_ptbase(unsigned int pt_base); +int kgsl_mmu_pt_get_flags(struct kgsl_pagetable *pt, + enum kgsl_deviceid id); -static inline int kgsl_mmu_enabled(void) -{ - return 1; -} - -#else - -static inline int kgsl_mmu_enabled(void) -{ - return 0; -} - -static inline int kgsl_mmu_init(struct kgsl_device *device) -{ - return 0; -} - -static inline int kgsl_mmu_start(struct kgsl_device *device) -{ - return 0; -} - -static inline int kgsl_mmu_stop(struct kgsl_device *device) -{ - return 0; -} - -static inline int kgsl_mmu_close(struct kgsl_device *device) -{ - return 0; -} - -static inline int kgsl_mmu_setstate(struct kgsl_device *device, - struct kgsl_pagetable *pagetable) -{ - return 0; -} - -static inline int kgsl_mmu_map(struct kgsl_pagetable *pagetable, - struct kgsl_memdesc *memdesc, - unsigned int protflags) -{ - memdesc->gpuaddr = memdesc->physaddr; - return 0; -} - -static inline int kgsl_mmu_unmap(struct kgsl_pagetable *pagetable, - struct kgsl_memdesc *memdesc) -{ - return 0; -} - -static inline int kgsl_ptpool_init(struct kgsl_ptpool *pool, int ptsize, - int entries) -{ - return 0; -} - -static inline int kgsl_mmu_map_global(struct kgsl_pagetable *pagetable, - struct kgsl_memdesc *memdesc, unsigned int protflags) -{ - /* gpuaddr is the same that gets passed in */ - return 0; -} - -static inline void kgsl_ptpool_destroy(struct kgsl_ptpool *pool) { } - -static inline void kgsl_mh_intrcallback(struct kgsl_device *device) { } - -static inline void kgsl_mmu_putpagetable(struct kgsl_pagetable *pagetable) { } - -static inline unsigned int kgsl_virtaddr_to_physaddr(void *virtaddr) -{ - return 0; -} - -#endif - -static inline unsigned int kgsl_pt_get_flags(struct kgsl_pagetable *pt, - enum kgsl_deviceid id) -{ - unsigned int result = 0; - - if (pt == NULL) - return 0; - - spin_lock(&pt->lock); - if (pt->tlb_flags && (1<tlb_flags &= ~(1<lock); - return result; -} - +void kgsl_mmu_ptpool_destroy(void *ptpool); +void *kgsl_mmu_ptpool_init(int ptsize, int entries); +int kgsl_mmu_enabled(void); +int kgsl_mmu_pt_equal(struct kgsl_pagetable *pt, + unsigned int pt_base); +void kgsl_mmu_set_mmutype(char *mmutype); +unsigned int kgsl_mmu_get_current_ptbase(struct kgsl_device *device); +enum kgsl_mmutype kgsl_mmu_get_mmutype(void); #endif /* __KGSL_MMU_H */ diff --git a/drivers/gpu/msm/kgsl_pwrctrl.c b/drivers/gpu/msm/kgsl_pwrctrl.c index dcac6aba..730a1e17 100644 --- a/drivers/gpu/msm/kgsl_pwrctrl.c +++ b/drivers/gpu/msm/kgsl_pwrctrl.c @@ -1,4 +1,5 @@ /* Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved. + * Copyright (C) 2011 Sony Ericsson Mobile Communications AB. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -11,53 +12,22 @@ * */ #include +#include #include -#include #include "kgsl.h" +#include "kgsl_pwrscale.h" +#include "kgsl_device.h" + +#define KGSL_PWRFLAGS_POWER_ON 0 +#define KGSL_PWRFLAGS_CLK_ON 1 +#define KGSL_PWRFLAGS_AXI_ON 2 +#define KGSL_PWRFLAGS_IRQ_ON 3 #define SWITCH_OFF 200 -#define TZ_UPDATE_ID 0x01404000 -#define TZ_RESET_ID 0x01403000 - -#ifdef CONFIG_MSM_SECURE_IO -/* Trap into the TrustZone, and call funcs there. */ -static int __secure_tz_entry(u32 cmd, u32 val) -{ - register u32 r0 asm("r0") = cmd; - register u32 r1 asm("r1") = 0x0; - register u32 r2 asm("r2") = val; - - __iowmb(); - asm( - __asmeq("%0", "r0") - __asmeq("%1", "r0") - __asmeq("%2", "r1") - __asmeq("%3", "r2") - "smc #0 @ switch to secure world\n" - : "=r" (r0) - : "r" (r0), "r" (r1), "r" (r2) - ); - return r0; -} -#else -static int __secure_tz_entry(u32 cmd, u32 val) -{ - return 0; -} -#endif /* CONFIG_MSM_SECURE_IO */ - -/* Returns the requested update to our power level. * - * Either up/down (-1/1) a level, or stay the same (0). */ -static inline int kgsl_pwrctrl_tz_update(u32 idle) -{ - return __secure_tz_entry(TZ_UPDATE_ID, idle); -} - -static inline void kgsl_pwrctrl_tz_reset(void) -{ - __secure_tz_entry(TZ_RESET_ID, 0); -} +#define GPU_SWFI_LATENCY 3 +#define UPDATE_BUSY_VAL 1000000 +#define UPDATE_BUSY 50 void kgsl_pwrctrl_pwrlevel_change(struct kgsl_device *device, unsigned int new_level) @@ -67,16 +37,18 @@ void kgsl_pwrctrl_pwrlevel_change(struct kgsl_device *device, new_level >= pwr->thermal_pwrlevel && new_level != pwr->active_pwrlevel) { pwr->active_pwrlevel = new_level; - if (test_bit(KGSL_PWRFLAGS_CLK_ON, &pwr->power_flags)) + if ((test_bit(KGSL_PWRFLAGS_CLK_ON, &pwr->power_flags)) || + (device->state == KGSL_STATE_NAP)) clk_set_rate(pwr->grp_clks[0], pwr->pwrlevels[pwr->active_pwrlevel]. gpu_freq); - if (test_bit(KGSL_PWRFLAGS_AXI_ON, &pwr->power_flags)) - if (pwr->pcl) - msm_bus_scale_client_update_request(pwr->pcl, + if (test_bit(KGSL_PWRFLAGS_AXI_ON, &pwr->power_flags)) { + if (pwr->ebi1_clk) + clk_set_rate(pwr->ebi1_clk, pwr->pwrlevels[pwr->active_pwrlevel]. bus_freq); - KGSL_PWR_WARN(device, "pwr level changed to %d\n", + } + KGSL_PWR_WARN(device, "kgsl pwr level changed to %d\n", pwr->active_pwrlevel); } } @@ -257,41 +229,20 @@ static int kgsl_pwrctrl_idle_timer_show(struct device *dev, device->pwrctrl.interval_timeout); } -static int kgsl_pwrctrl_scaling_governor_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) -{ - char temp[20]; - struct kgsl_device *device = kgsl_device_from_dev(dev); - struct kgsl_pwrctrl *pwr = &device->pwrctrl; - unsigned int reset = pwr->idle_pass; - - snprintf(temp, sizeof(temp), "%.*s", - (int)min(count, sizeof(temp) - 1), buf); - if (strncmp(temp, "ondemand", 8) == 0) - reset = 1; - else if (strncmp(temp, "performance", 11) == 0) - reset = 0; - - mutex_lock(&device->mutex); - pwr->idle_pass = reset; - if (pwr->idle_pass == 0) - kgsl_pwrctrl_pwrlevel_change(device, pwr->thermal_pwrlevel); - mutex_unlock(&device->mutex); - - return count; -} - -static int kgsl_pwrctrl_scaling_governor_show(struct device *dev, +static int kgsl_pwrctrl_gpubusy_show(struct device *dev, struct device_attribute *attr, char *buf) { + int ret; struct kgsl_device *device = kgsl_device_from_dev(dev); - struct kgsl_pwrctrl *pwr = &device->pwrctrl; - if (pwr->idle_pass) - return snprintf(buf, 10, "ondemand\n"); - else - return snprintf(buf, 13, "performance\n"); + struct kgsl_busy *b = &device->pwrctrl.busy; + ret = snprintf(buf, 17, "%7d %7d\n", + b->on_time_old, b->time_old); + if (!test_bit(KGSL_PWRFLAGS_AXI_ON, &device->pwrctrl.power_flags)) { + b->on_time_old = 0; + b->time_old = 0; + } + return ret; } DEVICE_ATTR(gpuclk, 0644, kgsl_pwrctrl_gpuclk_show, kgsl_pwrctrl_gpuclk_store); @@ -300,15 +251,15 @@ DEVICE_ATTR(max_gpuclk, 0644, kgsl_pwrctrl_max_gpuclk_show, DEVICE_ATTR(pwrnap, 0644, kgsl_pwrctrl_pwrnap_show, kgsl_pwrctrl_pwrnap_store); DEVICE_ATTR(idle_timer, 0644, kgsl_pwrctrl_idle_timer_show, kgsl_pwrctrl_idle_timer_store); -DEVICE_ATTR(scaling_governor, 0644, kgsl_pwrctrl_scaling_governor_show, - kgsl_pwrctrl_scaling_governor_store); +DEVICE_ATTR(gpubusy, 0644, kgsl_pwrctrl_gpubusy_show, + NULL); -static const struct device_attribute *pwrctrl_attr_list[] = { +static struct device_attribute *pwrctrl_attr_list[] = { &dev_attr_gpuclk, &dev_attr_max_gpuclk, &dev_attr_pwrnap, &dev_attr_idle_timer, - &dev_attr_scaling_governor, + &dev_attr_gpubusy, NULL }; @@ -322,28 +273,29 @@ void kgsl_pwrctrl_uninit_sysfs(struct kgsl_device *device) kgsl_remove_device_sysfs_files(device->dev, pwrctrl_attr_list); } -static void kgsl_pwrctrl_idle_calc(struct kgsl_device *device) +/* Track the amount of time the gpu is on vs the total system time. * + * Regularly update the percentage of busy time displayed by sysfs. */ +static void kgsl_pwrctrl_busy_time(struct kgsl_device *device, bool on_time) { - int val; - struct kgsl_pwrctrl *pwr = &device->pwrctrl; - struct kgsl_power_stats stats; - - device->ftbl.device_power_stats(device, &stats); - - if (stats.total_time == 0) - return; - - /* If the GPU has stayed in turbo mode for a while, * - * stop writing out values. */ - if (pwr->active_pwrlevel) - pwr->no_switch_cnt = 0; - else if (pwr->no_switch_cnt > SWITCH_OFF) - return; - pwr->no_switch_cnt++; - val = kgsl_pwrctrl_tz_update(stats.total_time - stats.busy_time); - if (val) - kgsl_pwrctrl_pwrlevel_change(device, - pwr->active_pwrlevel + val); + struct kgsl_busy *b = &device->pwrctrl.busy; + int elapsed; + if (b->start.tv_sec == 0) + do_gettimeofday(&(b->start)); + do_gettimeofday(&(b->stop)); + elapsed = (b->stop.tv_sec - b->start.tv_sec) * 1000000; + elapsed += b->stop.tv_usec - b->start.tv_usec; + b->time += elapsed; + if (on_time) + b->on_time += elapsed; + /* Update the output regularly and reset the counters. */ + if ((b->time > UPDATE_BUSY_VAL) || + !test_bit(KGSL_PWRFLAGS_AXI_ON, &device->pwrctrl.power_flags)) { + b->on_time_old = b->on_time; + b->time_old = b->time; + b->on_time = 0; + b->time = 0; + } + do_gettimeofday(&(b->start)); } void kgsl_pwrctrl_clk(struct kgsl_device *device, int state) @@ -363,6 +315,7 @@ void kgsl_pwrctrl_clk(struct kgsl_device *device, int state) clk_set_rate(pwr->grp_clks[0], pwr->pwrlevels[pwr->num_pwrlevels - 1]. gpu_freq); + kgsl_pwrctrl_busy_time(device, true); } } else if (state == KGSL_PWRFLAGS_ON) { if (!test_and_set_bit(KGSL_PWRFLAGS_CLK_ON, @@ -381,6 +334,7 @@ void kgsl_pwrctrl_clk(struct kgsl_device *device, int state) for (i = KGSL_MAX_CLKS - 1; i > 0; i--) if (pwr->grp_clks[i]) clk_enable(pwr->grp_clks[i]); + kgsl_pwrctrl_busy_time(device, false); } } } @@ -399,9 +353,6 @@ void kgsl_pwrctrl_axi(struct kgsl_device *device, int state) clk_set_rate(pwr->ebi1_clk, 0); clk_disable(pwr->ebi1_clk); } - if (pwr->pcl) - msm_bus_scale_client_update_request(pwr->pcl, - 0); } } else if (state == KGSL_PWRFLAGS_ON) { if (!test_and_set_bit(KGSL_PWRFLAGS_AXI_ON, @@ -414,10 +365,6 @@ void kgsl_pwrctrl_axi(struct kgsl_device *device, int state) pwr->pwrlevels[pwr->active_pwrlevel]. bus_freq); } - if (pwr->pcl) - msm_bus_scale_client_update_request(pwr->pcl, - pwr->pwrlevels[pwr->active_pwrlevel]. - bus_freq); } } } @@ -458,13 +405,18 @@ void kgsl_pwrctrl_irq(struct kgsl_device *device, int state) KGSL_PWR_INFO(device, "irq on, device %d\n", device->id); enable_irq(pwr->interrupt_num); + device->ftbl->irqctrl(device, 1); } } else if (state == KGSL_PWRFLAGS_OFF) { if (test_and_clear_bit(KGSL_PWRFLAGS_IRQ_ON, &pwr->power_flags)) { KGSL_PWR_INFO(device, "irq off, device %d\n", device->id); - disable_irq(pwr->interrupt_num); + device->ftbl->irqctrl(device, 0); + if (in_interrupt()) + disable_irq_nosync(pwr->interrupt_num); + else + disable_irq(pwr->interrupt_num); } } } @@ -533,9 +485,6 @@ int kgsl_pwrctrl_init(struct kgsl_device *device) pwr->power_flags = 0; pwr->nap_allowed = pdata_pwr->nap_allowed; -/* drewis: below was removed at some point before i cherry-picked the below commit */ - pwr->idle_pass = pdata_pwr->idle_pass; -/*dc14311... msm: kgsl: Replace internal_power_rail API calls with regulator APIs*/ pwr->interval_timeout = pdata_pwr->idle_timeout; pwr->ebi1_clk = clk_get(NULL, "ebi1_kgsl_clk"); if (IS_ERR(pwr->ebi1_clk)) @@ -544,19 +493,6 @@ int kgsl_pwrctrl_init(struct kgsl_device *device) clk_set_rate(pwr->ebi1_clk, pwr->pwrlevels[pwr->active_pwrlevel]. bus_freq); - if (pdata_dev->clk.bus_scale_table != NULL) { - pwr->pcl = - msm_bus_scale_register_client(pdata_dev->clk. - bus_scale_table); - if (!pwr->pcl) { - KGSL_PWR_ERR(device, - "msm_bus_scale_register_client failed: " - "id %d table %p", device->id, - pdata_dev->clk.bus_scale_table); - result = -EINVAL; - goto done; - } - } /*acquire interrupt */ pwr->interrupt_num = @@ -568,6 +504,8 @@ int kgsl_pwrctrl_init(struct kgsl_device *device) result = -EINVAL; goto done; } + + register_early_suspend(&device->display_off); return result; clk_err: @@ -586,6 +524,8 @@ void kgsl_pwrctrl_close(struct kgsl_device *device) KGSL_PWR_INFO(device, "close device %d\n", device->id); + unregister_early_suspend(&device->display_off); + if (pwr->interrupt_num > 0) { if (pwr->have_irq) { free_irq(pwr->interrupt_num, NULL); @@ -596,8 +536,6 @@ void kgsl_pwrctrl_close(struct kgsl_device *device) clk_put(pwr->ebi1_clk); - if (pwr->pcl) - msm_bus_scale_unregister_client(pwr->pcl); pwr->pcl = 0; @@ -622,15 +560,22 @@ void kgsl_idle_check(struct work_struct *work) idle_check_ws); mutex_lock(&device->mutex); - if ((device->pwrctrl.idle_pass) && - (device->requested_state != KGSL_STATE_SLEEP)) - kgsl_pwrctrl_idle_calc(device); - if (device->state & (KGSL_STATE_ACTIVE | KGSL_STATE_NAP)) { - if (kgsl_pwrctrl_sleep(device) != 0) + if (device->requested_state != KGSL_STATE_SLEEP) + kgsl_pwrscale_idle(device); + + if (kgsl_pwrctrl_sleep(device) != 0) { mod_timer(&device->idle_timer, jiffies + device->pwrctrl.interval_timeout); + /* If the GPU has been too busy to sleep, make sure * + * that is acurately reflected in the % busy numbers. */ + device->pwrctrl.busy.no_nap_cnt++; + if (device->pwrctrl.busy.no_nap_cnt > UPDATE_BUSY) { + kgsl_pwrctrl_busy_time(device, true); + device->pwrctrl.busy.no_nap_cnt = 0; + } + } } else if (device->state & (KGSL_STATE_HUNG | KGSL_STATE_DUMP_AND_RECOVER)) { device->requested_state = KGSL_STATE_NONE; @@ -684,11 +629,11 @@ int kgsl_pwrctrl_sleep(struct kgsl_device *device) /* Work through the legal state transitions */ if (device->requested_state == KGSL_STATE_NAP) { - if (device->ftbl.device_isidle(device)) + if (device->ftbl->isidle(device)) goto nap; } else if (device->requested_state == KGSL_STATE_SLEEP) { if (device->state == KGSL_STATE_NAP || - device->ftbl.device_isidle(device)) + device->ftbl->isidle(device)) goto sleep; } @@ -702,9 +647,10 @@ sleep: clk_set_rate(pwr->grp_clks[0], pwr->pwrlevels[pwr->num_pwrlevels - 1]. gpu_freq); - device->pwrctrl.no_switch_cnt = 0; + kgsl_pwrctrl_busy_time(device, false); + pwr->busy.start.tv_sec = 0; device->pwrctrl.time = 0; - kgsl_pwrctrl_tz_reset(); + goto clk_off; nap: @@ -730,11 +676,9 @@ void kgsl_pwrctrl_wake(struct kgsl_device *device) return; if (device->state != KGSL_STATE_NAP) { - if (device->pwrctrl.idle_pass) - kgsl_pwrctrl_pwrlevel_change(device, - device->pwrctrl.thermal_pwrlevel); kgsl_pwrctrl_axi(device, KGSL_PWRFLAGS_ON); } + /* Turn on the core clocks */ kgsl_pwrctrl_clk(device, KGSL_PWRFLAGS_ON); diff --git a/drivers/gpu/msm/kgsl_pwrctrl.h b/drivers/gpu/msm/kgsl_pwrctrl.h index 108cc309..97a428df 100644 --- a/drivers/gpu/msm/kgsl_pwrctrl.h +++ b/drivers/gpu/msm/kgsl_pwrctrl.h @@ -1,53 +1,48 @@ /* Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved. * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * * Neither the name of Code Aurora Forum, Inc. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. * - * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE - * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN - * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. * */ #ifndef __KGSL_PWRCTRL_H #define __KGSL_PWRCTRL_H +#include + /***************************************************************************** ** power flags *****************************************************************************/ -#define KGSL_PWRFLAGS_POWER_ON 0 -#define KGSL_PWRFLAGS_CLK_ON 1 -#define KGSL_PWRFLAGS_AXI_ON 2 -#define KGSL_PWRFLAGS_IRQ_ON 3 - #define KGSL_PWRFLAGS_ON 1 #define KGSL_PWRFLAGS_OFF 0 -#define KGSL_DEFAULT_PWRLEVEL 1 +#define KGSL_PWRLEVEL_TURBO 0 +#define KGSL_PWRLEVEL_NOMINAL 1 +#define KGSL_PWRLEVEL_LOW_OFFSET 2 + #define KGSL_MAX_CLKS 5 struct platform_device; +struct kgsl_busy { + struct timeval start; + struct timeval stop; + int on_time; + int time; + int on_time_old; + int time_old; + unsigned int no_nap_cnt; +}; + struct kgsl_pwrctrl { int interrupt_num; int have_irq; + unsigned int pwr_rail; struct clk *ebi1_clk; struct clk *grp_clks[KGSL_MAX_CLKS]; unsigned long power_flags; @@ -59,13 +54,11 @@ struct kgsl_pwrctrl { struct regulator *gpu_reg; uint32_t pcl; unsigned int nap_allowed; - struct adreno_context *suspended_ctxt; const char *regulator_name; const char *irq_name; const char *src_clk_name; s64 time; - unsigned int no_switch_cnt; - unsigned int idle_pass; + struct kgsl_busy busy; }; void kgsl_pwrctrl_clk(struct kgsl_device *device, int state); diff --git a/drivers/gpu/msm/kgsl_pwrscale.c b/drivers/gpu/msm/kgsl_pwrscale.c index f2250283..629a83f0 100644 --- a/drivers/gpu/msm/kgsl_pwrscale.c +++ b/drivers/gpu/msm/kgsl_pwrscale.c @@ -1,4 +1,5 @@ /* Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved. + * Copyright (C) 2011 Sony Ericsson Mobile Communications AB. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -15,6 +16,7 @@ #include "kgsl.h" #include "kgsl_pwrscale.h" +#include "kgsl_device.h" struct kgsl_pwrscale_attribute { struct attribute attr; @@ -38,6 +40,12 @@ __ATTR(_name, _mode, _show, _store) /* Master list of available policies */ static struct kgsl_pwrscale_policy *kgsl_pwrscale_policies[] = { +#ifdef CONFIG_MSM_SCM + &kgsl_pwrscale_policy_tz, +#endif +#ifdef CONFIG_MSM_SLEEP_STATS + &kgsl_pwrscale_policy_idlestats, +#endif NULL }; @@ -141,9 +149,6 @@ static ssize_t policy_sysfs_store(struct kobject *kobj, static void policy_sysfs_release(struct kobject *kobj) { - struct kgsl_pwrscale *pwrscale = to_pwrscale(kobj); - - complete(&pwrscale->kobj_unregister); } static ssize_t pwrscale_sysfs_show(struct kobject *kobj, @@ -181,12 +186,12 @@ static void pwrscale_sysfs_release(struct kobject *kobj) { } -static const struct sysfs_ops policy_sysfs_ops = { +static struct sysfs_ops policy_sysfs_ops = { .show = policy_sysfs_show, .store = policy_sysfs_store }; -static const struct sysfs_ops pwrscale_sysfs_ops = { +static struct sysfs_ops pwrscale_sysfs_ops = { .show = pwrscale_sysfs_show, .store = pwrscale_sysfs_store }; @@ -220,13 +225,17 @@ EXPORT_SYMBOL(kgsl_pwrscale_wake); void kgsl_pwrscale_busy(struct kgsl_device *device) { if (device->pwrscale.policy && device->pwrscale.policy->busy) - device->pwrscale.policy->busy(device, &device->pwrscale); + if (!device->pwrscale.gpu_busy) + device->pwrscale.policy->busy(device, + &device->pwrscale); + device->pwrscale.gpu_busy = 1; } void kgsl_pwrscale_idle(struct kgsl_device *device) { if (device->pwrscale.policy && device->pwrscale.policy->idle) device->pwrscale.policy->idle(device, &device->pwrscale); + device->pwrscale.gpu_busy = 0; } EXPORT_SYMBOL(kgsl_pwrscale_idle); @@ -236,12 +245,8 @@ int kgsl_pwrscale_policy_add_files(struct kgsl_device *device, { int ret; - init_completion(&pwrscale->kobj_unregister); - - ret = kobject_init_and_add(&pwrscale->kobj, - &ktype_pwrscale_policy, - &device->pwrscale_kobj, - "%s", pwrscale->policy->name); + ret = kobject_add(&pwrscale->kobj, &device->pwrscale_kobj, + "%s", pwrscale->policy->name); if (ret) return ret; @@ -249,8 +254,8 @@ int kgsl_pwrscale_policy_add_files(struct kgsl_device *device, ret = sysfs_create_group(&pwrscale->kobj, attr_group); if (ret) { + kobject_del(&pwrscale->kobj); kobject_put(&pwrscale->kobj); - wait_for_completion(&pwrscale->kobj_unregister); } return ret; @@ -261,16 +266,24 @@ void kgsl_pwrscale_policy_remove_files(struct kgsl_device *device, struct attribute_group *attr_group) { sysfs_remove_group(&pwrscale->kobj, attr_group); + kobject_del(&pwrscale->kobj); kobject_put(&pwrscale->kobj); - wait_for_completion(&pwrscale->kobj_unregister); +} + +static void _kgsl_pwrscale_detach_policy(struct kgsl_device *device) +{ + if (device->pwrscale.policy != NULL) { + device->pwrscale.policy->close(device, &device->pwrscale); + kgsl_pwrctrl_pwrlevel_change(device, + device->pwrctrl.thermal_pwrlevel); + } + device->pwrscale.policy = NULL; } void kgsl_pwrscale_detach_policy(struct kgsl_device *device) { mutex_lock(&device->mutex); - if (device->pwrscale.policy != NULL) - device->pwrscale.policy->close(device, &device->pwrscale); - device->pwrscale.policy = NULL; + _kgsl_pwrscale_detach_policy(device); mutex_unlock(&device->mutex); } EXPORT_SYMBOL(kgsl_pwrscale_detach_policy); @@ -278,16 +291,25 @@ EXPORT_SYMBOL(kgsl_pwrscale_detach_policy); int kgsl_pwrscale_attach_policy(struct kgsl_device *device, struct kgsl_pwrscale_policy *policy) { - int ret; - - if (device->pwrscale.policy != NULL) - kgsl_pwrscale_detach_policy(device); + int ret = 0; mutex_lock(&device->mutex); + + if (device->pwrscale.policy == policy) + goto done; + + if (device->pwrscale.policy != NULL) + _kgsl_pwrscale_detach_policy(device); + device->pwrscale.policy = policy; - ret = device->pwrscale.policy->init(device, &device->pwrscale); - if (ret) - device->pwrscale.policy = NULL; + + if (policy) { + ret = device->pwrscale.policy->init(device, &device->pwrscale); + if (ret) + device->pwrscale.policy = NULL; + } + +done: mutex_unlock(&device->mutex); return ret; @@ -296,8 +318,16 @@ EXPORT_SYMBOL(kgsl_pwrscale_attach_policy); int kgsl_pwrscale_init(struct kgsl_device *device) { - return kobject_init_and_add(&device->pwrscale_kobj, &ktype_pwrscale, - &device->dev->kobj, "pwrscale"); + int ret; + + ret = kobject_init_and_add(&device->pwrscale_kobj, &ktype_pwrscale, + &device->dev->kobj, "pwrscale"); + + if (ret) + return ret; + + kobject_init(&device->pwrscale.kobj, &ktype_pwrscale_policy); + return ret; } EXPORT_SYMBOL(kgsl_pwrscale_init); diff --git a/drivers/gpu/msm/kgsl_pwrscale.h b/drivers/gpu/msm/kgsl_pwrscale.h index f05adbd5..b4f831ee 100644 --- a/drivers/gpu/msm/kgsl_pwrscale.h +++ b/drivers/gpu/msm/kgsl_pwrscale.h @@ -1,29 +1,13 @@ /* Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved. * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * * Neither the name of Code Aurora Forum, Inc. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. * - * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE - * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN - * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. * */ @@ -51,8 +35,8 @@ struct kgsl_pwrscale_policy { struct kgsl_pwrscale { struct kgsl_pwrscale_policy *policy; struct kobject kobj; - struct completion kobj_unregister; void *priv; + int gpu_busy; }; struct kgsl_pwrscale_policy_attribute { @@ -68,8 +52,12 @@ struct kgsl_pwrscale_policy_attribute { struct kgsl_pwrscale_policy_attribute policy_attr_##_name = \ __ATTR(_name, _mode, _show, _store) +extern struct kgsl_pwrscale_policy kgsl_pwrscale_policy_tz; +extern struct kgsl_pwrscale_policy kgsl_pwrscale_policy_idlestats; + int kgsl_pwrscale_init(struct kgsl_device *device); void kgsl_pwrscale_close(struct kgsl_device *device); + int kgsl_pwrscale_attach_policy(struct kgsl_device *device, struct kgsl_pwrscale_policy *policy); void kgsl_pwrscale_detach_policy(struct kgsl_device *device); diff --git a/drivers/gpu/msm/kgsl_sharedmem.c b/drivers/gpu/msm/kgsl_sharedmem.c index 36ff19c7..a587c44a 100644 --- a/drivers/gpu/msm/kgsl_sharedmem.c +++ b/drivers/gpu/msm/kgsl_sharedmem.c @@ -1,4 +1,5 @@ /* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved. + * Copyright (C) 2011 Sony Ericsson Mobile Communications AB. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -11,11 +12,14 @@ * */ #include +#include #include #include "kgsl.h" #include "kgsl_sharedmem.h" #include "kgsl_cffdump.h" +#include "kgsl_device.h" +#include "adreno_ringbuffer.h" static struct kgsl_process_private * _get_priv_from_kobj(struct kobject *kobj) @@ -166,7 +170,7 @@ DEVICE_ATTR(mapped, 0444, kgsl_drv_memstat_show, NULL); DEVICE_ATTR(mapped_max, 0444, kgsl_drv_memstat_show, NULL); DEVICE_ATTR(histogram, 0444, kgsl_drv_histogram_show, NULL); -static const struct device_attribute *drv_attr_list[] = { +static struct device_attribute *drv_attr_list[] = { &dev_attr_vmalloc, &dev_attr_vmalloc_max, &dev_attr_coherent, @@ -205,28 +209,21 @@ static void _outer_cache_range_op(int op, unsigned long addr, size_t size) break; } } -#endif -static unsigned long kgsl_vmalloc_physaddr(struct kgsl_memdesc *memdesc, - unsigned int offset) +static void outer_cache_range_op_sg(struct scatterlist *sg, int sglen, int op) { - unsigned int addr; + struct scatterlist *s; + int i; - if (offset > memdesc->size) - return 0; - - addr = vmalloc_to_pfn(memdesc->hostptr + offset); - return addr << PAGE_SHIFT; + for_each_sg(sg, s, sglen, i) { + unsigned int paddr = sg_phys(s); + _outer_cache_range_op(op, paddr, s->length); + } } -#ifdef CONFIG_OUTER_CACHE -static void kgsl_vmalloc_outer_cache(struct kgsl_memdesc *memdesc, int op) +#else +static void outer_cache_range_op_sg(struct scatterlist *sg, int sglen, int op) { - void *vaddr = memdesc->hostptr; - for (; vaddr < (memdesc->hostptr + memdesc->size); vaddr += PAGE_SIZE) { - unsigned long paddr = page_to_phys(vmalloc_to_page(vaddr)); - _outer_cache_range_op(op, paddr, PAGE_SIZE); - } } #endif @@ -261,6 +258,42 @@ static void kgsl_vmalloc_free(struct kgsl_memdesc *memdesc) vfree(memdesc->hostptr); } +static int kgsl_contiguous_vmflags(struct kgsl_memdesc *memdesc) +{ + return VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTEXPAND; +} + +static int kgsl_contiguous_vmfault(struct kgsl_memdesc *memdesc, + struct vm_area_struct *vma, + struct vm_fault *vmf) +{ + unsigned long offset, pfn; + int ret; + + offset = ((unsigned long) vmf->virtual_address - vma->vm_start) >> + PAGE_SHIFT; + + pfn = (memdesc->physaddr >> PAGE_SHIFT) + offset; + ret = vm_insert_pfn(vma, (unsigned long) vmf->virtual_address, pfn); + + if (ret == -ENOMEM || ret == -EAGAIN) + return VM_FAULT_OOM; + else if (ret == -EFAULT) + return VM_FAULT_SIGBUS; + + return VM_FAULT_NOPAGE; +} + +static void kgsl_ebimem_free(struct kgsl_memdesc *memdesc) + +{ + kgsl_driver.stats.coherent -= memdesc->size; + if (memdesc->hostptr) + iounmap(memdesc->hostptr); + + free_contiguous_memory_by_paddr(memdesc->physaddr); +} + static void kgsl_coherent_free(struct kgsl_memdesc *memdesc) { kgsl_driver.stats.coherent -= memdesc->size; @@ -268,78 +301,24 @@ static void kgsl_coherent_free(struct kgsl_memdesc *memdesc) memdesc->hostptr, memdesc->physaddr); } -static unsigned long kgsl_contig_physaddr(struct kgsl_memdesc *memdesc, - unsigned int offset) -{ - if (offset > memdesc->size) - return 0; - - return memdesc->physaddr + offset; -} - -#ifdef CONFIG_OUTER_CACHE -static void kgsl_contig_outer_cache(struct kgsl_memdesc *memdesc, int op) -{ - _outer_cache_range_op(op, memdesc->physaddr, memdesc->size); -} -#endif - -#ifdef CONFIG_OUTER_CACHE -static void kgsl_userptr_outer_cache(struct kgsl_memdesc *memdesc, int op) -{ - void *vaddr = memdesc->hostptr; - for (; vaddr < (memdesc->hostptr + memdesc->size); vaddr += PAGE_SIZE) { - unsigned long paddr = kgsl_virtaddr_to_physaddr(vaddr); - if (paddr) - _outer_cache_range_op(op, paddr, PAGE_SIZE); - } -} -#endif - -static unsigned long kgsl_userptr_physaddr(struct kgsl_memdesc *memdesc, - unsigned int offset) -{ - return kgsl_virtaddr_to_physaddr(memdesc->hostptr + offset); -} - /* Global - also used by kgsl_drm.c */ struct kgsl_memdesc_ops kgsl_vmalloc_ops = { - .physaddr = kgsl_vmalloc_physaddr, .free = kgsl_vmalloc_free, .vmflags = kgsl_vmalloc_vmflags, .vmfault = kgsl_vmalloc_vmfault, -#ifdef CONFIG_OUTER_CACHE - .outer_cache = kgsl_vmalloc_outer_cache, -#endif }; EXPORT_SYMBOL(kgsl_vmalloc_ops); +static struct kgsl_memdesc_ops kgsl_ebimem_ops = { + .free = kgsl_ebimem_free, + .vmflags = kgsl_contiguous_vmflags, + .vmfault = kgsl_contiguous_vmfault, +}; + static struct kgsl_memdesc_ops kgsl_coherent_ops = { - .physaddr = kgsl_contig_physaddr, .free = kgsl_coherent_free, -#ifdef CONFIG_OUTER_CACHE - .outer_cache = kgsl_contig_outer_cache, -#endif }; -/* Global - also used by kgsl.c and kgsl_drm.c */ -struct kgsl_memdesc_ops kgsl_contig_ops = { - .physaddr = kgsl_contig_physaddr, -#ifdef CONFIG_OUTER_CACHE - .outer_cache = kgsl_contig_outer_cache -#endif -}; -EXPORT_SYMBOL(kgsl_contig_ops); - -/* Global - also used by kgsl.c */ -struct kgsl_memdesc_ops kgsl_userptr_ops = { - .physaddr = kgsl_userptr_physaddr, -#ifdef CONFIG_OUTER_CACHE - .outer_cache = kgsl_userptr_outer_cache, -#endif -}; -EXPORT_SYMBOL(kgsl_userptr_ops); - void kgsl_cache_range_op(struct kgsl_memdesc *memdesc, int op) { void *addr = memdesc->hostptr; @@ -357,8 +336,7 @@ void kgsl_cache_range_op(struct kgsl_memdesc *memdesc, int op) break; } - if (memdesc->ops->outer_cache) - memdesc->ops->outer_cache(memdesc, op); + outer_cache_range_op_sg(memdesc->sg, memdesc->sglen, op); } EXPORT_SYMBOL(kgsl_cache_range_op); @@ -367,7 +345,9 @@ _kgsl_sharedmem_vmalloc(struct kgsl_memdesc *memdesc, struct kgsl_pagetable *pagetable, void *ptr, size_t size, unsigned int protflags) { - int result; + int order, ret = 0; + int sglen = PAGE_ALIGN(size) / PAGE_SIZE; + int i; memdesc->size = size; memdesc->pagetable = pagetable; @@ -375,25 +355,44 @@ _kgsl_sharedmem_vmalloc(struct kgsl_memdesc *memdesc, memdesc->ops = &kgsl_vmalloc_ops; memdesc->hostptr = (void *) ptr; - kgsl_cache_range_op(memdesc, KGSL_CACHE_OP_INV); - - result = kgsl_mmu_map(pagetable, memdesc, protflags); - - if (result) { - kgsl_sharedmem_free(memdesc); - } else { - int order; - - KGSL_STATS_ADD(size, kgsl_driver.stats.vmalloc, - kgsl_driver.stats.vmalloc_max); - - order = get_order(size); - - if (order < 16) - kgsl_driver.stats.histogram[order]++; + memdesc->sg = kmalloc(sglen * sizeof(struct scatterlist), GFP_KERNEL); + if (memdesc->sg == NULL) { + ret = -ENOMEM; + goto done; } - return result; + memdesc->sglen = sglen; + sg_init_table(memdesc->sg, sglen); + + for (i = 0; i < memdesc->sglen; i++, ptr += PAGE_SIZE) { + struct page *page = vmalloc_to_page(ptr); + if (!page) { + ret = -EINVAL; + goto done; + } + sg_set_page(&memdesc->sg[i], page, PAGE_SIZE, 0); + } + + kgsl_cache_range_op(memdesc, KGSL_CACHE_OP_INV); + + ret = kgsl_mmu_map(pagetable, memdesc, protflags); + + if (ret) + goto done; + + KGSL_STATS_ADD(size, kgsl_driver.stats.vmalloc, + kgsl_driver.stats.vmalloc_max); + + order = get_order(size); + + if (order < 16) + kgsl_driver.stats.histogram[order]++; + +done: + if (ret) + kgsl_sharedmem_free(memdesc); + + return ret; } int @@ -446,24 +445,35 @@ EXPORT_SYMBOL(kgsl_sharedmem_vmalloc_user); int kgsl_sharedmem_alloc_coherent(struct kgsl_memdesc *memdesc, size_t size) { + int result = 0; + size = ALIGN(size, PAGE_SIZE); + memdesc->size = size; + memdesc->ops = &kgsl_coherent_ops; + memdesc->hostptr = dma_alloc_coherent(NULL, size, &memdesc->physaddr, GFP_KERNEL); if (memdesc->hostptr == NULL) { KGSL_CORE_ERR("dma_alloc_coherent(%d) failed\n", size); - return -ENOMEM; + result = -ENOMEM; + goto err; } - memdesc->size = size; - memdesc->ops = &kgsl_coherent_ops; + result = memdesc_sg_phys(memdesc, memdesc->physaddr, size); + if (result) + goto err; /* Record statistics */ KGSL_STATS_ADD(size, kgsl_driver.stats.coherent, kgsl_driver.stats.coherent_max); - return 0; +err: + if (result) + kgsl_sharedmem_free(memdesc); + + return result; } EXPORT_SYMBOL(kgsl_sharedmem_alloc_coherent); @@ -475,13 +485,86 @@ void kgsl_sharedmem_free(struct kgsl_memdesc *memdesc) if (memdesc->gpuaddr) kgsl_mmu_unmap(memdesc->pagetable, memdesc); - if (memdesc->ops->free) + if (memdesc->ops && memdesc->ops->free) memdesc->ops->free(memdesc); + kfree(memdesc->sg); + memset(memdesc, 0, sizeof(*memdesc)); } EXPORT_SYMBOL(kgsl_sharedmem_free); +static int +_kgsl_sharedmem_ebimem(struct kgsl_memdesc *memdesc, + struct kgsl_pagetable *pagetable, size_t size) +{ + int result = 0; + + memdesc->size = size; + memdesc->pagetable = pagetable; + memdesc->ops = &kgsl_ebimem_ops; + memdesc->physaddr = allocate_contiguous_ebi_nomap(size, SZ_8K); + + if (memdesc->physaddr == 0) { + KGSL_CORE_ERR("allocate_contiguous_ebi_nomap(%d) failed\n", + size); + return -ENOMEM; + } + + result = memdesc_sg_phys(memdesc, memdesc->physaddr, size); + + if (result) + goto err; + + result = kgsl_mmu_map(pagetable, memdesc, + GSL_PT_PAGE_RV | GSL_PT_PAGE_WV); + + if (result) + goto err; + + KGSL_STATS_ADD(size, kgsl_driver.stats.coherent, + kgsl_driver.stats.coherent_max); + +err: + if (result) + kgsl_sharedmem_free(memdesc); + + return result; +} + +int +kgsl_sharedmem_ebimem_user(struct kgsl_memdesc *memdesc, + struct kgsl_pagetable *pagetable, + size_t size, int flags) +{ + size = ALIGN(size, PAGE_SIZE); + return _kgsl_sharedmem_ebimem(memdesc, pagetable, size); +} +EXPORT_SYMBOL(kgsl_sharedmem_ebimem_user); + +int +kgsl_sharedmem_ebimem(struct kgsl_memdesc *memdesc, + struct kgsl_pagetable *pagetable, size_t size) +{ + int result; + size = ALIGN(size, 8192); + result = _kgsl_sharedmem_ebimem(memdesc, pagetable, size); + + if (result) + return result; + + memdesc->hostptr = ioremap(memdesc->physaddr, size); + + if (memdesc->hostptr == NULL) { + KGSL_CORE_ERR("ioremap failed\n"); + kgsl_sharedmem_free(memdesc); + return -ENOMEM; + } + + return 0; +} +EXPORT_SYMBOL(kgsl_sharedmem_ebimem); + int kgsl_sharedmem_readl(const struct kgsl_memdesc *memdesc, uint32_t *dst, diff --git a/drivers/gpu/msm/kgsl_sharedmem.h b/drivers/gpu/msm/kgsl_sharedmem.h index d0070584..61bcf05b 100644 --- a/drivers/gpu/msm/kgsl_sharedmem.h +++ b/drivers/gpu/msm/kgsl_sharedmem.h @@ -1,37 +1,27 @@ /* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved. + * Copyright (C) 2011 Sony Ericsson Mobile Communications AB. * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * * Neither the name of Code Aurora Forum, Inc. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. * - * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE - * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN - * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. * */ #ifndef __KGSL_SHAREDMEM_H #define __KGSL_SHAREDMEM_H +#include #include -struct kgsl_pagetable; +/* + * Convert a page to a physical address + */ +#define phys_to_page(phys) (pfn_to_page(__phys_to_pfn(phys))) + struct kgsl_device; struct kgsl_process_private; @@ -42,31 +32,14 @@ struct kgsl_process_private; /** Set if the memdesc describes cached memory */ #define KGSL_MEMFLAGS_CACHED 0x00000001 -struct kgsl_memdesc; - struct kgsl_memdesc_ops { - unsigned long (*physaddr)(struct kgsl_memdesc *, unsigned int); - void (*outer_cache)(struct kgsl_memdesc *, int); int (*vmflags)(struct kgsl_memdesc *); int (*vmfault)(struct kgsl_memdesc *, struct vm_area_struct *, struct vm_fault *); void (*free)(struct kgsl_memdesc *memdesc); }; -/* shared memory allocation */ -struct kgsl_memdesc { - struct kgsl_pagetable *pagetable; - void *hostptr; - unsigned int gpuaddr; - unsigned int physaddr; - unsigned int size; - unsigned int priv; - struct kgsl_memdesc_ops *ops; -}; - extern struct kgsl_memdesc_ops kgsl_vmalloc_ops; -extern struct kgsl_memdesc_ops kgsl_contig_ops; -extern struct kgsl_memdesc_ops kgsl_userptr_ops; int kgsl_sharedmem_vmalloc(struct kgsl_memdesc *memdesc, struct kgsl_pagetable *pagetable, size_t size); @@ -77,6 +50,14 @@ int kgsl_sharedmem_vmalloc_user(struct kgsl_memdesc *memdesc, int kgsl_sharedmem_alloc_coherent(struct kgsl_memdesc *memdesc, size_t size); +int kgsl_sharedmem_ebimem_user(struct kgsl_memdesc *memdesc, + struct kgsl_pagetable *pagetable, + size_t size, int flags); + +int kgsl_sharedmem_ebimem(struct kgsl_memdesc *memdesc, + struct kgsl_pagetable *pagetable, + size_t size); + void kgsl_sharedmem_free(struct kgsl_memdesc *memdesc); int kgsl_sharedmem_readl(const struct kgsl_memdesc *memdesc, @@ -99,18 +80,54 @@ void kgsl_process_uninit_sysfs(struct kgsl_process_private *private); int kgsl_sharedmem_init_sysfs(void); void kgsl_sharedmem_uninit_sysfs(void); +static inline int +memdesc_sg_phys(struct kgsl_memdesc *memdesc, + unsigned int physaddr, unsigned int size) +{ + struct page *page = phys_to_page(physaddr); + + memdesc->sg = kmalloc(sizeof(struct scatterlist) * 1, GFP_KERNEL); + if (memdesc->sg == NULL) + return -ENOMEM; + + memdesc->sglen = 1; + sg_init_table(memdesc->sg, 1); + sg_set_page(&memdesc->sg[0], page, size, 0); + return 0; +} + +static inline int +kgsl_allocate(struct kgsl_memdesc *memdesc, + struct kgsl_pagetable *pagetable, size_t size) +{ +#ifdef CONFIG_MSM_KGSL_MMU + return kgsl_sharedmem_vmalloc(memdesc, pagetable, size); +#else + return kgsl_sharedmem_ebimem(memdesc, pagetable, size); +#endif +} + static inline int kgsl_allocate_user(struct kgsl_memdesc *memdesc, struct kgsl_pagetable *pagetable, size_t size, unsigned int flags) { +#ifdef CONFIG_MSM_KGSL_MMU return kgsl_sharedmem_vmalloc_user(memdesc, pagetable, size, flags); +#else + return kgsl_sharedmem_ebimem_user(memdesc, pagetable, size, flags); +#endif } static inline int -kgsl_allocate_contig(struct kgsl_memdesc *memdesc, size_t size) +kgsl_allocate_contiguous(struct kgsl_memdesc *memdesc, size_t size) { - return kgsl_sharedmem_alloc_coherent(memdesc, size); + int ret = kgsl_sharedmem_alloc_coherent(memdesc, size); +#ifndef CONFIG_MSM_KGSL_MMU + if (!ret) + memdesc->gpuaddr = memdesc->physaddr; +#endif + return ret; } #endif /* __KGSL_SHAREDMEM_H */ diff --git a/drivers/gpu/msm/z180.c b/drivers/gpu/msm/z180.c index 27da432e..e7a1d521 100644 --- a/drivers/gpu/msm/z180.c +++ b/drivers/gpu/msm/z180.c @@ -14,6 +14,7 @@ #include "kgsl.h" #include "kgsl_cffdump.h" +#include "kgsl_sharedmem.h" #include "z180.h" #include "z180_reg.h" @@ -86,6 +87,11 @@ #define Z180_TIMESTAMP_EPSILON 20000 #define Z180_IDLE_COUNT_MAX 1000000 +enum z180_cmdwindow_type { + Z180_CMDWINDOW_2D = 0x00000000, + Z180_CMDWINDOW_MMU = 0x00000002, +}; + #define Z180_CMDWINDOW_TARGET_MASK 0x000000FF #define Z180_CMDWINDOW_ADDR_MASK 0x00FFFF00 #define Z180_CMDWINDOW_TARGET_SHIFT 0 @@ -102,17 +108,9 @@ static void z180_regread(struct kgsl_device *device, static void z180_regwrite(struct kgsl_device *device, unsigned int offsetwords, unsigned int value); -static int z180_cmdwindow_write(struct kgsl_device *device, - enum kgsl_cmdwindow_type target, +static void z180_cmdwindow_write(struct kgsl_device *device, unsigned int addr, unsigned int data); -static void z180_regread_isr(struct kgsl_device *device, - unsigned int offsetwords, - unsigned int *value); -static void z180_regwrite_isr(struct kgsl_device *device, - unsigned int offsetwords, - unsigned int value); -static void __devinit z180_getfunctable(struct kgsl_functable *ftbl); #define Z180_MMU_CONFIG \ (0x01 \ @@ -128,35 +126,29 @@ static void __devinit z180_getfunctable(struct kgsl_functable *ftbl); | (MMU_CONFIG << MH_MMU_CONFIG__TC_R_CLNT_BEHAVIOR__SHIFT) \ | (MMU_CONFIG << MH_MMU_CONFIG__PA_W_CLNT_BEHAVIOR__SHIFT)) +static const struct kgsl_functable z180_functable; + static struct z180_device device_2d0 = { .dev = { .name = DEVICE_2D0_NAME, .id = KGSL_DEVICE_2D0, .ver_major = DRIVER_VERSION_MAJOR, .ver_minor = DRIVER_VERSION_MINOR, - .mmu = { - .config = Z180_MMU_CONFIG, + .mh = { + .mharb = Z180_CFG_MHARB, + .mh_intf_cfg1 = 0x00032f07, + .mh_intf_cfg2 = 0x004b274f, /* turn off memory protection unit by setting acceptable physical address range to include all pages. */ .mpu_base = 0x00000000, .mpu_range = 0xFFFFF000, - .reg = { - .config = ADDR_MH_MMU_CONFIG, - .mpu_base = ADDR_MH_MMU_MPU_BASE, - .mpu_end = ADDR_MH_MMU_MPU_END, - .va_range = ADDR_MH_MMU_VA_RANGE, - .pt_page = ADDR_MH_MMU_PT_BASE, - .page_fault = ADDR_MH_MMU_PAGE_FAULT, - .tran_error = ADDR_MH_MMU_TRAN_ERROR, - .invalidate = ADDR_MH_MMU_INVALIDATE, - .interrupt_mask = ADDR_MH_INTERRUPT_MASK, - .interrupt_status = ADDR_MH_INTERRUPT_STATUS, - .interrupt_clear = ADDR_MH_INTERRUPT_CLEAR, - .axi_error = ADDR_MH_AXI_ERROR, - }, + }, + .mmu = { + .config = Z180_MMU_CONFIG, }, .pwrctrl = { + .pwr_rail = PWR_RAIL_GRP_2D_CLK, .regulator_name = "fs_gfx2d0", .irq_name = KGSL_2D0_IRQ, }, @@ -164,6 +156,14 @@ static struct z180_device device_2d0 = { .state = KGSL_STATE_INIT, .active_cnt = 0, .iomemname = KGSL_2D0_REG_MEMORY, + .ftbl = &z180_functable, +#ifdef CONFIG_HAS_EARLYSUSPEND + .display_off = { + .level = EARLY_SUSPEND_LEVEL_STOP_DRAWING, + .suspend = kgsl_early_suspend_driver, + .resume = kgsl_late_resume_driver, + }, +#endif }, }; @@ -173,29 +173,21 @@ static struct z180_device device_2d1 = { .id = KGSL_DEVICE_2D1, .ver_major = DRIVER_VERSION_MAJOR, .ver_minor = DRIVER_VERSION_MINOR, - .mmu = { - .config = Z180_MMU_CONFIG, + .mh = { + .mharb = Z180_CFG_MHARB, + .mh_intf_cfg1 = 0x00032f07, + .mh_intf_cfg2 = 0x004b274f, /* turn off memory protection unit by setting acceptable physical address range to include all pages. */ .mpu_base = 0x00000000, .mpu_range = 0xFFFFF000, - .reg = { - .config = ADDR_MH_MMU_CONFIG, - .mpu_base = ADDR_MH_MMU_MPU_BASE, - .mpu_end = ADDR_MH_MMU_MPU_END, - .va_range = ADDR_MH_MMU_VA_RANGE, - .pt_page = ADDR_MH_MMU_PT_BASE, - .page_fault = ADDR_MH_MMU_PAGE_FAULT, - .tran_error = ADDR_MH_MMU_TRAN_ERROR, - .invalidate = ADDR_MH_MMU_INVALIDATE, - .interrupt_mask = ADDR_MH_INTERRUPT_MASK, - .interrupt_status = ADDR_MH_INTERRUPT_STATUS, - .interrupt_clear = ADDR_MH_INTERRUPT_CLEAR, - .axi_error = ADDR_MH_AXI_ERROR, - }, + }, + .mmu = { + .config = Z180_MMU_CONFIG, }, .pwrctrl = { + .pwr_rail = PWR_RAIL_GRP_2D_CLK, .regulator_name = "fs_gfx2d1", .irq_name = KGSL_2D1_IRQ, }, @@ -203,6 +195,14 @@ static struct z180_device device_2d1 = { .state = KGSL_STATE_INIT, .active_cnt = 0, .iomemname = KGSL_2D1_REG_MEMORY, + .ftbl = &z180_functable, + .display_off = { +#ifdef CONFIG_HAS_EARLYSUSPEND + .level = EARLY_SUSPEND_LEVEL_STOP_DRAWING, + .suspend = kgsl_early_suspend_driver, + .resume = kgsl_late_resume_driver, +#endif + }, }, }; @@ -213,10 +213,10 @@ static irqreturn_t z180_isr(int irq, void *data) struct kgsl_device *device = (struct kgsl_device *) data; struct z180_device *z180_dev = Z180_DEVICE(device); - z180_regread_isr(device, ADDR_VGC_IRQSTATUS >> 2, &status); + z180_regread(device, ADDR_VGC_IRQSTATUS >> 2, &status); if (status & GSL_VGC_INT_MASK) { - z180_regwrite_isr(device, + z180_regwrite(device, ADDR_VGC_IRQSTATUS >> 2, status & GSL_VGC_INT_MASK); result = IRQ_HANDLED; @@ -228,7 +228,7 @@ static irqreturn_t z180_isr(int irq, void *data) if (status & REG_VGC_IRQSTATUS__G2D_MASK) { int count; - z180_regread_isr(device, + z180_regread(device, ADDR_VGC_IRQ_ACTIVE_CNT >> 2, &count); @@ -236,6 +236,7 @@ static irqreturn_t z180_isr(int irq, void *data) count &= 255; z180_dev->timestamp += count; + queue_work(device->work_queue, &device->ts_expired_ws); wake_up_interruptible(&device->wait_queue); atomic_notifier_call_chain( @@ -255,18 +256,16 @@ static irqreturn_t z180_isr(int irq, void *data) return result; } -static int z180_cleanup_pt(struct kgsl_device *device, +static void z180_cleanup_pt(struct kgsl_device *device, struct kgsl_pagetable *pagetable) { struct z180_device *z180_dev = Z180_DEVICE(device); - kgsl_mmu_unmap(pagetable, &device->mmu.dummyspace); + kgsl_mmu_unmap(pagetable, &device->mmu.setstate_memory); kgsl_mmu_unmap(pagetable, &device->memstore); kgsl_mmu_unmap(pagetable, &z180_dev->ringbuffer.cmdbufdesc); - - return 0; } static int z180_setup_pt(struct kgsl_device *device, @@ -275,7 +274,7 @@ static int z180_setup_pt(struct kgsl_device *device, int result = 0; struct z180_device *z180_dev = Z180_DEVICE(device); - result = kgsl_mmu_map_global(pagetable, &device->mmu.dummyspace, + result = kgsl_mmu_map_global(pagetable, &device->mmu.setstate_memory, GSL_PT_PAGE_RV | GSL_PT_PAGE_WV); if (result) @@ -294,7 +293,7 @@ static int z180_setup_pt(struct kgsl_device *device, return result; error_unmap_dummy: - kgsl_mmu_unmap(pagetable, &device->mmu.dummyspace); + kgsl_mmu_unmap(pagetable, &device->mmu.setstate_memory); error_unmap_memstore: kgsl_mmu_unmap(pagetable, &device->memstore); @@ -339,10 +338,9 @@ static void addcmd(struct z180_ringbuffer *rb, unsigned int index, *p++ = ADDR_VGV3_LAST << 24; } -static int z180_cmdstream_start(struct kgsl_device *device) +static void z180_cmdstream_start(struct kgsl_device *device) { struct z180_device *z180_dev = Z180_DEVICE(device); - int result; unsigned int cmd = VGV3_NEXTCMD_JUMP << VGV3_NEXTCMD_NEXTCMD_FSHIFT; z180_dev->timestamp = 0; @@ -350,43 +348,22 @@ static int z180_cmdstream_start(struct kgsl_device *device) addmarker(&z180_dev->ringbuffer, 0); - result = z180_cmdwindow_write(device, KGSL_CMDWINDOW_2D, - ADDR_VGV3_MODE, 4); - if (result != 0) - return result; + z180_cmdwindow_write(device, ADDR_VGV3_MODE, 4); - result = z180_cmdwindow_write(device, KGSL_CMDWINDOW_2D, - ADDR_VGV3_NEXTADDR, + z180_cmdwindow_write(device, ADDR_VGV3_NEXTADDR, z180_dev->ringbuffer.cmdbufdesc.gpuaddr); - if (result != 0) - return result; - result = z180_cmdwindow_write(device, KGSL_CMDWINDOW_2D, - ADDR_VGV3_NEXTCMD, cmd | 5); - if (result != 0) - return result; + z180_cmdwindow_write(device, ADDR_VGV3_NEXTCMD, cmd | 5); - result = z180_cmdwindow_write(device, KGSL_CMDWINDOW_2D, - ADDR_VGV3_WRITEADDR, device->memstore.gpuaddr); - - if (result != 0) - return result; + z180_cmdwindow_write(device, ADDR_VGV3_WRITEADDR, + device->memstore.gpuaddr); cmd = (int)(((1) & VGV3_CONTROL_MARKADD_FMASK) << VGV3_CONTROL_MARKADD_FSHIFT); - result = z180_cmdwindow_write(device, KGSL_CMDWINDOW_2D, - ADDR_VGV3_CONTROL, cmd); + z180_cmdwindow_write(device, ADDR_VGV3_CONTROL, cmd); - if (result != 0) - return result; - - result = z180_cmdwindow_write(device, KGSL_CMDWINDOW_2D, - ADDR_VGV3_CONTROL, 0); - if (result != 0) - return result; - - return result; + z180_cmdwindow_write(device, ADDR_VGV3_CONTROL, 0); } static int room_in_rb(struct z180_device *device) @@ -403,7 +380,8 @@ static int z180_idle(struct kgsl_device *device, unsigned int timeout) int status = 0; struct z180_device *z180_dev = Z180_DEVICE(device); - if (z180_dev->current_timestamp > z180_dev->timestamp) + if (timestamp_cmp(z180_dev->current_timestamp, + z180_dev->timestamp) > 0) status = z180_wait(device, z180_dev->current_timestamp, timeout); @@ -413,30 +391,6 @@ static int z180_idle(struct kgsl_device *device, unsigned int timeout) return status; } -static int z180_setstate(struct kgsl_device *device, uint32_t flags) -{ -#ifdef CONFIG_MSM_KGSL_MMU - unsigned int mh_mmu_invalidate = 0x00000003; /*invalidate all and tc */ - - if (flags & KGSL_MMUFLAGS_PTUPDATE) { - z180_idle(device, KGSL_TIMEOUT_DEFAULT); - z180_regwrite(device, ADDR_MH_MMU_PT_BASE, - device->mmu.hwpagetable->base.gpuaddr); - z180_regwrite(device, ADDR_MH_MMU_VA_RANGE, - (device->mmu.hwpagetable-> - va_base | (device->mmu.hwpagetable-> - va_range >> 16))); - z180_regwrite(device, ADDR_MH_MMU_INVALIDATE, - mh_mmu_invalidate); - } - - if (flags & KGSL_MMUFLAGS_TLBFLUSH) - z180_regwrite(device, ADDR_MH_MMU_INVALIDATE, - mh_mmu_invalidate); -#endif - return 0; -} - int z180_cmdstream_issueibcmds(struct kgsl_device_private *dev_priv, struct kgsl_context *context, @@ -445,7 +399,7 @@ z180_cmdstream_issueibcmds(struct kgsl_device_private *dev_priv, uint32_t *timestamp, unsigned int ctrl) { - unsigned int result = 0; + long result = 0; unsigned int ofs = PACKETSIZE_STATESTREAM * sizeof(unsigned int); unsigned int cnt = 5; unsigned int nextaddr = 0; @@ -460,7 +414,7 @@ z180_cmdstream_issueibcmds(struct kgsl_device_private *dev_priv, unsigned int sizedwords; if (device->state & KGSL_STATE_HUNG) { - return -EINVAL; + result = -EINVAL; goto error; } if (numibs != 1) { @@ -484,7 +438,7 @@ z180_cmdstream_issueibcmds(struct kgsl_device_private *dev_priv, cnt = PACKETSIZE_STATESTREAM; ofs = 0; } - z180_setstate(device, kgsl_pt_get_flags(device->mmu.hwpagetable, + kgsl_setstate(device, kgsl_mmu_pt_get_flags(device->mmu.hwpagetable, device->id)); result = wait_event_interruptible_timeout(device->wait_queue, @@ -492,7 +446,7 @@ z180_cmdstream_issueibcmds(struct kgsl_device_private *dev_priv, msecs_to_jiffies(KGSL_TIMEOUT_DEFAULT)); if (result < 0) { KGSL_CMD_ERR(device, "wait_event_interruptible_timeout " - "failed: %d\n", result); + "failed: %ld\n", result); goto error; } result = 0; @@ -525,12 +479,10 @@ z180_cmdstream_issueibcmds(struct kgsl_device_private *dev_priv, cmd = (int)(((2) & VGV3_CONTROL_MARKADD_FMASK) << VGV3_CONTROL_MARKADD_FSHIFT); - z180_cmdwindow_write(device, - KGSL_CMDWINDOW_2D, ADDR_VGV3_CONTROL, cmd); - z180_cmdwindow_write(device, - KGSL_CMDWINDOW_2D, ADDR_VGV3_CONTROL, 0); + z180_cmdwindow_write(device, ADDR_VGV3_CONTROL, cmd); + z180_cmdwindow_write(device, ADDR_VGV3_CONTROL, 0); error: - return result; + return (int)result; } static int z180_ringbuffer_init(struct kgsl_device *device) @@ -538,9 +490,8 @@ static int z180_ringbuffer_init(struct kgsl_device *device) struct z180_device *z180_dev = Z180_DEVICE(device); memset(&z180_dev->ringbuffer, 0, sizeof(struct z180_ringbuffer)); z180_dev->ringbuffer.prevctx = Z180_INVALID_CONTEXT; - return kgsl_sharedmem_alloc_coherent( - &z180_dev->ringbuffer.cmdbufdesc, - Z180_RB_SIZE); + return kgsl_allocate_contiguous(&z180_dev->ringbuffer.cmdbufdesc, + Z180_RB_SIZE); } static void z180_ringbuffer_close(struct kgsl_device *device) @@ -559,8 +510,6 @@ static int __devinit z180_probe(struct platform_device *pdev) device = (struct kgsl_device *)pdev->id_entry->driver_data; device->parentdev = &pdev->dev; - z180_getfunctable(&device->ftbl); - z180_dev = Z180_DEVICE(device); spin_lock_init(&z180_dev->cmdwin_lock); @@ -572,6 +521,8 @@ static int __devinit z180_probe(struct platform_device *pdev) if (status) goto error_close_ringbuffer; + kgsl_pwrscale_init(device); + return status; error_close_ringbuffer: @@ -587,6 +538,7 @@ static int __devexit z180_remove(struct platform_device *pdev) device = (struct kgsl_device *)pdev->id_entry->driver_data; + kgsl_pwrscale_close(device); kgsl_device_platform_remove(device); z180_ringbuffer_close(device); @@ -604,31 +556,24 @@ static int z180_start(struct kgsl_device *device, unsigned int init_ram) kgsl_pwrctrl_enable(device); - /* Set up MH arbiter. MH offsets are considered to be dword - * based, therefore no down shift. */ - z180_regwrite(device, ADDR_MH_ARBITER_CONFIG, Z180_CFG_MHARB); + /* Set interrupts to 0 to ensure a good state */ + z180_regwrite(device, (ADDR_VGC_IRQENABLE >> 2), 0x0); - z180_regwrite(device, ADDR_MH_CLNT_INTF_CTRL_CONFIG1, 0x00030F27); - z180_regwrite(device, ADDR_MH_CLNT_INTF_CTRL_CONFIG2, 0x004B274F); - - z180_regwrite(device, (ADDR_VGC_IRQENABLE >> 2), 0x3); + kgsl_mh_start(device); status = kgsl_mmu_start(device); if (status) goto error_clk_off; - status = z180_cmdstream_start(device); - if (status) - goto error_mmu_stop; + z180_cmdstream_start(device); mod_timer(&device->idle_timer, jiffies + FIRST_TIMEOUT); - kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_IRQ_ON); + kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_ON); return 0; + error_clk_off: z180_regwrite(device, (ADDR_VGC_IRQENABLE >> 2), 0); kgsl_pwrctrl_disable(device); -error_mmu_stop: - kgsl_mmu_stop(device); return status; } @@ -636,7 +581,7 @@ static int z180_stop(struct kgsl_device *device) { z180_idle(device, KGSL_TIMEOUT_DEFAULT); - del_timer(&device->idle_timer); + del_timer_sync(&device->idle_timer); kgsl_mmu_stop(device); @@ -680,16 +625,12 @@ static int z180_getproperty(struct kgsl_device *device, break; case KGSL_PROP_MMU_ENABLE: { -#ifdef CONFIG_MSM_KGSL_MMU - int mmuProp = 1; -#else - int mmuProp = 0; -#endif + int mmu_prop = kgsl_mmu_enabled(); if (sizebytes != sizeof(int)) { status = -EINVAL; break; } - if (copy_to_user(value, &mmuProp, sizeof(mmuProp))) { + if (copy_to_user(value, &mmu_prop, sizeof(mmu_prop))) { status = -EFAULT; break; } @@ -706,22 +647,10 @@ static int z180_getproperty(struct kgsl_device *device, static unsigned int z180_isidle(struct kgsl_device *device) { - int status = false; struct z180_device *z180_dev = Z180_DEVICE(device); - int timestamp = z180_dev->timestamp; - - if (timestamp == z180_dev->current_timestamp) - status = true; - - return status; -} - -static int z180_resume_context(struct kgsl_device *device) -{ - /* Context is in the pre-amble, automatically restored. */ - - return 0; + return (timestamp_cmp(z180_dev->timestamp, + z180_dev->current_timestamp) == 0) ? true : false; } static int z180_suspend_context(struct kgsl_device *device) @@ -800,7 +729,7 @@ static void _z180_regwrite_mmu(struct kgsl_device *device, unsigned int cmdwinaddr; unsigned long flags; - cmdwinaddr = ((KGSL_CMDWINDOW_MMU << Z180_CMDWINDOW_TARGET_SHIFT) & + cmdwinaddr = ((Z180_CMDWINDOW_MMU << Z180_CMDWINDOW_TARGET_SHIFT) & Z180_CMDWINDOW_TARGET_MASK); cmdwinaddr |= ((offsetwords << Z180_CMDWINDOW_ADDR_SHIFT) & Z180_CMDWINDOW_ADDR_MASK); @@ -815,91 +744,52 @@ static void _z180_regwrite_mmu(struct kgsl_device *device, /* the rest of the code doesn't want to think about if it is writing mmu * registers or normal registers so handle it here */ -static void _z180_regread(struct kgsl_device *device, unsigned int offsetwords, - unsigned int *value) +static void z180_regread(struct kgsl_device *device, + unsigned int offsetwords, + unsigned int *value) { - if ((offsetwords >= ADDR_MH_ARBITER_CONFIG && - offsetwords <= ADDR_MH_AXI_HALT_CONTROL) || - (offsetwords >= ADDR_MH_MMU_CONFIG && - offsetwords <= ADDR_MH_MMU_MPU_END)) { + if (!in_interrupt()) + kgsl_pre_hwaccess(device); + + if ((offsetwords >= MH_ARBITER_CONFIG && + offsetwords <= MH_AXI_HALT_CONTROL) || + (offsetwords >= MH_MMU_CONFIG && + offsetwords <= MH_MMU_MPU_END)) { _z180_regread_mmu(device, offsetwords, value); } else { _z180_regread_simple(device, offsetwords, value); } } -static void _z180_regwrite(struct kgsl_device *device, unsigned int offsetwords, +static void z180_regwrite(struct kgsl_device *device, + unsigned int offsetwords, unsigned int value) { - if ((offsetwords >= ADDR_MH_ARBITER_CONFIG && - offsetwords <= ADDR_MH_CLNT_INTF_CTRL_CONFIG2) || - (offsetwords >= ADDR_MH_MMU_CONFIG && - offsetwords <= ADDR_MH_MMU_MPU_END)) { - _z180_regwrite_mmu(device, offsetwords, value); + if (!in_interrupt()) + kgsl_pre_hwaccess(device); + if ((offsetwords >= MH_ARBITER_CONFIG && + offsetwords <= MH_CLNT_INTF_CTRL_CONFIG2) || + (offsetwords >= MH_MMU_CONFIG && + offsetwords <= MH_MMU_MPU_END)) { + _z180_regwrite_mmu(device, offsetwords, value); } else { _z180_regwrite_simple(device, offsetwords, value); } } - -static void z180_regread(struct kgsl_device *device, - unsigned int offsetwords, - unsigned int *value) -{ - kgsl_pre_hwaccess(device); - _z180_regread(device, offsetwords, value); -} - -static void z180_regread_isr(struct kgsl_device *device, - unsigned int offsetwords, - unsigned int *value) -{ - _z180_regread(device, offsetwords, value); -} - -static void z180_regwrite(struct kgsl_device *device, - unsigned int offsetwords, - unsigned int value) -{ - kgsl_pre_hwaccess(device); - _z180_regwrite(device, offsetwords, value); -} - -static void z180_regwrite_isr(struct kgsl_device *device, - unsigned int offsetwords, - unsigned int value) -{ - _z180_regwrite(device, offsetwords, value); -} - -static int z180_cmdwindow_write(struct kgsl_device *device, - enum kgsl_cmdwindow_type target, unsigned int addr, - unsigned int data) +static void z180_cmdwindow_write(struct kgsl_device *device, + unsigned int addr, unsigned int data) { unsigned int cmdwinaddr; - unsigned int cmdstream; - if (target < KGSL_CMDWINDOW_MIN || - target > KGSL_CMDWINDOW_MAX) { - KGSL_DRV_ERR(device, "invalid target\n"); - return -EINVAL; - } - - if (target == KGSL_CMDWINDOW_MMU) - cmdstream = ADDR_VGC_MMUCOMMANDSTREAM; - else - cmdstream = ADDR_VGC_COMMANDSTREAM; - - cmdwinaddr = ((target << Z180_CMDWINDOW_TARGET_SHIFT) & + cmdwinaddr = ((Z180_CMDWINDOW_2D << Z180_CMDWINDOW_TARGET_SHIFT) & Z180_CMDWINDOW_TARGET_MASK); cmdwinaddr |= ((addr << Z180_CMDWINDOW_ADDR_SHIFT) & Z180_CMDWINDOW_ADDR_MASK); - z180_regwrite(device, cmdstream >> 2, cmdwinaddr); - z180_regwrite(device, cmdstream >> 2, data); - - return 0; + z180_regwrite(device, ADDR_VGC_COMMANDSTREAM >> 2, cmdwinaddr); + z180_regwrite(device, ADDR_VGC_COMMANDSTREAM >> 2, data); } static unsigned int z180_readtimestamp(struct kgsl_device *device, @@ -915,6 +805,11 @@ static int z180_waittimestamp(struct kgsl_device *device, unsigned int msecs) { int status = -EINVAL; + + /* Don't wait forever, set a max (10 sec) value for now */ + if (msecs == -1) + msecs = 10 * MSEC_PER_SEC; + mutex_unlock(&device->mutex); status = z180_wait(device, timestamp, msecs); mutex_lock(&device->mutex); @@ -946,19 +841,7 @@ static int z180_wait(struct kgsl_device *device, return status; } -static long -z180_ioctl_cmdwindow_write(struct kgsl_device_private *dev_priv, - void *data) -{ - struct kgsl_cmdwindow_write *param = data; - - return z180_cmdwindow_write(dev_priv->device, - param->target, - param->addr, - param->data); -} - -static int +static void z180_drawctxt_destroy(struct kgsl_device *device, struct kgsl_context *context) { @@ -971,63 +854,62 @@ z180_drawctxt_destroy(struct kgsl_device *device, device->mmu.hwpagetable = device->mmu.defaultpagetable; kgsl_setstate(device, KGSL_MMUFLAGS_PTUPDATE); } - - return 0; -} - -static long z180_ioctl(struct kgsl_device_private *dev_priv, - unsigned int cmd, void *data) -{ - int result = 0; - - switch (cmd) { - case IOCTL_KGSL_CMDWINDOW_WRITE: - result = z180_ioctl_cmdwindow_write(dev_priv, data); - break; - default: - KGSL_DRV_INFO(dev_priv->device, - "invalid ioctl code %08x\n", cmd); - result = -EINVAL; - break; - } - return result; - } static void z180_power_stats(struct kgsl_device *device, struct kgsl_power_stats *stats) { - stats->total_time = 0; - stats->busy_time = 0; + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + if (pwr->time == 0) { + pwr->time = ktime_to_us(ktime_get()); + stats->total_time = 0; + stats->busy_time = 0; + } else { + s64 tmp; + tmp = ktime_to_us(ktime_get()); + stats->total_time = tmp - pwr->time; + stats->busy_time = tmp - pwr->time; + pwr->time = tmp; + } } -static void __devinit z180_getfunctable(struct kgsl_functable *ftbl) +static void z180_irqctrl(struct kgsl_device *device, int state) { - if (ftbl == NULL) - return; - ftbl->device_regread = z180_regread; - ftbl->device_regwrite = z180_regwrite; - ftbl->device_regread_isr = z180_regread_isr; - ftbl->device_regwrite_isr = z180_regwrite_isr; - ftbl->device_setstate = z180_setstate; - ftbl->device_idle = z180_idle; - ftbl->device_isidle = z180_isidle; - ftbl->device_suspend_context = z180_suspend_context; - ftbl->device_resume_context = z180_resume_context; - ftbl->device_start = z180_start; - ftbl->device_stop = z180_stop; - ftbl->device_getproperty = z180_getproperty; - ftbl->device_waittimestamp = z180_waittimestamp; - ftbl->device_readtimestamp = z180_readtimestamp; - ftbl->device_issueibcmds = z180_cmdstream_issueibcmds; - ftbl->device_drawctxt_create = NULL; - ftbl->device_drawctxt_destroy = z180_drawctxt_destroy; - ftbl->device_ioctl = z180_ioctl; - ftbl->device_setup_pt = z180_setup_pt; - ftbl->device_cleanup_pt = z180_cleanup_pt; - ftbl->device_power_stats = z180_power_stats, + /* Control interrupts for Z180 and the Z180 MMU */ + + if (state) { + z180_regwrite(device, (ADDR_VGC_IRQENABLE >> 2), 3); + z180_regwrite(device, MH_INTERRUPT_MASK, KGSL_MMU_INT_MASK); + } else { + z180_regwrite(device, (ADDR_VGC_IRQENABLE >> 2), 0); + z180_regwrite(device, MH_INTERRUPT_MASK, 0); + } } +static const struct kgsl_functable z180_functable = { + /* Mandatory functions */ + .regread = z180_regread, + .regwrite = z180_regwrite, + .idle = z180_idle, + .isidle = z180_isidle, + .suspend_context = z180_suspend_context, + .start = z180_start, + .stop = z180_stop, + .getproperty = z180_getproperty, + .waittimestamp = z180_waittimestamp, + .readtimestamp = z180_readtimestamp, + .issueibcmds = z180_cmdstream_issueibcmds, + .setup_pt = z180_setup_pt, + .cleanup_pt = z180_cleanup_pt, + .power_stats = z180_power_stats, + .irqctrl = z180_irqctrl, + /* Optional functions */ + .drawctxt_create = NULL, + .drawctxt_destroy = z180_drawctxt_destroy, + .ioctl = NULL, +}; + static struct platform_device_id z180_id_table[] = { { DEVICE_2D0_NAME, (kernel_ulong_t)&device_2d0.dev, }, { DEVICE_2D1_NAME, (kernel_ulong_t)&device_2d1.dev, }, diff --git a/drivers/gpu/msm/z180.h b/drivers/gpu/msm/z180.h index c62398a5..28b1cc6b 100644 --- a/drivers/gpu/msm/z180.h +++ b/drivers/gpu/msm/z180.h @@ -1,34 +1,20 @@ /* Copyright (c) 2008-2011, Code Aurora Forum. All rights reserved. * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * * Neither the name of Code Aurora Forum, Inc. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. * - * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE - * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN - * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. * */ #ifndef __Z180_H #define __Z180_H +#include "kgsl_device.h" + #define DEVICE_2D_NAME "kgsl-2d" #define DEVICE_2D0_NAME "kgsl-2d0" #define DEVICE_2D1_NAME "kgsl-2d1" diff --git a/drivers/gpu/msm/z180_reg.h b/drivers/gpu/msm/z180_reg.h index f5625535..5b6c0017 100644 --- a/drivers/gpu/msm/z180_reg.h +++ b/drivers/gpu/msm/z180_reg.h @@ -1,29 +1,13 @@ /* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved. * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * * Neither the name of Code Aurora Forum, Inc. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. * - * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE - * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN - * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. * */ #ifndef __Z80_REG_H @@ -48,36 +32,8 @@ #define MH_ARBITER_CONFIG__RB_CLNT_ENABLE__SHIFT 0x00000019 #define MH_ARBITER_CONFIG__PA_CLNT_ENABLE__SHIFT 0x0000001a -#define MH_MMU_CONFIG__RB_W_CLNT_BEHAVIOR__SHIFT 0x00000004 -#define MH_MMU_CONFIG__CP_W_CLNT_BEHAVIOR__SHIFT 0x00000006 -#define MH_MMU_CONFIG__CP_R0_CLNT_BEHAVIOR__SHIFT 0x00000008 -#define MH_MMU_CONFIG__CP_R1_CLNT_BEHAVIOR__SHIFT 0x0000000a -#define MH_MMU_CONFIG__CP_R2_CLNT_BEHAVIOR__SHIFT 0x0000000c -#define MH_MMU_CONFIG__CP_R3_CLNT_BEHAVIOR__SHIFT 0x0000000e -#define MH_MMU_CONFIG__CP_R4_CLNT_BEHAVIOR__SHIFT 0x00000010 -#define MH_MMU_CONFIG__VGT_R0_CLNT_BEHAVIOR__SHIFT 0x00000012 -#define MH_MMU_CONFIG__VGT_R1_CLNT_BEHAVIOR__SHIFT 0x00000014 -#define MH_MMU_CONFIG__TC_R_CLNT_BEHAVIOR__SHIFT 0x00000016 -#define MH_MMU_CONFIG__PA_W_CLNT_BEHAVIOR__SHIFT 0x00000018 - -#define ADDR_MH_ARBITER_CONFIG 0x0A40 -#define ADDR_MH_INTERRUPT_CLEAR 0x0A44 -#define ADDR_MH_INTERRUPT_MASK 0x0A42 -#define ADDR_MH_INTERRUPT_STATUS 0x0A43 -#define ADDR_MH_AXI_ERROR 0x0A45 -#define ADDR_MH_AXI_HALT_CONTROL 0x0A50 -#define ADDR_MH_CLNT_INTF_CTRL_CONFIG1 0x0A54 -#define ADDR_MH_CLNT_INTF_CTRL_CONFIG2 0x0A55 -#define ADDR_MH_MMU_CONFIG 0x0040 -#define ADDR_MH_MMU_INVALIDATE 0x0045 -#define ADDR_MH_MMU_MPU_BASE 0x0046 -#define ADDR_MH_MMU_MPU_END 0x0047 -#define ADDR_MH_MMU_PT_BASE 0x0042 -#define ADDR_MH_MMU_TRAN_ERROR 0x0044 -#define ADDR_MH_MMU_VA_RANGE 0x0041 #define ADDR_VGC_MH_READ_ADDR 0x0510 #define ADDR_VGC_MH_DATA_ADDR 0x0518 -#define ADDR_MH_MMU_PAGE_FAULT 0x0043 #define ADDR_VGC_COMMANDSTREAM 0x0000 #define ADDR_VGC_IRQENABLE 0x0438 #define ADDR_VGC_IRQSTATUS 0x0418 diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index 3481842f..ce7e1547 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -12,15 +12,7 @@ obj-$(CONFIG_LKDTM) += lkdtm.o obj-$(CONFIG_TIFM_CORE) += tifm_core.o obj-$(CONFIG_TIFM_7XX1) += tifm_7xx1.o obj-$(CONFIG_PHANTOM) += phantom.o -ifeq ($(CONFIG_ARCH_MSM7227),y) - obj-$(CONFIG_ANDROID_PMEM) += pmem_7x27.o -else - ifeq ($(CONFIG_ARCH_MSM7X30),y) - obj-$(CONFIG_ANDROID_PMEM) += pmem_7x30.o - else - obj-$(CONFIG_ANDROID_PMEM) += pmem.o - endif -endif +obj-$(CONFIG_ANDROID_PMEM) += pmem.o obj-$(CONFIG_SGI_IOC4) += ioc4.o obj-$(CONFIG_ENCLOSURE_SERVICES) += enclosure.o obj-$(CONFIG_KERNEL_DEBUGGER_CORE) += kernel_debugger.o diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c index 079918eb..a0763da9 100644 --- a/drivers/staging/android/binder.c +++ b/drivers/staging/android/binder.c @@ -3,6 +3,7 @@ * Android IPC Subsystem * * Copyright (C) 2007-2008 Google, Inc. + * Copyright (c) 2012, Code Aurora Forum. All rights reserved. * * This software is licensed under the terms of the GNU General Public * License version 2, as published by the Free Software Foundation, and @@ -26,9 +27,10 @@ #include #include #include -#include +#include #include #include +#include #include #include #include @@ -42,15 +44,29 @@ static HLIST_HEAD(binder_procs); static HLIST_HEAD(binder_deferred_list); static HLIST_HEAD(binder_dead_nodes); -static struct proc_dir_entry *binder_proc_dir_entry_root; -static struct proc_dir_entry *binder_proc_dir_entry_proc; +static struct dentry *binder_debugfs_dir_entry_root; +static struct dentry *binder_debugfs_dir_entry_proc; static struct binder_node *binder_context_mgr_node; static uid_t binder_context_mgr_uid = -1; static int binder_last_id; static struct workqueue_struct *binder_deferred_workqueue; -static int binder_read_proc_proc(char *page, char **start, off_t off, - int count, int *eof, void *data); +#define BINDER_DEBUG_ENTRY(name) \ +static int binder_##name##_open(struct inode *inode, struct file *file) \ +{ \ + return single_open(file, binder_##name##_show, inode->i_private); \ +} \ +\ +static const struct file_operations binder_##name##_fops = { \ + .owner = THIS_MODULE, \ + .open = binder_##name##_open, \ + .read = seq_read, \ + .llseek = seq_lseek, \ + .release = single_release, \ +} + +static int binder_proc_show(struct seq_file *m, void *unused); +BINDER_DEBUG_ENTRY(proc); /* This is only defined in include/asm-arm/sizes.h */ #ifndef SZ_1K @@ -82,9 +98,9 @@ enum { BINDER_DEBUG_BUFFER_ALLOC = 1U << 13, BINDER_DEBUG_PRIORITY_CAP = 1U << 14, BINDER_DEBUG_BUFFER_ALLOC_ASYNC = 1U << 15, + BINDER_DEBUG_TOP_ERRORS = 1U << 16, }; -static uint32_t binder_debug_mask = BINDER_DEBUG_USER_ERROR | - BINDER_DEBUG_FAILED_TRANSACTION | BINDER_DEBUG_DEAD_TRANSACTION; +static uint32_t binder_debug_mask; module_param_named(debug_mask, binder_debug_mask, uint, S_IWUSR | S_IRUGO); static int binder_debug_no_lock; @@ -296,6 +312,7 @@ struct binder_proc { int requested_threads_started; int ready_threads; long default_priority; + struct dentry *debugfs_entry; }; enum { @@ -622,7 +639,8 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate, goto free_range; if (vma == NULL) { - printk(KERN_ERR "binder: %d: binder_alloc_buf failed to " + binder_debug(BINDER_DEBUG_TOP_ERRORS, + "binder: %d: binder_alloc_buf failed to " "map pages in userspace, no vma\n", proc->pid); goto err_no_vma; } @@ -635,7 +653,8 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate, BUG_ON(*page); *page = alloc_page(GFP_KERNEL | __GFP_ZERO); if (*page == NULL) { - printk(KERN_ERR "binder: %d: binder_alloc_buf failed " + binder_debug(BINDER_DEBUG_TOP_ERRORS, + "binder: %d: binder_alloc_buf failed " "for page at %p\n", proc->pid, page_addr); goto err_alloc_page_failed; } @@ -644,7 +663,8 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate, page_array_ptr = page; ret = map_vm_area(&tmp_area, PAGE_KERNEL, &page_array_ptr); if (ret) { - printk(KERN_ERR "binder: %d: binder_alloc_buf failed " + binder_debug(BINDER_DEBUG_TOP_ERRORS, + "binder: %d: binder_alloc_buf failed " "to map page at %p in kernel\n", proc->pid, page_addr); goto err_map_kernel_failed; @@ -653,7 +673,8 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate, (uintptr_t)page_addr + proc->user_buffer_offset; ret = vm_insert_page(vma, user_page_addr, page[0]); if (ret) { - printk(KERN_ERR "binder: %d: binder_alloc_buf failed " + binder_debug(BINDER_DEBUG_TOP_ERRORS, + "binder: %d: binder_alloc_buf failed " "to map page at %lx in userspace\n", proc->pid, user_page_addr); goto err_vm_insert_page_failed; @@ -702,7 +723,8 @@ static struct binder_buffer *binder_alloc_buf(struct binder_proc *proc, size_t size; if (proc->vma == NULL) { - printk(KERN_ERR "binder: %d: binder_alloc_buf, no vma\n", + binder_debug(BINDER_DEBUG_TOP_ERRORS, + "binder: %d: binder_alloc_buf, no vma\n", proc->pid); return NULL; } @@ -740,7 +762,8 @@ static struct binder_buffer *binder_alloc_buf(struct binder_proc *proc, } } if (best_fit == NULL) { - printk(KERN_ERR "binder: %d: binder_alloc_buf size %zd failed, " + binder_debug(BINDER_DEBUG_TOP_ERRORS, + "binder: %d: binder_alloc_buf size %zd failed, " "no address space\n", proc->pid, size); return NULL; } @@ -975,7 +998,8 @@ static int binder_inc_node(struct binder_node *node, int strong, int internal, node->internal_strong_refs == 0 && !(node == binder_context_mgr_node && node->has_strong_ref)) { - printk(KERN_ERR "binder: invalid inc strong " + binder_debug(BINDER_DEBUG_TOP_ERRORS, + "binder: invalid inc strong " "node for %d\n", node->debug_id); return -EINVAL; } @@ -991,7 +1015,8 @@ static int binder_inc_node(struct binder_node *node, int strong, int internal, node->local_weak_refs++; if (!node->has_weak_ref && list_empty(&node->work.entry)) { if (target_list == NULL) { - printk(KERN_ERR "binder: invalid inc weak node " + binder_debug(BINDER_DEBUG_TOP_ERRORS, + "binder: invalid inc weak node " "for %d\n", node->debug_id); return -EINVAL; } @@ -1028,7 +1053,7 @@ static int binder_dec_node(struct binder_node *node, int strong, int internal) if (node->proc) { rb_erase(&node->rb_node, &node->proc->nodes); binder_debug(BINDER_DEBUG_INTERNAL_REFS, - "binder: refless node %d deleted\n", + "binder: refless node %d deleted\n", node->debug_id); } else { hlist_del(&node->dead_node); @@ -1247,14 +1272,16 @@ static void binder_send_failed_reply(struct binder_transaction *t, binder_debug(BINDER_DEBUG_FAILED_TRANSACTION, "binder: send failed reply for " "transaction %d to %d:%d\n", - t->debug_id, target_thread->proc->pid, + t->debug_id, + target_thread->proc->pid, target_thread->pid); binder_pop_transaction(target_thread, t); target_thread->return_error = error_code; wake_up_interruptible(&target_thread->wait); } else { - printk(KERN_ERR "binder: reply failed, target " + binder_debug(BINDER_DEBUG_TOP_ERRORS, + "binder: reply failed, target " "thread, %d:%d, has error code %d " "already\n", target_thread->proc->pid, target_thread->pid, @@ -1292,14 +1319,15 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, int debug_id = buffer->debug_id; binder_debug(BINDER_DEBUG_TRANSACTION, - "binder: %d buffer release %d, size %zd-%zd, failed at %p\n", - proc->pid, buffer->debug_id, + "binder: %d buffer release %d, size %zd-%zd, failed at" + " %p\n", proc->pid, buffer->debug_id, buffer->data_size, buffer->offsets_size, failed_at); if (buffer->target_node) binder_dec_node(buffer->target_node, 1, 0); - offp = (size_t *)(buffer->data + ALIGN(buffer->data_size, sizeof(void *))); + offp = (size_t *)(buffer->data + ALIGN(buffer->data_size, + sizeof(void *))); if (failed_at) off_end = failed_at; else @@ -1309,7 +1337,8 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, if (*offp > buffer->data_size - sizeof(*fp) || buffer->data_size < sizeof(*fp) || !IS_ALIGNED(*offp, sizeof(void *))) { - printk(KERN_ERR "binder: transaction release %d bad" + binder_debug(BINDER_DEBUG_TOP_ERRORS, + "binder: transaction release %d bad" "offset %zd, size %zd\n", debug_id, *offp, buffer->data_size); continue; @@ -1318,29 +1347,35 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, switch (fp->type) { case BINDER_TYPE_BINDER: case BINDER_TYPE_WEAK_BINDER: { - struct binder_node *node = binder_get_node(proc, fp->binder); + struct binder_node *node = binder_get_node(proc, + fp->binder); if (node == NULL) { - printk(KERN_ERR "binder: transaction release %d" + binder_debug(BINDER_DEBUG_TOP_ERRORS, + "binder: transaction release %d" " bad node %p\n", debug_id, fp->binder); break; } binder_debug(BINDER_DEBUG_TRANSACTION, " node %d u%p\n", node->debug_id, node->ptr); - binder_dec_node(node, fp->type == BINDER_TYPE_BINDER, 0); + binder_dec_node(node, fp->type == BINDER_TYPE_BINDER, + 0); } break; case BINDER_TYPE_HANDLE: case BINDER_TYPE_WEAK_HANDLE: { - struct binder_ref *ref = binder_get_ref(proc, fp->handle); + struct binder_ref *ref = binder_get_ref(proc, + fp->handle); if (ref == NULL) { - printk(KERN_ERR "binder: transaction release %d" + binder_debug(BINDER_DEBUG_TOP_ERRORS, + "binder: transaction release %d" " bad handle %ld\n", debug_id, fp->handle); break; } binder_debug(BINDER_DEBUG_TRANSACTION, " ref %d desc %d (node %d)\n", - ref->debug_id, ref->desc, ref->node->debug_id); + ref->debug_id, ref->desc, + ref->node->debug_id); binder_dec_ref(ref, fp->type == BINDER_TYPE_HANDLE); } break; @@ -1352,7 +1387,8 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, break; default: - printk(KERN_ERR "binder: transaction release %d bad " + binder_debug(BINDER_DEBUG_TOP_ERRORS, + "binder: transaction release %d bad " "object type %lx\n", debug_id, fp->type); break; } @@ -1503,21 +1539,19 @@ static void binder_transaction(struct binder_proc *proc, if (reply) binder_debug(BINDER_DEBUG_TRANSACTION, "binder: %d:%d BC_REPLY %d -> %d:%d, " - "data %p-%p size %zd-%zd async: %d\n", + "data %p-%p size %zd-%zd\n", proc->pid, thread->pid, t->debug_id, target_proc->pid, target_thread->pid, tr->data.ptr.buffer, tr->data.ptr.offsets, - tr->data_size, tr->offsets_size, - (tr->flags & TF_ONE_WAY) ? 1 : 0); + tr->data_size, tr->offsets_size); else binder_debug(BINDER_DEBUG_TRANSACTION, "binder: %d:%d BC_TRANSACTION %d -> " - "%d - node %d, data %p-%p size %zd-%zd async: %d\n", + "%d - node %d, data %p-%p size %zd-%zd\n", proc->pid, thread->pid, t->debug_id, target_proc->pid, target_node->debug_id, tr->data.ptr.buffer, tr->data.ptr.offsets, - tr->data_size, tr->offsets_size, - (tr->flags & TF_ONE_WAY) ? 1 : 0); + tr->data_size, tr->offsets_size); if (!reply && !(tr->flags & TF_ONE_WAY)) t->from = thread; @@ -1580,15 +1614,19 @@ static void binder_transaction(struct binder_proc *proc, case BINDER_TYPE_BINDER: case BINDER_TYPE_WEAK_BINDER: { struct binder_ref *ref; - struct binder_node *node = binder_get_node(proc, fp->binder); + struct binder_node *node = binder_get_node(proc, + fp->binder); if (node == NULL) { - node = binder_new_node(proc, fp->binder, fp->cookie); + node = binder_new_node(proc, fp->binder, + fp->cookie); if (node == NULL) { return_error = BR_FAILED_REPLY; goto err_binder_new_node_failed; } - node->min_priority = fp->flags & FLAT_BINDER_FLAG_PRIORITY_MASK; - node->accept_fds = !!(fp->flags & FLAT_BINDER_FLAG_ACCEPTS_FDS); + node->min_priority = fp->flags & + FLAT_BINDER_FLAG_PRIORITY_MASK; + node->accept_fds = !!(fp->flags & + FLAT_BINDER_FLAG_ACCEPTS_FDS); } if (fp->cookie != node->cookie) { binder_user_error("binder: %d:%d sending u%p " @@ -1618,7 +1656,8 @@ static void binder_transaction(struct binder_proc *proc, } break; case BINDER_TYPE_HANDLE: case BINDER_TYPE_WEAK_HANDLE: { - struct binder_ref *ref = binder_get_ref(proc, fp->handle); + struct binder_ref *ref = binder_get_ref(proc, + fp->handle); if (ref == NULL) { binder_user_error("binder: %d:%d got " "transaction with invalid " @@ -1634,24 +1673,31 @@ static void binder_transaction(struct binder_proc *proc, fp->type = BINDER_TYPE_WEAK_BINDER; fp->binder = ref->node->ptr; fp->cookie = ref->node->cookie; - binder_inc_node(ref->node, fp->type == BINDER_TYPE_BINDER, 0, NULL); + binder_inc_node(ref->node, fp->type == + BINDER_TYPE_BINDER, 0, NULL); binder_debug(BINDER_DEBUG_TRANSACTION, - " ref %d desc %d -> node %d u%p\n", - ref->debug_id, ref->desc, ref->node->debug_id, - ref->node->ptr); + " ref %d desc %d -> node %d u%p\n", + ref->debug_id, ref->desc, + ref->node->debug_id, + ref->node->ptr); } else { struct binder_ref *new_ref; - new_ref = binder_get_ref_for_node(target_proc, ref->node); + new_ref = binder_get_ref_for_node(target_proc, + ref->node); if (new_ref == NULL) { return_error = BR_FAILED_REPLY; goto err_binder_get_ref_for_node_failed; } fp->handle = new_ref->desc; - binder_inc_ref(new_ref, fp->type == BINDER_TYPE_HANDLE, NULL); + binder_inc_ref(new_ref, fp->type == + BINDER_TYPE_HANDLE, NULL); binder_debug(BINDER_DEBUG_TRANSACTION, - " ref %d desc %d -> ref %d desc %d (node %d)\n", - ref->debug_id, ref->desc, new_ref->debug_id, - new_ref->desc, ref->node->debug_id); + " ref %d desc %d -> ref %d" + " desc %d (node %d)\n", + ref->debug_id, ref->desc, + new_ref->debug_id, + new_ref->desc, + ref->node->debug_id); } } break; @@ -1661,13 +1707,19 @@ static void binder_transaction(struct binder_proc *proc, if (reply) { if (!(in_reply_to->flags & TF_ACCEPT_FDS)) { - binder_user_error("binder: %d:%d got reply with fd, %ld, but target does not allow fds\n", - proc->pid, thread->pid, fp->handle); + binder_user_error("binder: %d:%d got" + " reply with fd, %ld, but" + " target does not allow fds\n", + proc->pid, thread->pid, + fp->handle); return_error = BR_FAILED_REPLY; goto err_fd_not_allowed; } } else if (!target_node->accept_fds) { - binder_user_error("binder: %d:%d got transaction with fd, %ld, but target does not allow fds\n", + binder_user_error( + "binder: %d:%d got transaction" + " with fd, %ld, but target does" + " not allow fds\n", proc->pid, thread->pid, fp->handle); return_error = BR_FAILED_REPLY; goto err_fd_not_allowed; @@ -1675,12 +1727,15 @@ static void binder_transaction(struct binder_proc *proc, file = fget(fp->handle); if (file == NULL) { - binder_user_error("binder: %d:%d got transaction with invalid fd, %ld\n", + binder_user_error( + "binder: %d:%d got transaction" + " with invalid fd, %ld\n", proc->pid, thread->pid, fp->handle); return_error = BR_FAILED_REPLY; goto err_fget_failed; } - target_fd = task_get_unused_fd_flags(target_proc, O_CLOEXEC); + target_fd = task_get_unused_fd_flags(target_proc, + O_CLOEXEC); if (target_fd < 0) { fput(file); return_error = BR_FAILED_REPLY; @@ -1688,7 +1743,8 @@ static void binder_transaction(struct binder_proc *proc, } task_fd_install(target_proc, target_fd, file); binder_debug(BINDER_DEBUG_TRANSACTION, - " fd %ld -> %d\n", fp->handle, target_fd); + " fd %ld -> %d\n", fp->handle, + target_fd); /* TODO: fput? */ fp->handle = target_fd; } break; @@ -1837,9 +1893,11 @@ int binder_thread_write(struct binder_proc *proc, struct binder_thread *thread, break; } binder_debug(BINDER_DEBUG_USER_REFS, - "binder: %d:%d %s ref %d desc %d s %d w %d for node %d\n", - proc->pid, thread->pid, debug_string, ref->debug_id, - ref->desc, ref->strong, ref->weak, ref->node->debug_id); + "binder: %d:%d %s ref %d desc %d s %d w %d" + " for node %d\n", proc->pid, thread->pid, + debug_string, ref->debug_id, ref->desc, + ref->strong, ref->weak, + ref->node->debug_id); break; } case BC_INCREFS_DONE: @@ -1900,15 +1958,19 @@ int binder_thread_write(struct binder_proc *proc, struct binder_thread *thread, binder_debug(BINDER_DEBUG_USER_REFS, "binder: %d:%d %s node %d ls %d lw %d\n", proc->pid, thread->pid, - cmd == BC_INCREFS_DONE ? "BC_INCREFS_DONE" : "BC_ACQUIRE_DONE", - node->debug_id, node->local_strong_refs, node->local_weak_refs); + cmd == BC_INCREFS_DONE ? "BC_INCREFS_DONE" + : "BC_ACQUIRE_DONE", + node->debug_id, node->local_strong_refs, + node->local_weak_refs); break; } case BC_ATTEMPT_ACQUIRE: - printk(KERN_ERR "binder: BC_ATTEMPT_ACQUIRE not supported\n"); + binder_debug(BINDER_DEBUG_TOP_ERRORS, + "binder: BC_ATTEMPT_ACQUIRE not supported\n"); return -EINVAL; case BC_ACQUIRE_RESULT: - printk(KERN_ERR "binder: BC_ACQUIRE_RESULT not supported\n"); + binder_debug(BINDER_DEBUG_TOP_ERRORS, + "binder: BC_ACQUIRE_RESULT not supported\n"); return -EINVAL; case BC_FREE_BUFFER: { @@ -1934,9 +1996,11 @@ int binder_thread_write(struct binder_proc *proc, struct binder_thread *thread, break; } binder_debug(BINDER_DEBUG_FREE_BUFFER, - "binder: %d:%d BC_FREE_BUFFER u%p found buffer %d for %s transaction\n", - proc->pid, thread->pid, data_ptr, buffer->debug_id, - buffer->transaction ? "active" : "finished"); + "binder: %d:%d BC_FREE_BUFFER u%p found" + " buffer %d for %s transaction\n", + proc->pid, thread->pid, data_ptr, + buffer->debug_id, buffer->transaction ? + "active" : "finished"); if (buffer->transaction) { buffer->transaction->buffer = NULL; @@ -2033,13 +2097,15 @@ int binder_thread_write(struct binder_proc *proc, struct binder_thread *thread, } binder_debug(BINDER_DEBUG_DEATH_NOTIFICATION, - "binder: %d:%d %s %p ref %d desc %d s %d w %d for node %d\n", + "binder: %d:%d %s %p ref %d desc %d s %d" + " w %d for node %d\n", proc->pid, thread->pid, cmd == BC_REQUEST_DEATH_NOTIFICATION ? "BC_REQUEST_DEATH_NOTIFICATION" : "BC_CLEAR_DEATH_NOTIFICATION", cookie, ref->debug_id, ref->desc, - ref->strong, ref->weak, ref->node->debug_id); + ref->strong, ref->weak, + ref->node->debug_id); if (cmd == BC_REQUEST_DEATH_NOTIFICATION) { if (ref->death) { @@ -2053,10 +2119,12 @@ int binder_thread_write(struct binder_proc *proc, struct binder_thread *thread, death = kzalloc(sizeof(*death), GFP_KERNEL); if (death == NULL) { thread->return_error = BR_ERROR; - binder_debug(BINDER_DEBUG_FAILED_TRANSACTION, - "binder: %d:%d " - "BC_REQUEST_DEATH_NOTIFICATION failed\n", - proc->pid, thread->pid); + binder_debug( + BINDER_DEBUG_FAILED_TRANSACTION, + "binder: %d:%d " + "BC_REQUEST_DEATH_NOTIFICATION" + " failed\n", + proc->pid, thread->pid); break; } binder_stats_created(BINDER_STAT_DEATH); @@ -2145,7 +2213,8 @@ int binder_thread_write(struct binder_proc *proc, struct binder_thread *thread, } break; default: - printk(KERN_ERR "binder: %d:%d unknown command %d\n", + binder_debug(BINDER_DEBUG_TOP_ERRORS, + "binder: %d:%d unknown command %d\n", proc->pid, thread->pid, cmd); return -EINVAL; } @@ -2615,9 +2684,11 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) unsigned int size = _IOC_SIZE(cmd); void __user *ubuf = (void __user *)arg; - /*printk(KERN_INFO "binder_ioctl: %d:%d %x %lx\n", proc->pid, current->pid, cmd, arg);*/ + /*binder_debug(BINDER_DEBUG_TOP_ERRORS, "binder_ioctl: %d:%d %x %lx\n", + proc->pid, current->pid, cmd, arg);*/ - ret = wait_event_interruptible(binder_user_error_wait, binder_stop_on_user_error < 2); + ret = wait_event_interruptible(binder_user_error_wait, + binder_stop_on_user_error < 2); if (ret) return ret; @@ -2674,20 +2745,23 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) break; } case BINDER_SET_MAX_THREADS: - if (copy_from_user(&proc->max_threads, ubuf, sizeof(proc->max_threads))) { + if (copy_from_user(&proc->max_threads, ubuf, + sizeof(proc->max_threads))) { ret = -EINVAL; goto err; } break; case BINDER_SET_CONTEXT_MGR: if (binder_context_mgr_node != NULL) { - printk(KERN_ERR "binder: BINDER_SET_CONTEXT_MGR already set\n"); + binder_debug(BINDER_DEBUG_TOP_ERRORS, + "binder: BINDER_SET_CONTEXT_MGR already set\n"); ret = -EBUSY; goto err; } if (binder_context_mgr_uid != -1) { if (binder_context_mgr_uid != current->cred->euid) { - printk(KERN_ERR "binder: BINDER_SET_" + binder_debug(BINDER_DEBUG_TOP_ERRORS, + "binder: BINDER_SET_" "CONTEXT_MGR bad uid %d != %d\n", current->cred->euid, binder_context_mgr_uid); @@ -2733,7 +2807,9 @@ err: mutex_unlock(&binder_lock); wait_event_interruptible(binder_user_error_wait, binder_stop_on_user_error < 2); if (ret && ret != -ERESTARTSYS) - printk(KERN_INFO "binder: %d:%d ioctl %x %lx returned %d\n", proc->pid, current->pid, cmd, arg, ret); + binder_debug(BINDER_DEBUG_TOP_ERRORS, + "binder: %d:%d ioctl %x %lx returned %d\n", + proc->pid, current->pid, cmd, arg, ret); return ret; } @@ -2807,7 +2883,9 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma) #ifdef CONFIG_CPU_CACHE_VIPT if (cache_is_vipt_aliasing()) { while (CACHE_COLOUR((vma->vm_start ^ (uint32_t)proc->buffer))) { - printk(KERN_INFO "binder_mmap: %d %lx-%lx maps %p bad alignment\n", proc->pid, vma->vm_start, vma->vm_end, proc->buffer); + binder_debug(BINDER_DEBUG_TOP_ERRORS, + "binder_mmap: %d %lx-%lx maps %p bad alignment\n", + proc->pid, vma->vm_start, vma->vm_end, proc->buffer); vma->vm_start += PAGE_SIZE; } } @@ -2838,7 +2916,8 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma) proc->files = get_files_struct(current); proc->vma = vma; - /*printk(KERN_INFO "binder_mmap: %d %lx-%lx maps %p\n", + /*binder_debug(BINDER_DEBUG_TOP_ERRORS, + "binder_mmap: %d %lx-%lx maps %p\n", proc->pid, vma->vm_start, vma->vm_end, proc->buffer);*/ return 0; @@ -2851,7 +2930,8 @@ err_alloc_pages_failed: err_get_vm_area_failed: err_already_mapped: err_bad_arg: - printk(KERN_ERR "binder_mmap: %d %lx-%lx %s failed %d\n", + binder_debug(BINDER_DEBUG_TOP_ERRORS, + "binder_mmap: %d %lx-%lx %s failed %d\n", proc->pid, vma->vm_start, vma->vm_end, failure_string, ret); return ret; } @@ -2879,13 +2959,11 @@ static int binder_open(struct inode *nodp, struct file *filp) filp->private_data = proc; mutex_unlock(&binder_lock); - if (binder_proc_dir_entry_proc) { + if (binder_debugfs_dir_entry_proc) { char strbuf[11]; snprintf(strbuf, sizeof(strbuf), "%u", proc->pid); - remove_proc_entry(strbuf, binder_proc_dir_entry_proc); - create_proc_read_entry(strbuf, S_IRUGO, - binder_proc_dir_entry_proc, - binder_read_proc_proc, proc); + proc->debugfs_entry = debugfs_create_file(strbuf, S_IRUGO, + binder_debugfs_dir_entry_proc, proc, &binder_proc_fops); } return 0; @@ -2922,12 +3000,7 @@ static void binder_deferred_flush(struct binder_proc *proc) static int binder_release(struct inode *nodp, struct file *filp) { struct binder_proc *proc = filp->private_data; - if (binder_proc_dir_entry_proc) { - char strbuf[11]; - snprintf(strbuf, sizeof(strbuf), "%u", proc->pid); - remove_proc_entry(strbuf, binder_proc_dir_entry_proc); - } - + debugfs_remove(proc->debugfs_entry); binder_defer_work(proc, BINDER_DEFERRED_RELEASE); return 0; @@ -3013,7 +3086,8 @@ static void binder_deferred_release(struct binder_proc *proc) if (t) { t->buffer = NULL; buffer->transaction = NULL; - printk(KERN_ERR "binder: release proc %d, " + binder_debug(BINDER_DEBUG_TOP_ERRORS, + "binder: release proc %d, " "transaction %d, not freed\n", proc->pid, t->debug_id); /*BUG();*/ @@ -3111,49 +3185,41 @@ binder_defer_work(struct binder_proc *proc, enum binder_deferred_state defer) mutex_unlock(&binder_deferred_lock); } -static char *print_binder_transaction(char *buf, char *end, const char *prefix, - struct binder_transaction *t) +static void print_binder_transaction(struct seq_file *m, const char *prefix, + struct binder_transaction *t) { - buf += snprintf(buf, end - buf, - "%s %d: %p from %d:%d to %d:%d code %x " - "flags %x pri %ld r%d", - prefix, t->debug_id, t, - t->from ? t->from->proc->pid : 0, - t->from ? t->from->pid : 0, - t->to_proc ? t->to_proc->pid : 0, - t->to_thread ? t->to_thread->pid : 0, - t->code, t->flags, t->priority, t->need_reply); - if (buf >= end) - return buf; + seq_printf(m, + "%s %d: %p from %d:%d to %d:%d code %x flags %x pri %ld r%d", + prefix, t->debug_id, t, + t->from ? t->from->proc->pid : 0, + t->from ? t->from->pid : 0, + t->to_proc ? t->to_proc->pid : 0, + t->to_thread ? t->to_thread->pid : 0, + t->code, t->flags, t->priority, t->need_reply); if (t->buffer == NULL) { - buf += snprintf(buf, end - buf, " buffer free\n"); - return buf; + seq_puts(m, " buffer free\n"); + return; } - if (t->buffer->target_node) { - buf += snprintf(buf, end - buf, " node %d", - t->buffer->target_node->debug_id); - if (buf >= end) - return buf; - } - buf += snprintf(buf, end - buf, " size %zd:%zd data %p\n", - t->buffer->data_size, t->buffer->offsets_size, - t->buffer->data); - return buf; + if (t->buffer->target_node) + seq_printf(m, " node %d", + t->buffer->target_node->debug_id); + seq_printf(m, " size %zd:%zd data %p\n", + t->buffer->data_size, t->buffer->offsets_size, + t->buffer->data); } -static char *print_binder_buffer(char *buf, char *end, const char *prefix, - struct binder_buffer *buffer) +static void print_binder_buffer(struct seq_file *m, const char *prefix, + struct binder_buffer *buffer) { - buf += snprintf(buf, end - buf, "%s %d: %p size %zd:%zd %s\n", - prefix, buffer->debug_id, buffer->data, - buffer->data_size, buffer->offsets_size, - buffer->transaction ? "active" : "delivered"); - return buf; + seq_printf(m, "%s %d: %p size %zd:%zd %s\n", + prefix, buffer->debug_id, buffer->data, + buffer->data_size, buffer->offsets_size, + buffer->transaction ? "active" : "delivered"); } -static char *print_binder_work(char *buf, char *end, const char *prefix, - const char *transaction_prefix, - struct binder_work *w) +static void print_binder_work(struct seq_file *m, const char *prefix, + const char *transaction_prefix, + struct binder_work *w) { struct binder_node *node; struct binder_transaction *t; @@ -3161,79 +3227,65 @@ static char *print_binder_work(char *buf, char *end, const char *prefix, switch (w->type) { case BINDER_WORK_TRANSACTION: t = container_of(w, struct binder_transaction, work); - buf = print_binder_transaction(buf, end, transaction_prefix, t); + print_binder_transaction(m, transaction_prefix, t); break; case BINDER_WORK_TRANSACTION_COMPLETE: - buf += snprintf(buf, end - buf, - "%stransaction complete\n", prefix); + seq_printf(m, "%stransaction complete\n", prefix); break; case BINDER_WORK_NODE: node = container_of(w, struct binder_node, work); - buf += snprintf(buf, end - buf, "%snode work %d: u%p c%p\n", - prefix, node->debug_id, node->ptr, - node->cookie); + seq_printf(m, "%snode work %d: u%p c%p\n", + prefix, node->debug_id, node->ptr, node->cookie); break; case BINDER_WORK_DEAD_BINDER: - buf += snprintf(buf, end - buf, "%shas dead binder\n", prefix); + seq_printf(m, "%shas dead binder\n", prefix); break; case BINDER_WORK_DEAD_BINDER_AND_CLEAR: - buf += snprintf(buf, end - buf, - "%shas cleared dead binder\n", prefix); + seq_printf(m, "%shas cleared dead binder\n", prefix); break; case BINDER_WORK_CLEAR_DEATH_NOTIFICATION: - buf += snprintf(buf, end - buf, - "%shas cleared death notification\n", prefix); + seq_printf(m, "%shas cleared death notification\n", prefix); break; default: - buf += snprintf(buf, end - buf, "%sunknown work: type %d\n", - prefix, w->type); + seq_printf(m, "%sunknown work: type %d\n", prefix, w->type); break; } - return buf; } -static char *print_binder_thread(char *buf, char *end, - struct binder_thread *thread, - int print_always) +static void print_binder_thread(struct seq_file *m, + struct binder_thread *thread, + int print_always) { struct binder_transaction *t; struct binder_work *w; - char *start_buf = buf; - char *header_buf; + size_t start_pos = m->count; + size_t header_pos; - buf += snprintf(buf, end - buf, " thread %d: l %02x\n", - thread->pid, thread->looper); - header_buf = buf; + seq_printf(m, " thread %d: l %02x\n", thread->pid, thread->looper); + header_pos = m->count; t = thread->transaction_stack; while (t) { - if (buf >= end) - break; if (t->from == thread) { - buf = print_binder_transaction(buf, end, - " outgoing transaction", t); + print_binder_transaction(m, + " outgoing transaction", t); t = t->from_parent; } else if (t->to_thread == thread) { - buf = print_binder_transaction(buf, end, - " incoming transaction", t); + print_binder_transaction(m, + " incoming transaction", t); t = t->to_parent; } else { - buf = print_binder_transaction(buf, end, - " bad transaction", t); + print_binder_transaction(m, " bad transaction", t); t = NULL; } } list_for_each_entry(w, &thread->todo, entry) { - if (buf >= end) - break; - buf = print_binder_work(buf, end, " ", - " pending transaction", w); + print_binder_work(m, " ", " pending transaction", w); } - if (!print_always && buf == header_buf) - buf = start_buf; - return buf; + if (!print_always && m->count == header_pos) + m->count = start_pos; } -static char *print_binder_node(char *buf, char *end, struct binder_node *node) +static void print_binder_node(struct seq_file *m, struct binder_node *node) { struct binder_ref *ref; struct hlist_node *pos; @@ -3244,100 +3296,67 @@ static char *print_binder_node(char *buf, char *end, struct binder_node *node) hlist_for_each_entry(ref, pos, &node->refs, node_entry) count++; - buf += snprintf(buf, end - buf, - " node %d: u%p c%p hs %d hw %d ls %d lw %d " - "is %d iw %d", - node->debug_id, node->ptr, node->cookie, - node->has_strong_ref, node->has_weak_ref, - node->local_strong_refs, node->local_weak_refs, - node->internal_strong_refs, count); - if (buf >= end) - return buf; + seq_printf(m, " node %d: u%p c%p hs %d hw %d ls %d lw %d is %d iw %d", + node->debug_id, node->ptr, node->cookie, + node->has_strong_ref, node->has_weak_ref, + node->local_strong_refs, node->local_weak_refs, + node->internal_strong_refs, count); if (count) { - buf += snprintf(buf, end - buf, " proc"); - if (buf >= end) - return buf; - hlist_for_each_entry(ref, pos, &node->refs, node_entry) { - buf += snprintf(buf, end - buf, " %d", ref->proc->pid); - if (buf >= end) - return buf; - } + seq_puts(m, " proc"); + hlist_for_each_entry(ref, pos, &node->refs, node_entry) + seq_printf(m, " %d", ref->proc->pid); } - buf += snprintf(buf, end - buf, "\n"); - list_for_each_entry(w, &node->async_todo, entry) { - if (buf >= end) - break; - buf = print_binder_work(buf, end, " ", - " pending async transaction", w); - } - return buf; + seq_puts(m, "\n"); + list_for_each_entry(w, &node->async_todo, entry) + print_binder_work(m, " ", + " pending async transaction", w); } -static char *print_binder_ref(char *buf, char *end, struct binder_ref *ref) +static void print_binder_ref(struct seq_file *m, struct binder_ref *ref) { - buf += snprintf(buf, end - buf, - " ref %d: desc %d %snode %d s %d w %d d %p\n", - ref->debug_id, ref->desc, - ref->node->proc ? "" : "dead ", ref->node->debug_id, - ref->strong, ref->weak, ref->death); - return buf; + seq_printf(m, " ref %d: desc %d %snode %d s %d w %d d %p\n", + ref->debug_id, ref->desc, ref->node->proc ? "" : "dead ", + ref->node->debug_id, ref->strong, ref->weak, ref->death); } -static char *print_binder_proc(char *buf, char *end, - struct binder_proc *proc, int print_all) +static void print_binder_proc(struct seq_file *m, + struct binder_proc *proc, int print_all) { struct binder_work *w; struct rb_node *n; - char *start_buf = buf; - char *header_buf; + size_t start_pos = m->count; + size_t header_pos; - buf += snprintf(buf, end - buf, "proc %d\n", proc->pid); - header_buf = buf; + seq_printf(m, "proc %d\n", proc->pid); + header_pos = m->count; - for (n = rb_first(&proc->threads); - n != NULL && buf < end; - n = rb_next(n)) - buf = print_binder_thread(buf, end, - rb_entry(n, struct binder_thread, - rb_node), print_all); - for (n = rb_first(&proc->nodes); - n != NULL && buf < end; - n = rb_next(n)) { + for (n = rb_first(&proc->threads); n != NULL; n = rb_next(n)) + print_binder_thread(m, rb_entry(n, struct binder_thread, + rb_node), print_all); + for (n = rb_first(&proc->nodes); n != NULL; n = rb_next(n)) { struct binder_node *node = rb_entry(n, struct binder_node, rb_node); if (print_all || node->has_async_transaction) - buf = print_binder_node(buf, end, node); + print_binder_node(m, node); } if (print_all) { for (n = rb_first(&proc->refs_by_desc); - n != NULL && buf < end; + n != NULL; n = rb_next(n)) - buf = print_binder_ref(buf, end, - rb_entry(n, struct binder_ref, - rb_node_desc)); - } - for (n = rb_first(&proc->allocated_buffers); - n != NULL && buf < end; - n = rb_next(n)) - buf = print_binder_buffer(buf, end, " buffer", - rb_entry(n, struct binder_buffer, - rb_node)); - list_for_each_entry(w, &proc->todo, entry) { - if (buf >= end) - break; - buf = print_binder_work(buf, end, " ", - " pending transaction", w); + print_binder_ref(m, rb_entry(n, struct binder_ref, + rb_node_desc)); } + for (n = rb_first(&proc->allocated_buffers); n != NULL; n = rb_next(n)) + print_binder_buffer(m, " buffer", + rb_entry(n, struct binder_buffer, rb_node)); + list_for_each_entry(w, &proc->todo, entry) + print_binder_work(m, " ", " pending transaction", w); list_for_each_entry(w, &proc->delivered_death, entry) { - if (buf >= end) - break; - buf += snprintf(buf, end - buf, - " has delivered dead binder\n"); + seq_puts(m, " has delivered dead binder\n"); break; } - if (!print_all && buf == header_buf) - buf = start_buf; - return buf; + if (!print_all && m->count == header_pos) + m->count = start_pos; } static const char *binder_return_strings[] = { @@ -3391,79 +3410,61 @@ static const char *binder_objstat_strings[] = { "transaction_complete" }; -static char *print_binder_stats(char *buf, char *end, const char *prefix, - struct binder_stats *stats) +static void print_binder_stats(struct seq_file *m, const char *prefix, + struct binder_stats *stats) { int i; BUILD_BUG_ON(ARRAY_SIZE(stats->bc) != - ARRAY_SIZE(binder_command_strings)); + ARRAY_SIZE(binder_command_strings)); for (i = 0; i < ARRAY_SIZE(stats->bc); i++) { if (stats->bc[i]) - buf += snprintf(buf, end - buf, "%s%s: %d\n", prefix, - binder_command_strings[i], - stats->bc[i]); - if (buf >= end) - return buf; + seq_printf(m, "%s%s: %d\n", prefix, + binder_command_strings[i], stats->bc[i]); } BUILD_BUG_ON(ARRAY_SIZE(stats->br) != - ARRAY_SIZE(binder_return_strings)); + ARRAY_SIZE(binder_return_strings)); for (i = 0; i < ARRAY_SIZE(stats->br); i++) { if (stats->br[i]) - buf += snprintf(buf, end - buf, "%s%s: %d\n", prefix, - binder_return_strings[i], stats->br[i]); - if (buf >= end) - return buf; + seq_printf(m, "%s%s: %d\n", prefix, + binder_return_strings[i], stats->br[i]); } BUILD_BUG_ON(ARRAY_SIZE(stats->obj_created) != - ARRAY_SIZE(binder_objstat_strings)); + ARRAY_SIZE(binder_objstat_strings)); BUILD_BUG_ON(ARRAY_SIZE(stats->obj_created) != - ARRAY_SIZE(stats->obj_deleted)); + ARRAY_SIZE(stats->obj_deleted)); for (i = 0; i < ARRAY_SIZE(stats->obj_created); i++) { if (stats->obj_created[i] || stats->obj_deleted[i]) - buf += snprintf(buf, end - buf, - "%s%s: active %d total %d\n", prefix, - binder_objstat_strings[i], - stats->obj_created[i] - - stats->obj_deleted[i], - stats->obj_created[i]); - if (buf >= end) - return buf; + seq_printf(m, "%s%s: active %d total %d\n", prefix, + binder_objstat_strings[i], + stats->obj_created[i] - stats->obj_deleted[i], + stats->obj_created[i]); } - return buf; } -static char *print_binder_proc_stats(char *buf, char *end, - struct binder_proc *proc) +static void print_binder_proc_stats(struct seq_file *m, + struct binder_proc *proc) { struct binder_work *w; struct rb_node *n; int count, strong, weak; - buf += snprintf(buf, end - buf, "proc %d\n", proc->pid); - if (buf >= end) - return buf; + seq_printf(m, "proc %d\n", proc->pid); count = 0; for (n = rb_first(&proc->threads); n != NULL; n = rb_next(n)) count++; - buf += snprintf(buf, end - buf, " threads: %d\n", count); - if (buf >= end) - return buf; - buf += snprintf(buf, end - buf, " requested threads: %d+%d/%d\n" + seq_printf(m, " threads: %d\n", count); + seq_printf(m, " requested threads: %d+%d/%d\n" " ready threads %d\n" " free async space %zd\n", proc->requested_threads, proc->requested_threads_started, proc->max_threads, proc->ready_threads, proc->free_async_space); - if (buf >= end) - return buf; count = 0; for (n = rb_first(&proc->nodes); n != NULL; n = rb_next(n)) count++; - buf += snprintf(buf, end - buf, " nodes: %d\n", count); - if (buf >= end) - return buf; + seq_printf(m, " nodes: %d\n", count); count = 0; strong = 0; weak = 0; @@ -3474,17 +3475,12 @@ static char *print_binder_proc_stats(char *buf, char *end, strong += ref->strong; weak += ref->weak; } - buf += snprintf(buf, end - buf, " refs: %d s %d w %d\n", - count, strong, weak); - if (buf >= end) - return buf; + seq_printf(m, " refs: %d s %d w %d\n", count, strong, weak); count = 0; for (n = rb_first(&proc->allocated_buffers); n != NULL; n = rb_next(n)) count++; - buf += snprintf(buf, end - buf, " buffers: %d\n", count); - if (buf >= end) - return buf; + seq_printf(m, " buffers: %d\n", count); count = 0; list_for_each_entry(w, &proc->todo, entry) { @@ -3496,222 +3492,110 @@ static char *print_binder_proc_stats(char *buf, char *end, break; } } - buf += snprintf(buf, end - buf, " pending transactions: %d\n", count); - if (buf >= end) - return buf; + seq_printf(m, " pending transactions: %d\n", count); - buf = print_binder_stats(buf, end, " ", &proc->stats); - - return buf; + print_binder_stats(m, " ", &proc->stats); } -static int binder_read_proc_state(char *page, char **start, off_t off, - int count, int *eof, void *data) +static int binder_state_show(struct seq_file *m, void *unused) { struct binder_proc *proc; struct hlist_node *pos; struct binder_node *node; - int len = 0; - char *buf = page; - char *end = page + PAGE_SIZE; int do_lock = !binder_debug_no_lock; - if (off) - return 0; - if (do_lock) mutex_lock(&binder_lock); - buf += snprintf(buf, end - buf, "binder state:\n"); + seq_puts(m, "binder state:\n"); if (!hlist_empty(&binder_dead_nodes)) - buf += snprintf(buf, end - buf, "dead nodes:\n"); - hlist_for_each_entry(node, pos, &binder_dead_nodes, dead_node) { - if (buf >= end) - break; - buf = print_binder_node(buf, end, node); - } + seq_puts(m, "dead nodes:\n"); + hlist_for_each_entry(node, pos, &binder_dead_nodes, dead_node) + print_binder_node(m, node); - hlist_for_each_entry(proc, pos, &binder_procs, proc_node) { - if (buf >= end) - break; - buf = print_binder_proc(buf, end, proc, 1); - } + hlist_for_each_entry(proc, pos, &binder_procs, proc_node) + print_binder_proc(m, proc, 1); if (do_lock) mutex_unlock(&binder_lock); - if (buf > page + PAGE_SIZE) - buf = page + PAGE_SIZE; - - *start = page + off; - - len = buf - page; - if (len > off) - len -= off; - else - len = 0; - - return len < count ? len : count; + return 0; } -static int binder_read_proc_stats(char *page, char **start, off_t off, - int count, int *eof, void *data) +static int binder_stats_show(struct seq_file *m, void *unused) { struct binder_proc *proc; struct hlist_node *pos; - int len = 0; - char *p = page; int do_lock = !binder_debug_no_lock; - if (off) - return 0; - if (do_lock) mutex_lock(&binder_lock); - p += snprintf(p, PAGE_SIZE, "binder stats:\n"); + seq_puts(m, "binder stats:\n"); - p = print_binder_stats(p, page + PAGE_SIZE, "", &binder_stats); + print_binder_stats(m, "", &binder_stats); - hlist_for_each_entry(proc, pos, &binder_procs, proc_node) { - if (p >= page + PAGE_SIZE) - break; - p = print_binder_proc_stats(p, page + PAGE_SIZE, proc); - } + hlist_for_each_entry(proc, pos, &binder_procs, proc_node) + print_binder_proc_stats(m, proc); if (do_lock) mutex_unlock(&binder_lock); - if (p > page + PAGE_SIZE) - p = page + PAGE_SIZE; - - *start = page + off; - - len = p - page; - if (len > off) - len -= off; - else - len = 0; - - return len < count ? len : count; + return 0; } -static int binder_read_proc_transactions(char *page, char **start, off_t off, - int count, int *eof, void *data) +static int binder_transactions_show(struct seq_file *m, void *unused) { struct binder_proc *proc; struct hlist_node *pos; - int len = 0; - char *buf = page; - char *end = page + PAGE_SIZE; int do_lock = !binder_debug_no_lock; - if (off) - return 0; - if (do_lock) mutex_lock(&binder_lock); - buf += snprintf(buf, end - buf, "binder transactions:\n"); - hlist_for_each_entry(proc, pos, &binder_procs, proc_node) { - if (buf >= end) - break; - buf = print_binder_proc(buf, end, proc, 0); - } + seq_puts(m, "binder transactions:\n"); + hlist_for_each_entry(proc, pos, &binder_procs, proc_node) + print_binder_proc(m, proc, 0); if (do_lock) mutex_unlock(&binder_lock); - if (buf > page + PAGE_SIZE) - buf = page + PAGE_SIZE; - - *start = page + off; - - len = buf - page; - if (len > off) - len -= off; - else - len = 0; - - return len < count ? len : count; + return 0; } -static int binder_read_proc_proc(char *page, char **start, off_t off, - int count, int *eof, void *data) +static int binder_proc_show(struct seq_file *m, void *unused) { - struct binder_proc *proc = data; - int len = 0; - char *p = page; + struct binder_proc *proc = m->private; int do_lock = !binder_debug_no_lock; - if (off) - return 0; - if (do_lock) mutex_lock(&binder_lock); - p += snprintf(p, PAGE_SIZE, "binder proc state:\n"); - p = print_binder_proc(p, page + PAGE_SIZE, proc, 1); + seq_puts(m, "binder proc state:\n"); + print_binder_proc(m, proc, 1); if (do_lock) mutex_unlock(&binder_lock); - - if (p > page + PAGE_SIZE) - p = page + PAGE_SIZE; - *start = page + off; - - len = p - page; - if (len > off) - len -= off; - else - len = 0; - - return len < count ? len : count; + return 0; } -static char *print_binder_transaction_log_entry(char *buf, char *end, +static void print_binder_transaction_log_entry(struct seq_file *m, struct binder_transaction_log_entry *e) { - buf += snprintf(buf, end - buf, - "%d: %s from %d:%d to %d:%d node %d handle %d " - "size %d:%d\n", - e->debug_id, (e->call_type == 2) ? "reply" : - ((e->call_type == 1) ? "async" : "call "), e->from_proc, - e->from_thread, e->to_proc, e->to_thread, e->to_node, - e->target_handle, e->data_size, e->offsets_size); - return buf; + seq_printf(m, + "%d: %s from %d:%d to %d:%d node %d handle %d size %d:%d\n", + e->debug_id, (e->call_type == 2) ? "reply" : + ((e->call_type == 1) ? "async" : "call "), e->from_proc, + e->from_thread, e->to_proc, e->to_thread, e->to_node, + e->target_handle, e->data_size, e->offsets_size); } -static int binder_read_proc_transaction_log( - char *page, char **start, off_t off, int count, int *eof, void *data) +static int binder_transaction_log_show(struct seq_file *m, void *unused) { - struct binder_transaction_log *log = data; - int len = 0; + struct binder_transaction_log *log = m->private; int i; - char *buf = page; - char *end = page + PAGE_SIZE; - - if (off) - return 0; if (log->full) { - for (i = log->next; i < ARRAY_SIZE(log->entry); i++) { - if (buf >= end) - break; - buf = print_binder_transaction_log_entry(buf, end, - &log->entry[i]); - } + for (i = log->next; i < ARRAY_SIZE(log->entry); i++) + print_binder_transaction_log_entry(m, &log->entry[i]); } - for (i = 0; i < log->next; i++) { - if (buf >= end) - break; - buf = print_binder_transaction_log_entry(buf, end, - &log->entry[i]); - } - - *start = page + off; - - len = buf - page; - if (len > off) - len -= off; - else - len = 0; - - return len < count ? len : count; + for (i = 0; i < log->next; i++) + print_binder_transaction_log_entry(m, &log->entry[i]); + return 0; } static const struct file_operations binder_fops = { @@ -3730,6 +3614,11 @@ static struct miscdevice binder_miscdev = { .fops = &binder_fops }; +BINDER_DEBUG_ENTRY(state); +BINDER_DEBUG_ENTRY(stats); +BINDER_DEBUG_ENTRY(transactions); +BINDER_DEBUG_ENTRY(transaction_log); + static int __init binder_init(void) { int ret; @@ -3738,37 +3627,37 @@ static int __init binder_init(void) if (!binder_deferred_workqueue) return -ENOMEM; - binder_proc_dir_entry_root = proc_mkdir("binder", NULL); - if (binder_proc_dir_entry_root) - binder_proc_dir_entry_proc = proc_mkdir("proc", - binder_proc_dir_entry_root); + binder_debugfs_dir_entry_root = debugfs_create_dir("binder", NULL); + if (binder_debugfs_dir_entry_root) + binder_debugfs_dir_entry_proc = debugfs_create_dir("proc", + binder_debugfs_dir_entry_root); ret = misc_register(&binder_miscdev); - if (binder_proc_dir_entry_root) { - create_proc_read_entry("state", - S_IRUGO, - binder_proc_dir_entry_root, - binder_read_proc_state, - NULL); - create_proc_read_entry("stats", - S_IRUGO, - binder_proc_dir_entry_root, - binder_read_proc_stats, - NULL); - create_proc_read_entry("transactions", - S_IRUGO, - binder_proc_dir_entry_root, - binder_read_proc_transactions, - NULL); - create_proc_read_entry("transaction_log", - S_IRUGO, - binder_proc_dir_entry_root, - binder_read_proc_transaction_log, - &binder_transaction_log); - create_proc_read_entry("failed_transaction_log", - S_IRUGO, - binder_proc_dir_entry_root, - binder_read_proc_transaction_log, - &binder_transaction_log_failed); + if (binder_debugfs_dir_entry_root) { + debugfs_create_file("state", + S_IRUGO, + binder_debugfs_dir_entry_root, + NULL, + &binder_state_fops); + debugfs_create_file("stats", + S_IRUGO, + binder_debugfs_dir_entry_root, + NULL, + &binder_stats_fops); + debugfs_create_file("transactions", + S_IRUGO, + binder_debugfs_dir_entry_root, + NULL, + &binder_transactions_fops); + debugfs_create_file("transaction_log", + S_IRUGO, + binder_debugfs_dir_entry_root, + &binder_transaction_log, + &binder_transaction_log_fops); + debugfs_create_file("failed_transaction_log", + S_IRUGO, + binder_debugfs_dir_entry_root, + &binder_transaction_log_failed, + &binder_transaction_log_fops); } return ret; } diff --git a/drivers/staging/android/ram_console.c b/drivers/staging/android/ram_console.c index 7adbdc68..55bf2dda 100644 --- a/drivers/staging/android/ram_console.c +++ b/drivers/staging/android/ram_console.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #if defined(CONFIG_VERY_EARLY_CONSOLE) #include @@ -395,7 +395,7 @@ static ssize_t ram_console_read_old(struct file *file, char __user *buf, return count; } -static const struct file_operations ram_console_file_ops = { +static struct file_operations ram_console_file_ops = { .owner = THIS_MODULE, .read = ram_console_read_old, }; diff --git a/include/linux/memory_alloc.h b/include/linux/memory_alloc.h index ec005a1c..a49bc37f 100644 --- a/include/linux/memory_alloc.h +++ b/include/linux/memory_alloc.h @@ -16,6 +16,7 @@ #include #include #include +#include struct mem_pool { struct mutex pool_mutex; @@ -56,4 +57,3 @@ unsigned long memory_pool_node_len(void *vaddr); int memory_pool_init(void); #endif /* _LINUX_MEMALLOC_H */ - diff --git a/include/linux/msm_kgsl.h b/include/linux/msm_kgsl.h index ceada785..56e6cc6b 100644 --- a/include/linux/msm_kgsl.h +++ b/include/linux/msm_kgsl.h @@ -35,7 +35,7 @@ #define _MSM_KGSL_H #define KGSL_VERSION_MAJOR 3 -#define KGSL_VERSION_MINOR 7 +#define KGSL_VERSION_MINOR 8 /*context flags */ #define KGSL_CONTEXT_SAVE_GMEM 1 @@ -60,6 +60,9 @@ #define KGSL_MAX_PWRLEVELS 5 +#define KGSL_CONVERT_TO_MBPS(val) \ + (val*1000*1000U) + /* device id */ enum kgsl_deviceid { KGSL_DEVICE_3D0 = 0x00000000, @@ -170,7 +173,6 @@ struct kgsl_device_pwr_data { int (*set_grp_async)(void); unsigned int idle_timeout; unsigned int nap_allowed; - unsigned int idle_pass; }; struct kgsl_clk_data { @@ -183,6 +185,8 @@ struct kgsl_device_platform_data { struct kgsl_clk_data clk; /* imem_clk_name is for 3d only, not used in 2d devices */ struct kgsl_grp_clk_name imem_clk_name; + const char *iommu_user_ctx_name; + const char *iommu_priv_ctx_name; }; #endif @@ -454,6 +458,30 @@ struct kgsl_cff_syncmem { #define IOCTL_KGSL_CFF_SYNCMEM \ _IOW(KGSL_IOC_TYPE, 0x30, struct kgsl_cff_syncmem) +/* +* A timestamp event allows the user space to register an action following an +* expired timestamp. +*/ + +struct kgsl_timestamp_event { + int type; /* Type of event (see list below) */ + unsigned int timestamp; /* Timestamp to trigger event on */ + unsigned int context_id; /* Context for the timestamp */ + void *priv; /* Pointer to the event specific blob */ + size_t len; /* Size of the event specific blob */ +}; + +#define IOCTL_KGSL_TIMESTAMP_EVENT \ + _IOW(KGSL_IOC_TYPE, 0x31, struct kgsl_timestamp_event) + +/* A genlock timestamp event releases an existing lock on timestamp expire */ + +#define KGSL_TIMESTAMP_EVENT_GENLOCK 1 + +struct kgsl_timestamp_event_genlock { + int handle; /* Handle of the genlock lock to release */ +}; + #ifdef __KERNEL__ #ifdef CONFIG_MSM_KGSL_DRM int kgsl_gem_obj_addr(int drm_fd, int handle, unsigned long *start, diff --git a/include/linux/msm_q6vdec.h b/include/linux/msm_q6vdec.h index ed7cf782..3b1d0930 100644 --- a/include/linux/msm_q6vdec.h +++ b/include/linux/msm_q6vdec.h @@ -46,6 +46,8 @@ #define VDEC_IOCTL_GETDECATTRIBUTES _IOR(VDEC_IOCTL_MAGIC, 10, \ struct vdec_dec_attributes) #define VDEC_IOCTL_GETVERSION _IOR(VDEC_IOCTL_MAGIC, 11, struct vdec_version) +#define VDEC_IOCTL_PERFORMANCE_CHANGE_REQ _IOW(VDEC_IOCTL_MAGIC, 14, \ + unsigned int) enum { VDEC_FRAME_DECODE_OK, @@ -83,6 +85,13 @@ enum { VDEC_COLOR_FORMAT_NV21_YAMOTO = 0x02 }; +enum { + PERF_REQUEST_SET_MIN = 0, + PERF_REQUEST_LOWER, + PERF_REQUEST_RAISE, + PERF_REQUEST_SET_MAX +}; + struct vdec_input_buf_info { u32 offset; u32 data; diff --git a/include/linux/pm_qos_params.h b/include/linux/pm_qos_params.h index 265729ec..1cc93892 100644 --- a/include/linux/pm_qos_params.h +++ b/include/linux/pm_qos_params.h @@ -1,6 +1,8 @@ /* interface for the pm_qos_power infrastructure of the linux kernel. * * Mark Gross + * + * Copyright (c) 2010, Code Aurora Forum. All rights reserved. */ #ifndef __PM_QOS_PARAMS_H__ #define __PM_QOS_PARAMS_H__ @@ -57,3 +59,4 @@ int pm_qos_add_notifier(int qos, struct notifier_block *notifier); int pm_qos_remove_notifier(int qos, struct notifier_block *notifier); #endif /* __PM_QOS_PARAMS_H__ */ + diff --git a/lib/Makefile b/lib/Makefile index 019e8ef7..11003485 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -12,7 +12,8 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ idr.o int_sqrt.o extable.o prio_tree.o \ sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \ proportions.o prio_heap.o ratelimit.o show_mem.o \ - is_single_threaded.o plist.o decompress.o flex_array.o memcopy.o + is_single_threaded.o plist.o decompress.o flex_array.o memcopy.o \ + memory_alloc.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o @@ -38,9 +39,10 @@ lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o lib-$(CONFIG_GENERIC_FIND_FIRST_BIT) += find_next_bit.o lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o -obj-$(CONFIG_GENERIC_FIND_LAST_BIT) += find_last_bit.o +lib-$(CONFIG_GENERIC_FIND_LAST_BIT) += find_last_bit.o obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o +obj-$(CONFIG_PLIST) += plist.o obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o obj-$(CONFIG_DEBUG_LIST) += list_debug.o obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o diff --git a/mm/ashmem.c b/mm/ashmem.c index a16f3f7c..5e059283 100644 --- a/mm/ashmem.c +++ b/mm/ashmem.c @@ -29,10 +29,9 @@ #include #include #include -#include -#define ASHMEM_NAME_PREFIX "dev/ashmem/" -#define ASHMEM_NAME_PREFIX_LEN (sizeof(ASHMEM_NAME_PREFIX) - 1) +#define ASHMEM_NAME_PREFIX "" +#define ASHMEM_NAME_PREFIX_LEN 0 #define ASHMEM_FULL_NAME_LEN (ASHMEM_NAME_LEN + ASHMEM_NAME_PREFIX_LEN) /* @@ -46,8 +45,6 @@ struct ashmem_area { struct list_head unpinned_list; /* list of all ashmem areas */ struct file *file; /* the shmem-based backing file */ size_t size; /* size of the mapping, in bytes */ - unsigned long vm_start; /* Start address of vm_area - * which maps this ashmem */ unsigned long prot_mask; /* allowed prot bits, as vm_flags */ }; @@ -181,7 +178,7 @@ static int ashmem_open(struct inode *inode, struct file *file) struct ashmem_area *asma; int ret; - ret = generic_file_open(inode, file); + ret = nonseekable_open(inode, file); if (unlikely(ret)) return ret; @@ -190,7 +187,6 @@ static int ashmem_open(struct inode *inode, struct file *file) return -ENOMEM; INIT_LIST_HEAD(&asma->unpinned_list); - memcpy(asma->name, ASHMEM_NAME_PREFIX, ASHMEM_NAME_PREFIX_LEN); asma->prot_mask = PROT_MASK; file->private_data = asma; @@ -214,67 +210,6 @@ static int ashmem_release(struct inode *ignored, struct file *file) return 0; } -static ssize_t ashmem_read(struct file *file, char __user *buf, - size_t len, loff_t *pos) -{ - struct ashmem_area *asma = file->private_data; - int ret = 0; - - mutex_lock(&ashmem_mutex); - - /* If size is not set, or set to 0, always return EOF. */ - if (asma->size == 0) { - goto out; - } - - if (!asma->file) { - ret = -EBADF; - goto out; - } - - ret = asma->file->f_op->read(asma->file, buf, len, pos); - if (ret < 0) { - goto out; - } - - /** Update backing file pos, since f_ops->read() doesn't */ - asma->file->f_pos = *pos; - -out: - mutex_unlock(&ashmem_mutex); - return ret; -} - -static loff_t ashmem_llseek(struct file *file, loff_t offset, int origin) -{ - struct ashmem_area *asma = file->private_data; - int ret; - - mutex_lock(&ashmem_mutex); - - if (asma->size == 0) { - ret = -EINVAL; - goto out; - } - - if (!asma->file) { - ret = -EBADF; - goto out; - } - - ret = asma->file->f_op->llseek(asma->file, offset, origin); - if (ret < 0) { - goto out; - } - - /** Copy f_pos from backing file, since f_ops->llseek() sets it */ - file->f_pos = asma->file->f_pos; - -out: - mutex_unlock(&ashmem_mutex); - return ret; -} - static inline unsigned long calc_vm_may_flags(unsigned long prot) { @@ -329,7 +264,6 @@ static int ashmem_mmap(struct file *file, struct vm_area_struct *vma) vma->vm_file = asma->file; } vma->vm_flags |= VM_CAN_NONLINEAR; - asma->vm_start = vma->vm_start; out: mutex_unlock(&ashmem_mutex); @@ -351,7 +285,7 @@ out: * chunks of ashmem regions LRU-wise one-at-a-time until we hit 'nr_to_scan' * pages freed. */ -static int ashmem_shrink(struct shrinker *s, int nr_to_scan, gfp_t gfp_mask) +static int ashmem_shrink(int nr_to_scan, gfp_t gfp_mask) { struct ashmem_range *range, *next; @@ -630,69 +564,6 @@ static int ashmem_pin_unpin(struct ashmem_area *asma, unsigned long cmd, return ret; } -#ifdef CONFIG_OUTER_CACHE -static unsigned int virtaddr_to_physaddr(unsigned int virtaddr) -{ - unsigned int physaddr = 0; - pgd_t *pgd_ptr = NULL; - pmd_t *pmd_ptr = NULL; - pte_t *pte_ptr = NULL, pte; - - spin_lock(¤t->mm->page_table_lock); - pgd_ptr = pgd_offset(current->mm, virtaddr); - if (pgd_none(*pgd) || pgd_bad(*pgd)) { - pr_err("Failed to convert virtaddr %x to pgd_ptr\n", - virtaddr); - goto done; - } - - pmd_ptr = pmd_offset(pgd_ptr, virtaddr); - if (pmd_none(*pmd_ptr) || pmd_bad(*pmd_ptr)) { - pr_err("Failed to convert pgd_ptr %p to pmd_ptr\n", - (void *)pgd_ptr); - goto done; - } - - pte_ptr = pte_offset_map(pmd_ptr, virtaddr); - if (!pte_ptr) { - pr_err("Failed to convert pmd_ptr %p to pte_ptr\n", - (void *)pmd_ptr); - goto done; - } - pte = *pte_ptr; - physaddr = pte_pfn(pte); - pte_unmap(pte_ptr); -done: - spin_unlock(¤t->mm->page_table_lock); - physaddr <<= PAGE_SHIFT; - return physaddr; -} -#endif - -static int ashmem_cache_op(struct ashmem_area *asma, - void (*cache_func)(unsigned long vstart, unsigned long length, - unsigned long pstart)) -{ -#ifdef CONFIG_OUTER_CACHE - unsigned long vaddr; -#endif - mutex_lock(&ashmem_mutex); -#ifndef CONFIG_OUTER_CACHE - cache_func(asma->vm_start, asma->size, 0); -#else - for (vaddr = asma->vm_start; vaddr < asma->vm_start + asma->size; - vaddr += PAGE_SIZE) { - unsigned long physaddr; - physaddr = virtaddr_to_physaddr(vaddr); - if (!physaddr) - return -EINVAL; - cache_func(vaddr, PAGE_SIZE, physaddr); - } -#endif - mutex_unlock(&ashmem_mutex); - return 0; -} - static long ashmem_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct ashmem_area *asma = file->private_data; @@ -729,19 +600,10 @@ static long ashmem_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case ASHMEM_PURGE_ALL_CACHES: ret = -EPERM; if (capable(CAP_SYS_ADMIN)) { - ret = ashmem_shrink(&ashmem_shrinker, 0, GFP_KERNEL); - ashmem_shrink(&ashmem_shrinker, ret, GFP_KERNEL); + ret = ashmem_shrink(0, GFP_KERNEL); + ashmem_shrink(ret, GFP_KERNEL); } break; - case ASHMEM_CACHE_FLUSH_RANGE: - ret = ashmem_cache_op(asma, &clean_and_invalidate_caches); - break; - case ASHMEM_CACHE_CLEAN_RANGE: - ret = ashmem_cache_op(asma, &clean_caches); - break; - case ASHMEM_CACHE_INV_RANGE: - ret = ashmem_cache_op(asma, &invalidate_caches); - break; } return ret; @@ -804,8 +666,6 @@ static struct file_operations ashmem_fops = { .owner = THIS_MODULE, .open = ashmem_open, .release = ashmem_release, - .read = ashmem_read, - .llseek = ashmem_llseek, .mmap = ashmem_mmap, .unlocked_ioctl = ashmem_ioctl, .compat_ioctl = ashmem_ioctl, From be86226379024c75722981b22936160791166a2e Mon Sep 17 00:00:00 2001 From: Shantanu Gupta Date: Mon, 14 May 2012 02:47:02 +0530 Subject: [PATCH 006/155] [KGSL] add missing files for last commit --- arch/arm/include/asm/asm-offsets.h | 1 + arch/arm/include/asm/outercache.h | 75 + .../include/mach/internal_power_rail.h | 63 + arch/arm/mach-msm/include/mach/msm_memtypes.h | 64 + drivers/gpu/msm/a2xx_reg.h | 418 +++++ drivers/gpu/msm/adreno_a2xx.c | 1607 +++++++++++++++++ drivers/gpu/msm/kgsl_gpummu.c | 766 ++++++++ drivers/gpu/msm/kgsl_gpummu.h | 85 + drivers/gpu/msm/kgsl_iommu.c | 333 ++++ drivers/gpu/msm/kgsl_pwrscale_idlestats.c | 221 +++ drivers/gpu/msm/kgsl_pwrscale_trustzone.c | 197 ++ include/drm/kgsl_drm.h | 221 +++ 12 files changed, 4051 insertions(+) create mode 100644 arch/arm/include/asm/asm-offsets.h create mode 100644 arch/arm/include/asm/outercache.h create mode 100644 arch/arm/mach-msm/include/mach/internal_power_rail.h create mode 100644 arch/arm/mach-msm/include/mach/msm_memtypes.h create mode 100644 drivers/gpu/msm/a2xx_reg.h create mode 100644 drivers/gpu/msm/adreno_a2xx.c create mode 100644 drivers/gpu/msm/kgsl_gpummu.c create mode 100644 drivers/gpu/msm/kgsl_gpummu.h create mode 100644 drivers/gpu/msm/kgsl_iommu.c create mode 100644 drivers/gpu/msm/kgsl_pwrscale_idlestats.c create mode 100644 drivers/gpu/msm/kgsl_pwrscale_trustzone.c create mode 100644 include/drm/kgsl_drm.h diff --git a/arch/arm/include/asm/asm-offsets.h b/arch/arm/include/asm/asm-offsets.h new file mode 100644 index 00000000..d370ee36 --- /dev/null +++ b/arch/arm/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include diff --git a/arch/arm/include/asm/outercache.h b/arch/arm/include/asm/outercache.h new file mode 100644 index 00000000..25f76bae --- /dev/null +++ b/arch/arm/include/asm/outercache.h @@ -0,0 +1,75 @@ +/* + * arch/arm/include/asm/outercache.h + * + * Copyright (C) 2010 ARM Ltd. + * Written by Catalin Marinas + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __ASM_OUTERCACHE_H +#define __ASM_OUTERCACHE_H + +struct outer_cache_fns { + void (*inv_range)(unsigned long, unsigned long); + void (*clean_range)(unsigned long, unsigned long); + void (*flush_range)(unsigned long, unsigned long); +#ifdef CONFIG_OUTER_CACHE_SYNC + void (*sync)(void); +#endif +}; + +#ifdef CONFIG_OUTER_CACHE + +extern struct outer_cache_fns outer_cache; + +static inline void outer_inv_range(unsigned long start, unsigned long end) +{ + if (outer_cache.inv_range) + outer_cache.inv_range(start, end); +} +static inline void outer_clean_range(unsigned long start, unsigned long end) +{ + if (outer_cache.clean_range) + outer_cache.clean_range(start, end); +} +static inline void outer_flush_range(unsigned long start, unsigned long end) +{ + if (outer_cache.flush_range) + outer_cache.flush_range(start, end); +} + +#else + +static inline void outer_inv_range(unsigned long start, unsigned long end) +{ } +static inline void outer_clean_range(unsigned long start, unsigned long end) +{ } +static inline void outer_flush_range(unsigned long start, unsigned long end) +{ } + +#endif + +#ifdef CONFIG_OUTER_CACHE_SYNC +static inline void outer_sync(void) +{ + if (outer_cache.sync) + outer_cache.sync(); +} +#else +static inline void outer_sync(void) +{ } +#endif + +#endif /* __ASM_OUTERCACHE_H */ diff --git a/arch/arm/mach-msm/include/mach/internal_power_rail.h b/arch/arm/mach-msm/include/mach/internal_power_rail.h new file mode 100644 index 00000000..f489dc57 --- /dev/null +++ b/arch/arm/mach-msm/include/mach/internal_power_rail.h @@ -0,0 +1,63 @@ +/* Copyright (c) 2009, Code Aurora Forum. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * * Neither the name of Code Aurora Forum, Inc. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _INTERNAL_POWER_RAIL_H +#define _INTERNAL_POWER_RAIL_H + +/* Clock power rail IDs */ +#define PWR_RAIL_GRP_CLK 8 +#define PWR_RAIL_GRP_2D_CLK 58 +#define PWR_RAIL_MDP_CLK 14 +#define PWR_RAIL_MFC_CLK 68 +#define PWR_RAIL_ROTATOR_CLK 90 +#define PWR_RAIL_VDC_CLK 39 +#define PWR_RAIL_VFE_CLK 41 +#define PWR_RAIL_VPE_CLK 76 + +enum rail_ctl_mode { + PWR_RAIL_CTL_AUTO = 0, + PWR_RAIL_CTL_MANUAL, +}; + +static inline int __maybe_unused internal_pwr_rail_ctl(unsigned rail_id, + bool enable) +{ + /* Not yet implemented. */ + return 0; +} +static inline int __maybe_unused internal_pwr_rail_mode(unsigned rail_id, + enum rail_ctl_mode mode) +{ + /* Not yet implemented. */ + return 0; +} + +int internal_pwr_rail_ctl_auto(unsigned rail_id, bool enable); + +#endif /* _INTERNAL_POWER_RAIL_H */ + diff --git a/arch/arm/mach-msm/include/mach/msm_memtypes.h b/arch/arm/mach-msm/include/mach/msm_memtypes.h new file mode 100644 index 00000000..963f25c1 --- /dev/null +++ b/arch/arm/mach-msm/include/mach/msm_memtypes.h @@ -0,0 +1,64 @@ +/* Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. +*/ + +/* The MSM Hardware supports multiple flavors of physical memory. + * This file captures hardware specific information of these types. +*/ + +#ifndef __ASM_ARCH_MSM_MEMTYPES_H +#define __ASM_ARCH_MSM_MEMTYPES_H + +#include +#include +int __init meminfo_init(unsigned int, unsigned int); +/* Redundant check to prevent this from being included outside of 7x30 */ +#if defined(CONFIG_ARCH_MSM7X30) +unsigned int get_num_populated_chipselects(void); +#endif + +unsigned int get_num_memory_banks(void); +unsigned int get_memory_bank_size(unsigned int); +unsigned int get_memory_bank_start(unsigned int); +int soc_change_memory_power(u64, u64, int); + +enum { + MEMTYPE_NONE = -1, + MEMTYPE_SMI_KERNEL = 0, + MEMTYPE_SMI, + MEMTYPE_EBI0, + MEMTYPE_EBI1, + MEMTYPE_MAX, +}; + +void msm_reserve(void); + +#define MEMTYPE_FLAGS_FIXED 0x1 +#define MEMTYPE_FLAGS_1M_ALIGN 0x2 + +struct memtype_reserve { + unsigned long start; + unsigned long size; + unsigned long limit; + int flags; +}; + +struct reserve_info { + struct memtype_reserve *memtype_reserve_table; + void (*calculate_reserve_sizes)(void); + int (*paddr_to_memtype)(unsigned int); + unsigned long low_unstable_address; + unsigned long max_unstable_size; + unsigned long bank_size; +}; + +extern struct reserve_info *reserve_info; +#endif diff --git a/drivers/gpu/msm/a2xx_reg.h b/drivers/gpu/msm/a2xx_reg.h new file mode 100644 index 00000000..d859d61c --- /dev/null +++ b/drivers/gpu/msm/a2xx_reg.h @@ -0,0 +1,418 @@ +/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#ifndef __A200_REG_H +#define __A200_REG_H + +enum VGT_EVENT_TYPE { + VS_DEALLOC = 0, + PS_DEALLOC = 1, + VS_DONE_TS = 2, + PS_DONE_TS = 3, + CACHE_FLUSH_TS = 4, + CONTEXT_DONE = 5, + CACHE_FLUSH = 6, + VIZQUERY_START = 7, + VIZQUERY_END = 8, + SC_WAIT_WC = 9, + RST_PIX_CNT = 13, + RST_VTX_CNT = 14, + TILE_FLUSH = 15, + CACHE_FLUSH_AND_INV_TS_EVENT = 20, + ZPASS_DONE = 21, + CACHE_FLUSH_AND_INV_EVENT = 22, + PERFCOUNTER_START = 23, + PERFCOUNTER_STOP = 24, + VS_FETCH_DONE = 27, + FACENESS_FLUSH = 28, +}; + +enum COLORFORMATX { + COLORX_4_4_4_4 = 0, + COLORX_1_5_5_5 = 1, + COLORX_5_6_5 = 2, + COLORX_8 = 3, + COLORX_8_8 = 4, + COLORX_8_8_8_8 = 5, + COLORX_S8_8_8_8 = 6, + COLORX_16_FLOAT = 7, + COLORX_16_16_FLOAT = 8, + COLORX_16_16_16_16_FLOAT = 9, + COLORX_32_FLOAT = 10, + COLORX_32_32_FLOAT = 11, + COLORX_32_32_32_32_FLOAT = 12, + COLORX_2_3_3 = 13, + COLORX_8_8_8 = 14, +}; + +enum SURFACEFORMAT { + FMT_1_REVERSE = 0, + FMT_1 = 1, + FMT_8 = 2, + FMT_1_5_5_5 = 3, + FMT_5_6_5 = 4, + FMT_6_5_5 = 5, + FMT_8_8_8_8 = 6, + FMT_2_10_10_10 = 7, + FMT_8_A = 8, + FMT_8_B = 9, + FMT_8_8 = 10, + FMT_Cr_Y1_Cb_Y0 = 11, + FMT_Y1_Cr_Y0_Cb = 12, + FMT_5_5_5_1 = 13, + FMT_8_8_8_8_A = 14, + FMT_4_4_4_4 = 15, + FMT_10_11_11 = 16, + FMT_11_11_10 = 17, + FMT_DXT1 = 18, + FMT_DXT2_3 = 19, + FMT_DXT4_5 = 20, + FMT_24_8 = 22, + FMT_24_8_FLOAT = 23, + FMT_16 = 24, + FMT_16_16 = 25, + FMT_16_16_16_16 = 26, + FMT_16_EXPAND = 27, + FMT_16_16_EXPAND = 28, + FMT_16_16_16_16_EXPAND = 29, + FMT_16_FLOAT = 30, + FMT_16_16_FLOAT = 31, + FMT_16_16_16_16_FLOAT = 32, + FMT_32 = 33, + FMT_32_32 = 34, + FMT_32_32_32_32 = 35, + FMT_32_FLOAT = 36, + FMT_32_32_FLOAT = 37, + FMT_32_32_32_32_FLOAT = 38, + FMT_32_AS_8 = 39, + FMT_32_AS_8_8 = 40, + FMT_16_MPEG = 41, + FMT_16_16_MPEG = 42, + FMT_8_INTERLACED = 43, + FMT_32_AS_8_INTERLACED = 44, + FMT_32_AS_8_8_INTERLACED = 45, + FMT_16_INTERLACED = 46, + FMT_16_MPEG_INTERLACED = 47, + FMT_16_16_MPEG_INTERLACED = 48, + FMT_DXN = 49, + FMT_8_8_8_8_AS_16_16_16_16 = 50, + FMT_DXT1_AS_16_16_16_16 = 51, + FMT_DXT2_3_AS_16_16_16_16 = 52, + FMT_DXT4_5_AS_16_16_16_16 = 53, + FMT_2_10_10_10_AS_16_16_16_16 = 54, + FMT_10_11_11_AS_16_16_16_16 = 55, + FMT_11_11_10_AS_16_16_16_16 = 56, + FMT_32_32_32_FLOAT = 57, + FMT_DXT3A = 58, + FMT_DXT5A = 59, + FMT_CTX1 = 60, + FMT_DXT3A_AS_1_1_1_1 = 61 +}; + +#define REG_PERF_MODE_CNT 0x0 +#define REG_PERF_STATE_RESET 0x0 +#define REG_PERF_STATE_ENABLE 0x1 +#define REG_PERF_STATE_FREEZE 0x2 + +#define RB_EDRAM_INFO_EDRAM_SIZE_SIZE 4 +#define RB_EDRAM_INFO_EDRAM_MAPPING_MODE_SIZE 2 +#define RB_EDRAM_INFO_UNUSED0_SIZE 8 +#define RB_EDRAM_INFO_EDRAM_RANGE_SIZE 18 + +struct rb_edram_info_t { + unsigned int edram_size:RB_EDRAM_INFO_EDRAM_SIZE_SIZE; + unsigned int edram_mapping_mode:RB_EDRAM_INFO_EDRAM_MAPPING_MODE_SIZE; + unsigned int unused0:RB_EDRAM_INFO_UNUSED0_SIZE; + unsigned int edram_range:RB_EDRAM_INFO_EDRAM_RANGE_SIZE; +}; + +union reg_rb_edram_info { + unsigned int val; + struct rb_edram_info_t f; +}; + +#define RBBM_READ_ERROR_UNUSED0_SIZE 2 +#define RBBM_READ_ERROR_READ_ADDRESS_SIZE 15 +#define RBBM_READ_ERROR_UNUSED1_SIZE 13 +#define RBBM_READ_ERROR_READ_REQUESTER_SIZE 1 +#define RBBM_READ_ERROR_READ_ERROR_SIZE 1 + +struct rbbm_read_error_t { + unsigned int unused0:RBBM_READ_ERROR_UNUSED0_SIZE; + unsigned int read_address:RBBM_READ_ERROR_READ_ADDRESS_SIZE; + unsigned int unused1:RBBM_READ_ERROR_UNUSED1_SIZE; + unsigned int read_requester:RBBM_READ_ERROR_READ_REQUESTER_SIZE; + unsigned int read_error:RBBM_READ_ERROR_READ_ERROR_SIZE; +}; + +union rbbm_read_error_u { + unsigned int val:32; + struct rbbm_read_error_t f; +}; + +#define CP_RB_CNTL_RB_BUFSZ_SIZE 6 +#define CP_RB_CNTL_UNUSED0_SIZE 2 +#define CP_RB_CNTL_RB_BLKSZ_SIZE 6 +#define CP_RB_CNTL_UNUSED1_SIZE 2 +#define CP_RB_CNTL_BUF_SWAP_SIZE 2 +#define CP_RB_CNTL_UNUSED2_SIZE 2 +#define CP_RB_CNTL_RB_POLL_EN_SIZE 1 +#define CP_RB_CNTL_UNUSED3_SIZE 6 +#define CP_RB_CNTL_RB_NO_UPDATE_SIZE 1 +#define CP_RB_CNTL_UNUSED4_SIZE 3 +#define CP_RB_CNTL_RB_RPTR_WR_ENA_SIZE 1 + +struct cp_rb_cntl_t { + unsigned int rb_bufsz:CP_RB_CNTL_RB_BUFSZ_SIZE; + unsigned int unused0:CP_RB_CNTL_UNUSED0_SIZE; + unsigned int rb_blksz:CP_RB_CNTL_RB_BLKSZ_SIZE; + unsigned int unused1:CP_RB_CNTL_UNUSED1_SIZE; + unsigned int buf_swap:CP_RB_CNTL_BUF_SWAP_SIZE; + unsigned int unused2:CP_RB_CNTL_UNUSED2_SIZE; + unsigned int rb_poll_en:CP_RB_CNTL_RB_POLL_EN_SIZE; + unsigned int unused3:CP_RB_CNTL_UNUSED3_SIZE; + unsigned int rb_no_update:CP_RB_CNTL_RB_NO_UPDATE_SIZE; + unsigned int unused4:CP_RB_CNTL_UNUSED4_SIZE; + unsigned int rb_rptr_wr_ena:CP_RB_CNTL_RB_RPTR_WR_ENA_SIZE; +}; + +union reg_cp_rb_cntl { + unsigned int val:32; + struct cp_rb_cntl_t f; +}; + +#define RB_COLOR_INFO__COLOR_FORMAT_MASK 0x0000000fL +#define RB_COPY_DEST_INFO__COPY_DEST_FORMAT__SHIFT 0x00000004 + + +#define SQ_INT_CNTL__PS_WATCHDOG_MASK 0x00000001L +#define SQ_INT_CNTL__VS_WATCHDOG_MASK 0x00000002L + +#define RBBM_INT_CNTL__RDERR_INT_MASK 0x00000001L +#define RBBM_INT_CNTL__DISPLAY_UPDATE_INT_MASK 0x00000002L +#define RBBM_INT_CNTL__GUI_IDLE_INT_MASK 0x00080000L + +#define RBBM_STATUS__CMDFIFO_AVAIL_MASK 0x0000001fL +#define RBBM_STATUS__TC_BUSY_MASK 0x00000020L +#define RBBM_STATUS__HIRQ_PENDING_MASK 0x00000100L +#define RBBM_STATUS__CPRQ_PENDING_MASK 0x00000200L +#define RBBM_STATUS__CFRQ_PENDING_MASK 0x00000400L +#define RBBM_STATUS__PFRQ_PENDING_MASK 0x00000800L +#define RBBM_STATUS__VGT_BUSY_NO_DMA_MASK 0x00001000L +#define RBBM_STATUS__RBBM_WU_BUSY_MASK 0x00004000L +#define RBBM_STATUS__CP_NRT_BUSY_MASK 0x00010000L +#define RBBM_STATUS__MH_BUSY_MASK 0x00040000L +#define RBBM_STATUS__MH_COHERENCY_BUSY_MASK 0x00080000L +#define RBBM_STATUS__SX_BUSY_MASK 0x00200000L +#define RBBM_STATUS__TPC_BUSY_MASK 0x00400000L +#define RBBM_STATUS__SC_CNTX_BUSY_MASK 0x01000000L +#define RBBM_STATUS__PA_BUSY_MASK 0x02000000L +#define RBBM_STATUS__VGT_BUSY_MASK 0x04000000L +#define RBBM_STATUS__SQ_CNTX17_BUSY_MASK 0x08000000L +#define RBBM_STATUS__SQ_CNTX0_BUSY_MASK 0x10000000L +#define RBBM_STATUS__RB_CNTX_BUSY_MASK 0x40000000L +#define RBBM_STATUS__GUI_ACTIVE_MASK 0x80000000L + +#define CP_INT_CNTL__SW_INT_MASK 0x00080000L +#define CP_INT_CNTL__T0_PACKET_IN_IB_MASK 0x00800000L +#define CP_INT_CNTL__OPCODE_ERROR_MASK 0x01000000L +#define CP_INT_CNTL__PROTECTED_MODE_ERROR_MASK 0x02000000L +#define CP_INT_CNTL__RESERVED_BIT_ERROR_MASK 0x04000000L +#define CP_INT_CNTL__IB_ERROR_MASK 0x08000000L +#define CP_INT_CNTL__IB2_INT_MASK 0x20000000L +#define CP_INT_CNTL__IB1_INT_MASK 0x40000000L +#define CP_INT_CNTL__RB_INT_MASK 0x80000000L + +#define MASTER_INT_SIGNAL__MH_INT_STAT 0x00000020L +#define MASTER_INT_SIGNAL__SQ_INT_STAT 0x04000000L +#define MASTER_INT_SIGNAL__CP_INT_STAT 0x40000000L +#define MASTER_INT_SIGNAL__RBBM_INT_STAT 0x80000000L + +#define RB_EDRAM_INFO__EDRAM_SIZE_MASK 0x0000000fL +#define RB_EDRAM_INFO__EDRAM_RANGE_MASK 0xffffc000L + +#define MH_ARBITER_CONFIG__SAME_PAGE_GRANULARITY__SHIFT 0x00000006 +#define MH_ARBITER_CONFIG__L1_ARB_ENABLE__SHIFT 0x00000007 +#define MH_ARBITER_CONFIG__L1_ARB_HOLD_ENABLE__SHIFT 0x00000008 +#define MH_ARBITER_CONFIG__L2_ARB_CONTROL__SHIFT 0x00000009 +#define MH_ARBITER_CONFIG__PAGE_SIZE__SHIFT 0x0000000a +#define MH_ARBITER_CONFIG__TC_REORDER_ENABLE__SHIFT 0x0000000d +#define MH_ARBITER_CONFIG__TC_ARB_HOLD_ENABLE__SHIFT 0x0000000e +#define MH_ARBITER_CONFIG__IN_FLIGHT_LIMIT_ENABLE__SHIFT 0x0000000f +#define MH_ARBITER_CONFIG__IN_FLIGHT_LIMIT__SHIFT 0x00000010 +#define MH_ARBITER_CONFIG__CP_CLNT_ENABLE__SHIFT 0x00000016 +#define MH_ARBITER_CONFIG__VGT_CLNT_ENABLE__SHIFT 0x00000017 +#define MH_ARBITER_CONFIG__TC_CLNT_ENABLE__SHIFT 0x00000018 +#define MH_ARBITER_CONFIG__RB_CLNT_ENABLE__SHIFT 0x00000019 +#define MH_ARBITER_CONFIG__PA_CLNT_ENABLE__SHIFT 0x0000001a + +#define CP_RB_CNTL__RB_BUFSZ__SHIFT 0x00000000 +#define CP_RB_CNTL__RB_BLKSZ__SHIFT 0x00000008 +#define CP_RB_CNTL__RB_POLL_EN__SHIFT 0x00000014 +#define CP_RB_CNTL__RB_NO_UPDATE__SHIFT 0x0000001b + +#define RB_COLOR_INFO__COLOR_FORMAT__SHIFT 0x00000000 +#define RB_EDRAM_INFO__EDRAM_MAPPING_MODE__SHIFT 0x00000004 +#define RB_EDRAM_INFO__EDRAM_RANGE__SHIFT 0x0000000e + +#define REG_CP_CSQ_IB1_STAT 0x01FE +#define REG_CP_CSQ_IB2_STAT 0x01FF +#define REG_CP_CSQ_RB_STAT 0x01FD +#define REG_CP_DEBUG 0x01FC +#define REG_CP_IB1_BASE 0x0458 +#define REG_CP_IB1_BUFSZ 0x0459 +#define REG_CP_IB2_BASE 0x045A +#define REG_CP_IB2_BUFSZ 0x045B +#define REG_CP_INT_ACK 0x01F4 +#define REG_CP_INT_CNTL 0x01F2 +#define REG_CP_INT_STATUS 0x01F3 +#define REG_CP_ME_CNTL 0x01F6 +#define REG_CP_ME_RAM_DATA 0x01FA +#define REG_CP_ME_RAM_WADDR 0x01F8 +#define REG_CP_ME_STATUS 0x01F7 +#define REG_CP_PFP_UCODE_ADDR 0x00C0 +#define REG_CP_PFP_UCODE_DATA 0x00C1 +#define REG_CP_QUEUE_THRESHOLDS 0x01D5 +#define REG_CP_RB_BASE 0x01C0 +#define REG_CP_RB_CNTL 0x01C1 +#define REG_CP_RB_RPTR 0x01C4 +#define REG_CP_RB_RPTR_ADDR 0x01C3 +#define REG_CP_RB_RPTR_WR 0x01C7 +#define REG_CP_RB_WPTR 0x01C5 +#define REG_CP_RB_WPTR_BASE 0x01C8 +#define REG_CP_RB_WPTR_DELAY 0x01C6 +#define REG_CP_STAT 0x047F +#define REG_CP_STATE_DEBUG_DATA 0x01ED +#define REG_CP_STATE_DEBUG_INDEX 0x01EC +#define REG_CP_ST_BASE 0x044D +#define REG_CP_ST_BUFSZ 0x044E + +#define REG_CP_PERFMON_CNTL 0x0444 +#define REG_CP_PERFCOUNTER_SELECT 0x0445 +#define REG_CP_PERFCOUNTER_LO 0x0446 +#define REG_CP_PERFCOUNTER_HI 0x0447 + +#define REG_RBBM_PERFCOUNTER1_SELECT 0x0395 +#define REG_RBBM_PERFCOUNTER1_HI 0x0398 +#define REG_RBBM_PERFCOUNTER1_LO 0x0397 + +#define REG_MASTER_INT_SIGNAL 0x03B7 + +#define REG_PA_CL_VPORT_XSCALE 0x210F +#define REG_PA_CL_VPORT_ZOFFSET 0x2114 +#define REG_PA_CL_VPORT_ZSCALE 0x2113 +#define REG_PA_CL_CLIP_CNTL 0x2204 +#define REG_PA_CL_VTE_CNTL 0x2206 +#define REG_PA_SC_AA_MASK 0x2312 +#define REG_PA_SC_LINE_CNTL 0x2300 +#define REG_PA_SC_SCREEN_SCISSOR_BR 0x200F +#define REG_PA_SC_SCREEN_SCISSOR_TL 0x200E +#define REG_PA_SC_VIZ_QUERY 0x2293 +#define REG_PA_SC_VIZ_QUERY_STATUS 0x0C44 +#define REG_PA_SC_WINDOW_OFFSET 0x2080 +#define REG_PA_SC_WINDOW_SCISSOR_BR 0x2082 +#define REG_PA_SC_WINDOW_SCISSOR_TL 0x2081 +#define REG_PA_SU_FACE_DATA 0x0C86 +#define REG_PA_SU_POINT_SIZE 0x2280 +#define REG_PA_SU_LINE_CNTL 0x2282 +#define REG_PA_SU_POLY_OFFSET_BACK_OFFSET 0x2383 +#define REG_PA_SU_POLY_OFFSET_FRONT_SCALE 0x2380 +#define REG_PA_SU_SC_MODE_CNTL 0x2205 + +#define REG_PC_INDEX_OFFSET 0x2102 + +#define REG_RBBM_CNTL 0x003B +#define REG_RBBM_INT_ACK 0x03B6 +#define REG_RBBM_INT_CNTL 0x03B4 +#define REG_RBBM_INT_STATUS 0x03B5 +#define REG_RBBM_PATCH_RELEASE 0x0001 +#define REG_RBBM_PERIPHID1 0x03F9 +#define REG_RBBM_PERIPHID2 0x03FA +#define REG_RBBM_DEBUG 0x039B +#define REG_RBBM_DEBUG_OUT 0x03A0 +#define REG_RBBM_DEBUG_CNTL 0x03A1 +#define REG_RBBM_PM_OVERRIDE1 0x039C +#define REG_RBBM_PM_OVERRIDE2 0x039D +#define REG_RBBM_READ_ERROR 0x03B3 +#define REG_RBBM_SOFT_RESET 0x003C +#define REG_RBBM_STATUS 0x05D0 + +#define REG_RB_COLORCONTROL 0x2202 +#define REG_RB_COLOR_DEST_MASK 0x2326 +#define REG_RB_COLOR_MASK 0x2104 +#define REG_RB_COPY_CONTROL 0x2318 +#define REG_RB_DEPTHCONTROL 0x2200 +#define REG_RB_EDRAM_INFO 0x0F02 +#define REG_RB_MODECONTROL 0x2208 +#define REG_RB_SURFACE_INFO 0x2000 +#define REG_RB_SAMPLE_POS 0x220a + +#define REG_SCRATCH_ADDR 0x01DD +#define REG_SCRATCH_REG0 0x0578 +#define REG_SCRATCH_REG2 0x057A +#define REG_SCRATCH_UMSK 0x01DC + +#define REG_SQ_CF_BOOLEANS 0x4900 +#define REG_SQ_CF_LOOP 0x4908 +#define REG_SQ_GPR_MANAGEMENT 0x0D00 +#define REG_SQ_FLOW_CONTROL 0x0D01 +#define REG_SQ_INST_STORE_MANAGMENT 0x0D02 +#define REG_SQ_INT_ACK 0x0D36 +#define REG_SQ_INT_CNTL 0x0D34 +#define REG_SQ_INT_STATUS 0x0D35 +#define REG_SQ_PROGRAM_CNTL 0x2180 +#define REG_SQ_PS_PROGRAM 0x21F6 +#define REG_SQ_VS_PROGRAM 0x21F7 +#define REG_SQ_WRAPPING_0 0x2183 +#define REG_SQ_WRAPPING_1 0x2184 + +#define REG_VGT_ENHANCE 0x2294 +#define REG_VGT_INDX_OFFSET 0x2102 +#define REG_VGT_MAX_VTX_INDX 0x2100 +#define REG_VGT_MIN_VTX_INDX 0x2101 + +#define REG_TP0_CHICKEN 0x0E1E +#define REG_TC_CNTL_STATUS 0x0E00 +#define REG_PA_SC_AA_CONFIG 0x2301 +#define REG_VGT_VERTEX_REUSE_BLOCK_CNTL 0x2316 +#define REG_SQ_INTERPOLATOR_CNTL 0x2182 +#define REG_RB_DEPTH_INFO 0x2002 +#define REG_COHER_DEST_BASE_0 0x2006 +#define REG_RB_FOG_COLOR 0x2109 +#define REG_RB_STENCILREFMASK_BF 0x210C +#define REG_PA_SC_LINE_STIPPLE 0x2283 +#define REG_SQ_PS_CONST 0x2308 +#define REG_RB_DEPTH_CLEAR 0x231D +#define REG_RB_SAMPLE_COUNT_CTL 0x2324 +#define REG_SQ_CONSTANT_0 0x4000 +#define REG_SQ_FETCH_0 0x4800 + +#define REG_COHER_BASE_PM4 0xA2A +#define REG_COHER_STATUS_PM4 0xA2B +#define REG_COHER_SIZE_PM4 0xA29 + +/*registers added in adreno220*/ +#define REG_A220_PC_INDX_OFFSET REG_VGT_INDX_OFFSET +#define REG_A220_PC_VERTEX_REUSE_BLOCK_CNTL REG_VGT_VERTEX_REUSE_BLOCK_CNTL +#define REG_A220_PC_MAX_VTX_INDX REG_VGT_MAX_VTX_INDX +#define REG_A220_RB_LRZ_VSC_CONTROL 0x2209 +#define REG_A220_GRAS_CONTROL 0x2210 +#define REG_A220_VSC_BIN_SIZE 0x0C01 +#define REG_A220_VSC_PIPE_DATA_LENGTH_7 0x0C1D + +/*registers added in adreno225*/ +#define REG_A225_RB_COLOR_INFO3 0x2005 +#define REG_A225_PC_MULTI_PRIM_IB_RESET_INDX 0x2103 +#define REG_A225_GRAS_UCP0X 0x2340 +#define REG_A225_GRAS_UCP_ENABLED 0x2360 + +#endif /* __A200_REG_H */ diff --git a/drivers/gpu/msm/adreno_a2xx.c b/drivers/gpu/msm/adreno_a2xx.c new file mode 100644 index 00000000..064b05e9 --- /dev/null +++ b/drivers/gpu/msm/adreno_a2xx.c @@ -0,0 +1,1607 @@ +/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include "kgsl.h" +#include "kgsl_sharedmem.h" +#include "kgsl_cffdump.h" +#include "adreno.h" + +/* + * + * Memory Map for Register, Constant & Instruction Shadow, and Command Buffers + * (34.5KB) + * + * +---------------------+------------+-------------+---+---------------------+ + * | ALU Constant Shadow | Reg Shadow | C&V Buffers |Tex| Shader Instr Shadow | + * +---------------------+------------+-------------+---+---------------------+ + * ________________________________/ \____________________ + * / | + * +--------------+-----------+------+-----------+------------------------+ + * | Restore Regs | Save Regs | Quad | Gmem Save | Gmem Restore | unused | + * +--------------+-----------+------+-----------+------------------------+ + * + * 8K - ALU Constant Shadow (8K aligned) + * 4K - H/W Register Shadow (8K aligned) + * 4K - Command and Vertex Buffers + * - Indirect command buffer : Const/Reg restore + * - includes Loop & Bool const shadows + * - Indirect command buffer : Const/Reg save + * - Quad vertices & texture coordinates + * - Indirect command buffer : Gmem save + * - Indirect command buffer : Gmem restore + * - Unused (padding to 8KB boundary) + * <1K - Texture Constant Shadow (768 bytes) (8K aligned) + * 18K - Shader Instruction Shadow + * - 6K vertex (32 byte aligned) + * - 6K pixel (32 byte aligned) + * - 6K shared (32 byte aligned) + * + * Note: Reading constants into a shadow, one at a time using REG_TO_MEM, takes + * 3 DWORDS per DWORD transfered, plus 1 DWORD for the shadow, for a total of + * 16 bytes per constant. If the texture constants were transfered this way, + * the Command & Vertex Buffers section would extend past the 16K boundary. + * By moving the texture constant shadow area to start at 16KB boundary, we + * only require approximately 40 bytes more memory, but are able to use the + * LOAD_CONSTANT_CONTEXT shadowing feature for the textures, speeding up + * context switching. + * + * [Using LOAD_CONSTANT_CONTEXT shadowing feature for the Loop and/or Bool + * constants would require an additional 8KB each, for alignment.] + * + */ + +/* Constants */ + +#define ALU_CONSTANTS 2048 /* DWORDS */ +#define NUM_REGISTERS 1024 /* DWORDS */ +#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES +#define CMD_BUFFER_LEN 9216 /* DWORDS */ +#else +#define CMD_BUFFER_LEN 3072 /* DWORDS */ +#endif +#define TEX_CONSTANTS (32*6) /* DWORDS */ +#define BOOL_CONSTANTS 8 /* DWORDS */ +#define LOOP_CONSTANTS 56 /* DWORDS */ +#define SHADER_INSTRUCT_LOG2 9U /* 2^n == SHADER_INSTRUCTIONS */ + +/* 96-bit instructions */ +#define SHADER_INSTRUCT (1< sys) */ + +/* pre-compiled vertex shader program +* +* attribute vec4 P; +* void main(void) +* { +* gl_Position = P; +* } +*/ +#define GMEM2SYS_VTX_PGM_LEN 0x12 + +static unsigned int gmem2sys_vtx_pgm[GMEM2SYS_VTX_PGM_LEN] = { + 0x00011003, 0x00001000, 0xc2000000, + 0x00001004, 0x00001000, 0xc4000000, + 0x00001005, 0x00002000, 0x00000000, + 0x1cb81000, 0x00398a88, 0x00000003, + 0x140f803e, 0x00000000, 0xe2010100, + 0x14000000, 0x00000000, 0xe2000000 +}; + +/* pre-compiled fragment shader program +* +* precision highp float; +* uniform vec4 clear_color; +* void main(void) +* { +* gl_FragColor = clear_color; +* } +*/ + +#define GMEM2SYS_FRAG_PGM_LEN 0x0c + +static unsigned int gmem2sys_frag_pgm[GMEM2SYS_FRAG_PGM_LEN] = { + 0x00000000, 0x1002c400, 0x10000000, + 0x00001003, 0x00002000, 0x00000000, + 0x140f8000, 0x00000000, 0x22000000, + 0x14000000, 0x00000000, 0xe2000000 +}; + +/* context restore (sys -> gmem) */ +/* pre-compiled vertex shader program +* +* attribute vec4 position; +* attribute vec4 texcoord; +* varying vec4 texcoord0; +* void main() +* { +* gl_Position = position; +* texcoord0 = texcoord; +* } +*/ + +#define SYS2GMEM_VTX_PGM_LEN 0x18 + +static unsigned int sys2gmem_vtx_pgm[SYS2GMEM_VTX_PGM_LEN] = { + 0x00052003, 0x00001000, 0xc2000000, 0x00001005, + 0x00001000, 0xc4000000, 0x00001006, 0x10071000, + 0x20000000, 0x18981000, 0x0039ba88, 0x00000003, + 0x12982000, 0x40257b08, 0x00000002, 0x140f803e, + 0x00000000, 0xe2010100, 0x140f8000, 0x00000000, + 0xe2020200, 0x14000000, 0x00000000, 0xe2000000 +}; + +/* pre-compiled fragment shader program +* +* precision mediump float; +* uniform sampler2D tex0; +* varying vec4 texcoord0; +* void main() +* { +* gl_FragColor = texture2D(tex0, texcoord0.xy); +* } +*/ + +#define SYS2GMEM_FRAG_PGM_LEN 0x0f + +static unsigned int sys2gmem_frag_pgm[SYS2GMEM_FRAG_PGM_LEN] = { + 0x00011002, 0x00001000, 0xc4000000, 0x00001003, + 0x10041000, 0x20000000, 0x10000001, 0x1ffff688, + 0x00000002, 0x140f8000, 0x00000000, 0xe2000000, + 0x14000000, 0x00000000, 0xe2000000 +}; + +/* shader texture constants (sysmem -> gmem) */ +#define SYS2GMEM_TEX_CONST_LEN 6 + +static unsigned int sys2gmem_tex_const[SYS2GMEM_TEX_CONST_LEN] = { + /* Texture, FormatXYZW=Unsigned, ClampXYZ=Wrap/Repeat, + * RFMode=ZeroClamp-1, Dim=1:2d + */ + 0x00000002, /* Pitch = TBD */ + + /* Format=6:8888_WZYX, EndianSwap=0:None, ReqSize=0:256bit, DimHi=0, + * NearestClamp=1:OGL Mode + */ + 0x00000800, /* Address[31:12] = TBD */ + + /* Width, Height, EndianSwap=0:None */ + 0, /* Width & Height = TBD */ + + /* NumFormat=0:RF, DstSelXYZW=XYZW, ExpAdj=0, MagFilt=MinFilt=0:Point, + * Mip=2:BaseMap + */ + 0 << 1 | 1 << 4 | 2 << 7 | 3 << 10 | 2 << 23, + + /* VolMag=VolMin=0:Point, MinMipLvl=0, MaxMipLvl=1, LodBiasH=V=0, + * Dim3d=0 + */ + 0, + + /* BorderColor=0:ABGRBlack, ForceBC=0:diable, TriJuice=0, Aniso=0, + * Dim=1:2d, MipPacking=0 + */ + 1 << 9 /* Mip Address[31:12] = TBD */ +}; + +#define NUM_COLOR_FORMATS 13 + +static enum SURFACEFORMAT surface_format_table[NUM_COLOR_FORMATS] = { + FMT_4_4_4_4, /* COLORX_4_4_4_4 */ + FMT_1_5_5_5, /* COLORX_1_5_5_5 */ + FMT_5_6_5, /* COLORX_5_6_5 */ + FMT_8, /* COLORX_8 */ + FMT_8_8, /* COLORX_8_8 */ + FMT_8_8_8_8, /* COLORX_8_8_8_8 */ + FMT_8_8_8_8, /* COLORX_S8_8_8_8 */ + FMT_16_FLOAT, /* COLORX_16_FLOAT */ + FMT_16_16_FLOAT, /* COLORX_16_16_FLOAT */ + FMT_16_16_16_16_FLOAT, /* COLORX_16_16_16_16_FLOAT */ + FMT_32_FLOAT, /* COLORX_32_FLOAT */ + FMT_32_32_FLOAT, /* COLORX_32_32_FLOAT */ + FMT_32_32_32_32_FLOAT, /* COLORX_32_32_32_32_FLOAT */ +}; + +static unsigned int format2bytesperpixel[NUM_COLOR_FORMATS] = { + 2, /* COLORX_4_4_4_4 */ + 2, /* COLORX_1_5_5_5 */ + 2, /* COLORX_5_6_5 */ + 1, /* COLORX_8 */ + 2, /* COLORX_8_8 8*/ + 4, /* COLORX_8_8_8_8 */ + 4, /* COLORX_S8_8_8_8 */ + 2, /* COLORX_16_FLOAT */ + 4, /* COLORX_16_16_FLOAT */ + 8, /* COLORX_16_16_16_16_FLOAT */ + 4, /* COLORX_32_FLOAT */ + 8, /* COLORX_32_32_FLOAT */ + 16, /* COLORX_32_32_32_32_FLOAT */ +}; + +/* shader linkage info */ +#define SHADER_CONST_ADDR (11 * 6 + 3) + + +static unsigned int *program_shader(unsigned int *cmds, int vtxfrag, + unsigned int *shader_pgm, int dwords) +{ + /* load the patched vertex shader stream */ + *cmds++ = cp_type3_packet(CP_IM_LOAD_IMMEDIATE, 2 + dwords); + /* 0=vertex shader, 1=fragment shader */ + *cmds++ = vtxfrag; + /* instruction start & size (in 32-bit words) */ + *cmds++ = ((0 << 16) | dwords); + + memcpy(cmds, shader_pgm, dwords << 2); + cmds += dwords; + + return cmds; +} + +static unsigned int *reg_to_mem(unsigned int *cmds, uint32_t dst, + uint32_t src, int dwords) +{ + while (dwords-- > 0) { + *cmds++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmds++ = src++; + *cmds++ = dst; + dst += 4; + } + + return cmds; +} + +#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES + +static void build_reg_to_mem_range(unsigned int start, unsigned int end, + unsigned int **cmd, + struct adreno_context *drawctxt) +{ + unsigned int i = start; + + for (i = start; i <= end; i++) { + *(*cmd)++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *(*cmd)++ = i; + *(*cmd)++ = + ((drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000) + + (i - 0x2000) * 4; + } +} + +#endif + +/* chicken restore */ +static unsigned int *build_chicken_restore_cmds( + struct adreno_context *drawctxt) +{ + unsigned int *start = tmp_ctx.cmd; + unsigned int *cmds = start; + + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmds++ = 0; + + *cmds++ = cp_type0_packet(REG_TP0_CHICKEN, 1); + tmp_ctx.chicken_restore = virt2gpu(cmds, &drawctxt->gpustate); + *cmds++ = 0x00000000; + + /* create indirect buffer command for above command sequence */ + create_ib1(drawctxt, drawctxt->chicken_restore, start, cmds); + + return cmds; +} + +/****************************************************************************/ +/* context save */ +/****************************************************************************/ + +static const unsigned int register_ranges_a20x[] = { + REG_RB_SURFACE_INFO, REG_RB_DEPTH_INFO, + REG_COHER_DEST_BASE_0, REG_PA_SC_SCREEN_SCISSOR_BR, + REG_PA_SC_WINDOW_OFFSET, REG_PA_SC_WINDOW_SCISSOR_BR, + REG_RB_STENCILREFMASK_BF, REG_PA_CL_VPORT_ZOFFSET, + REG_SQ_PROGRAM_CNTL, REG_SQ_WRAPPING_1, + REG_PA_SC_LINE_CNTL, REG_SQ_PS_CONST, + REG_PA_SC_AA_MASK, REG_PA_SC_AA_MASK, + REG_RB_SAMPLE_COUNT_CTL, REG_RB_COLOR_DEST_MASK, + REG_PA_SU_POLY_OFFSET_FRONT_SCALE, REG_PA_SU_POLY_OFFSET_BACK_OFFSET, + REG_VGT_MAX_VTX_INDX, REG_RB_FOG_COLOR, + REG_RB_DEPTHCONTROL, REG_RB_MODECONTROL, + REG_PA_SU_POINT_SIZE, REG_PA_SC_LINE_STIPPLE, + REG_PA_SC_VIZ_QUERY, REG_PA_SC_VIZ_QUERY, + REG_VGT_VERTEX_REUSE_BLOCK_CNTL, REG_RB_DEPTH_CLEAR +}; + +static const unsigned int register_ranges_a220[] = { + REG_RB_SURFACE_INFO, REG_RB_DEPTH_INFO, + REG_COHER_DEST_BASE_0, REG_PA_SC_SCREEN_SCISSOR_BR, + REG_PA_SC_WINDOW_OFFSET, REG_PA_SC_WINDOW_SCISSOR_BR, + REG_RB_STENCILREFMASK_BF, REG_PA_CL_VPORT_ZOFFSET, + REG_SQ_PROGRAM_CNTL, REG_SQ_WRAPPING_1, + REG_PA_SC_LINE_CNTL, REG_SQ_PS_CONST, + REG_PA_SC_AA_MASK, REG_PA_SC_AA_MASK, + REG_RB_SAMPLE_COUNT_CTL, REG_RB_COLOR_DEST_MASK, + REG_PA_SU_POLY_OFFSET_FRONT_SCALE, REG_PA_SU_POLY_OFFSET_BACK_OFFSET, + REG_A220_PC_MAX_VTX_INDX, REG_A220_PC_INDX_OFFSET, + REG_RB_COLOR_MASK, REG_RB_FOG_COLOR, + REG_RB_DEPTHCONTROL, REG_RB_COLORCONTROL, + REG_PA_CL_CLIP_CNTL, REG_PA_CL_VTE_CNTL, + REG_RB_MODECONTROL, REG_RB_SAMPLE_POS, + REG_PA_SU_POINT_SIZE, REG_PA_SU_LINE_CNTL, + REG_A220_PC_VERTEX_REUSE_BLOCK_CNTL, + REG_A220_PC_VERTEX_REUSE_BLOCK_CNTL, + REG_RB_COPY_CONTROL, REG_RB_DEPTH_CLEAR +}; + +static const unsigned int register_ranges_a225[] = { + REG_RB_SURFACE_INFO, REG_A225_RB_COLOR_INFO3, + REG_COHER_DEST_BASE_0, REG_PA_SC_SCREEN_SCISSOR_BR, + REG_PA_SC_WINDOW_OFFSET, REG_PA_SC_WINDOW_SCISSOR_BR, + REG_RB_STENCILREFMASK_BF, REG_PA_CL_VPORT_ZOFFSET, + REG_SQ_PROGRAM_CNTL, REG_SQ_WRAPPING_1, + REG_PA_SC_LINE_CNTL, REG_SQ_PS_CONST, + REG_PA_SC_AA_MASK, REG_PA_SC_AA_MASK, + REG_RB_SAMPLE_COUNT_CTL, REG_RB_COLOR_DEST_MASK, + REG_PA_SU_POLY_OFFSET_FRONT_SCALE, REG_PA_SU_POLY_OFFSET_BACK_OFFSET, + REG_A220_PC_MAX_VTX_INDX, REG_A225_PC_MULTI_PRIM_IB_RESET_INDX, + REG_RB_COLOR_MASK, REG_RB_FOG_COLOR, + REG_RB_DEPTHCONTROL, REG_RB_COLORCONTROL, + REG_PA_CL_CLIP_CNTL, REG_PA_CL_VTE_CNTL, + REG_RB_MODECONTROL, REG_RB_SAMPLE_POS, + REG_PA_SU_POINT_SIZE, REG_PA_SU_LINE_CNTL, + REG_A220_PC_VERTEX_REUSE_BLOCK_CNTL, + REG_A220_PC_VERTEX_REUSE_BLOCK_CNTL, + REG_RB_COPY_CONTROL, REG_RB_DEPTH_CLEAR, + REG_A225_GRAS_UCP0X, REG_A225_GRAS_UCP_ENABLED +}; + + +/* save h/w regs, alu constants, texture contants, etc. ... +* requires: bool_shadow_gpuaddr, loop_shadow_gpuaddr +*/ +static void build_regsave_cmds(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + unsigned int *start = tmp_ctx.cmd; + unsigned int *cmd = start; + + *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmd++ = 0; + +#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES + /* Make sure the HW context has the correct register values + * before reading them. */ + *cmd++ = cp_type3_packet(CP_CONTEXT_UPDATE, 1); + *cmd++ = 0; + + { + unsigned int i = 0; + unsigned int reg_array_size = 0; + const unsigned int *ptr_register_ranges; + + /* Based on chip id choose the register ranges */ + if (adreno_is_a220(adreno_dev)) { + ptr_register_ranges = register_ranges_a220; + reg_array_size = ARRAY_SIZE(register_ranges_a220); + } else if (adreno_is_a225(adreno_dev)) { + ptr_register_ranges = register_ranges_a225; + reg_array_size = ARRAY_SIZE(register_ranges_a225); + } else { + ptr_register_ranges = register_ranges_a20x; + reg_array_size = ARRAY_SIZE(register_ranges_a20x); + } + + + /* Write HW registers into shadow */ + for (i = 0; i < (reg_array_size/2) ; i++) { + build_reg_to_mem_range(ptr_register_ranges[i*2], + ptr_register_ranges[i*2+1], + &cmd, drawctxt); + } + } + + /* Copy ALU constants */ + cmd = + reg_to_mem(cmd, (drawctxt->gpustate.gpuaddr) & 0xFFFFE000, + REG_SQ_CONSTANT_0, ALU_CONSTANTS); + + /* Copy Tex constants */ + cmd = + reg_to_mem(cmd, + (drawctxt->gpustate.gpuaddr + TEX_OFFSET) & 0xFFFFE000, + REG_SQ_FETCH_0, TEX_CONSTANTS); +#else + + /* Insert a wait for idle packet before reading the registers. + * This is to fix a hang/reset seen during stress testing. In this + * hang, CP encountered a timeout reading SQ's boolean constant + * register. There is logic in the HW that blocks reading of this + * register when the SQ block is not idle, which we believe is + * contributing to the hang.*/ + *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmd++ = 0; + + /* H/w registers are already shadowed; just need to disable shadowing + * to prevent corruption. + */ + *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3); + *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000; + *cmd++ = 4 << 16; /* regs, start=0 */ + *cmd++ = 0x0; /* count = 0 */ + + /* ALU constants are already shadowed; just need to disable shadowing + * to prevent corruption. + */ + *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3); + *cmd++ = drawctxt->gpustate.gpuaddr & 0xFFFFE000; + *cmd++ = 0 << 16; /* ALU, start=0 */ + *cmd++ = 0x0; /* count = 0 */ + + /* Tex constants are already shadowed; just need to disable shadowing + * to prevent corruption. + */ + *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3); + *cmd++ = (drawctxt->gpustate.gpuaddr + TEX_OFFSET) & 0xFFFFE000; + *cmd++ = 1 << 16; /* Tex, start=0 */ + *cmd++ = 0x0; /* count = 0 */ +#endif + + /* Need to handle some of the registers separately */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = REG_SQ_GPR_MANAGEMENT; + *cmd++ = tmp_ctx.reg_values[0]; + + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = REG_TP0_CHICKEN; + *cmd++ = tmp_ctx.reg_values[1]; + + if (adreno_is_a22x(adreno_dev)) { + unsigned int i; + unsigned int j = 2; + for (i = REG_A220_VSC_BIN_SIZE; i <= + REG_A220_VSC_PIPE_DATA_LENGTH_7; i++) { + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = i; + *cmd++ = tmp_ctx.reg_values[j]; + j++; + } + } + + /* Copy Boolean constants */ + cmd = reg_to_mem(cmd, tmp_ctx.bool_shadow, REG_SQ_CF_BOOLEANS, + BOOL_CONSTANTS); + + /* Copy Loop constants */ + cmd = reg_to_mem(cmd, tmp_ctx.loop_shadow, + REG_SQ_CF_LOOP, LOOP_CONSTANTS); + + /* create indirect buffer command for above command sequence */ + create_ib1(drawctxt, drawctxt->reg_save, start, cmd); + + tmp_ctx.cmd = cmd; +} + +/*copy colour, depth, & stencil buffers from graphics memory to system memory*/ +static unsigned int *build_gmem2sys_cmds(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, + struct gmem_shadow_t *shadow) +{ + unsigned int *cmds = shadow->gmem_save_commands; + unsigned int *start = cmds; + /* Calculate the new offset based on the adjusted base */ + unsigned int bytesperpixel = format2bytesperpixel[shadow->format]; + unsigned int addr = shadow->gmemshadow.gpuaddr; + unsigned int offset = (addr - (addr & 0xfffff000)) / bytesperpixel; + + /* Store TP0_CHICKEN register */ + *cmds++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmds++ = REG_TP0_CHICKEN; + + *cmds++ = tmp_ctx.chicken_restore; + + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmds++ = 0; + + /* Set TP0_CHICKEN to zero */ + *cmds++ = cp_type0_packet(REG_TP0_CHICKEN, 1); + *cmds++ = 0x00000000; + + /* Set PA_SC_AA_CONFIG to 0 */ + *cmds++ = cp_type0_packet(REG_PA_SC_AA_CONFIG, 1); + *cmds++ = 0x00000000; + + /* program shader */ + + /* load shader vtx constants ... 5 dwords */ + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 4); + *cmds++ = (0x1 << 16) | SHADER_CONST_ADDR; + *cmds++ = 0; + /* valid(?) vtx constant flag & addr */ + *cmds++ = shadow->quad_vertices.gpuaddr | 0x3; + /* limit = 12 dwords */ + *cmds++ = 0x00000030; + + /* Invalidate L2 cache to make sure vertices are updated */ + *cmds++ = cp_type0_packet(REG_TC_CNTL_STATUS, 1); + *cmds++ = 0x1; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 4); + *cmds++ = CP_REG(REG_VGT_MAX_VTX_INDX); + *cmds++ = 0x00ffffff; /* REG_VGT_MAX_VTX_INDX */ + *cmds++ = 0x0; /* REG_VGT_MIN_VTX_INDX */ + *cmds++ = 0x00000000; /* REG_VGT_INDX_OFFSET */ + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(REG_PA_SC_AA_MASK); + *cmds++ = 0x0000ffff; /* REG_PA_SC_AA_MASK */ + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(REG_RB_COLORCONTROL); + *cmds++ = 0x00000c20; + + /* Repartition shaders */ + *cmds++ = cp_type0_packet(REG_SQ_INST_STORE_MANAGMENT, 1); + *cmds++ = 0x180; + + /* Invalidate Vertex & Pixel instruction code address and sizes */ + *cmds++ = cp_type3_packet(CP_INVALIDATE_STATE, 1); + *cmds++ = 0x00003F00; + + *cmds++ = cp_type3_packet(CP_SET_SHADER_BASES, 1); + *cmds++ = (0x80000000) | 0x180; + + /* load the patched vertex shader stream */ + cmds = program_shader(cmds, 0, gmem2sys_vtx_pgm, GMEM2SYS_VTX_PGM_LEN); + + /* Load the patched fragment shader stream */ + cmds = + program_shader(cmds, 1, gmem2sys_frag_pgm, GMEM2SYS_FRAG_PGM_LEN); + + /* SQ_PROGRAM_CNTL / SQ_CONTEXT_MISC */ + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(REG_SQ_PROGRAM_CNTL); + if (adreno_is_a22x(adreno_dev)) + *cmds++ = 0x10018001; + else + *cmds++ = 0x10010001; + *cmds++ = 0x00000008; + + /* resolve */ + + /* PA_CL_VTE_CNTL */ + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(REG_PA_CL_VTE_CNTL); + /* disable X/Y/Z transforms, X/Y/Z are premultiplied by W */ + *cmds++ = 0x00000b00; + + /* program surface info */ + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(REG_RB_SURFACE_INFO); + *cmds++ = shadow->gmem_pitch; /* pitch, MSAA = 1 */ + + /* RB_COLOR_INFO Endian=none, Linear, Format=RGBA8888, Swap=0, + * Base=gmem_base + */ + /* gmem base assumed 4K aligned. */ + BUG_ON(tmp_ctx.gmem_base & 0xFFF); + *cmds++ = + (shadow-> + format << RB_COLOR_INFO__COLOR_FORMAT__SHIFT) | tmp_ctx.gmem_base; + + /* disable Z */ + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(REG_RB_DEPTHCONTROL); + if (adreno_is_a22x(adreno_dev)) + *cmds++ = 0x08; + else + *cmds++ = 0; + + /* set REG_PA_SU_SC_MODE_CNTL + * Front_ptype = draw triangles + * Back_ptype = draw triangles + * Provoking vertex = last + */ + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(REG_PA_SU_SC_MODE_CNTL); + *cmds++ = 0x00080240; + + /* Use maximum scissor values -- quad vertices already have the + * correct bounds */ + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(REG_PA_SC_SCREEN_SCISSOR_TL); + *cmds++ = (0 << 16) | 0; + *cmds++ = (0x1fff << 16) | (0x1fff); + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(REG_PA_SC_WINDOW_SCISSOR_TL); + *cmds++ = (unsigned int)((1U << 31) | (0 << 16) | 0); + *cmds++ = (0x1fff << 16) | (0x1fff); + + /* load the viewport so that z scale = clear depth and + * z offset = 0.0f + */ + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(REG_PA_CL_VPORT_ZSCALE); + *cmds++ = 0xbf800000; /* -1.0f */ + *cmds++ = 0x0; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(REG_RB_COLOR_MASK); + *cmds++ = 0x0000000f; /* R = G = B = 1:enabled */ + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(REG_RB_COLOR_DEST_MASK); + *cmds++ = 0xffffffff; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(REG_SQ_WRAPPING_0); + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + + /* load the stencil ref value + * $AAM - do this later + */ + + /* load the COPY state */ + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6); + *cmds++ = CP_REG(REG_RB_COPY_CONTROL); + *cmds++ = 0; /* RB_COPY_CONTROL */ + *cmds++ = addr & 0xfffff000; /* RB_COPY_DEST_BASE */ + *cmds++ = shadow->pitch >> 5; /* RB_COPY_DEST_PITCH */ + + /* Endian=none, Linear, Format=RGBA8888,Swap=0,!Dither, + * MaskWrite:R=G=B=A=1 + */ + *cmds++ = 0x0003c008 | + (shadow->format << RB_COPY_DEST_INFO__COPY_DEST_FORMAT__SHIFT); + /* Make sure we stay in offsetx field. */ + BUG_ON(offset & 0xfffff000); + *cmds++ = offset; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(REG_RB_MODECONTROL); + *cmds++ = 0x6; /* EDRAM copy */ + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(REG_PA_CL_CLIP_CNTL); + *cmds++ = 0x00010000; + + if (adreno_is_a22x(adreno_dev)) { + *cmds++ = cp_type3_packet(CP_SET_DRAW_INIT_FLAGS, 1); + *cmds++ = 0; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(REG_A220_RB_LRZ_VSC_CONTROL); + *cmds++ = 0x0000000; + + *cmds++ = cp_type3_packet(CP_DRAW_INDX, 3); + *cmds++ = 0; /* viz query info. */ + /* PrimType=RectList, SrcSel=AutoIndex, VisCullMode=Ignore*/ + *cmds++ = 0x00004088; + *cmds++ = 3; /* NumIndices=3 */ + } else { + /* queue the draw packet */ + *cmds++ = cp_type3_packet(CP_DRAW_INDX, 2); + *cmds++ = 0; /* viz query info. */ + /* PrimType=RectList, NumIndices=3, SrcSel=AutoIndex */ + *cmds++ = 0x00030088; + } + + /* create indirect buffer command for above command sequence */ + create_ib1(drawctxt, shadow->gmem_save, start, cmds); + + return cmds; +} + +/* context restore */ + +/*copy colour, depth, & stencil buffers from system memory to graphics memory*/ +static unsigned int *build_sys2gmem_cmds(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, + struct gmem_shadow_t *shadow) +{ + unsigned int *cmds = shadow->gmem_restore_commands; + unsigned int *start = cmds; + + /* Store TP0_CHICKEN register */ + *cmds++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmds++ = REG_TP0_CHICKEN; + *cmds++ = tmp_ctx.chicken_restore; + + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmds++ = 0; + + /* Set TP0_CHICKEN to zero */ + *cmds++ = cp_type0_packet(REG_TP0_CHICKEN, 1); + *cmds++ = 0x00000000; + + /* Set PA_SC_AA_CONFIG to 0 */ + *cmds++ = cp_type0_packet(REG_PA_SC_AA_CONFIG, 1); + *cmds++ = 0x00000000; + /* shader constants */ + + /* vertex buffer constants */ + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7); + + *cmds++ = (0x1 << 16) | (9 * 6); + /* valid(?) vtx constant flag & addr */ + *cmds++ = shadow->quad_vertices.gpuaddr | 0x3; + /* limit = 12 dwords */ + *cmds++ = 0x00000030; + /* valid(?) vtx constant flag & addr */ + *cmds++ = shadow->quad_texcoords.gpuaddr | 0x3; + /* limit = 8 dwords */ + *cmds++ = 0x00000020; + *cmds++ = 0; + *cmds++ = 0; + + /* Invalidate L2 cache to make sure vertices are updated */ + *cmds++ = cp_type0_packet(REG_TC_CNTL_STATUS, 1); + *cmds++ = 0x1; + + cmds = program_shader(cmds, 0, sys2gmem_vtx_pgm, SYS2GMEM_VTX_PGM_LEN); + + /* Repartition shaders */ + *cmds++ = cp_type0_packet(REG_SQ_INST_STORE_MANAGMENT, 1); + *cmds++ = 0x180; + + /* Invalidate Vertex & Pixel instruction code address and sizes */ + *cmds++ = cp_type3_packet(CP_INVALIDATE_STATE, 1); + *cmds++ = 0x00000300; /* 0x100 = Vertex, 0x200 = Pixel */ + + *cmds++ = cp_type3_packet(CP_SET_SHADER_BASES, 1); + *cmds++ = (0x80000000) | 0x180; + + /* Load the patched fragment shader stream */ + cmds = + program_shader(cmds, 1, sys2gmem_frag_pgm, SYS2GMEM_FRAG_PGM_LEN); + + /* SQ_PROGRAM_CNTL / SQ_CONTEXT_MISC */ + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(REG_SQ_PROGRAM_CNTL); + *cmds++ = 0x10030002; + *cmds++ = 0x00000008; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(REG_PA_SC_AA_MASK); + *cmds++ = 0x0000ffff; /* REG_PA_SC_AA_MASK */ + + if (!adreno_is_a22x(adreno_dev)) { + /* PA_SC_VIZ_QUERY */ + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(REG_PA_SC_VIZ_QUERY); + *cmds++ = 0x0; /*REG_PA_SC_VIZ_QUERY */ + } + + /* RB_COLORCONTROL */ + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(REG_RB_COLORCONTROL); + *cmds++ = 0x00000c20; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 4); + *cmds++ = CP_REG(REG_VGT_MAX_VTX_INDX); + *cmds++ = 0x00ffffff; /* mmVGT_MAX_VTX_INDX */ + *cmds++ = 0x0; /* mmVGT_MIN_VTX_INDX */ + *cmds++ = 0x00000000; /* mmVGT_INDX_OFFSET */ + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(REG_VGT_VERTEX_REUSE_BLOCK_CNTL); + *cmds++ = 0x00000002; /* mmVGT_VERTEX_REUSE_BLOCK_CNTL */ + *cmds++ = 0x00000002; /* mmVGT_OUT_DEALLOC_CNTL */ + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(REG_SQ_INTERPOLATOR_CNTL); + *cmds++ = 0xffffffff; /* mmSQ_INTERPOLATOR_CNTL */ + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(REG_PA_SC_AA_CONFIG); + *cmds++ = 0x00000000; /* REG_PA_SC_AA_CONFIG */ + + /* set REG_PA_SU_SC_MODE_CNTL + * Front_ptype = draw triangles + * Back_ptype = draw triangles + * Provoking vertex = last + */ + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(REG_PA_SU_SC_MODE_CNTL); + *cmds++ = 0x00080240; + + /* texture constants */ + *cmds++ = + cp_type3_packet(CP_SET_CONSTANT, (SYS2GMEM_TEX_CONST_LEN + 1)); + *cmds++ = (0x1 << 16) | (0 * 6); + memcpy(cmds, sys2gmem_tex_const, SYS2GMEM_TEX_CONST_LEN << 2); + cmds[0] |= (shadow->pitch >> 5) << 22; + cmds[1] |= + shadow->gmemshadow.gpuaddr | surface_format_table[shadow->format]; + cmds[2] |= (shadow->width - 1) | (shadow->height - 1) << 13; + cmds += SYS2GMEM_TEX_CONST_LEN; + + /* program surface info */ + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(REG_RB_SURFACE_INFO); + *cmds++ = shadow->gmem_pitch; /* pitch, MSAA = 1 */ + + /* RB_COLOR_INFO Endian=none, Linear, Format=RGBA8888, Swap=0, + * Base=gmem_base + */ + *cmds++ = + (shadow-> + format << RB_COLOR_INFO__COLOR_FORMAT__SHIFT) | tmp_ctx.gmem_base; + + /* RB_DEPTHCONTROL */ + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(REG_RB_DEPTHCONTROL); + + if (adreno_is_a22x(adreno_dev)) + *cmds++ = 8; /* disable Z */ + else + *cmds++ = 0; /* disable Z */ + + /* Use maximum scissor values -- quad vertices already + * have the correct bounds */ + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(REG_PA_SC_SCREEN_SCISSOR_TL); + *cmds++ = (0 << 16) | 0; + *cmds++ = ((0x1fff) << 16) | 0x1fff; + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(REG_PA_SC_WINDOW_SCISSOR_TL); + *cmds++ = (unsigned int)((1U << 31) | (0 << 16) | 0); + *cmds++ = ((0x1fff) << 16) | 0x1fff; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(REG_PA_CL_VTE_CNTL); + /* disable X/Y/Z transforms, X/Y/Z are premultiplied by W */ + *cmds++ = 0x00000b00; + + /*load the viewport so that z scale = clear depth and z offset = 0.0f */ + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(REG_PA_CL_VPORT_ZSCALE); + *cmds++ = 0xbf800000; + *cmds++ = 0x0; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(REG_RB_COLOR_MASK); + *cmds++ = 0x0000000f; /* R = G = B = 1:enabled */ + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(REG_RB_COLOR_DEST_MASK); + *cmds++ = 0xffffffff; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(REG_SQ_WRAPPING_0); + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + + /* load the stencil ref value + * $AAM - do this later + */ + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(REG_RB_MODECONTROL); + /* draw pixels with color and depth/stencil component */ + *cmds++ = 0x4; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(REG_PA_CL_CLIP_CNTL); + *cmds++ = 0x00010000; + + if (adreno_is_a22x(adreno_dev)) { + *cmds++ = cp_type3_packet(CP_SET_DRAW_INIT_FLAGS, 1); + *cmds++ = 0; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(REG_A220_RB_LRZ_VSC_CONTROL); + *cmds++ = 0x0000000; + + *cmds++ = cp_type3_packet(CP_DRAW_INDX, 3); + *cmds++ = 0; /* viz query info. */ + /* PrimType=RectList, SrcSel=AutoIndex, VisCullMode=Ignore*/ + *cmds++ = 0x00004088; + *cmds++ = 3; /* NumIndices=3 */ + } else { + /* queue the draw packet */ + *cmds++ = cp_type3_packet(CP_DRAW_INDX, 2); + *cmds++ = 0; /* viz query info. */ + /* PrimType=RectList, NumIndices=3, SrcSel=AutoIndex */ + *cmds++ = 0x00030088; + } + + /* create indirect buffer command for above command sequence */ + create_ib1(drawctxt, shadow->gmem_restore, start, cmds); + + return cmds; +} + +static void build_regrestore_cmds(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + unsigned int *start = tmp_ctx.cmd; + unsigned int *cmd = start; + + unsigned int i = 0; + unsigned int reg_array_size = 0; + const unsigned int *ptr_register_ranges; + + *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmd++ = 0; + + /* H/W Registers */ + /* deferred cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, ???); */ + cmd++; +#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES + /* Force mismatch */ + *cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000) | 1; +#else + *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000; +#endif + + /* Based on chip id choose the registers ranges*/ + if (adreno_is_a220(adreno_dev)) { + ptr_register_ranges = register_ranges_a220; + reg_array_size = ARRAY_SIZE(register_ranges_a220); + } else if (adreno_is_a225(adreno_dev)) { + ptr_register_ranges = register_ranges_a225; + reg_array_size = ARRAY_SIZE(register_ranges_a225); + } else { + ptr_register_ranges = register_ranges_a20x; + reg_array_size = ARRAY_SIZE(register_ranges_a20x); + } + + + for (i = 0; i < (reg_array_size/2); i++) { + cmd = reg_range(cmd, ptr_register_ranges[i*2], + ptr_register_ranges[i*2+1]); + } + + /* Now we know how many register blocks we have, we can compute command + * length + */ + start[2] = + cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, (cmd - start) - 3); + /* Enable shadowing for the entire register block. */ +#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES + start[4] |= (0 << 24) | (4 << 16); /* Disable shadowing. */ +#else + start[4] |= (1 << 24) | (4 << 16); +#endif + + /* Need to handle some of the registers separately */ + *cmd++ = cp_type0_packet(REG_SQ_GPR_MANAGEMENT, 1); + tmp_ctx.reg_values[0] = virt2gpu(cmd, &drawctxt->gpustate); + *cmd++ = 0x00040400; + + *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmd++ = 0; + *cmd++ = cp_type0_packet(REG_TP0_CHICKEN, 1); + tmp_ctx.reg_values[1] = virt2gpu(cmd, &drawctxt->gpustate); + *cmd++ = 0x00000000; + + if (adreno_is_a22x(adreno_dev)) { + unsigned int i; + unsigned int j = 2; + for (i = REG_A220_VSC_BIN_SIZE; i <= + REG_A220_VSC_PIPE_DATA_LENGTH_7; i++) { + *cmd++ = cp_type0_packet(i, 1); + tmp_ctx.reg_values[j] = virt2gpu(cmd, + &drawctxt->gpustate); + *cmd++ = 0x00000000; + j++; + } + } + + /* ALU Constants */ + *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3); + *cmd++ = drawctxt->gpustate.gpuaddr & 0xFFFFE000; +#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES + *cmd++ = (0 << 24) | (0 << 16) | 0; /* Disable shadowing */ +#else + *cmd++ = (1 << 24) | (0 << 16) | 0; +#endif + *cmd++ = ALU_CONSTANTS; + + /* Texture Constants */ + *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3); + *cmd++ = (drawctxt->gpustate.gpuaddr + TEX_OFFSET) & 0xFFFFE000; +#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES + /* Disable shadowing */ + *cmd++ = (0 << 24) | (1 << 16) | 0; +#else + *cmd++ = (1 << 24) | (1 << 16) | 0; +#endif + *cmd++ = TEX_CONSTANTS; + + /* Boolean Constants */ + *cmd++ = cp_type3_packet(CP_SET_CONSTANT, 1 + BOOL_CONSTANTS); + *cmd++ = (2 << 16) | 0; + + /* the next BOOL_CONSTANT dwords is the shadow area for + * boolean constants. + */ + tmp_ctx.bool_shadow = virt2gpu(cmd, &drawctxt->gpustate); + cmd += BOOL_CONSTANTS; + + /* Loop Constants */ + *cmd++ = cp_type3_packet(CP_SET_CONSTANT, 1 + LOOP_CONSTANTS); + *cmd++ = (3 << 16) | 0; + + /* the next LOOP_CONSTANTS dwords is the shadow area for + * loop constants. + */ + tmp_ctx.loop_shadow = virt2gpu(cmd, &drawctxt->gpustate); + cmd += LOOP_CONSTANTS; + + /* create indirect buffer command for above command sequence */ + create_ib1(drawctxt, drawctxt->reg_restore, start, cmd); + + tmp_ctx.cmd = cmd; +} + +static void +build_shader_save_restore_cmds(struct adreno_context *drawctxt) +{ + unsigned int *cmd = tmp_ctx.cmd; + unsigned int *save, *restore, *fixup; + unsigned int *startSizeVtx, *startSizePix, *startSizeShared; + unsigned int *partition1; + unsigned int *shaderBases, *partition2; + + /* compute vertex, pixel and shared instruction shadow GPU addresses */ + tmp_ctx.shader_vertex = drawctxt->gpustate.gpuaddr + SHADER_OFFSET; + tmp_ctx.shader_pixel = tmp_ctx.shader_vertex + SHADER_SHADOW_SIZE; + tmp_ctx.shader_shared = tmp_ctx.shader_pixel + SHADER_SHADOW_SIZE; + + /* restore shader partitioning and instructions */ + + restore = cmd; /* start address */ + + /* Invalidate Vertex & Pixel instruction code address and sizes */ + *cmd++ = cp_type3_packet(CP_INVALIDATE_STATE, 1); + *cmd++ = 0x00000300; /* 0x100 = Vertex, 0x200 = Pixel */ + + /* Restore previous shader vertex & pixel instruction bases. */ + *cmd++ = cp_type3_packet(CP_SET_SHADER_BASES, 1); + shaderBases = cmd++; /* TBD #5: shader bases (from fixup) */ + + /* write the shader partition information to a scratch register */ + *cmd++ = cp_type0_packet(REG_SQ_INST_STORE_MANAGMENT, 1); + partition1 = cmd++; /* TBD #4a: partition info (from save) */ + + /* load vertex shader instructions from the shadow. */ + *cmd++ = cp_type3_packet(CP_IM_LOAD, 2); + *cmd++ = tmp_ctx.shader_vertex + 0x0; /* 0x0 = Vertex */ + startSizeVtx = cmd++; /* TBD #1: start/size (from save) */ + + /* load pixel shader instructions from the shadow. */ + *cmd++ = cp_type3_packet(CP_IM_LOAD, 2); + *cmd++ = tmp_ctx.shader_pixel + 0x1; /* 0x1 = Pixel */ + startSizePix = cmd++; /* TBD #2: start/size (from save) */ + + /* load shared shader instructions from the shadow. */ + *cmd++ = cp_type3_packet(CP_IM_LOAD, 2); + *cmd++ = tmp_ctx.shader_shared + 0x2; /* 0x2 = Shared */ + startSizeShared = cmd++; /* TBD #3: start/size (from save) */ + + /* create indirect buffer command for above command sequence */ + create_ib1(drawctxt, drawctxt->shader_restore, restore, cmd); + + /* + * fixup SET_SHADER_BASES data + * + * since self-modifying PM4 code is being used here, a seperate + * command buffer is used for this fixup operation, to ensure the + * commands are not read by the PM4 engine before the data fields + * have been written. + */ + + fixup = cmd; /* start address */ + + /* write the shader partition information to a scratch register */ + *cmd++ = cp_type0_packet(REG_SCRATCH_REG2, 1); + partition2 = cmd++; /* TBD #4b: partition info (from save) */ + + /* mask off unused bits, then OR with shader instruction memory size */ + *cmd++ = cp_type3_packet(CP_REG_RMW, 3); + *cmd++ = REG_SCRATCH_REG2; + /* AND off invalid bits. */ + *cmd++ = 0x0FFF0FFF; + /* OR in instruction memory size */ + *cmd++ = (unsigned int)((SHADER_INSTRUCT_LOG2 - 5U) << 29); + + /* write the computed value to the SET_SHADER_BASES data field */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = REG_SCRATCH_REG2; + /* TBD #5: shader bases (to restore) */ + *cmd++ = virt2gpu(shaderBases, &drawctxt->gpustate); + + /* create indirect buffer command for above command sequence */ + create_ib1(drawctxt, drawctxt->shader_fixup, fixup, cmd); + + /* save shader partitioning and instructions */ + + save = cmd; /* start address */ + + *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmd++ = 0; + + /* fetch the SQ_INST_STORE_MANAGMENT register value, + * store the value in the data fields of the SET_CONSTANT commands + * above. + */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = REG_SQ_INST_STORE_MANAGMENT; + /* TBD #4a: partition info (to restore) */ + *cmd++ = virt2gpu(partition1, &drawctxt->gpustate); + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = REG_SQ_INST_STORE_MANAGMENT; + /* TBD #4b: partition info (to fixup) */ + *cmd++ = virt2gpu(partition2, &drawctxt->gpustate); + + + /* store the vertex shader instructions */ + *cmd++ = cp_type3_packet(CP_IM_STORE, 2); + *cmd++ = tmp_ctx.shader_vertex + 0x0; /* 0x0 = Vertex */ + /* TBD #1: start/size (to restore) */ + *cmd++ = virt2gpu(startSizeVtx, &drawctxt->gpustate); + + /* store the pixel shader instructions */ + *cmd++ = cp_type3_packet(CP_IM_STORE, 2); + *cmd++ = tmp_ctx.shader_pixel + 0x1; /* 0x1 = Pixel */ + /* TBD #2: start/size (to restore) */ + *cmd++ = virt2gpu(startSizePix, &drawctxt->gpustate); + + /* store the shared shader instructions if vertex base is nonzero */ + + *cmd++ = cp_type3_packet(CP_IM_STORE, 2); + *cmd++ = tmp_ctx.shader_shared + 0x2; /* 0x2 = Shared */ + /* TBD #3: start/size (to restore) */ + *cmd++ = virt2gpu(startSizeShared, &drawctxt->gpustate); + + + *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmd++ = 0; + + /* create indirect buffer command for above command sequence */ + create_ib1(drawctxt, drawctxt->shader_save, save, cmd); + + tmp_ctx.cmd = cmd; +} + +/* create buffers for saving/restoring registers, constants, & GMEM */ +static int a2xx_ctxt_gpustate_shadow(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + int result; + + /* Allocate vmalloc memory to store the gpustate */ + result = kgsl_allocate(&drawctxt->gpustate, + drawctxt->pagetable, CONTEXT_SIZE); + + if (result) + return result; + + drawctxt->flags |= CTXT_FLAGS_STATE_SHADOW; + + /* Blank out h/w register, constant, and command buffer shadows. */ + kgsl_sharedmem_set(&drawctxt->gpustate, 0, 0, CONTEXT_SIZE); + + /* set-up command and vertex buffer pointers */ + tmp_ctx.cmd = tmp_ctx.start + = (unsigned int *)((char *)drawctxt->gpustate.hostptr + CMD_OFFSET); + + /* build indirect command buffers to save & restore regs/constants */ + adreno_idle(&adreno_dev->dev, KGSL_TIMEOUT_DEFAULT); + build_regrestore_cmds(adreno_dev, drawctxt); + build_regsave_cmds(adreno_dev, drawctxt); + + build_shader_save_restore_cmds(drawctxt); + + kgsl_cache_range_op(&drawctxt->gpustate, + KGSL_CACHE_OP_FLUSH); + + kgsl_cffdump_syncmem(NULL, &drawctxt->gpustate, + drawctxt->gpustate.gpuaddr, + drawctxt->gpustate.size, false); + return 0; +} + +/* create buffers for saving/restoring registers, constants, & GMEM */ +static int a2xx_ctxt_gmem_shadow(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + int result; + + calc_gmemsize(&drawctxt->context_gmem_shadow, + adreno_dev->gmemspace.sizebytes); + tmp_ctx.gmem_base = adreno_dev->gmemspace.gpu_base; + + result = kgsl_allocate(&drawctxt->context_gmem_shadow.gmemshadow, + drawctxt->pagetable, drawctxt->context_gmem_shadow.size); + + if (result) + return result; + + /* we've allocated the shadow, when swapped out, GMEM must be saved. */ + drawctxt->flags |= CTXT_FLAGS_GMEM_SHADOW | CTXT_FLAGS_GMEM_SAVE; + + /* blank out gmem shadow. */ + kgsl_sharedmem_set(&drawctxt->context_gmem_shadow.gmemshadow, 0, 0, + drawctxt->context_gmem_shadow.size); + + /* build quad vertex buffer */ + build_quad_vtxbuff(drawctxt, &drawctxt->context_gmem_shadow, + &tmp_ctx.cmd); + + /* build TP0_CHICKEN register restore command buffer */ + tmp_ctx.cmd = build_chicken_restore_cmds(drawctxt); + + /* build indirect command buffers to save & restore gmem */ + /* Idle because we are reading PM override registers */ + adreno_idle(&adreno_dev->dev, KGSL_TIMEOUT_DEFAULT); + drawctxt->context_gmem_shadow.gmem_save_commands = tmp_ctx.cmd; + tmp_ctx.cmd = + build_gmem2sys_cmds(adreno_dev, drawctxt, + &drawctxt->context_gmem_shadow); + drawctxt->context_gmem_shadow.gmem_restore_commands = tmp_ctx.cmd; + tmp_ctx.cmd = + build_sys2gmem_cmds(adreno_dev, drawctxt, + &drawctxt->context_gmem_shadow); + + kgsl_cache_range_op(&drawctxt->context_gmem_shadow.gmemshadow, + KGSL_CACHE_OP_FLUSH); + + kgsl_cffdump_syncmem(NULL, + &drawctxt->context_gmem_shadow.gmemshadow, + drawctxt->context_gmem_shadow.gmemshadow.gpuaddr, + drawctxt->context_gmem_shadow.gmemshadow.size, false); + + return 0; +} + +static void a2xx_ctxt_save(struct adreno_device *adreno_dev, + struct adreno_context *context) +{ + struct kgsl_device *device = &adreno_dev->dev; + + if (context == NULL) + return; + + if (context->flags & CTXT_FLAGS_GPU_HANG) + KGSL_CTXT_WARN(device, + "Current active context has caused gpu hang\n"); + + KGSL_CTXT_INFO(device, + "active context flags %08x\n", context->flags); + + /* save registers and constants. */ + adreno_ringbuffer_issuecmds(device, 0, context->reg_save, 3); + + if (context->flags & CTXT_FLAGS_SHADER_SAVE) { + /* save shader partitioning and instructions. */ + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE, + context->shader_save, 3); + + /* fixup shader partitioning parameter for + * SET_SHADER_BASES. + */ + adreno_ringbuffer_issuecmds(device, 0, + context->shader_fixup, 3); + + context->flags |= CTXT_FLAGS_SHADER_RESTORE; + } + + if ((context->flags & CTXT_FLAGS_GMEM_SAVE) && + (context->flags & CTXT_FLAGS_GMEM_SHADOW)) { + /* save gmem. + * (note: changes shader. shader must already be saved.) + */ + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE, + context->context_gmem_shadow.gmem_save, 3); + + /* Restore TP0_CHICKEN */ + adreno_ringbuffer_issuecmds(device, 0, + context->chicken_restore, 3); + + context->flags |= CTXT_FLAGS_GMEM_RESTORE; + } +} + +static void a2xx_ctxt_restore(struct adreno_device *adreno_dev, + struct adreno_context *context) +{ + struct kgsl_device *device = &adreno_dev->dev; + unsigned int cmds[5]; + + if (context == NULL) { + /* No context - set the default apgetable and thats it */ + kgsl_mmu_setstate(device, device->mmu.defaultpagetable); + return; + } + + KGSL_CTXT_INFO(device, "context flags %08x\n", context->flags); + + cmds[0] = cp_nop_packet(1); + cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER; + cmds[2] = cp_type3_packet(CP_MEM_WRITE, 2); + cmds[3] = device->memstore.gpuaddr + + KGSL_DEVICE_MEMSTORE_OFFSET(current_context); + cmds[4] = (unsigned int) context; + adreno_ringbuffer_issuecmds(device, 0, cmds, 5); + kgsl_mmu_setstate(device, context->pagetable); + +#ifndef CONFIG_MSM_KGSL_CFF_DUMP_NO_CONTEXT_MEM_DUMP + kgsl_cffdump_syncmem(NULL, &context->gpustate, + context->gpustate.gpuaddr, LCC_SHADOW_SIZE + + REG_SHADOW_SIZE + CMD_BUFFER_SIZE + TEX_SHADOW_SIZE, false); +#endif + + /* restore gmem. + * (note: changes shader. shader must not already be restored.) + */ + if (context->flags & CTXT_FLAGS_GMEM_RESTORE) { + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE, + context->context_gmem_shadow.gmem_restore, 3); + + /* Restore TP0_CHICKEN */ + adreno_ringbuffer_issuecmds(device, 0, + context->chicken_restore, 3); + + context->flags &= ~CTXT_FLAGS_GMEM_RESTORE; + } + + /* restore registers and constants. */ + adreno_ringbuffer_issuecmds(device, 0, + context->reg_restore, 3); + + /* restore shader instructions & partitioning. */ + if (context->flags & CTXT_FLAGS_SHADER_RESTORE) { + adreno_ringbuffer_issuecmds(device, 0, + context->shader_restore, 3); + } + + if (adreno_is_a20x(adreno_dev)) { + cmds[0] = cp_type3_packet(CP_SET_BIN_BASE_OFFSET, 1); + cmds[1] = context->bin_base_offset; + adreno_ringbuffer_issuecmds(device, 0, cmds, 2); + } +} + +/* + * Interrupt management + * + * a2xx interrupt control is distributed among the various + * hardware components (RB, CP, MMU). The main interrupt + * tells us which component fired the interrupt, but one needs + * to go to the individual component to find out why. The + * following functions provide the broken out support for + * managing the interrupts + */ + +#define RBBM_INT_MASK RBBM_INT_CNTL__RDERR_INT_MASK + +#define CP_INT_MASK \ + (CP_INT_CNTL__T0_PACKET_IN_IB_MASK | \ + CP_INT_CNTL__OPCODE_ERROR_MASK | \ + CP_INT_CNTL__PROTECTED_MODE_ERROR_MASK | \ + CP_INT_CNTL__RESERVED_BIT_ERROR_MASK | \ + CP_INT_CNTL__IB_ERROR_MASK | \ + CP_INT_CNTL__IB1_INT_MASK | \ + CP_INT_CNTL__RB_INT_MASK) + +#define VALID_STATUS_COUNT_MAX 10 + +static struct { + unsigned int mask; + const char *message; +} kgsl_cp_error_irqs[] = { + { CP_INT_CNTL__T0_PACKET_IN_IB_MASK, + "ringbuffer TO packet in IB interrupt" }, + { CP_INT_CNTL__OPCODE_ERROR_MASK, + "ringbuffer opcode error interrupt" }, + { CP_INT_CNTL__PROTECTED_MODE_ERROR_MASK, + "ringbuffer protected mode error interrupt" }, + { CP_INT_CNTL__RESERVED_BIT_ERROR_MASK, + "ringbuffer reserved bit error interrupt" }, + { CP_INT_CNTL__IB_ERROR_MASK, + "ringbuffer IB error interrupt" }, +}; + +static void a2xx_cp_intrcallback(struct kgsl_device *device) +{ + unsigned int status = 0, num_reads = 0, master_status = 0; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer; + int i; + + adreno_regread(device, REG_MASTER_INT_SIGNAL, &master_status); + while (!status && (num_reads < VALID_STATUS_COUNT_MAX) && + (master_status & MASTER_INT_SIGNAL__CP_INT_STAT)) { + adreno_regread(device, REG_CP_INT_STATUS, &status); + adreno_regread(device, REG_MASTER_INT_SIGNAL, + &master_status); + num_reads++; + } + if (num_reads > 1) + KGSL_DRV_WARN(device, + "Looped %d times to read REG_CP_INT_STATUS\n", + num_reads); + if (!status) { + if (master_status & MASTER_INT_SIGNAL__CP_INT_STAT) { + /* This indicates that we could not read CP_INT_STAT. + * As a precaution just wake up processes so + * they can check their timestamps. Since, we + * did not ack any interrupts this interrupt will + * be generated again */ + KGSL_DRV_WARN(device, "Unable to read CP_INT_STATUS\n"); + wake_up_interruptible_all(&device->wait_queue); + } else + KGSL_DRV_WARN(device, "Spurious interrput detected\n"); + return; + } + + if (status & CP_INT_CNTL__RB_INT_MASK) { + /* signal intr completion event */ + unsigned int enableflag = 0; + kgsl_sharedmem_writel(&rb->device->memstore, + KGSL_DEVICE_MEMSTORE_OFFSET(ts_cmp_enable), + enableflag); + wmb(); + KGSL_CMD_WARN(rb->device, "ringbuffer rb interrupt\n"); + } + + for (i = 0; i < ARRAY_SIZE(kgsl_cp_error_irqs); i++) { + if (status & kgsl_cp_error_irqs[i].mask) { + KGSL_CMD_CRIT(rb->device, "%s\n", + kgsl_cp_error_irqs[i].message); + /* + * on fatal errors, turn off the interrupts to + * avoid storming. This has the side effect of + * forcing a PM dump when the timestamp times out + */ + + kgsl_pwrctrl_irq(rb->device, KGSL_PWRFLAGS_OFF); + } + } + + /* only ack bits we understand */ + status &= CP_INT_MASK; + adreno_regwrite(device, REG_CP_INT_ACK, status); + + if (status & (CP_INT_CNTL__IB1_INT_MASK | CP_INT_CNTL__RB_INT_MASK)) { + KGSL_CMD_WARN(rb->device, "ringbuffer ib1/rb interrupt\n"); + queue_work(device->work_queue, &device->ts_expired_ws); + wake_up_interruptible_all(&device->wait_queue); + atomic_notifier_call_chain(&(device->ts_notifier_list), + device->id, + NULL); + } +} + +static void a2xx_rbbm_intrcallback(struct kgsl_device *device) +{ + unsigned int status = 0; + unsigned int rderr = 0; + + adreno_regread(device, REG_RBBM_INT_STATUS, &status); + + if (status & RBBM_INT_CNTL__RDERR_INT_MASK) { + union rbbm_read_error_u rerr; + adreno_regread(device, REG_RBBM_READ_ERROR, &rderr); + rerr.val = rderr; + if (rerr.f.read_address == REG_CP_INT_STATUS && + rerr.f.read_error && + rerr.f.read_requester) + KGSL_DRV_WARN(device, + "rbbm read error interrupt: %08x\n", rderr); + else + KGSL_DRV_CRIT(device, + "rbbm read error interrupt: %08x\n", rderr); + } + + status &= RBBM_INT_MASK; + adreno_regwrite(device, REG_RBBM_INT_ACK, status); +} + +irqreturn_t a2xx_irq_handler(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = &adreno_dev->dev; + irqreturn_t result = IRQ_NONE; + unsigned int status; + + adreno_regread(device, REG_MASTER_INT_SIGNAL, &status); + + if (status & MASTER_INT_SIGNAL__MH_INT_STAT) { + kgsl_mh_intrcallback(device); + result = IRQ_HANDLED; + } + + if (status & MASTER_INT_SIGNAL__CP_INT_STAT) { + a2xx_cp_intrcallback(device); + result = IRQ_HANDLED; + } + + if (status & MASTER_INT_SIGNAL__RBBM_INT_STAT) { + a2xx_rbbm_intrcallback(device); + result = IRQ_HANDLED; + } + + return result; +} + +static void a2xx_irq_control(struct adreno_device *adreno_dev, int state) +{ + struct kgsl_device *device = &adreno_dev->dev; + + if (state) { + adreno_regwrite(device, REG_RBBM_INT_CNTL, RBBM_INT_MASK); + adreno_regwrite(device, REG_CP_INT_CNTL, CP_INT_MASK); + adreno_regwrite(device, MH_INTERRUPT_MASK, KGSL_MMU_INT_MASK); + } else { + adreno_regwrite(device, REG_RBBM_INT_CNTL, 0); + adreno_regwrite(device, REG_CP_INT_CNTL, 0); + adreno_regwrite(device, MH_INTERRUPT_MASK, 0); + } + + /* Force the writes to post before touching the IRQ line */ + wmb(); +} + +struct adreno_gpudev adreno_a2xx_gpudev = { + .ctxt_gpustate_shadow = a2xx_ctxt_gpustate_shadow, + .ctxt_gmem_shadow = a2xx_ctxt_gmem_shadow, + .ctxt_save = a2xx_ctxt_save, + .ctxt_restore = a2xx_ctxt_restore, + .irq_handler = a2xx_irq_handler, + .irq_control = a2xx_irq_control, +}; diff --git a/drivers/gpu/msm/kgsl_gpummu.c b/drivers/gpu/msm/kgsl_gpummu.c new file mode 100644 index 00000000..9e7ef61d --- /dev/null +++ b/drivers/gpu/msm/kgsl_gpummu.c @@ -0,0 +1,766 @@ +/* Copyright (c) 2011-2012, Code Aurora Forum. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include +#include +#include +#include +#include +#include + +#include "kgsl.h" +#include "kgsl_mmu.h" +#include "kgsl_device.h" +#include "kgsl_sharedmem.h" + +#include "adreno_ringbuffer.h" + +static ssize_t +sysfs_show_ptpool_entries(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct kgsl_ptpool *pool = (struct kgsl_ptpool *) + kgsl_driver.ptpool; + return snprintf(buf, PAGE_SIZE, "%d\n", pool->entries); +} + +static ssize_t +sysfs_show_ptpool_min(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct kgsl_ptpool *pool = (struct kgsl_ptpool *) + kgsl_driver.ptpool; + return snprintf(buf, PAGE_SIZE, "%d\n", + pool->static_entries); +} + +static ssize_t +sysfs_show_ptpool_chunks(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct kgsl_ptpool *pool = (struct kgsl_ptpool *) + kgsl_driver.ptpool; + return snprintf(buf, PAGE_SIZE, "%d\n", pool->chunks); +} + +static ssize_t +sysfs_show_ptpool_ptsize(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct kgsl_ptpool *pool = (struct kgsl_ptpool *) + kgsl_driver.ptpool; + return snprintf(buf, PAGE_SIZE, "%d\n", pool->ptsize); +} + +static struct kobj_attribute attr_ptpool_entries = { + .attr = { .name = "ptpool_entries", .mode = 0444 }, + .show = sysfs_show_ptpool_entries, + .store = NULL, +}; + +static struct kobj_attribute attr_ptpool_min = { + .attr = { .name = "ptpool_min", .mode = 0444 }, + .show = sysfs_show_ptpool_min, + .store = NULL, +}; + +static struct kobj_attribute attr_ptpool_chunks = { + .attr = { .name = "ptpool_chunks", .mode = 0444 }, + .show = sysfs_show_ptpool_chunks, + .store = NULL, +}; + +static struct kobj_attribute attr_ptpool_ptsize = { + .attr = { .name = "ptpool_ptsize", .mode = 0444 }, + .show = sysfs_show_ptpool_ptsize, + .store = NULL, +}; + +static struct attribute *ptpool_attrs[] = { + &attr_ptpool_entries.attr, + &attr_ptpool_min.attr, + &attr_ptpool_chunks.attr, + &attr_ptpool_ptsize.attr, + NULL, +}; + +static struct attribute_group ptpool_attr_group = { + .attrs = ptpool_attrs, +}; + +static int +_kgsl_ptpool_add_entries(struct kgsl_ptpool *pool, int count, int dynamic) +{ + struct kgsl_ptpool_chunk *chunk; + size_t size = ALIGN(count * pool->ptsize, PAGE_SIZE); + + BUG_ON(count == 0); + + if (get_order(size) >= MAX_ORDER) { + KGSL_CORE_ERR("ptpool allocation is too big: %d\n", size); + return -EINVAL; + } + + chunk = kzalloc(sizeof(*chunk), GFP_KERNEL); + if (chunk == NULL) { + KGSL_CORE_ERR("kzalloc(%d) failed\n", sizeof(*chunk)); + return -ENOMEM; + } + + chunk->size = size; + chunk->count = count; + chunk->dynamic = dynamic; + + chunk->data = dma_alloc_coherent(NULL, size, + &chunk->phys, GFP_KERNEL); + + if (chunk->data == NULL) { + KGSL_CORE_ERR("dma_alloc_coherent(%d) failed\n", size); + goto err; + } + + chunk->bitmap = kzalloc(BITS_TO_LONGS(count) * 4, GFP_KERNEL); + + if (chunk->bitmap == NULL) { + KGSL_CORE_ERR("kzalloc(%d) failed\n", + BITS_TO_LONGS(count) * 4); + goto err_dma; + } + + list_add_tail(&chunk->list, &pool->list); + + pool->chunks++; + pool->entries += count; + + if (!dynamic) + pool->static_entries += count; + + return 0; + +err_dma: + dma_free_coherent(NULL, chunk->size, chunk->data, chunk->phys); +err: + kfree(chunk); + return -ENOMEM; +} + +static void * +_kgsl_ptpool_get_entry(struct kgsl_ptpool *pool, unsigned int *physaddr) +{ + struct kgsl_ptpool_chunk *chunk; + + list_for_each_entry(chunk, &pool->list, list) { + int bit = find_first_zero_bit(chunk->bitmap, chunk->count); + + if (bit >= chunk->count) + continue; + + set_bit(bit, chunk->bitmap); + *physaddr = chunk->phys + (bit * pool->ptsize); + + return chunk->data + (bit * pool->ptsize); + } + + return NULL; +} + +/** + * kgsl_ptpool_add + * @pool: A pointer to a ptpool structure + * @entries: Number of entries to add + * + * Add static entries to the pagetable pool. + */ + +static int +kgsl_ptpool_add(struct kgsl_ptpool *pool, int count) +{ + int ret = 0; + BUG_ON(count == 0); + + mutex_lock(&pool->lock); + + /* Only 4MB can be allocated in one chunk, so larger allocations + need to be split into multiple sections */ + + while (count) { + int entries = ((count * pool->ptsize) > SZ_4M) ? + SZ_4M / pool->ptsize : count; + + /* Add the entries as static, i.e. they don't ever stand + a chance of being removed */ + + ret = _kgsl_ptpool_add_entries(pool, entries, 0); + if (ret) + break; + + count -= entries; + } + + mutex_unlock(&pool->lock); + return ret; +} + +/** + * kgsl_ptpool_alloc + * @pool: A pointer to a ptpool structure + * @addr: A pointer to store the physical address of the chunk + * + * Allocate a pagetable from the pool. Returns the virtual address + * of the pagetable, the physical address is returned in physaddr + */ + +static void *kgsl_ptpool_alloc(struct kgsl_ptpool *pool, + unsigned int *physaddr) +{ + void *addr = NULL; + int ret; + + mutex_lock(&pool->lock); + addr = _kgsl_ptpool_get_entry(pool, physaddr); + if (addr) + goto done; + + /* Add a chunk for 1 more pagetable and mark it as dynamic */ + ret = _kgsl_ptpool_add_entries(pool, 1, 1); + + if (ret) + goto done; + + addr = _kgsl_ptpool_get_entry(pool, physaddr); +done: + mutex_unlock(&pool->lock); + return addr; +} + +static inline void _kgsl_ptpool_rm_chunk(struct kgsl_ptpool_chunk *chunk) +{ + list_del(&chunk->list); + + if (chunk->data) + dma_free_coherent(NULL, chunk->size, chunk->data, + chunk->phys); + kfree(chunk->bitmap); + kfree(chunk); +} + +/** + * kgsl_ptpool_free + * @pool: A pointer to a ptpool structure + * @addr: A pointer to the virtual address to free + * + * Free a pagetable allocated from the pool + */ + +static void kgsl_ptpool_free(struct kgsl_ptpool *pool, void *addr) +{ + struct kgsl_ptpool_chunk *chunk, *tmp; + + if (pool == NULL || addr == NULL) + return; + + mutex_lock(&pool->lock); + list_for_each_entry_safe(chunk, tmp, &pool->list, list) { + if (addr >= chunk->data && + addr < chunk->data + chunk->size) { + int bit = ((unsigned long) (addr - chunk->data)) / + pool->ptsize; + + clear_bit(bit, chunk->bitmap); + memset(addr, 0, pool->ptsize); + + if (chunk->dynamic && + bitmap_empty(chunk->bitmap, chunk->count)) + _kgsl_ptpool_rm_chunk(chunk); + + break; + } + } + + mutex_unlock(&pool->lock); +} + +void kgsl_gpummu_ptpool_destroy(void *ptpool) +{ + struct kgsl_ptpool *pool = (struct kgsl_ptpool *)ptpool; + struct kgsl_ptpool_chunk *chunk, *tmp; + + if (pool == NULL) + return; + + mutex_lock(&pool->lock); + list_for_each_entry_safe(chunk, tmp, &pool->list, list) + _kgsl_ptpool_rm_chunk(chunk); + mutex_unlock(&pool->lock); + + kfree(pool); +} + +/** + * kgsl_ptpool_init + * @pool: A pointer to a ptpool structure to initialize + * @ptsize: The size of each pagetable entry + * @entries: The number of inital entries to add to the pool + * + * Initalize a pool and allocate an initial chunk of entries. + */ +void *kgsl_gpummu_ptpool_init(int ptsize, int entries) +{ + struct kgsl_ptpool *pool; + int ret = 0; + BUG_ON(ptsize == 0); + + pool = kzalloc(sizeof(struct kgsl_ptpool), GFP_KERNEL); + if (!pool) { + KGSL_CORE_ERR("Failed to allocate memory " + "for ptpool\n"); + return NULL; + } + + pool->ptsize = ptsize; + mutex_init(&pool->lock); + INIT_LIST_HEAD(&pool->list); + + if (entries) { + ret = kgsl_ptpool_add(pool, entries); + if (ret) + goto err_ptpool_remove; + } + + ret = sysfs_create_group(kgsl_driver.ptkobj, &ptpool_attr_group); + if (ret) { + KGSL_CORE_ERR("sysfs_create_group failed for ptpool " + "statistics: %d\n", ret); + goto err_ptpool_remove; + } + return (void *)pool; + +err_ptpool_remove: + kgsl_gpummu_ptpool_destroy(pool); + return NULL; +} + +int kgsl_gpummu_pt_equal(struct kgsl_pagetable *pt, + unsigned int pt_base) +{ + struct kgsl_gpummu_pt *gpummu_pt = pt->priv; + return pt && pt_base && (gpummu_pt->base.gpuaddr == pt_base); +} + +void kgsl_gpummu_destroy_pagetable(void *mmu_specific_pt) +{ + struct kgsl_gpummu_pt *gpummu_pt = (struct kgsl_gpummu_pt *) + mmu_specific_pt; + kgsl_ptpool_free((struct kgsl_ptpool *)kgsl_driver.ptpool, + gpummu_pt->base.hostptr); + + kgsl_driver.stats.coherent -= KGSL_PAGETABLE_SIZE; + + kfree(gpummu_pt->tlbflushfilter.base); + + kfree(gpummu_pt); +} + +static inline uint32_t +kgsl_pt_entry_get(unsigned int va_base, uint32_t va) +{ + return (va - va_base) >> PAGE_SHIFT; +} + +static inline void +kgsl_pt_map_set(struct kgsl_gpummu_pt *pt, uint32_t pte, uint32_t val) +{ + uint32_t *baseptr = (uint32_t *)pt->base.hostptr; + + writel_relaxed(val, &baseptr[pte]); +} + +static inline uint32_t +kgsl_pt_map_get(struct kgsl_gpummu_pt *pt, uint32_t pte) +{ + uint32_t *baseptr = (uint32_t *)pt->base.hostptr; + return readl_relaxed(&baseptr[pte]) & GSL_PT_PAGE_ADDR_MASK; +} + +static unsigned int kgsl_gpummu_pt_get_flags(struct kgsl_pagetable *pt, + enum kgsl_deviceid id) +{ + unsigned int result = 0; + struct kgsl_gpummu_pt *gpummu_pt; + + if (pt == NULL) + return 0; + gpummu_pt = pt->priv; + + spin_lock(&pt->lock); + if (gpummu_pt->tlb_flags && (1<tlb_flags &= ~(1<lock); + return result; +} + +static void kgsl_gpummu_pagefault(struct kgsl_device *device) +{ + unsigned int reg; + unsigned int ptbase; + + kgsl_regread(device, MH_MMU_PAGE_FAULT, ®); + kgsl_regread(device, MH_MMU_PT_BASE, &ptbase); + + KGSL_MEM_CRIT(device, + "mmu page fault: page=0x%lx pt=%d op=%s axi=%d\n", + reg & ~(PAGE_SIZE - 1), + kgsl_mmu_get_ptname_from_ptbase(ptbase), + reg & 0x02 ? "WRITE" : "READ", (reg >> 4) & 0xF); +} + +static void *kgsl_gpummu_create_pagetable(void) +{ + struct kgsl_gpummu_pt *gpummu_pt; + + gpummu_pt = kzalloc(sizeof(struct kgsl_gpummu_pt), + GFP_KERNEL); + if (!gpummu_pt) + return NULL; + + gpummu_pt->tlb_flags = 0; + gpummu_pt->last_superpte = 0; + + gpummu_pt->tlbflushfilter.size = (CONFIG_MSM_KGSL_PAGE_TABLE_SIZE / + (PAGE_SIZE * GSL_PT_SUPER_PTE * 8)) + 1; + gpummu_pt->tlbflushfilter.base = (unsigned int *) + kzalloc(gpummu_pt->tlbflushfilter.size, GFP_KERNEL); + if (!gpummu_pt->tlbflushfilter.base) { + KGSL_CORE_ERR("kzalloc(%d) failed\n", + gpummu_pt->tlbflushfilter.size); + goto err_free_gpummu; + } + GSL_TLBFLUSH_FILTER_RESET(); + + gpummu_pt->base.hostptr = kgsl_ptpool_alloc((struct kgsl_ptpool *) + kgsl_driver.ptpool, + &gpummu_pt->base.physaddr); + + if (gpummu_pt->base.hostptr == NULL) + goto err_flushfilter; + + /* ptpool allocations are from coherent memory, so update the + device statistics acordingly */ + + KGSL_STATS_ADD(KGSL_PAGETABLE_SIZE, kgsl_driver.stats.coherent, + kgsl_driver.stats.coherent_max); + + gpummu_pt->base.gpuaddr = gpummu_pt->base.physaddr; + gpummu_pt->base.size = KGSL_PAGETABLE_SIZE; + + return (void *)gpummu_pt; + +err_flushfilter: + kfree(gpummu_pt->tlbflushfilter.base); +err_free_gpummu: + kfree(gpummu_pt); + + return NULL; +} + +static void kgsl_gpummu_default_setstate(struct kgsl_device *device, + uint32_t flags) +{ + struct kgsl_gpummu_pt *gpummu_pt; + if (!kgsl_mmu_enabled()) + return; + + if (flags & KGSL_MMUFLAGS_PTUPDATE) { + kgsl_idle(device, KGSL_TIMEOUT_DEFAULT); + gpummu_pt = device->mmu.hwpagetable->priv; + kgsl_regwrite(device, MH_MMU_PT_BASE, + gpummu_pt->base.gpuaddr); + } + + if (flags & KGSL_MMUFLAGS_TLBFLUSH) { + /* Invalidate all and tc */ + kgsl_regwrite(device, MH_MMU_INVALIDATE, 0x00000003); + } +} + +static void kgsl_gpummu_setstate(struct kgsl_device *device, + struct kgsl_pagetable *pagetable) +{ + struct kgsl_mmu *mmu = &device->mmu; + struct kgsl_gpummu_pt *gpummu_pt; + + if (mmu->flags & KGSL_FLAGS_STARTED) { + /* page table not current, then setup mmu to use new + * specified page table + */ + if (mmu->hwpagetable != pagetable) { + mmu->hwpagetable = pagetable; + spin_lock(&mmu->hwpagetable->lock); + gpummu_pt = mmu->hwpagetable->priv; + gpummu_pt->tlb_flags &= ~(1<id); + spin_unlock(&mmu->hwpagetable->lock); + + /* call device specific set page table */ + kgsl_setstate(mmu->device, KGSL_MMUFLAGS_TLBFLUSH | + KGSL_MMUFLAGS_PTUPDATE); + } + } +} + +static int kgsl_gpummu_init(struct kgsl_device *device) +{ + /* + * intialize device mmu + * + * call this with the global lock held + */ + int status = 0; + struct kgsl_mmu *mmu = &device->mmu; + + mmu->device = device; + + /* sub-client MMU lookups require address translation */ + if ((mmu->config & ~0x1) > 0) { + /*make sure virtual address range is a multiple of 64Kb */ + if (CONFIG_MSM_KGSL_PAGE_TABLE_SIZE & ((1 << 16) - 1)) { + KGSL_CORE_ERR("Invalid pagetable size requested " + "for GPUMMU: %x\n", CONFIG_MSM_KGSL_PAGE_TABLE_SIZE); + return -EINVAL; + } + + /* allocate memory used for completing r/w operations that + * cannot be mapped by the MMU + */ + status = kgsl_allocate_contiguous(&mmu->setstate_memory, 64); + if (!status) + kgsl_sharedmem_set(&mmu->setstate_memory, 0, 0, + mmu->setstate_memory.size); + } + + dev_info(device->dev, "|%s| MMU type set for device is GPUMMU\n", + __func__); + return status; +} + +static int kgsl_gpummu_start(struct kgsl_device *device) +{ + /* + * intialize device mmu + * + * call this with the global lock held + */ + + struct kgsl_mmu *mmu = &device->mmu; + struct kgsl_gpummu_pt *gpummu_pt; + + if (mmu->flags & KGSL_FLAGS_STARTED) + return 0; + + /* MMU not enabled */ + if ((mmu->config & 0x1) == 0) + return 0; + + /* setup MMU and sub-client behavior */ + kgsl_regwrite(device, MH_MMU_CONFIG, mmu->config); + + /* idle device */ + kgsl_idle(device, KGSL_TIMEOUT_DEFAULT); + + /* enable axi interrupts */ + kgsl_regwrite(device, MH_INTERRUPT_MASK, + GSL_MMU_INT_MASK | MH_INTERRUPT_MASK__MMU_PAGE_FAULT); + + kgsl_sharedmem_set(&mmu->setstate_memory, 0, 0, + mmu->setstate_memory.size); + + /* TRAN_ERROR needs a 32 byte (32 byte aligned) chunk of memory + * to complete transactions in case of an MMU fault. Note that + * we'll leave the bottom 32 bytes of the setstate_memory for other + * purposes (e.g. use it when dummy read cycles are needed + * for other blocks) */ + kgsl_regwrite(device, MH_MMU_TRAN_ERROR, + mmu->setstate_memory.physaddr + 32); + + if (mmu->defaultpagetable == NULL) + mmu->defaultpagetable = + kgsl_mmu_getpagetable(KGSL_MMU_GLOBAL_PT); + + /* Return error if the default pagetable doesn't exist */ + if (mmu->defaultpagetable == NULL) + return -ENOMEM; + + mmu->hwpagetable = mmu->defaultpagetable; + gpummu_pt = mmu->hwpagetable->priv; + kgsl_regwrite(device, MH_MMU_PT_BASE, + gpummu_pt->base.gpuaddr); + kgsl_regwrite(device, MH_MMU_VA_RANGE, + (KGSL_PAGETABLE_BASE | + (CONFIG_MSM_KGSL_PAGE_TABLE_SIZE >> 16))); + kgsl_setstate(device, KGSL_MMUFLAGS_TLBFLUSH); + mmu->flags |= KGSL_FLAGS_STARTED; + + return 0; +} + +static int +kgsl_gpummu_unmap(void *mmu_specific_pt, + struct kgsl_memdesc *memdesc) +{ + unsigned int numpages; + unsigned int pte, ptefirst, ptelast, superpte; + unsigned int range = memdesc->size; + struct kgsl_gpummu_pt *gpummu_pt = mmu_specific_pt; + + /* All GPU addresses as assigned are page aligned, but some + functions purturb the gpuaddr with an offset, so apply the + mask here to make sure we have the right address */ + + unsigned int gpuaddr = memdesc->gpuaddr & KGSL_MMU_ALIGN_MASK; + + numpages = (range >> PAGE_SHIFT); + if (range & (PAGE_SIZE - 1)) + numpages++; + + ptefirst = kgsl_pt_entry_get(KGSL_PAGETABLE_BASE, gpuaddr); + ptelast = ptefirst + numpages; + + superpte = ptefirst - (ptefirst & (GSL_PT_SUPER_PTE-1)); + GSL_TLBFLUSH_FILTER_SETDIRTY(superpte / GSL_PT_SUPER_PTE); + for (pte = ptefirst; pte < ptelast; pte++) { +#ifdef VERBOSE_DEBUG + /* check if PTE exists */ + if (!kgsl_pt_map_get(gpummu_pt, pte)) + KGSL_CORE_ERR("pt entry %x is already " + "unmapped for pagetable %p\n", pte, gpummu_pt); +#endif + kgsl_pt_map_set(gpummu_pt, pte, GSL_PT_PAGE_DIRTY); + superpte = pte - (pte & (GSL_PT_SUPER_PTE - 1)); + if (pte == superpte) + GSL_TLBFLUSH_FILTER_SETDIRTY(superpte / + GSL_PT_SUPER_PTE); + } + + /* Post all writes to the pagetable */ + wmb(); + + return 0; +} + +#define SUPERPTE_IS_DIRTY(_p) \ +(((_p) & (GSL_PT_SUPER_PTE - 1)) == 0 && \ +GSL_TLBFLUSH_FILTER_ISDIRTY((_p) / GSL_PT_SUPER_PTE)) + +static int +kgsl_gpummu_map(void *mmu_specific_pt, + struct kgsl_memdesc *memdesc, + unsigned int protflags) +{ + unsigned int pte; + struct kgsl_gpummu_pt *gpummu_pt = mmu_specific_pt; + struct scatterlist *s; + int flushtlb = 0; + int i; + + pte = kgsl_pt_entry_get(KGSL_PAGETABLE_BASE, memdesc->gpuaddr); + + /* Flush the TLB if the first PTE isn't at the superpte boundary */ + if (pte & (GSL_PT_SUPER_PTE - 1)) + flushtlb = 1; + + for_each_sg(memdesc->sg, s, memdesc->sglen, i) { + unsigned int paddr = sg_phys(s); + unsigned int j; + + /* Each sg entry might be multiple pages long */ + for (j = paddr; j < paddr + s->length; pte++, j += PAGE_SIZE) { + if (SUPERPTE_IS_DIRTY(pte)) + flushtlb = 1; + kgsl_pt_map_set(gpummu_pt, pte, j | protflags); + } + } + + /* Flush the TLB if the last PTE isn't at the superpte boundary */ + if ((pte + 1) & (GSL_PT_SUPER_PTE - 1)) + flushtlb = 1; + + wmb(); + + if (flushtlb) { + /*set all devices as needing flushing*/ + gpummu_pt->tlb_flags = UINT_MAX; + GSL_TLBFLUSH_FILTER_RESET(); + } + + return 0; +} + +static int kgsl_gpummu_stop(struct kgsl_device *device) +{ + struct kgsl_mmu *mmu = &device->mmu; + + kgsl_regwrite(device, MH_MMU_CONFIG, 0x00000000); + mmu->flags &= ~KGSL_FLAGS_STARTED; + + return 0; +} + +static int kgsl_gpummu_close(struct kgsl_device *device) +{ + /* + * close device mmu + * + * call this with the global lock held + */ + struct kgsl_mmu *mmu = &device->mmu; + + if (mmu->setstate_memory.gpuaddr) + kgsl_sharedmem_free(&mmu->setstate_memory); + + if (mmu->defaultpagetable) + kgsl_mmu_putpagetable(mmu->defaultpagetable); + + return 0; +} + +static unsigned int +kgsl_gpummu_get_current_ptbase(struct kgsl_device *device) +{ + unsigned int ptbase; + kgsl_regread(device, MH_MMU_PT_BASE, &ptbase); + return ptbase; +} + +struct kgsl_mmu_ops gpummu_ops = { + .mmu_init = kgsl_gpummu_init, + .mmu_close = kgsl_gpummu_close, + .mmu_start = kgsl_gpummu_start, + .mmu_stop = kgsl_gpummu_stop, + .mmu_setstate = kgsl_gpummu_setstate, + .mmu_device_setstate = kgsl_gpummu_default_setstate, + .mmu_pagefault = kgsl_gpummu_pagefault, + .mmu_get_current_ptbase = kgsl_gpummu_get_current_ptbase, +}; + +struct kgsl_mmu_pt_ops gpummu_pt_ops = { + .mmu_map = kgsl_gpummu_map, + .mmu_unmap = kgsl_gpummu_unmap, + .mmu_create_pagetable = kgsl_gpummu_create_pagetable, + .mmu_destroy_pagetable = kgsl_gpummu_destroy_pagetable, + .mmu_pt_equal = kgsl_gpummu_pt_equal, + .mmu_pt_get_flags = kgsl_gpummu_pt_get_flags, +}; diff --git a/drivers/gpu/msm/kgsl_gpummu.h b/drivers/gpu/msm/kgsl_gpummu.h new file mode 100644 index 00000000..46466a8d --- /dev/null +++ b/drivers/gpu/msm/kgsl_gpummu.h @@ -0,0 +1,85 @@ +/* Copyright (c) 2011, Code Aurora Forum. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef __KGSL_GPUMMU_H +#define __KGSL_GPUMMU_H + +#define GSL_PT_PAGE_BITS_MASK 0x00000007 +#define GSL_PT_PAGE_ADDR_MASK PAGE_MASK + +#define GSL_MMU_INT_MASK \ + (MH_INTERRUPT_MASK__AXI_READ_ERROR | \ + MH_INTERRUPT_MASK__AXI_WRITE_ERROR) + +/* Macros to manage TLB flushing */ +#define GSL_TLBFLUSH_FILTER_ENTRY_NUMBITS (sizeof(unsigned char) * 8) +#define GSL_TLBFLUSH_FILTER_GET(superpte) \ + (*((unsigned char *) \ + (((unsigned int)gpummu_pt->tlbflushfilter.base) \ + + (superpte / GSL_TLBFLUSH_FILTER_ENTRY_NUMBITS)))) +#define GSL_TLBFLUSH_FILTER_SETDIRTY(superpte) \ + (GSL_TLBFLUSH_FILTER_GET((superpte)) |= 1 << \ + (superpte % GSL_TLBFLUSH_FILTER_ENTRY_NUMBITS)) +#define GSL_TLBFLUSH_FILTER_ISDIRTY(superpte) \ + (GSL_TLBFLUSH_FILTER_GET((superpte)) & \ + (1 << (superpte % GSL_TLBFLUSH_FILTER_ENTRY_NUMBITS))) +#define GSL_TLBFLUSH_FILTER_RESET() memset(gpummu_pt->tlbflushfilter.base,\ + 0, gpummu_pt->tlbflushfilter.size) + +extern struct kgsl_mmu_ops gpummu_ops; +extern struct kgsl_mmu_pt_ops gpummu_pt_ops; + +struct kgsl_tlbflushfilter { + unsigned int *base; + unsigned int size; +}; + +struct kgsl_gpummu_pt { + struct kgsl_memdesc base; + unsigned int last_superpte; + unsigned int tlb_flags; + /* Maintain filter to manage tlb flushing */ + struct kgsl_tlbflushfilter tlbflushfilter; +}; + +struct kgsl_ptpool_chunk { + size_t size; + unsigned int count; + int dynamic; + + void *data; + unsigned int phys; + + unsigned long *bitmap; + struct list_head list; +}; + +struct kgsl_ptpool { + size_t ptsize; + struct mutex lock; + struct list_head list; + int entries; + int static_entries; + int chunks; +}; + +void *kgsl_gpummu_ptpool_init(int ptsize, + int entries); +void kgsl_gpummu_ptpool_destroy(void *ptpool); + +static inline unsigned int kgsl_pt_get_base_addr(struct kgsl_pagetable *pt) +{ + struct kgsl_gpummu_pt *gpummu_pt = pt->priv; + return gpummu_pt->base.gpuaddr; +} +#endif /* __KGSL_GPUMMU_H */ diff --git a/drivers/gpu/msm/kgsl_iommu.c b/drivers/gpu/msm/kgsl_iommu.c new file mode 100644 index 00000000..30365a3c --- /dev/null +++ b/drivers/gpu/msm/kgsl_iommu.c @@ -0,0 +1,333 @@ +/* Copyright (c) 2011, Code Aurora Forum. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "kgsl.h" +#include "kgsl_device.h" +#include "kgsl_mmu.h" +#include "kgsl_sharedmem.h" + +struct kgsl_iommu { + struct device *iommu_user_dev; + int iommu_user_dev_attached; + struct device *iommu_priv_dev; + int iommu_priv_dev_attached; +}; + +static int kgsl_iommu_pt_equal(struct kgsl_pagetable *pt, + unsigned int pt_base) +{ + struct iommu_domain *domain = pt->priv; + return pt && pt_base && ((unsigned int)domain == pt_base); +} + +static void kgsl_iommu_destroy_pagetable(void *mmu_specific_pt) +{ + struct iommu_domain *domain = mmu_specific_pt; + if (domain) + iommu_domain_free(domain); +} + +void *kgsl_iommu_create_pagetable(void) +{ + struct iommu_domain *domain = iommu_domain_alloc(0); + if (!domain) + KGSL_CORE_ERR("Failed to create iommu domain\n"); + + return domain; +} + +static void kgsl_detach_pagetable_iommu_domain(struct kgsl_mmu *mmu) +{ + struct iommu_domain *domain; + struct kgsl_iommu *iommu = mmu->priv; + + BUG_ON(mmu->hwpagetable == NULL); + BUG_ON(mmu->hwpagetable->priv == NULL); + + domain = mmu->hwpagetable->priv; + + if (iommu->iommu_user_dev_attached) { + iommu_detach_device(domain, iommu->iommu_user_dev); + iommu->iommu_user_dev_attached = 0; + KGSL_MEM_INFO(mmu->device, + "iommu %p detached from user dev of MMU: %p\n", + domain, mmu); + } + if (iommu->iommu_priv_dev_attached) { + iommu_detach_device(domain, iommu->iommu_priv_dev); + iommu->iommu_priv_dev_attached = 0; + KGSL_MEM_INFO(mmu->device, + "iommu %p detached from priv dev of MMU: %p\n", + domain, mmu); + } +} + +static int kgsl_attach_pagetable_iommu_domain(struct kgsl_mmu *mmu) +{ + struct iommu_domain *domain; + int ret = 0; + struct kgsl_iommu *iommu = mmu->priv; + + BUG_ON(mmu->hwpagetable == NULL); + BUG_ON(mmu->hwpagetable->priv == NULL); + + domain = mmu->hwpagetable->priv; + + if (iommu->iommu_user_dev && !iommu->iommu_user_dev_attached) { + ret = iommu_attach_device(domain, iommu->iommu_user_dev); + if (ret) { + KGSL_MEM_ERR(mmu->device, + "Failed to attach device, err %d\n", ret); + goto done; + } + iommu->iommu_user_dev_attached = 1; + KGSL_MEM_INFO(mmu->device, + "iommu %p attached to user dev of MMU: %p\n", + domain, mmu); + } + if (iommu->iommu_priv_dev && !iommu->iommu_priv_dev_attached) { + ret = iommu_attach_device(domain, iommu->iommu_priv_dev); + if (ret) { + KGSL_MEM_ERR(mmu->device, + "Failed to attach device, err %d\n", ret); + iommu_detach_device(domain, iommu->iommu_user_dev); + iommu->iommu_user_dev_attached = 0; + goto done; + } + iommu->iommu_priv_dev_attached = 1; + KGSL_MEM_INFO(mmu->device, + "iommu %p attached to priv dev of MMU: %p\n", + domain, mmu); + } +done: + return ret; +} + +static int kgsl_get_iommu_ctxt(struct kgsl_iommu *iommu, + struct kgsl_device *device) +{ + int status = 0; + struct platform_device *pdev = + container_of(device->parentdev, struct platform_device, dev); + struct kgsl_device_platform_data *pdata_dev = pdev->dev.platform_data; + if (pdata_dev->iommu_user_ctx_name) + iommu->iommu_user_dev = msm_iommu_get_ctx( + pdata_dev->iommu_user_ctx_name); + if (pdata_dev->iommu_priv_ctx_name) + iommu->iommu_priv_dev = msm_iommu_get_ctx( + pdata_dev->iommu_priv_ctx_name); + if (!iommu->iommu_user_dev) { + KGSL_CORE_ERR("Failed to get user iommu dev handle for " + "device %s\n", + pdata_dev->iommu_user_ctx_name); + status = -EINVAL; + } + return status; +} + +static void kgsl_iommu_setstate(struct kgsl_device *device, + struct kgsl_pagetable *pagetable) +{ + struct kgsl_mmu *mmu = &device->mmu; + + if (mmu->flags & KGSL_FLAGS_STARTED) { + /* page table not current, then setup mmu to use new + * specified page table + */ + if (mmu->hwpagetable != pagetable) { + kgsl_idle(device, KGSL_TIMEOUT_DEFAULT); + kgsl_detach_pagetable_iommu_domain(mmu); + mmu->hwpagetable = pagetable; + if (mmu->hwpagetable) + kgsl_attach_pagetable_iommu_domain(mmu); + } + } +} + +static int kgsl_iommu_init(struct kgsl_device *device) +{ + /* + * intialize device mmu + * + * call this with the global lock held + */ + int status = 0; + struct kgsl_mmu *mmu = &device->mmu; + struct kgsl_iommu *iommu; + + mmu->device = device; + + iommu = kzalloc(sizeof(struct kgsl_iommu), GFP_KERNEL); + if (!iommu) { + KGSL_CORE_ERR("kzalloc(%d) failed\n", + sizeof(struct kgsl_iommu)); + return -ENOMEM; + } + + iommu->iommu_priv_dev_attached = 0; + iommu->iommu_user_dev_attached = 0; + status = kgsl_get_iommu_ctxt(iommu, device); + if (status) { + kfree(iommu); + iommu = NULL; + } + mmu->priv = iommu; + + dev_info(device->dev, "|%s| MMU type set for device is IOMMU\n", + __func__); + return status; +} + +static int kgsl_iommu_start(struct kgsl_device *device) +{ + int status; + struct kgsl_mmu *mmu = &device->mmu; + + if (mmu->flags & KGSL_FLAGS_STARTED) + return 0; + + kgsl_regwrite(device, MH_MMU_CONFIG, 0x00000000); + if (mmu->defaultpagetable == NULL) + mmu->defaultpagetable = + kgsl_mmu_getpagetable(KGSL_MMU_GLOBAL_PT); + /* Return error if the default pagetable doesn't exist */ + if (mmu->defaultpagetable == NULL) + return -ENOMEM; + mmu->hwpagetable = mmu->defaultpagetable; + + status = kgsl_attach_pagetable_iommu_domain(mmu); + if (!status) + mmu->flags |= KGSL_FLAGS_STARTED; + + return status; +} + +static int +kgsl_iommu_unmap(void *mmu_specific_pt, + struct kgsl_memdesc *memdesc) +{ + int ret; + unsigned int range = memdesc->size; + struct iommu_domain *domain = (struct iommu_domain *) + mmu_specific_pt; + + /* All GPU addresses as assigned are page aligned, but some + functions purturb the gpuaddr with an offset, so apply the + mask here to make sure we have the right address */ + + unsigned int gpuaddr = memdesc->gpuaddr & KGSL_MMU_ALIGN_MASK; + + if (range == 0 || gpuaddr == 0) + return 0; + + ret = iommu_unmap_range(domain, gpuaddr, range); + if (ret) + KGSL_CORE_ERR("iommu_unmap_range(%p, %x, %d) failed " + "with err: %d\n", domain, gpuaddr, + range, ret); + + return 0; +} + +static int +kgsl_iommu_map(void *mmu_specific_pt, + struct kgsl_memdesc *memdesc, + unsigned int protflags) +{ + int ret; + unsigned int iommu_virt_addr; + struct iommu_domain *domain = mmu_specific_pt; + + BUG_ON(NULL == domain); + + + iommu_virt_addr = memdesc->gpuaddr; + + ret = iommu_map_range(domain, iommu_virt_addr, memdesc->sg, + memdesc->size, MSM_IOMMU_ATTR_NONCACHED); + if (ret) { + KGSL_CORE_ERR("iommu_map_range(%p, %x, %p, %d, %d) " + "failed with err: %d\n", domain, + iommu_virt_addr, memdesc->sg, memdesc->size, + MSM_IOMMU_ATTR_NONCACHED, ret); + return ret; + } + + return ret; +} + +static int kgsl_iommu_stop(struct kgsl_device *device) +{ + /* + * stop device mmu + * + * call this with the global lock held + */ + struct kgsl_mmu *mmu = &device->mmu; + + if (mmu->flags & KGSL_FLAGS_STARTED) { + /* detach iommu attachment */ + kgsl_detach_pagetable_iommu_domain(mmu); + + mmu->flags &= ~KGSL_FLAGS_STARTED; + } + + return 0; +} + +static int kgsl_iommu_close(struct kgsl_device *device) +{ + struct kgsl_mmu *mmu = &device->mmu; + if (mmu->defaultpagetable) + kgsl_mmu_putpagetable(mmu->defaultpagetable); + + return 0; +} + +static unsigned int +kgsl_iommu_get_current_ptbase(struct kgsl_device *device) +{ + /* Current base is always the hwpagetables domain as we + * do not use per process pagetables right not for iommu. + * This will change when we switch to per process pagetables. + */ + return (unsigned int)device->mmu.hwpagetable->priv; +} + +struct kgsl_mmu_ops iommu_ops = { + .mmu_init = kgsl_iommu_init, + .mmu_close = kgsl_iommu_close, + .mmu_start = kgsl_iommu_start, + .mmu_stop = kgsl_iommu_stop, + .mmu_setstate = kgsl_iommu_setstate, + .mmu_device_setstate = NULL, + .mmu_pagefault = NULL, + .mmu_get_current_ptbase = kgsl_iommu_get_current_ptbase, +}; + +struct kgsl_mmu_pt_ops iommu_pt_ops = { + .mmu_map = kgsl_iommu_map, + .mmu_unmap = kgsl_iommu_unmap, + .mmu_create_pagetable = kgsl_iommu_create_pagetable, + .mmu_destroy_pagetable = kgsl_iommu_destroy_pagetable, + .mmu_pt_equal = kgsl_iommu_pt_equal, + .mmu_pt_get_flags = NULL, +}; diff --git a/drivers/gpu/msm/kgsl_pwrscale_idlestats.c b/drivers/gpu/msm/kgsl_pwrscale_idlestats.c new file mode 100644 index 00000000..d5fa84ed --- /dev/null +++ b/drivers/gpu/msm/kgsl_pwrscale_idlestats.c @@ -0,0 +1,221 @@ +/* Copyright (c) 2011, Code Aurora Forum. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "kgsl.h" +#include "kgsl_pwrscale.h" +#include "kgsl_device.h" + +#define MAX_CORES 4 +struct _cpu_info { + spinlock_t lock; + struct notifier_block cpu_nb; + u64 start[MAX_CORES]; + u64 end[MAX_CORES]; + int curr_freq[MAX_CORES]; + int max_freq[MAX_CORES]; +}; + +struct idlestats_priv { + char name[32]; + struct msm_idle_stats_device idledev; + struct kgsl_device *device; + struct msm_idle_pulse pulse; + struct _cpu_info cpu_info; +}; + +static int idlestats_cpufreq_notifier( + struct notifier_block *nb, + unsigned long val, void *data) +{ + struct _cpu_info *cpu = container_of(nb, + struct _cpu_info, cpu_nb); + struct cpufreq_freqs *freq = data; + + if (val != CPUFREQ_POSTCHANGE) + return 0; + + spin_lock(&cpu->lock); + if (freq->cpu < num_possible_cpus()) + cpu->curr_freq[freq->cpu] = freq->new / 1000; + spin_unlock(&cpu->lock); + + return 0; +} + +static void idlestats_get_sample(struct msm_idle_stats_device *idledev, + struct msm_idle_pulse *pulse) +{ + struct kgsl_power_stats stats; + struct idlestats_priv *priv = container_of(idledev, + struct idlestats_priv, idledev); + struct kgsl_device *device = priv->device; + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + mutex_lock(&device->mutex); + /* If the GPU is asleep, don't wake it up - assume that we + are idle */ + + if (!(device->state & (KGSL_STATE_SLEEP | KGSL_STATE_NAP))) { + device->ftbl->power_stats(device, &stats); + pulse->busy_start_time = pwr->time - stats.busy_time; + pulse->busy_interval = stats.busy_time; + } else { + pulse->busy_start_time = pwr->time; + pulse->busy_interval = 0; + } + pulse->wait_interval = 0; + mutex_unlock(&device->mutex); +} + +static void idlestats_busy(struct kgsl_device *device, + struct kgsl_pwrscale *pwrscale) +{ + struct idlestats_priv *priv = pwrscale->priv; + int i, busy, nr_cpu = 1; + + if (priv->pulse.busy_start_time != 0) { + priv->pulse.wait_interval = 0; + /* Calculate the total CPU busy time for this GPU pulse */ + for (i = 0; i < num_possible_cpus(); i++) { + spin_lock(&priv->cpu_info.lock); + if (cpu_online(i)) { + priv->cpu_info.end[i] = + (u64)ktime_to_us(ktime_get()) - + get_cpu_idle_time_us(i, NULL); + busy = priv->cpu_info.end[i] - + priv->cpu_info.start[i]; + /* Normalize the busy time by frequency */ + busy = priv->cpu_info.curr_freq[i] * + (busy / priv->cpu_info.max_freq[i]); + priv->pulse.wait_interval += busy; + nr_cpu++; + } + spin_unlock(&priv->cpu_info.lock); + } + priv->pulse.wait_interval /= nr_cpu; + msm_idle_stats_idle_end(&priv->idledev, &priv->pulse); + } + priv->pulse.busy_start_time = ktime_to_us(ktime_get()); +} + +static void idlestats_idle(struct kgsl_device *device, + struct kgsl_pwrscale *pwrscale) +{ + int i, nr_cpu; + struct kgsl_power_stats stats; + struct idlestats_priv *priv = pwrscale->priv; + + /* This is called from within a mutex protected function, so + no additional locking required */ + device->ftbl->power_stats(device, &stats); + + /* If total_time is zero, then we don't have + any interesting statistics to store */ + if (stats.total_time == 0) { + priv->pulse.busy_start_time = 0; + return; + } + + priv->pulse.busy_interval = stats.busy_time; + nr_cpu = num_possible_cpus(); + for (i = 0; i < nr_cpu; i++) + if (cpu_online(i)) + priv->cpu_info.start[i] = + (u64)ktime_to_us(ktime_get()) - + get_cpu_idle_time_us(i, NULL); + + msm_idle_stats_idle_start(&priv->idledev); +} + +static void idlestats_sleep(struct kgsl_device *device, + struct kgsl_pwrscale *pwrscale) +{ + struct idlestats_priv *priv = pwrscale->priv; + priv->idledev.stats->event |= MSM_IDLE_STATS_EVENT_IDLE_TIMER_EXPIRED; +} + +static int idlestats_init(struct kgsl_device *device, + struct kgsl_pwrscale *pwrscale) +{ + struct idlestats_priv *priv; + struct cpufreq_policy cpu_policy; + int ret, i; + + priv = pwrscale->priv = kzalloc(sizeof(struct idlestats_priv), + GFP_KERNEL); + if (pwrscale->priv == NULL) + return -ENOMEM; + + snprintf(priv->name, sizeof(priv->name), "idle_stats_%s", + device->name); + + priv->device = device; + + priv->idledev.name = (const char *) priv->name; + priv->idledev.get_sample = idlestats_get_sample; + + spin_lock_init(&priv->cpu_info.lock); + priv->cpu_info.cpu_nb.notifier_call = + idlestats_cpufreq_notifier; + ret = cpufreq_register_notifier(&priv->cpu_info.cpu_nb, + CPUFREQ_TRANSITION_NOTIFIER); + if (ret) + goto err; + for (i = 0; i < num_possible_cpus(); i++) { + cpufreq_frequency_table_cpuinfo(&cpu_policy, + cpufreq_frequency_get_table(i)); + priv->cpu_info.max_freq[i] = cpu_policy.max / 1000; + priv->cpu_info.curr_freq[i] = cpu_policy.max / 1000; + } + ret = msm_idle_stats_register_device(&priv->idledev); +err: + if (ret) { + kfree(pwrscale->priv); + pwrscale->priv = NULL; + } + + return ret; +} + +static void idlestats_close(struct kgsl_device *device, + struct kgsl_pwrscale *pwrscale) +{ + struct idlestats_priv *priv = pwrscale->priv; + + if (pwrscale->priv == NULL) + return; + + cpufreq_unregister_notifier(&priv->cpu_info.cpu_nb, + CPUFREQ_TRANSITION_NOTIFIER); + msm_idle_stats_deregister_device(&priv->idledev); + + kfree(pwrscale->priv); + pwrscale->priv = NULL; +} + +struct kgsl_pwrscale_policy kgsl_pwrscale_policy_idlestats = { + .name = "idlestats", + .init = idlestats_init, + .idle = idlestats_idle, + .busy = idlestats_busy, + .sleep = idlestats_sleep, + .close = idlestats_close +}; diff --git a/drivers/gpu/msm/kgsl_pwrscale_trustzone.c b/drivers/gpu/msm/kgsl_pwrscale_trustzone.c new file mode 100644 index 00000000..f3e84e45 --- /dev/null +++ b/drivers/gpu/msm/kgsl_pwrscale_trustzone.c @@ -0,0 +1,197 @@ +/* Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include +#include +#include +#include + +#include "kgsl.h" +#include "kgsl_pwrscale.h" +#include "kgsl_device.h" + +#define TZ_GOVERNOR_PERFORMANCE 0 +#define TZ_GOVERNOR_ONDEMAND 1 + +struct tz_priv { + int governor; + unsigned int no_switch_cnt; + unsigned int skip_cnt; +}; + +#define SWITCH_OFF 200 +#define SWITCH_OFF_RESET_TH 40 +#define SKIP_COUNTER 500 +#define TZ_RESET_ID 0x3 +#define TZ_UPDATE_ID 0x4 + +#ifdef CONFIG_MSM_SCM +/* Trap into the TrustZone, and call funcs there. */ +static int __secure_tz_entry(u32 cmd, u32 val) +{ + __iowmb(); + return scm_call_atomic1(SCM_SVC_IO, cmd, val); +} +#else +static int __secure_tz_entry(u32 cmd, u32 val) +{ + return 0; +} +#endif /* CONFIG_MSM_SCM */ + +static ssize_t tz_governor_show(struct kgsl_device *device, + struct kgsl_pwrscale *pwrscale, + char *buf) +{ + struct tz_priv *priv = pwrscale->priv; + int ret; + + if (priv->governor == TZ_GOVERNOR_ONDEMAND) + ret = snprintf(buf, 10, "ondemand\n"); + else + ret = snprintf(buf, 13, "performance\n"); + + return ret; +} + +static ssize_t tz_governor_store(struct kgsl_device *device, + struct kgsl_pwrscale *pwrscale, + const char *buf, size_t count) +{ + char str[20]; + struct tz_priv *priv = pwrscale->priv; + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int ret; + + ret = sscanf(buf, "%20s", str); + if (ret != 1) + return -EINVAL; + + mutex_lock(&device->mutex); + + if (!strncmp(str, "ondemand", 8)) + priv->governor = TZ_GOVERNOR_ONDEMAND; + else if (!strncmp(str, "performance", 11)) + priv->governor = TZ_GOVERNOR_PERFORMANCE; + + if (priv->governor == TZ_GOVERNOR_PERFORMANCE) + kgsl_pwrctrl_pwrlevel_change(device, pwr->thermal_pwrlevel); + + mutex_unlock(&device->mutex); + return count; +} + +PWRSCALE_POLICY_ATTR(governor, 0644, tz_governor_show, tz_governor_store); + +static struct attribute *tz_attrs[] = { + &policy_attr_governor.attr, + NULL +}; + +static struct attribute_group tz_attr_group = { + .attrs = tz_attrs, +}; + +static void tz_wake(struct kgsl_device *device, struct kgsl_pwrscale *pwrscale) +{ + struct tz_priv *priv = pwrscale->priv; + if (device->state != KGSL_STATE_NAP && + priv->governor == TZ_GOVERNOR_ONDEMAND) + kgsl_pwrctrl_pwrlevel_change(device, + device->pwrctrl.thermal_pwrlevel); +} + +static void tz_idle(struct kgsl_device *device, struct kgsl_pwrscale *pwrscale) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct tz_priv *priv = pwrscale->priv; + struct kgsl_power_stats stats; + int val; + + /* In "performance" mode the clock speed always stays + the same */ + + if (priv->governor == TZ_GOVERNOR_PERFORMANCE) + return; + + device->ftbl->power_stats(device, &stats); + if (stats.total_time == 0) + return; + + /* If the GPU has stayed in turbo mode for a while, * + * stop writing out values. */ + if (pwr->active_pwrlevel == 0) { + if (priv->no_switch_cnt > SWITCH_OFF) { + priv->skip_cnt++; + if (priv->skip_cnt > SKIP_COUNTER) { + priv->no_switch_cnt -= SWITCH_OFF_RESET_TH; + priv->skip_cnt = 0; + } + return; + } + priv->no_switch_cnt++; + } else { + priv->no_switch_cnt = 0; + } + + val = __secure_tz_entry(TZ_UPDATE_ID, + stats.total_time - stats.busy_time); + if (val) + kgsl_pwrctrl_pwrlevel_change(device, + pwr->active_pwrlevel + val); +} + +static void tz_sleep(struct kgsl_device *device, + struct kgsl_pwrscale *pwrscale) +{ + struct tz_priv *priv = pwrscale->priv; + + __secure_tz_entry(TZ_RESET_ID, 0); + priv->no_switch_cnt = 0; +} + +static int tz_init(struct kgsl_device *device, struct kgsl_pwrscale *pwrscale) +{ + struct tz_priv *priv; + + /* Trustzone is only valid for some SOCs */ + if (!(cpu_is_msm8x60() || cpu_is_msm8960() || cpu_is_msm8930())) + return -EINVAL; + + priv = pwrscale->priv = kzalloc(sizeof(struct tz_priv), GFP_KERNEL); + if (pwrscale->priv == NULL) + return -ENOMEM; + + priv->governor = TZ_GOVERNOR_ONDEMAND; + kgsl_pwrscale_policy_add_files(device, pwrscale, &tz_attr_group); + + return 0; +} + +static void tz_close(struct kgsl_device *device, struct kgsl_pwrscale *pwrscale) +{ + kgsl_pwrscale_policy_remove_files(device, pwrscale, &tz_attr_group); + kfree(pwrscale->priv); + pwrscale->priv = NULL; +} + +struct kgsl_pwrscale_policy kgsl_pwrscale_policy_tz = { + .name = "trustzone", + .init = tz_init, + .idle = tz_idle, + .sleep = tz_sleep, + .wake = tz_wake, + .close = tz_close +}; +EXPORT_SYMBOL(kgsl_pwrscale_policy_tz); diff --git a/include/drm/kgsl_drm.h b/include/drm/kgsl_drm.h new file mode 100644 index 00000000..934bdf3f --- /dev/null +++ b/include/drm/kgsl_drm.h @@ -0,0 +1,221 @@ +/* Copyright (c) 2009-2010, Code Aurora Forum. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * * Neither the name of Code Aurora Forum, Inc. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef _KGSL_DRM_H_ +#define _KGSL_DRM_H_ + +#include "drm.h" + +#define DRM_KGSL_GEM_CREATE 0x00 +#define DRM_KGSL_GEM_PREP 0x01 +#define DRM_KGSL_GEM_SETMEMTYPE 0x02 +#define DRM_KGSL_GEM_GETMEMTYPE 0x03 +#define DRM_KGSL_GEM_MMAP 0x04 +#define DRM_KGSL_GEM_ALLOC 0x05 +#define DRM_KGSL_GEM_BIND_GPU 0x06 +#define DRM_KGSL_GEM_UNBIND_GPU 0x07 + +#define DRM_KGSL_GEM_GET_BUFINFO 0x08 +#define DRM_KGSL_GEM_SET_BUFCOUNT 0x09 +#define DRM_KGSL_GEM_SET_ACTIVE 0x0A +#define DRM_KGSL_GEM_LOCK_HANDLE 0x0B +#define DRM_KGSL_GEM_UNLOCK_HANDLE 0x0C +#define DRM_KGSL_GEM_UNLOCK_ON_TS 0x0D +#define DRM_KGSL_GEM_CREATE_FD 0x0E + +#define DRM_IOCTL_KGSL_GEM_CREATE \ +DRM_IOWR(DRM_COMMAND_BASE + DRM_KGSL_GEM_CREATE, struct drm_kgsl_gem_create) + +#define DRM_IOCTL_KGSL_GEM_PREP \ +DRM_IOWR(DRM_COMMAND_BASE + DRM_KGSL_GEM_PREP, struct drm_kgsl_gem_prep) + +#define DRM_IOCTL_KGSL_GEM_SETMEMTYPE \ +DRM_IOWR(DRM_COMMAND_BASE + DRM_KGSL_GEM_SETMEMTYPE, \ +struct drm_kgsl_gem_memtype) + +#define DRM_IOCTL_KGSL_GEM_GETMEMTYPE \ +DRM_IOWR(DRM_COMMAND_BASE + DRM_KGSL_GEM_GETMEMTYPE, \ +struct drm_kgsl_gem_memtype) + +#define DRM_IOCTL_KGSL_GEM_MMAP \ +DRM_IOWR(DRM_COMMAND_BASE + DRM_KGSL_GEM_MMAP, struct drm_kgsl_gem_mmap) + +#define DRM_IOCTL_KGSL_GEM_ALLOC \ +DRM_IOWR(DRM_COMMAND_BASE + DRM_KGSL_GEM_ALLOC, struct drm_kgsl_gem_alloc) + +#define DRM_IOCTL_KGSL_GEM_BIND_GPU \ +DRM_IOWR(DRM_COMMAND_BASE + DRM_KGSL_GEM_BIND_GPU, struct drm_kgsl_gem_bind_gpu) + +#define DRM_IOCTL_KGSL_GEM_UNBIND_GPU \ +DRM_IOWR(DRM_COMMAND_BASE + DRM_KGSL_GEM_UNBIND_GPU, \ +struct drm_kgsl_gem_bind_gpu) + +#define DRM_IOCTL_KGSL_GEM_GET_BUFINFO \ +DRM_IOWR(DRM_COMMAND_BASE + DRM_KGSL_GEM_GET_BUFINFO, \ + struct drm_kgsl_gem_bufinfo) + +#define DRM_IOCTL_KGSL_GEM_SET_BUFCOUNT \ +DRM_IOWR(DRM_COMMAND_BASE + DRM_KGSL_GEM_SET_BUFCOUNT, \ + struct drm_kgsl_gem_bufcount) + +#define DRM_IOCTL_KGSL_GEM_SET_ACTIVE \ +DRM_IOWR(DRM_COMMAND_BASE + DRM_KGSL_GEM_SET_ACTIVE, \ + struct drm_kgsl_gem_active) + +#define DRM_IOCTL_KGSL_GEM_LOCK_HANDLE \ +DRM_IOWR(DRM_COMMAND_BASE + DRM_KGSL_GEM_LOCK_HANDLE, \ +struct drm_kgsl_gem_lock_handles) + +#define DRM_IOCTL_KGSL_GEM_UNLOCK_HANDLE \ +DRM_IOWR(DRM_COMMAND_BASE + DRM_KGSL_GEM_UNLOCK_HANDLE, \ +struct drm_kgsl_gem_unlock_handles) + +#define DRM_IOCTL_KGSL_GEM_UNLOCK_ON_TS \ +DRM_IOWR(DRM_COMMAND_BASE + DRM_KGSL_GEM_UNLOCK_ON_TS, \ +struct drm_kgsl_gem_unlock_on_ts) + +#define DRM_IOCTL_KGSL_GEM_CREATE_FD \ +DRM_IOWR(DRM_COMMAND_BASE + DRM_KGSL_GEM_CREATE_FD, \ +struct drm_kgsl_gem_create_fd) + +/* Maximum number of sub buffers per GEM object */ +#define DRM_KGSL_GEM_MAX_BUFFERS 2 + +/* Memory types - these define the source and caching policies + of the GEM memory chunk */ + +/* Legacy definitions left for compatability */ + +#define DRM_KGSL_GEM_TYPE_EBI 0 +#define DRM_KGSL_GEM_TYPE_SMI 1 +#define DRM_KGSL_GEM_TYPE_KMEM 2 +#define DRM_KGSL_GEM_TYPE_KMEM_NOCACHE 3 +#define DRM_KGSL_GEM_TYPE_MEM_MASK 0xF + +/* Contiguous memory (PMEM) */ +#define DRM_KGSL_GEM_TYPE_PMEM 0x000100 + +/* PMEM memory types */ +#define DRM_KGSL_GEM_PMEM_EBI 0x001000 +#define DRM_KGSL_GEM_PMEM_SMI 0x002000 + +/* Standard paged memory */ +#define DRM_KGSL_GEM_TYPE_MEM 0x010000 + +/* Caching controls */ +#define DRM_KGSL_GEM_CACHE_NONE 0x000000 +#define DRM_KGSL_GEM_CACHE_WCOMBINE 0x100000 +#define DRM_KGSL_GEM_CACHE_WTHROUGH 0x200000 +#define DRM_KGSL_GEM_CACHE_WBACK 0x400000 +#define DRM_KGSL_GEM_CACHE_WBACKWA 0x800000 +#define DRM_KGSL_GEM_CACHE_MASK 0xF00000 + +/* FD based objects */ +#define DRM_KGSL_GEM_TYPE_FD_FBMEM 0x1000000 +#define DRM_KGSL_GEM_TYPE_FD_MASK 0xF000000 + +/* Timestamp types */ +#define DRM_KGSL_GEM_TS_3D 0x00000430 +#define DRM_KGSL_GEM_TS_2D 0x00000180 + + +struct drm_kgsl_gem_create { + uint32_t size; + uint32_t handle; +}; + +struct drm_kgsl_gem_prep { + uint32_t handle; + uint32_t phys; + uint64_t offset; +}; + +struct drm_kgsl_gem_memtype { + uint32_t handle; + uint32_t type; +}; + +struct drm_kgsl_gem_mmap { + uint32_t handle; + uint32_t size; + uint32_t hostptr; + uint64_t offset; +}; + +struct drm_kgsl_gem_alloc { + uint32_t handle; + uint64_t offset; +}; + +struct drm_kgsl_gem_bind_gpu { + uint32_t handle; + uint32_t gpuptr; +}; + +struct drm_kgsl_gem_bufinfo { + uint32_t handle; + uint32_t count; + uint32_t active; + uint32_t offset[DRM_KGSL_GEM_MAX_BUFFERS]; + uint32_t gpuaddr[DRM_KGSL_GEM_MAX_BUFFERS]; +}; + +struct drm_kgsl_gem_bufcount { + uint32_t handle; + uint32_t bufcount; +}; + +struct drm_kgsl_gem_active { + uint32_t handle; + uint32_t active; +}; + +struct drm_kgsl_gem_lock_handles { + uint32_t num_handles; + uint32_t *handle_list; + uint32_t pid; + uint32_t lock_id; /* Returned lock id used for unlocking */ +}; + +struct drm_kgsl_gem_unlock_handles { + uint32_t lock_id; +}; + +struct drm_kgsl_gem_unlock_on_ts { + uint32_t lock_id; + uint32_t timestamp; /* This field is a hw generated ts */ + uint32_t type; /* Which pipe to check for ts generation */ +}; + +struct drm_kgsl_gem_create_fd { + uint32_t fd; + uint32_t handle; +}; + +#endif From 88e7e8a6d250df2644be2edc82acdd5946ed5333 Mon Sep 17 00:00:00 2001 From: tytung Date: Sat, 19 May 2012 19:33:14 +0800 Subject: [PATCH 007/155] drivers: usb: gadget: update some usb drivers for USB Tethering. --- arch/arm/include/asm/dma-mapping.h | 39 + arch/arm/mach-msm/include/mach/board.h | 10 + arch/arm/mach-msm/include/mach/msm_hsusb.h | 27 +- arch/arm/mach-msm/include/mach/msm_hsusb_hw.h | 126 +- drivers/usb/gadget/android.c | 35 +- drivers/usb/gadget/f_adb.c | 217 ++-- drivers/usb/gadget/f_rndis.c | 12 +- drivers/usb/gadget/msm72k_udc.c | 1074 +++++++++++++---- drivers/usb/gadget/rndis.c | 10 +- 9 files changed, 1144 insertions(+), 406 deletions(-) diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index 26b76345..f7724976 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -129,6 +129,45 @@ static inline void dma_free_noncoherent(struct device *dev, size_t size, { } +/* + * dma_coherent_pre_ops - barrier functions for coherent memory before DMA. + * A barrier is required to ensure memory operations are complete before the + * initiation of a DMA xfer. + * If the coherent memory is Strongly Ordered + * - pre ARMv7 and 8x50 guarantees ordering wrt other mem accesses + * - ARMv7 guarantees ordering only within a 1KB block, so we need a barrier + * If coherent memory is normal then we need a barrier to prevent + * reordering + */ +static inline void dma_coherent_pre_ops(void) +{ +#if (__LINUX_ARM_ARCH__ >= 7) + dmb(); +#else + if (arch_is_coherent()) + dmb(); + else + barrier(); +#endif +} +/* + * dma_post_coherent_ops - barrier functions for coherent memory after DMA. + * If the coherent memory is Strongly Ordered we dont need a barrier since + * there are no speculative fetches to Strongly Ordered memory. + * If coherent memory is normal then we need a barrier to prevent reordering + */ +static inline void dma_coherent_post_ops(void) +{ +#if (__LINUX_ARM_ARCH__ >= 7) + dmb(); +#else + if (arch_is_coherent()) + dmb(); + else + barrier(); +#endif +} + /** * dma_alloc_coherent - allocate consistent memory for DMA * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices diff --git a/arch/arm/mach-msm/include/mach/board.h b/arch/arm/mach-msm/include/mach/board.h index cebfe9d0..7da0c22e 100644 --- a/arch/arm/mach-msm/include/mach/board.h +++ b/arch/arm/mach-msm/include/mach/board.h @@ -179,6 +179,16 @@ enum { }; void msm_hsusb_set_vbus_state(int online); +enum usb_connect_type { + CONNECT_TYPE_CLEAR = -2, + CONNECT_TYPE_UNKNOWN = -1, + CONNECT_TYPE_NONE = 0, + CONNECT_TYPE_USB, + CONNECT_TYPE_AC, + CONNECT_TYPE_9V_AC, + CONNECT_TYPE_WIRELESS, + CONNECT_TYPE_INTERNAL, +}; #define MSM_MAX_DEC_CNT 14 /* 7k target ADSP information */ diff --git a/arch/arm/mach-msm/include/mach/msm_hsusb.h b/arch/arm/mach-msm/include/mach/msm_hsusb.h index bfd174f7..4036ec18 100644 --- a/arch/arm/mach-msm/include/mach/msm_hsusb.h +++ b/arch/arm/mach-msm/include/mach/msm_hsusb.h @@ -47,8 +47,18 @@ struct msm_hsusb_platform_data { /* 1 : uart, 0 : usb */ void (*usb_uart_switch)(int); void (*config_usb_id_gpios)(bool enable); - /* val, reg pairs terminated by -1 */ - int *phy_init_seq; + void (*usb_hub_enable)(bool); + void (*serial_debug_gpios)(int); + int (*china_ac_detect)(void); + void (*disable_usb_charger)(void); + /* val, reg pairs terminated by -1 */ + int *phy_init_seq; + void (*change_phy_voltage)(int); + int (*ldo_init) (int init); + int (*ldo_enable) (int enable); + int (*rpc_connect)(int); + /* 1 : mhl, 0 : usb */ + void (*usb_mhl_switch)(bool); #ifdef CONFIG_USB_FUNCTION /* USB device descriptor fields */ @@ -74,10 +84,15 @@ struct msm_hsusb_platform_data { int num_products; struct msm_hsusb_product *products; #endif - char *serial_number; - int usb_id_pin_gpio; - bool enable_car_kit_detect; - __u8 accessory_detect; + char *serial_number; + int usb_id_pin_gpio; + int dock_pin_gpio; + int id_pin_irq; + bool enable_car_kit_detect; + __u8 accessory_detect; + bool dock_detect; + + int ac_9v_gpio; }; int usb_get_connect_type(void); diff --git a/arch/arm/mach-msm/include/mach/msm_hsusb_hw.h b/arch/arm/mach-msm/include/mach/msm_hsusb_hw.h index e2c86c18..ca2f794c 100644 --- a/arch/arm/mach-msm/include/mach/msm_hsusb_hw.h +++ b/arch/arm/mach-msm/include/mach/msm_hsusb_hw.h @@ -37,11 +37,30 @@ do { } while (0) #endif /* VERBOSE */ +#ifndef __LINUX_USB_COMPOSITE_H #define ERROR(fmt,args...) \ xprintk(KERN_ERR , fmt , ## args) #define INFO(fmt,args...) \ xprintk(KERN_INFO , fmt , ## args) +#endif +#define USB_ERR(fmt, args...) \ + printk(KERN_ERR "[USB:ERR] " fmt, ## args) +#define USB_WARNING(fmt, args...) \ + printk(KERN_WARNING "[USB] " fmt, ## args) +#define USB_INFO(fmt, args...) \ + printk(KERN_INFO "[USB] " fmt, ## args) +#define USB_DEBUG(fmt, args...) \ + printk(KERN_DEBUG "[USB] " fmt, ## args) + +#define USBH_ERR(fmt, args...) \ + printk(KERN_ERR "[USBH:ERR] " fmt, ## args) +#define USBH_WARNING(fmt, args...) \ + printk(KERN_WARNING "[USBH] " fmt, ## args) +#define USBH_INFO(fmt, args...) \ + printk(KERN_INFO "[USBH] " fmt, ## args) +#define USBH_DEBUG(fmt, args...) \ + printk(KERN_DEBUG "[USBH] " fmt, ## args) /*-------------------------------------------------------------------------*/ @@ -51,9 +70,12 @@ #define USB_HWDEVICE (MSM_USB_BASE + 0x000C) #define USB_HWTXBUF (MSM_USB_BASE + 0x0010) #define USB_HWRXBUF (MSM_USB_BASE + 0x0014) -#define USB_AHBBURST (MSM_USB_BASE + 0x0090) -#define USB_AHBMODE (MSM_USB_BASE + 0x0098) +#define USB_AHB_BURST (MSM_USB_BASE + 0x0090) +#define USB_AHB_MODE (MSM_USB_BASE + 0x0098) +#define USB_AHBBURST (USB_AHB_BURST) +#define USB_AHBMODE (USB_AHB_MODE) #define USB_SBUSCFG (MSM_USB_BASE + 0x0090) +#define USB_ROC_AHB_MODE (MSM_USB_BASE + 0x0090) #define USB_CAPLENGTH (MSM_USB_BASE + 0x0100) /* 8 bit */ #define USB_HCIVERSION (MSM_USB_BASE + 0x0102) /* 16 bit */ @@ -82,12 +104,26 @@ #define USB_ENDPTCTRL(n) (MSM_USB_BASE + 0x01C0 + (4 * (n))) -#define USBCMD_RESET 2 -#define USBCMD_ATTACH 1 -#define USBCMD_ATDTW (1 << 14) +#define USBCMD_RESET 2 +#define USBCMD_ATTACH 1 +#define USBCMD_RS (1 << 0) /* run/stop bit */ +#define USBCMD_ATDTW (1 << 14) + +#define ASYNC_INTR_CTRL (1 << 29) +#define ULPI_STP_CTRL (1 << 30) +#define USBCMD_ITC(n) (n << 16) +#define USBCMD_ITC_MASK (0xFF << 16) + #define USBMODE_DEVICE 2 #define USBMODE_HOST 3 +/* Redefining SDIS bit as it defined incorrectly in ehci.h. */ +#ifdef USBMODE_SDIS +#undef USBMODE_SDIS +#endif +#define USBMODE_SDIS (1 << 4) /* stream disable */ +#define USBMODE_VBUS (1 << 5) /* vbus power select */ + struct ept_queue_head { unsigned config; @@ -138,7 +174,7 @@ struct ept_queue_item { #define STS_NAKI (1 << 16) /* */ #define STS_SLI (1 << 8) /* R/WC - suspend state entered */ #define STS_SRI (1 << 7) /* R/WC - SOF recv'd */ -#define STS_URI (1 << 6) /* R/WC - RESET recv'd - write to clear */ +#define STS_URI (1 << 6) /* R/WC - RESET recv'd */ #define STS_FRI (1 << 3) /* R/WC - Frame List Rollover */ #define STS_PCI (1 << 2) /* R/WC - Port Change Detect */ #define STS_UEI (1 << 1) /* R/WC - USB Error */ @@ -175,6 +211,38 @@ struct ept_queue_item { #define CTRL_RXT_INT (3 << 2) #define CTRL_RXT_EP_TYPE_SHIFT 2 +#if defined(CONFIG_ARCH_MSM7X30) || defined(CONFIG_ARCH_MSM8X60) +#define ULPI_DIGOUT_CTRL 0X36 +#define ULPI_CDR_AUTORESET (1 << 1) +#else +#define ULPI_DIGOUT_CTRL 0X31 +#define ULPI_CDR_AUTORESET (1 << 5) +#endif + +#define ULPI_FUNC_CTRL_CLR (0x06) +#define ULPI_IFC_CTRL_CLR (0x09) +#define ULPI_AMPLITUDE_MAX (0x0C) +#define ULPI_OTG_CTRL (0x0B) +#define ULPI_OTG_CTRL_CLR (0x0C) +#define ULPI_INT_RISE_CLR (0x0F) +#define ULPI_INT_FALL_CLR (0x12) +#define ULPI_DEBUG_REG (0x15) +#define ULPI_SCRATCH_REG (0x16) +#define ULPI_CONFIG_REG1 (0x30) +#define ULPI_CONFIG_REG2 (0X31) +#define ULPI_CONFIG_REG (0x31) +#define ULPI_CONFIG_REG3 (0X32) +#define ULPI_CHG_DETECT_REG (0x34) +#define ULPI_PRE_EMPHASIS_MASK (3 << 4) +#define ULPI_DRV_AMPL_MASK (3 << 2) +#define ULPI_ONCLOCK (1 << 6) +#define ULPI_FUNC_SUSPENDM (1 << 6) +#define ULPI_IDPU (1 << 0) +#define ULPI_HOST_DISCONNECT (1 << 0) +#define ULPI_VBUS_VALID (1 << 1) +#define ULPI_SE1_GATE (1 << 2) +#define ULPI_SESS_END (1 << 3) +#define ULPI_ID_GND (1 << 4) #define ULPI_WAKEUP (1 << 31) #define ULPI_RUN (1 << 30) #define ULPI_WRITE (1 << 29) @@ -184,12 +252,17 @@ struct ept_queue_item { #define ULPI_DATA(n) ((n) & 255) #define ULPI_DATA_READ(n) (((n) >> 8) & 255) -#define ULPI_DEBUG_REG (0x15) -#define ULPI_SCRATCH_REG (0x16) +/* control charger detection by ULPI or externally */ +#define ULPI_EXTCHGCTRL_65NM (1 << 2) +#define ULPI_EXTCHGCTRL_180NM (1 << 3) -#define ULPI_FUNC_CTRL_CLR (0x06) -#define ULPI_FUNC_SUSPENDM (1 << 6) +/* charger detection power on control */ +#define ULPI_CHGDETON (1 << 1) +/* enable charger detection */ +#define ULPI_CHGDETEN (1 << 0) +#define ULPI_CHGTYPE_65NM (1 << 3) +#define ULPI_CHGTYPE_180NM (1 << 4) /* USB_PORTSC bits for determining port speed */ #define PORTSC_PSPD_FS (0 << 26) @@ -218,6 +291,30 @@ struct ept_queue_item { #define PORTSC_FPR (1 << 6) /* R/W - State normal => suspend */ #define PORTSC_SUSP (1 << 7) /* Read - Port in suspend state */ #define PORTSC_LS (3 << 10) /* Read - Port's Line status */ +#define PORTSC_PHCD (1 << 23) /* phy suspend mode */ +#define PORTSC_CCS (1 << 0) /* current connect status */ +#define PORTSC_PTS (3 << 30) +#define PORTSC_PTS_ULPI (2 << 30) +#define PORTSC_PTS_SERIAL (3 << 30) + +#define PORTSC_PORT_SPEED_FULL 0x00000000 +#define PORTSC_PORT_SPEED_LOW 0x04000000 +#define PORTSC_PORT_SPEED_HIGH 0x08000000 +#define PORTSC_PORT_SPEED_MASK 0x0c000000 + +#define SBUSCFG_AHBBRST_INCR4 0x01 +#define ULPI_USBINTR_ENABLE_FALLING_S 0x11 +#define ULPI_USBINTR_ENABLE_FALLING_C 0x12 +#define ULPI_USBINTR_STATUS 0x13 +#define ULPI_USBINTR_ENABLE_RASING_S 0x0E +#define ULPI_USBINTR_ENABLE_RASING_C 0x0F +#define ULPI_SESSION_END_RAISE (1 << 3) +#define ULPI_SESSION_END_FALL (1 << 3) +#define ULPI_SESSION_VALID_RAISE (1 << 2) +#define ULPI_SESSION_VALID_FALL (1 << 2) +#define ULPI_VBUS_VALID_RAISE (1 << 1) +#define ULPI_VBUS_VALID_FALL (1 << 1) + #define PORTSC_PHCD (1 << 23) /* phy suspend mode */ #define PORTSC_CCS (1 << 0) /* current connect status */ #define PORTSC_PTS (3 << 30) @@ -238,6 +335,9 @@ struct ept_queue_item { #define PORTSC_PTC_SE0_NAK (0x03 << 16) #define PORTSC_PTC_TST_PKT (0x04 << 16) +#define USBH (1 << 15) +#define USB_PHY (1 << 18) + #define PORTSC_PTS_MASK (3 << 30) #define PORTSC_PTS_ULPI (2 << 30) #define PORTSC_PTS_SERIAL (3 << 30) @@ -250,5 +350,9 @@ struct ept_queue_item { #define PORTSC_PHCD (1 << 23) /* phy suspend mode */ #define ULPI_DEBUG 0x15 +#define ULPI_CLOCK_SUSPENDM (1 << 3) #define ULPI_SUSPENDM (1 << 6) -#endif /* _USB_FUNCTION_MSM_HSUSB_HW_H */ +#define ULPI_CALIB_STS (1 << 7) +#define ULPI_CALIB_VAL(x) (x & 0x7C) + +#endif /* __LINUX_USB_GADGET_MSM72K_UDC_H__ */ diff --git a/drivers/usb/gadget/android.c b/drivers/usb/gadget/android.c index f2a3f3ee..dc95c20e 100644 --- a/drivers/usb/gadget/android.c +++ b/drivers/usb/gadget/android.c @@ -21,11 +21,11 @@ #include #include #include - #include #include #include #include +#include #include #include @@ -33,6 +33,8 @@ #include #include "gadget_chips.h" +#include +#include /* * Kbuild is not very cooperative with respect to linking separately @@ -52,6 +54,8 @@ MODULE_LICENSE("GPL"); MODULE_VERSION("1.0"); static const char longname[] = "Gadget Android"; +static struct wake_lock usb_rndis_idle_wake_lock; +static struct perf_lock usb_rndis_perf_lock; enum { USB_FUNCTION_UMS = 0, @@ -478,11 +482,14 @@ int android_switch_function(unsigned func) dev->cdev->desc.bDeviceClass = USB_CLASS_PER_INTERFACE; #ifdef CONFIG_USB_GADGET_MSM_72K + /* avoid sending a disconnect switch event until after we disconnect */ msm_hsusb_request_reset(); #else /* force reenumeration */ if (dev->cdev && dev->cdev->gadget && dev->cdev->gadget->speed != USB_SPEED_UNKNOWN) { + + /* avoid sending a disconnect switch event until after we disconnect */ usb_gadget_disconnect(dev->cdev->gadget); msleep(10); usb_gadget_connect(dev->cdev->gadget); @@ -500,6 +507,7 @@ void android_enable_function(struct usb_function *f, int enable) if (!!f->hidden != disable) { f->hidden = disable; + #ifdef CONFIG_USB_ANDROID_RNDIS if (!strcmp(f->name, "rndis")) { struct usb_function *func; @@ -565,6 +573,9 @@ static int __init android_probe(struct platform_device *pdev) printk(KERN_INFO "android_probe pdata: %p\n", pdata); + pm_runtime_set_active(&pdev->dev); + pm_runtime_enable(&pdev->dev); + if (pdata) { dev->products = pdata->products; dev->num_products = pdata->num_products; @@ -593,8 +604,26 @@ static int __init android_probe(struct platform_device *pdev) return usb_composite_register(&android_usb_driver); } +static int andr_runtime_suspend(struct device *dev) +{ + dev_dbg(dev, "pm_runtime: suspending...\n"); + return 0; +} + +static int andr_runtime_resume(struct device *dev) +{ + dev_dbg(dev, "pm_runtime: resuming...\n"); + return 0; +} + +static struct dev_pm_ops andr_dev_pm_ops = { + .runtime_suspend = andr_runtime_suspend, + .runtime_resume = andr_runtime_resume, +}; static struct platform_driver android_platform_driver = { - .driver = { .name = "android_usb", }, + .driver = { + .name = "android_usb", + .pm = &andr_dev_pm_ops}, .probe = android_probe, }; @@ -612,6 +641,8 @@ static int __init init(void) dev->product_id = PRODUCT_ID; _android_dev = dev; + wake_lock_init(&usb_rndis_idle_wake_lock, WAKE_LOCK_IDLE, "rndis_idle_lock"); + perf_lock_init(&usb_rndis_perf_lock, PERF_LOCK_HIGHEST, "rndis"); return platform_driver_register(&android_platform_driver); } module_init(init); diff --git a/drivers/usb/gadget/f_adb.c b/drivers/usb/gadget/f_adb.c index 39fc2f17..b9f58126 100644 --- a/drivers/usb/gadget/f_adb.c +++ b/drivers/usb/gadget/f_adb.c @@ -31,13 +31,11 @@ #include #include -#include #define BULK_BUFFER_SIZE 4096 /* number of tx requests to allocate */ #define TX_REQ_MAX 4 -#define RX_REQ_MAX 32 static const char shortname[] = "android_adb"; @@ -57,18 +55,11 @@ struct adb_dev { atomic_t open_excl; struct list_head tx_idle; - struct list_head rx_idle; - struct list_head rx_done; wait_queue_head_t read_wq; wait_queue_head_t write_wq; - - /* the request we're currently reading from */ - struct usb_request *read_req; - unsigned char *read_buf; - unsigned read_count; - - int maxsize; + struct usb_request *rx_req; + int rx_done; }; static struct usb_interface_descriptor adb_interface_desc = { @@ -125,22 +116,6 @@ static struct usb_descriptor_header *hs_adb_descs[] = { NULL, }; -/* string descriptors: */ - -static struct usb_string adb_string_defs[] = { - [0].s = "ADB", - { } /* end of list */ -}; - -static struct usb_gadget_strings adb_string_table = { - .language = 0x0409, /* en-us */ - .strings = adb_string_defs, -}; - -static struct usb_gadget_strings *adb_strings[] = { - &adb_string_table, - NULL, -}; /* temporary variable used between adb_open() and adb_gadget_bind() */ static struct adb_dev *_adb_dev; @@ -179,12 +154,16 @@ static void adb_request_free(struct usb_request *req, struct usb_ep *ep) static inline int _lock(atomic_t *excl) { + int ret = -1; + + preempt_disable(); if (atomic_inc_return(excl) == 1) { - return 0; - } else { + ret = 0; + } else atomic_dec(excl); - return -1; - } + + preempt_enable(); + return ret; } static inline void _unlock(atomic_t *excl) @@ -236,11 +215,9 @@ static void adb_complete_out(struct usb_ep *ep, struct usb_request *req) { struct adb_dev *dev = _adb_dev; - if (req->status != 0) { + dev->rx_done = 1; + if (req->status != 0) dev->error = 1; - req_put(dev, &dev->rx_idle, req); - } else - req_put(dev, &dev->rx_done, req); wake_up(&dev->read_wq); } @@ -275,13 +252,11 @@ static int __init create_bulk_endpoints(struct adb_dev *dev, dev->ep_out = ep; /* now allocate requests for our endpoints */ - for (i = 0; i < RX_REQ_MAX; i++) { - req = adb_request_new(dev->ep_out, 512); - if (!req) - goto fail; - req->complete = adb_complete_out; - req_put(dev, &dev->rx_idle, req); - } + req = adb_request_new(dev->ep_out, BULK_BUFFER_SIZE); + if (!req) + goto fail; + req->complete = adb_complete_out; + dev->rx_req = req; for (i = 0; i < TX_REQ_MAX; i++) { req = adb_request_new(dev->ep_in, BULK_BUFFER_SIZE); @@ -329,71 +304,41 @@ static ssize_t adb_read(struct file *fp, char __user *buf, r = -EIO; goto done; } - while (count > 0) { - if (dev->error) { - r = -EIO; - break; - } - /* if we have idle read requests, get them queued */ - while ((req = req_get(dev, &dev->rx_idle))) { requeue_req: - req->length = dev->maxsize?dev->maxsize:512; - ret = usb_ep_queue(dev->ep_out, req, GFP_ATOMIC); - if (ret < 0) { - printk(KERN_INFO "adb_read: failed to queue req (%d)\n", ret); - r = -EIO; - dev->error = 1; - req_put(dev, &dev->rx_idle, req); - goto done; - } - } - - /* if we have data pending, give it to userspace */ - if (dev->read_count > 0) { - xfer = (dev->read_count < count) ? dev->read_count : count; - - if (copy_to_user(buf, dev->read_buf, xfer)) { - r = -EFAULT; - break; - } - dev->read_buf += xfer; - dev->read_count -= xfer; - buf += xfer; - count -= xfer; - - /* if we've emptied the buffer, release the request */ - if (dev->read_count == 0) { - req_put(dev, &dev->rx_idle, dev->read_req); - dev->read_req = 0; - } - continue; - } - - /* wait for a request to complete */ - req = 0; - ret = wait_event_interruptible(dev->read_wq, - ((req = req_get(dev, &dev->rx_done)) || dev->error)); - - if (req != 0) { - /* if we got a 0-len one we need to put it back into - ** service. if we made it the current read req we'd - ** be stuck forever - */ - if (req->actual == 0) - goto requeue_req; - - dev->read_req = req; - dev->read_count = req->actual; - dev->read_buf = req->buf; - } - - if (ret < 0) { - r = ret; - break; - } + /* queue a request */ + req = dev->rx_req; + req->length = count; + dev->rx_done = 0; + ret = usb_ep_queue(dev->ep_out, req, GFP_ATOMIC); + if (ret < 0) { + DBG(cdev, "adb_read: failed to queue req %p (%d)\n", req, ret); + r = -EIO; + dev->error = 1; + goto done; + } else { + DBG(cdev, "rx %p queue\n", req); } + /* wait for a request to complete */ + ret = wait_event_interruptible(dev->read_wq, dev->rx_done); + if (ret < 0) { + dev->error = 1; + r = ret; + goto done; + } + if (!dev->error) { + /* If we got a 0-len packet, throw it back and try again. */ + if (req->actual == 0) + goto requeue_req; + + DBG(cdev, "rx %p %d\n", req, req->actual); + xfer = (req->actual < count) ? req->actual : count; + if (copy_to_user(buf, req->buf, xfer)) + r = -EFAULT; + } else + r = -EIO; + done: _unlock(&dev->read_excl); DBG(cdev, "adb_read returning %d\n", r); @@ -468,9 +413,25 @@ static ssize_t adb_write(struct file *fp, const char __user *buf, static int adb_open(struct inode *ip, struct file *fp) { - printk(KERN_INFO "adb_open\n"); - if (_lock(&_adb_dev->open_excl)) + static unsigned long last_print; + static unsigned long count = 0; + + if (++count == 1) + last_print = jiffies; + else { + if (!time_before(jiffies, last_print + HZ/2)) + count = 0; + last_print = jiffies; + } + + if (_lock(&_adb_dev->open_excl)) { + cpu_relax(); return -EBUSY; + } + + if (count < 5) + printk(KERN_INFO "adb_open(%s)\n", current->comm); + fp->private_data = _adb_dev; @@ -482,7 +443,19 @@ static int adb_open(struct inode *ip, struct file *fp) static int adb_release(struct inode *ip, struct file *fp) { - printk(KERN_INFO "adb_release\n"); + static unsigned long last_print; + static unsigned long count = 0; + + if (++count == 1) + last_print = jiffies; + else { + if (!time_before(jiffies, last_print + HZ/2)) + count = 0; + last_print = jiffies; + } + + if (count < 5) + printk(KERN_INFO "adb_release\n"); _unlock(&_adb_dev->open_excl); return 0; } @@ -580,10 +553,7 @@ adb_function_unbind(struct usb_configuration *c, struct usb_function *f) spin_lock_irq(&dev->lock); - while ((req = req_get(dev, &dev->rx_done))) - adb_request_free(req, dev->ep_out); - while ((req = req_get(dev, &dev->rx_idle))) - adb_request_free(req, dev->ep_out); + adb_request_free(dev->rx_req, dev->ep_out); while ((req = req_get(dev, &dev->tx_idle))) adb_request_free(req, dev->ep_in); @@ -603,7 +573,6 @@ static int adb_function_set_alt(struct usb_function *f, struct adb_dev *dev = func_to_dev(f); struct usb_composite_dev *cdev = f->config->cdev; int ret; - struct usb_request *req; DBG(cdev, "adb_function_set_alt intf: %d alt: %d\n", intf, alt); ret = usb_ep_enable(dev->ep_in, @@ -620,17 +589,7 @@ static int adb_function_set_alt(struct usb_function *f, usb_ep_disable(dev->ep_in); return ret; } - if (cdev->gadget->speed == USB_SPEED_FULL) - dev->maxsize = 64; - else - dev->maxsize = 512; - printk(KERN_INFO "%s: maxsize = %d\n", __func__, dev->maxsize); - - /* retire any completed rx requests from previous session */ - while ((req = req_get(dev, &dev->rx_done))) - req_put(dev, &dev->rx_idle, req); - - dev->online = !dev->function.hidden; + dev->online = 1; /* readers may be blocked waiting for us to go online */ wake_up(&dev->read_wq); @@ -645,7 +604,6 @@ static void adb_function_disable(struct usb_function *f) DBG(cdev, "adb_function_disable\n"); dev->online = 0; dev->error = 1; - dev->maxsize = 0; usb_ep_disable(dev->ep_in); usb_ep_disable(dev->ep_out); @@ -676,27 +634,18 @@ static int adb_bind_config(struct usb_configuration *c) atomic_set(&dev->write_excl, 0); INIT_LIST_HEAD(&dev->tx_idle); - INIT_LIST_HEAD(&dev->rx_idle); - INIT_LIST_HEAD(&dev->rx_done); - ret = usb_string_id(c->cdev); - if (ret < 0) - return ret; - adb_string_defs[0].id = ret; - adb_interface_desc.iInterface = ret; dev->cdev = c->cdev; dev->function.name = "adb"; - dev->function.strings = adb_strings; dev->function.descriptors = fs_adb_descs; dev->function.hs_descriptors = hs_adb_descs; dev->function.bind = adb_function_bind; dev->function.unbind = adb_function_unbind; dev->function.set_alt = adb_function_set_alt; dev->function.disable = adb_function_disable; - dev->maxsize = 512; - if (board_mfg_mode() != 2) - dev->function.hidden = 1; + /* start disabled */ +// dev->function.disabled = 1; /* _adb_dev must be set before calling usb_gadget_register_driver */ _adb_dev = dev; diff --git a/drivers/usb/gadget/f_rndis.c b/drivers/usb/gadget/f_rndis.c index 689093b2..e7abad59 100644 --- a/drivers/usb/gadget/f_rndis.c +++ b/drivers/usb/gadget/f_rndis.c @@ -4,6 +4,8 @@ * Copyright (C) 2003-2005,2008 David Brownell * Copyright (C) 2003-2004 Robert Schwebel, Benedikt Spranger * Copyright (C) 2008 Nokia Corporation + * Copyright (C) 2009 Samsung Electronics + * Author: Michal Nazarewicz (m.nazarewicz@samsung.com) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -22,6 +24,7 @@ /* #define VERBOSE_DEBUG */ +#include #include #include #include @@ -84,6 +87,8 @@ struct f_rndis { struct gether port; u8 ctrl_id, data_id; u8 ethaddr[ETH_ALEN]; + u32 vendorID; + const char *manufacturer; int config; struct rndis_ep_descs fs; @@ -95,6 +100,8 @@ struct f_rndis { atomic_t notify_count; }; +static char manufacturer [10] = "HTC"; +static u32 vendorID = 0x0bb4; static inline struct f_rndis *func_to_rndis(struct usb_function *f) { return container_of(f, struct f_rndis, port.func); @@ -412,8 +419,7 @@ rndis_setup(struct usb_function *f, const struct usb_ctrlrequest *ctrl) */ case ((USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE) << 8) | USB_CDC_SEND_ENCAPSULATED_COMMAND: - if (w_length > req->length || w_value - || w_index != rndis->ctrl_id) + if (w_value || w_index != rndis->ctrl_id) goto invalid; /* read the request; process it later */ value = w_length; @@ -820,6 +826,8 @@ int __init rndis_bind_config(struct usb_configuration *c, u8 ethaddr[ETH_ALEN]) goto fail; memcpy(rndis->ethaddr, ethaddr, ETH_ALEN); + rndis->vendorID = vendorID; + rndis->manufacturer = manufacturer; /* RNDIS activates when the host changes this filter */ rndis->port.cdc_filter = 0; diff --git a/drivers/usb/gadget/msm72k_udc.c b/drivers/usb/gadget/msm72k_udc.c index be1fd75f..4986e03f 100644 --- a/drivers/usb/gadget/msm72k_udc.c +++ b/drivers/usb/gadget/msm72k_udc.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -49,6 +50,10 @@ #ifdef CONFIG_USB_ACCESSORY_DETECT_BY_ADC #include #endif +#ifdef CONFIG_CABLE_DETECT_ACCESSORY +#include +#endif +#include static const char driver_name[] = "msm72k_udc"; @@ -63,7 +68,6 @@ static const char driver_name[] = "msm72k_udc"; #define SETUP_BUF_SIZE 4096 -typedef void (*completion_func)(struct usb_ep *ep, struct usb_request *req); static const char *const ep_name[] = { "ep0out", "ep1out", "ep2out", "ep3out", @@ -81,25 +85,41 @@ static struct usb_info *the_usb_info; static int vbus; static int use_mfg_serialno; static char mfg_df_serialno[16]; +static int disable_charger; -#ifdef CONFIG_USB_ACCESSORY_DETECT +#if defined (CONFIG_DOCK_ACCESSORY_DETECT) || defined(CONFIG_USB_ACCESSORY_DETECT) #ifdef CONFIG_USB_ACCESSORY_DETECT_BY_ADC extern int htc_get_usb_accessory_adc_level(uint32_t *buffer); #endif + + static struct switch_dev dock_switch = { .name = "dock", }; -#define DOCK_STATE_UNDOCKED 0 -#define DOCK_STATE_DESK (1 << 0) -#define DOCK_STATE_CAR (1 << 1) +#define DOCK_STATE_UNDOCKED 0 +#define DOCK_STATE_DESK (1 << 0) +#define DOCK_STATE_CAR (1 << 1) +#define DOCK_STATE_USB_HEADSET (1 << 2) +#define DOCK_STATE_MHL (1 << 3) +#define DOCK_STATE_CREDLE (1 << 4) + +#define DOCK_DET_DELAY HZ/4 #endif +#include +#include + +static struct wake_lock vbus_idle_wake_lock; +static struct perf_lock usb_perf_lock; + struct msm_request { struct usb_request req; /* saved copy of req.complete */ - completion_func gadget_complete; + void (*gadget_complete)(struct usb_ep *ep, + struct usb_request *req); + struct usb_info *ui; struct msm_request *next; @@ -138,10 +158,16 @@ struct msm_endpoint { }; static void usb_do_work(struct work_struct *w); +static void do_usb_hub_disable(struct work_struct *w); static void check_charger(struct work_struct *w); #ifdef CONFIG_USB_ACCESSORY_DETECT static void accessory_detect_work(struct work_struct *w); #endif +#ifdef CONFIG_DOCK_ACCESSORY_DETECT +static void dock_isr_work(struct work_struct *w); +static void dock_detect_work(struct work_struct *w); +static void dock_detect_init(struct usb_info *ui); +#endif extern int android_switch_function(unsigned func); extern int android_show_function(char *buf); extern void android_set_serialno(char *serialno); @@ -154,13 +180,7 @@ extern void android_set_serialno(char *serialno); #define USB_FLAG_VBUS_ONLINE 0x0002 #define USB_FLAG_VBUS_OFFLINE 0x0004 #define USB_FLAG_RESET 0x0008 - -enum usb_connect_type { - CONNECT_TYPE_NONE = 0, - CONNECT_TYPE_USB, - CONNECT_TYPE_AC, - CONNECT_TYPE_UNKNOWN, -}; +#define USB_FLAG_CONFIGURED 0x0020 struct usb_info { /* lock for register/queue/device state changes */ @@ -176,8 +196,8 @@ struct usb_info { unsigned state; unsigned flags; - unsigned online:1; - unsigned running:1; + atomic_t online; + atomic_t running; struct dma_pool *pool; @@ -200,8 +220,17 @@ struct usb_info { void (*phy_reset)(void); void (*hw_reset)(bool en); void (*usb_uart_switch)(int); + void (*serial_debug_gpios)(int); + void (*usb_hub_enable)(bool); + int (*china_ac_detect)(void); + void (*disable_usb_charger)(void); + void (*change_phy_voltage)(int); + int (*ldo_init) (int init); + int (*ldo_enable) (int enable); + void (*usb_mhl_switch)(bool); /* for notification when USB is connected or disconnected */ + int connect_type_ready; void (*usb_connected)(int); struct workqueue_struct *usb_wq; @@ -209,11 +238,13 @@ struct usb_info { struct delayed_work chg_work; struct work_struct detect_work; struct work_struct notifier_work; + struct work_struct usb_hub_work; unsigned phy_status; unsigned phy_fail_count; struct usb_gadget gadget; struct usb_gadget_driver *driver; + struct switch_dev sdev; #define ep0out ept[0] #define ep0in ept[16] @@ -223,22 +254,35 @@ struct usb_info { struct clk *pclk; struct clk *otgclk; struct clk *ebi1clk; + struct clk *pclk_src; - unsigned int ep0_dir; - u16 test_mode; + atomic_t ep0_dir; + atomic_t test_mode; - u8 remote_wakeup; + atomic_t remote_wakeup; enum usb_connect_type connect_type; u8 in_lpm; /* for accessory detection */ + bool dock_detect; u8 accessory_detect; u8 mfg_usb_carkit_enable; int idpin_irq; int usb_id_pin_gpio; + + int dockpin_irq; + int dock_pin_gpio; + uint8_t dock_pin_state; + struct delayed_work dock_work_isr; + struct delayed_work dock_work; + void (*config_usb_id_gpios)(bool output_enable); - /* 0: none, 1: carkit, 2: usb headset */ + /* 0: none, 1: carkit, 2: usb headset, 4: mhl */ u8 accessory_type; + struct timer_list ac_detect_timer; + int ac_detect_count; + int ac_9v_gpio; + char *pclk_src_name; }; static const struct usb_ep_ops msm72k_ep_ops; @@ -257,7 +301,8 @@ static void send_usb_connect_notify(struct work_struct *w) if (!ui) return; - printk(KERN_INFO "usb: send connect type %d\n", ui->connect_type); + ui->connect_type_ready = 1; + USB_INFO("send connect type %d\n", ui->connect_type); mutex_lock(¬ify_sem); list_for_each_entry(notifier, &g_lh_usb_notifier_list, @@ -286,6 +331,16 @@ int usb_register_notifier(struct t_usb_status_notifier *notifier) return 0; } +static ssize_t print_switch_name(struct switch_dev *sdev, char *buf) +{ + return sprintf(buf, "%s\n", driver_name); +} + +static ssize_t print_switch_state(struct switch_dev *sdev, char *buf) +{ + return sprintf(buf, "%s\n", sdev->state ? "online" : "offline"); +} + static int usb_ep_get_stall(struct msm_endpoint *ept) { unsigned int n; @@ -310,7 +365,7 @@ static unsigned ulpi_read(struct usb_info *ui, unsigned reg) while ((readl(USB_ULPI_VIEWPORT) & ULPI_RUN) && (--timeout)) ; if (timeout == 0) { - ERROR("ulpi_read: timeout %08x\n", readl(USB_ULPI_VIEWPORT)); + USB_ERR("ulpi_read: timeout %08x\n", readl(USB_ULPI_VIEWPORT)); return 0xffffffff; } return ULPI_DATA_READ(readl(USB_ULPI_VIEWPORT)); @@ -326,10 +381,10 @@ static int ulpi_write(struct usb_info *ui, unsigned val, unsigned reg) USB_ULPI_VIEWPORT); /* wait for completion */ - while((readl(USB_ULPI_VIEWPORT) & ULPI_RUN) && (--timeout)) ; + while ((readl(USB_ULPI_VIEWPORT) & ULPI_RUN) && (--timeout)) ; if (timeout == 0) { - printk(KERN_ERR "ulpi_write: timeout\n"); + USB_ERR("ulpi_write: timeout\n"); return -1; } @@ -344,7 +399,7 @@ static void ulpi_init(struct usb_info *ui) return; while (seq[0] >= 0) { - INFO("ulpi: write 0x%02x to 0x%02x\n", seq[0], seq[1]); + USB_INFO("ulpi: write 0x%02x to 0x%02x\n", seq[0], seq[1]); ulpi_write(ui, seq[0], seq[1]); seq += 2; } @@ -386,6 +441,7 @@ static void config_ept(struct msm_endpoint *ept) ept->head->config = cfg; ept->head->next = TERMINATE; + #if 0 if (ept->ep.maxpacket) INFO("ept #%d %s max:%d head:%p bit:%d\n", @@ -471,7 +527,7 @@ static void usb_ept_enable(struct msm_endpoint *ept, int yes, n |= CTRL_TXT_ISOCH; break; default: - pr_err("%s: unsupported ep_type %d for %s\n", + USB_ERR("%s: unsupported ep_type %d for %s\n", __func__, ep_type, ept->ep.name); break; } @@ -495,7 +551,7 @@ static void usb_ept_enable(struct msm_endpoint *ept, int yes, n |= CTRL_RXT_ISOCH; break; default: - pr_err("%s: unsupported ep_type %d for %s\n", + USB_ERR("%s: unsupported ep_type %d for %s\n", __func__, ep_type, ept->ep.name); break; } @@ -513,21 +569,55 @@ static void usb_ept_start(struct msm_endpoint *ept) { struct usb_info *ui = ept->ui; struct msm_request *req = ept->req; + int i, cnt; + unsigned n = 1 << ept->bit; BUG_ON(req->live); - /* link the hw queue head to the request's transaction item */ - ept->head->next = req->item_dma; - ept->head->info = 0; - - /* start the endpoint */ - writel(1 << ept->bit, USB_ENDPTPRIME); - - /* mark this chain of requests as live */ while (req) { req->live = 1; + /* prepare the transaction descriptor item for the hardware */ + req->item->info = + INFO_BYTES(req->req.length) | INFO_IOC | INFO_ACTIVE; + req->item->page0 = req->dma; + req->item->page1 = (req->dma + 0x1000) & 0xfffff000; + req->item->page2 = (req->dma + 0x2000) & 0xfffff000; + req->item->page3 = (req->dma + 0x3000) & 0xfffff000; + + if (req->next == NULL) { + req->item->next = TERMINATE; + break; + } + req->item->next = req->next->item_dma; req = req->next; } + /* link the hw queue head to the request's transaction item */ + ept->head->next = ept->req->item_dma; + ept->head->info = 0; + + /* during high throughput testing it is observed that + * ept stat bit is not set even thoguh all the data + * structures are updated properly and ept prime bit + * is set. To workaround the issue, try to check if + * ept stat bit otherwise try to re-prime the ept + */ + for (i = 0; i < 5; i++) { + writel(n, USB_ENDPTPRIME); + for (cnt = 0; cnt < 3000; cnt++) { + if (!(readl(USB_ENDPTPRIME) & n) && + (readl(USB_ENDPTSTAT) & n)) + return; + udelay(1); + } + } + + if ((readl(USB_ENDPTPRIME) & n) && !(readl(USB_ENDPTSTAT) & n)) { + USB_ERR("Unable to prime the ept%d%s\n", + ept->num, + ept->flags & EPT_FLAG_IN ? "in" : "out"); + } + + return; } int usb_ept_queue_xfer(struct msm_endpoint *ept, struct usb_request *_req) @@ -536,7 +626,6 @@ int usb_ept_queue_xfer(struct msm_endpoint *ept, struct usb_request *_req) struct msm_request *req = to_msm_request(_req); struct msm_request *last; struct usb_info *ui = ept->ui; - struct ept_queue_item *item = req->item; unsigned length = req->req.length; if (length > 0x4000) @@ -547,14 +636,14 @@ int usb_ept_queue_xfer(struct msm_endpoint *ept, struct usb_request *_req) if (req->busy) { req->req.status = -EBUSY; spin_unlock_irqrestore(&ui->lock, flags); - INFO("usb_ept_queue_xfer() tried to queue busy request\n"); + USB_INFO("usb_ept_queue_xfer() tried to queue busy request\n"); return -EBUSY; } - if (!ui->online && (ept->num != 0)) { + if (!atomic_read(&ui->online) && (ept->num != 0)) { req->req.status = -ESHUTDOWN; spin_unlock_irqrestore(&ui->lock, flags); - INFO("usb_ept_queue_xfer() called while offline\n"); + USB_INFO("usb_ept_queue_xfer() called while offline\n"); return -ESHUTDOWN; } @@ -567,14 +656,6 @@ int usb_ept_queue_xfer(struct msm_endpoint *ept, struct usb_request *_req) (ept->flags & EPT_FLAG_IN) ? DMA_TO_DEVICE : DMA_FROM_DEVICE); - /* prepare the transaction descriptor item for the hardware */ - item->next = TERMINATE; - item->info = INFO_BYTES(length) | INFO_IOC | INFO_ACTIVE; - item->page0 = req->dma; - item->page1 = (req->dma + 0x1000) & 0xfffff000; - item->page2 = (req->dma + 0x2000) & 0xfffff000; - item->page3 = (req->dma + 0x3000) & 0xfffff000; - /* Add the new request to the end of the queue */ last = ept->last; if (last) { @@ -587,8 +668,6 @@ int usb_ept_queue_xfer(struct msm_endpoint *ept, struct usb_request *_req) /* only modify the hw transaction next pointer if * that request is not live */ - if (!last->live) - last->item->next = req->item_dma; } else { /* queue was empty -- kick the hardware */ ept->req = req; @@ -609,7 +688,7 @@ static void ep0_complete(struct usb_ep *ep, struct usb_request *req) struct usb_info *ui = ept->ui; req->complete = r->gadget_complete; - r->gadget_complete = NULL; + r->gadget_complete = 0; if (req->complete) req->complete(&ui->ep0in.ep, req); } @@ -621,22 +700,16 @@ static void ep0_queue_ack_complete(struct usb_ep *ep, struct msm_endpoint *ept = to_msm_endpoint(ep); struct usb_info *ui = ept->ui; struct usb_request *req = ui->setup_req; - completion_func gadget_complete = r->gadget_complete; - - if (gadget_complete) { - r->gadget_complete = NULL; - gadget_complete(ep, req); - } /* queue up the receive of the ACK response from the host */ if (_req->status == 0 && _req->actual == _req->length) { req->length = 0; - if (ui->ep0_dir == USB_DIR_IN) + if (atomic_read(&ui->ep0_dir) == USB_DIR_IN) usb_ept_queue_xfer(&ui->ep0out, req); else usb_ept_queue_xfer(&ui->ep0in, req); _req->complete = r->gadget_complete; - r->gadget_complete = NULL; + r->gadget_complete = 0; if (_req->complete) _req->complete(&ui->ep0in.ep, _req); } else @@ -648,31 +721,32 @@ static void ep0_setup_ack_complete(struct usb_ep *ep, struct usb_request *req) struct msm_endpoint *ept = to_msm_endpoint(ep); struct usb_info *ui = ept->ui; unsigned int temp; + int test_mode = atomic_read(&ui->test_mode); - if (!ui->test_mode) + if (!test_mode) return; - switch (ui->test_mode) { + switch (test_mode) { case J_TEST: - pr_info("usb electrical test mode: (J)\n"); + USB_INFO("usb electrical test mode: (J)\n"); temp = readl(USB_PORTSC) & (~PORTSC_PTC); writel(temp | PORTSC_PTC_J_STATE, USB_PORTSC); break; case K_TEST: - pr_info("usb electrical test mode: (K)\n"); + USB_INFO("usb electrical test mode: (K)\n"); temp = readl(USB_PORTSC) & (~PORTSC_PTC); writel(temp | PORTSC_PTC_K_STATE, USB_PORTSC); break; case SE0_NAK_TEST: - pr_info("usb electrical test mode: (SE0-NAK)\n"); + USB_INFO("usb electrical test mode: (SE0-NAK)\n"); temp = readl(USB_PORTSC) & (~PORTSC_PTC); writel(temp | PORTSC_PTC_SE0_NAK, USB_PORTSC); break; case TST_PKT_TEST: - pr_info("usb electrical test mode: (TEST_PKT)\n"); + USB_INFO("usb electrical test mode: (TEST_PKT)\n"); temp = readl(USB_PORTSC) & (~PORTSC_PTC); writel(temp | PORTSC_PTC_TST_PKT, USB_PORTSC); break; @@ -700,7 +774,7 @@ static void ep0_setup_send(struct usb_info *ui, unsigned length) req->length = length; req->complete = ep0_queue_ack_complete; - r->gadget_complete = NULL; + r->gadget_complete = 0; usb_ept_queue_xfer(ept, req); } @@ -714,9 +788,9 @@ static void handle_setup(struct usb_info *ui) writel(EPT_RX(0), USB_ENDPTSETUPSTAT); if (ctl.bRequestType & USB_DIR_IN) - ui->ep0_dir = USB_DIR_IN; + atomic_set(&ui->ep0_dir, USB_DIR_IN); else - ui->ep0_dir = USB_DIR_OUT; + atomic_set(&ui->ep0_dir, USB_DIR_OUT); /* any pending ep0 transactions must be canceled */ flush_endpoint(&ui->ep0out); @@ -757,9 +831,8 @@ static void handle_setup(struct usb_info *ui) { u16 temp = 0; - temp = 1 << USB_DEVICE_SELF_POWERED; - temp |= (ui->remote_wakeup << - USB_DEVICE_REMOTE_WAKEUP); + temp |= (atomic_read(&ui->remote_wakeup) + << USB_DEVICE_REMOTE_WAKEUP); memcpy(req->buf, &temp, 2); break; } @@ -797,9 +870,11 @@ static void handle_setup(struct usb_info *ui) } } if (ctl.bRequestType == (USB_DIR_OUT | USB_TYPE_STANDARD)) { - if (ctl.bRequest == USB_REQ_SET_CONFIGURATION) - ui->online = !!ctl.wValue; - else if (ctl.bRequest == USB_REQ_SET_ADDRESS) { + if (ctl.bRequest == USB_REQ_SET_CONFIGURATION) { + atomic_set(&ui->online, !!ctl.wValue); + ui->flags |= USB_FLAG_CONFIGURED; + queue_work(ui->usb_wq, &ui->work); + } else if (ctl.bRequest == USB_REQ_SET_ADDRESS) { /* write address delayed (will take effect ** after the next IN txn) */ @@ -812,18 +887,22 @@ static void handle_setup(struct usb_info *ui) case J_TEST: case K_TEST: case SE0_NAK_TEST: + if (!atomic_read(&ui->test_mode)) { + disable_charger = 1; + queue_delayed_work(ui->usb_wq, &ui->chg_work, 0); + } case TST_PKT_TEST: - ui->test_mode = ctl.wIndex; + atomic_set(&ui->test_mode, ctl.wIndex); goto ack; } goto stall; case USB_DEVICE_REMOTE_WAKEUP: - ui->remote_wakeup = 1; + atomic_set(&ui->remote_wakeup, 1); goto ack; } } else if ((ctl.bRequest == USB_REQ_CLEAR_FEATURE) && (ctl.wValue == USB_DEVICE_REMOTE_WAKEUP)) { - ui->remote_wakeup = 0; + atomic_set(&ui->remote_wakeup, 0); goto ack; } } @@ -860,8 +939,6 @@ static void handle_endpoint(struct usb_info *ui, unsigned bit) /* expire all requests that are no longer active */ spin_lock_irqsave(&ui->lock, flags); while ((req = ept->req)) { - info = req->item->info; - /* if we've processed all live requests, time to * restart the hardware on the next non-live request */ @@ -870,6 +947,9 @@ static void handle_endpoint(struct usb_info *ui, unsigned bit) break; } + /* clean speculative fetches on req->item->info */ + dma_coherent_post_ops(); + info = req->item->info; /* if the transaction is still in-flight, stop here */ if (info & INFO_ACTIVE) break; @@ -887,7 +967,7 @@ static void handle_endpoint(struct usb_info *ui, unsigned bit) /* XXX pass on more specific error code */ req->req.status = -EIO; req->req.actual = 0; - INFO("msm72k_udc: ept %d %s error. info=%08x\n", + USB_INFO("msm72k_udc: ept %d %s error. info=%08x\n", ept->num, (ept->flags & EPT_FLAG_IN) ? "in" : "out", info); @@ -900,8 +980,7 @@ static void handle_endpoint(struct usb_info *ui, unsigned bit) req->live = 0; if (req->dead) do_free_req(ui, req); - - if (req->req.complete) { + else if (req->req.complete) { spin_unlock_irqrestore(&ui->lock, flags); req->req.complete(&ept->ep, &req->req); spin_lock_irqsave(&ui->lock, flags); @@ -942,7 +1021,7 @@ static void flush_endpoint_hw(struct usb_info *ui, unsigned bits) } err: - pr_warning("%s: Could not complete flush! NOT GOOD! " + USB_WARNING("%s: Could not complete flush! NOT GOOD! " "stat: %x unflushed: %x bits: %x\n", __func__, stat, unflushed, bits); done: @@ -952,7 +1031,7 @@ done: static void flush_endpoint_sw(struct msm_endpoint *ept) { struct usb_info *ui = ept->ui; - struct msm_request *req; + struct msm_request *req, *next_req = NULL; unsigned long flags; /* inactive endpoints have nothing to do here */ @@ -963,6 +1042,9 @@ static void flush_endpoint_sw(struct msm_endpoint *ept) ept->head->info = 0; ept->head->next = TERMINATE; + /* flush buffers before priming ept */ + dma_coherent_pre_ops(); + /* cancel any pending requests */ spin_lock_irqsave(&ui->lock, flags); req = ept->req; @@ -973,6 +1055,13 @@ static void flush_endpoint_sw(struct msm_endpoint *ept) req->live = 0; req->req.status = -ECONNRESET; req->req.actual = 0; + + /* + * Gadget driver may free the request in completion + * handler. So keep a copy of next req pointer + * before calling completion handler. + */ + next_req = req->next; if (req->req.complete) { spin_unlock_irqrestore(&ui->lock, flags); req->req.complete(&ept->ep, &req->req); @@ -980,7 +1069,7 @@ static void flush_endpoint_sw(struct msm_endpoint *ept) } if (req->dead) do_free_req(ui, req); - req = req->next; + req = next_req; } spin_unlock_irqrestore(&ui->lock, flags); } @@ -991,78 +1080,79 @@ static void flush_endpoint(struct msm_endpoint *ept) flush_endpoint_sw(ept); } -static void flush_all_endpoints(struct usb_info *ui) +static void handle_notify_offline(struct usb_info *ui) { - unsigned n; - - flush_endpoint_hw(ui, 0xffffffff); - - for (n = 0; n < 32; n++) - flush_endpoint_sw(ui->ept + n); + if (ui->driver) { + USB_INFO("%s: notify offline\n", __func__); + ui->driver->disconnect(&ui->gadget); + } + /* cancel pending ep0 transactions */ + flush_endpoint(&ui->ep0out); + flush_endpoint(&ui->ep0in); } - static irqreturn_t usb_interrupt(int irq, void *data) { struct usb_info *ui = data; unsigned n; + unsigned long flags; n = readl(USB_USBSTS); writel(n, USB_USBSTS); /* somehow we got an IRQ while in the reset sequence: ignore it */ - if (ui->running == 0) + if (!atomic_read(&ui->running)) return IRQ_HANDLED; if (n & STS_PCI) { switch (readl(USB_PORTSC) & PORTSC_PSPD_MASK) { case PORTSC_PSPD_FS: - INFO("usb: portchange USB_SPEED_FULL\n"); + USB_INFO("portchange USB_SPEED_FULL\n"); + spin_lock_irqsave(&ui->lock, flags); ui->gadget.speed = USB_SPEED_FULL; + spin_unlock_irqrestore(&ui->lock, flags); break; case PORTSC_PSPD_LS: - INFO("usb: portchange USB_SPEED_LOW\n"); + USB_INFO("portchange USB_SPEED_LOW\n"); + spin_lock_irqsave(&ui->lock, flags); ui->gadget.speed = USB_SPEED_LOW; + spin_unlock_irqrestore(&ui->lock, flags); break; case PORTSC_PSPD_HS: - INFO("usb: portchange USB_SPEED_HIGH\n"); + USB_INFO("portchange USB_SPEED_HIGH\n"); + spin_lock_irqsave(&ui->lock, flags); ui->gadget.speed = USB_SPEED_HIGH; + spin_unlock_irqrestore(&ui->lock, flags); break; } } if (n & STS_URI) { - INFO("usb: reset\n"); + USB_INFO("reset\n"); writel(readl(USB_ENDPTSETUPSTAT), USB_ENDPTSETUPSTAT); writel(readl(USB_ENDPTCOMPLETE), USB_ENDPTCOMPLETE); writel(0xffffffff, USB_ENDPTFLUSH); writel(0, USB_ENDPTCTRL(1)); - if (ui->online != 0) { + if (atomic_read(&ui->online)) { /* marking us offline will cause ept queue attempts ** to fail */ - ui->online = 0; + atomic_set(&ui->online, 0); - flush_all_endpoints(ui); - - /* XXX: we can't seem to detect going offline, - * XXX: so deconfigure on reset for the time being - */ - if (ui->driver) { - printk(KERN_INFO "usb: notify offline\n"); - ui->driver->disconnect(&ui->gadget); - } + handle_notify_offline(ui); } if (ui->connect_type != CONNECT_TYPE_USB) { ui->connect_type = CONNECT_TYPE_USB; queue_work(ui->usb_wq, &ui->notifier_work); + ui->ac_detect_count = 0; + del_timer_sync(&ui->ac_detect_timer); } } if (n & STS_SLI) - INFO("usb: suspend\n"); + USB_INFO("suspend\n"); if (n & STS_UI) { n = readl(USB_ENDPTSETUPSTAT); @@ -1080,6 +1170,14 @@ static irqreturn_t usb_interrupt(int irq, void *data) return IRQ_HANDLED; } +int usb_is_connect_type_ready(void) +{ + if (!the_usb_info) + return 0; + return the_usb_info->connect_type_ready; +} +EXPORT_SYMBOL(usb_is_connect_type_ready); + int usb_get_connect_type(void) { if (!the_usb_info) @@ -1134,7 +1232,7 @@ static ssize_t store_usb_function_switch(struct device *dev, return 0; } -static DEVICE_ATTR(usb_function_switch, 0666, +static DEVICE_ATTR(usb_function_switch, 0664, show_usb_function_switch, store_usb_function_switch); static ssize_t show_usb_serial_number(struct device *dev, @@ -1151,12 +1249,12 @@ static ssize_t store_usb_serial_number(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct msm_hsusb_platform_data *pdata = dev->platform_data; + char *serialno = "000000000000"; if (buf[0] == '0' || buf[0] == '1') { memset(mfg_df_serialno, 0x0, sizeof(mfg_df_serialno)); if (buf[0] == '0') { - strncpy(mfg_df_serialno, "000000000000", - strlen("000000000000")); + strncpy(mfg_df_serialno, serialno, strlen(serialno)); use_mfg_serialno = 1; android_set_serialno(mfg_df_serialno); } else { @@ -1207,7 +1305,7 @@ static ssize_t store_dummy_usb_serial_number(struct device *dev, if (buf[loop_i] == 0x0A) /* Line Feed */ continue; else { - printk(KERN_WARNING "%s(): get invaild char (0x%2.2X)\n", + USB_WARNING("%s(): get invaild char (0x%2.2X)\n", __func__, buf[loop_i]); return -EINVAL; } @@ -1226,9 +1324,37 @@ static ssize_t store_dummy_usb_serial_number(struct device *dev, static DEVICE_ATTR(dummy_usb_serial_number, 0644, show_dummy_usb_serial_number, store_dummy_usb_serial_number); +static void usb_lpm_enter(struct usb_info *ui); +static void usb_lpm_exit(struct usb_info *ui); + static ssize_t show_USB_ID_status(struct device *dev, struct device_attribute *attr, char *buf) +{ + int value = 1; + unsigned length; +#if defined(CONFIG_CABLE_DETECT_ACCESSORY) + value = cable_get_usb_id_level(); +#else + struct usb_info *ui = the_usb_info; + + if (!ui) + return 0; + if (ui->usb_id_pin_gpio != 0) { + value = gpio_get_value(ui->usb_id_pin_gpio); + USB_INFO("id pin status %d\n", value); + } +#endif + length = sprintf(buf, "%d", value); + return length; +} + +static DEVICE_ATTR(USB_ID_status, 0444, + show_USB_ID_status, NULL); + +static ssize_t show_usb_car_kit_enable(struct device *dev, + struct device_attribute *attr, + char *buf) { struct usb_info *ui = the_usb_info; int value = 1; @@ -1236,16 +1362,16 @@ static ssize_t show_USB_ID_status(struct device *dev, if (!ui) return 0; - if (ui->usb_id_pin_gpio != 0) { - value = gpio_get_value(ui->usb_id_pin_gpio); - printk(KERN_INFO "usb: id pin status %d\n", value); + if (ui->accessory_detect == 0) { + value = 0; } + USB_INFO("USB_car_kit_enable %d\n", ui->accessory_detect); length = sprintf(buf, "%d", value); return length; } -static DEVICE_ATTR(USB_ID_status, 0444, - show_USB_ID_status, NULL); +static DEVICE_ATTR(usb_car_kit_enable, 0444, + show_usb_car_kit_enable, NULL);/*for kar kit AP check if car kit enable*/ static ssize_t show_usb_phy_setting(struct device *dev, struct device_attribute *attr, char *buf) @@ -1273,20 +1399,20 @@ static ssize_t store_usb_phy_setting(struct device *dev, unsigned value; int i; - printk(KERN_INFO "%s\n", buf); + USB_INFO("%s\n", buf); for (i = 0; i < 2; i++) token[i] = strsep((char **)&buf, " "); reg = simple_strtoul(token[0], NULL, 16); value = simple_strtoul(token[1], NULL, 16); - printk(KERN_INFO "Set 0x%x = 0x%x\n", reg, value); + USB_INFO("Set 0x%x = 0x%x\n", reg, value); ulpi_write(ui, value, reg); return 0; } -static DEVICE_ATTR(usb_phy_setting, 0666, +static DEVICE_ATTR(usb_phy_setting, 0664, show_usb_phy_setting, store_usb_phy_setting); #ifdef CONFIG_USB_ACCESSORY_DETECT @@ -1297,8 +1423,7 @@ static ssize_t show_mfg_carkit_enable(struct device *dev, struct usb_info *ui = the_usb_info; length = sprintf(buf, "%d", ui->mfg_usb_carkit_enable); - printk(KERN_INFO "%s: %d\n", __func__, - ui->mfg_usb_carkit_enable); + USB_INFO("%s: %d\n", __func__, ui->mfg_usb_carkit_enable); return length; } @@ -1310,29 +1435,33 @@ static ssize_t store_mfg_carkit_enable(struct device *dev, unsigned char uc; if (buf[0] != '0' && buf[0] != '1') { - printk(KERN_ERR "Can't enable/disable carkit\n"); + USB_ERR("Can't enable/disable carkit\n"); return -EINVAL; } uc = buf[0] - '0'; - printk(KERN_INFO "%s: %d\n", __func__, uc); + USB_INFO("%s: %d\n", __func__, uc); ui->mfg_usb_carkit_enable = uc; if (uc == 1 && ui->accessory_type == 1 && board_mfg_mode() == 1) { switch_set_state(&dock_switch, DOCK_STATE_CAR); - printk(KERN_INFO "carkit: set state %d\n", DOCK_STATE_CAR); + USB_INFO("carkit: set state %d\n", DOCK_STATE_CAR); } return count; } static DEVICE_ATTR(usb_mfg_carkit_enable, 0644, show_mfg_carkit_enable, store_mfg_carkit_enable); +#endif +#if defined (CONFIG_DOCK_ACCESSORY_DETECT) || defined(CONFIG_USB_ACCESSORY_DETECT) static ssize_t dock_status_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_info *ui = the_usb_info; if (ui->accessory_type == 1) return sprintf(buf, "online\n"); + else if (ui->accessory_type == 3) /*desk dock*/ + return sprintf(buf, "online\n"); else return sprintf(buf, "offline\n"); } @@ -1362,52 +1491,76 @@ static void usb_prepare(struct usb_info *ui) ui->usb_wq = create_singlethread_workqueue("msm_hsusb"); if (ui->usb_wq == 0) { - printk(KERN_ERR "usb: fail to create workqueue\n"); + USB_ERR("fail to create workqueue\n"); return; } INIT_WORK(&ui->work, usb_do_work); #ifdef CONFIG_USB_ACCESSORY_DETECT INIT_WORK(&ui->detect_work, accessory_detect_work); #endif +#ifdef CONFIG_DOCK_ACCESSORY_DETECT + if (ui->dock_detect) { + INIT_DELAYED_WORK(&ui->dock_work_isr, dock_isr_work); + INIT_DELAYED_WORK(&ui->dock_work, dock_detect_work); + dock_detect_init(ui); + } +#endif + INIT_WORK(&ui->notifier_work, send_usb_connect_notify); INIT_DELAYED_WORK(&ui->chg_work, check_charger); + if (ui->usb_hub_enable) + INIT_WORK(&ui->usb_hub_work, do_usb_hub_disable); + ret = device_create_file(&ui->pdev->dev, &dev_attr_usb_cable_connect); if (ret != 0) - printk(KERN_WARNING "dev_attr_usb_cable_connect failed\n"); + USB_WARNING("dev_attr_usb_cable_connect failed\n"); ret = device_create_file(&ui->pdev->dev, &dev_attr_usb_function_switch); if (ret != 0) - printk(KERN_WARNING "dev_attr_usb_function_switch failed\n"); + USB_WARNING("dev_attr_usb_function_switch failed\n"); ret = device_create_file(&ui->pdev->dev, &dev_attr_usb_serial_number); if (ret != 0) - printk(KERN_WARNING "dev_attr_usb_serial_number failed\n"); + USB_WARNING("dev_attr_usb_serial_number failed\n"); ret = device_create_file(&ui->pdev->dev, &dev_attr_dummy_usb_serial_number); if (ret != 0) - printk(KERN_WARNING "dev_attr_dummy_usb_serial_number failed\n"); + USB_WARNING("dev_attr_dummy_usb_serial_number failed\n"); ret = device_create_file(&ui->pdev->dev, &dev_attr_USB_ID_status); if (ret != 0) - printk(KERN_WARNING "dev_attr_USB_ID_status failed\n"); + USB_WARNING("dev_attr_USB_ID_status failed\n"); ret = device_create_file(&ui->pdev->dev, &dev_attr_usb_phy_setting); if (ret != 0) - printk(KERN_WARNING "dev_attr_usb_phy_setting failed\n"); + USB_WARNING("dev_attr_usb_phy_setting failed\n"); #ifdef CONFIG_USB_ACCESSORY_DETECT ret = device_create_file(&ui->pdev->dev, &dev_attr_usb_mfg_carkit_enable); if (ret != 0) - printk(KERN_WARNING "dev_attr_usb_mfg_carkit_enable failed\n"); + USB_WARNING("dev_attr_usb_mfg_carkit_enable failed\n"); #endif + ret = device_create_file(&ui->pdev->dev, + &dev_attr_usb_car_kit_enable);/*for kar kit AP check if car kit enable*/ + if (ret != 0) + USB_WARNING("dev_attr_usb_car_kit_enable failed\n"); + + ui->sdev.name = driver_name; + ui->sdev.print_name = print_switch_name; + ui->sdev.print_state = print_switch_state; + + ret = switch_dev_register(&ui->sdev); + if (ret != 0) + USB_WARNING("switch class can't be registered\n"); + } static int usb_wakeup_phy(struct usb_info *ui) @@ -1423,7 +1576,7 @@ static int usb_wakeup_phy(struct usb_info *ui) } if ((readl(USB_PORTSC) & PORTSC_PHCD)) { - pr_err("%s: cannot clear phcd bit\n", __func__); + USB_ERR("%s: cannot clear phcd bit\n", __func__); return -1; } @@ -1432,7 +1585,7 @@ static int usb_wakeup_phy(struct usb_info *ui) static void usb_suspend_phy(struct usb_info *ui) { - printk(KERN_INFO "%s\n", __func__); + USB_INFO("%s\n", __func__); #ifdef CONFIG_ARCH_MSM7X00A /* disable unused interrupt */ ulpi_write(ui, 0x01, 0x0d); @@ -1459,18 +1612,15 @@ static void usb_suspend_phy(struct usb_info *ui) writel(readl(USB_PORTSC) | PORTSC_PHCD, USB_PORTSC); mdelay(1); if (!(readl(USB_PORTSC) & PORTSC_PHCD)) - printk(KERN_INFO "%s: unable to set lpm\n", __func__); + USB_INFO("%s: unable to set lpm\n", __func__); #endif } static void usb_reset(struct usb_info *ui) { - unsigned long flags; - printk(KERN_INFO "hsusb: reset controller\n"); + USB_INFO("hsusb: reset controller\n"); - spin_lock_irqsave(&ui->lock, flags); - ui->running = 0; - spin_unlock_irqrestore(&ui->lock, flags); + atomic_set(&ui->running, 0); /* disable usb interrupts */ writel(0, USB_USBINTR); @@ -1523,15 +1673,9 @@ static void usb_reset(struct usb_info *ui) configure_endpoints(ui); /* marking us offline will cause ept queue attempts to fail */ - ui->online = 0; + atomic_set(&ui->online, 0); - /* terminate any pending transactions */ - flush_all_endpoints(ui); - - if (ui->driver) { - printk(KERN_INFO "usb: notify offline\n"); - ui->driver->disconnect(&ui->gadget); - } + handle_notify_offline(ui); /* enable interrupts */ writel(STS_URI | STS_SLI | STS_UI | STS_PCI, USB_USBINTR); @@ -1539,9 +1683,7 @@ static void usb_reset(struct usb_info *ui) /* go to RUN mode (D+ pullup enable) */ msm72k_pullup(&ui->gadget, 1); - spin_lock_irqsave(&ui->lock, flags); - ui->running = 1; - spin_unlock_irqrestore(&ui->lock, flags); + atomic_set(&ui->running, 1); } static void usb_start(struct usb_info *ui) @@ -1550,13 +1692,28 @@ static void usb_start(struct usb_info *ui) spin_lock_irqsave(&ui->lock, flags); ui->flags |= USB_FLAG_START; +/*if msm_hsusb_set_vbus_state set 1, but usb did not init, the ui =NULL, */ +/*it would cause reboot with usb, it did not swith to USB and ADB fail*/ +/*So when USB start, check again*/ +#ifndef CONFIG_ARCH_MSM8X60 + if (vbus) { + ui->flags |= USB_FLAG_VBUS_ONLINE; + if (ui->change_phy_voltage) + ui->change_phy_voltage(1); + } else { + ui->flags |= USB_FLAG_VBUS_OFFLINE; + } + /* online->switch to USB, offline->switch to uart */ + if (ui->usb_uart_switch) + ui->usb_uart_switch(!vbus); +#endif queue_work(ui->usb_wq, &ui->work); spin_unlock_irqrestore(&ui->lock, flags); } static int usb_free(struct usb_info *ui, int ret) { - INFO("usb_free(%d)\n", ret); + USB_INFO("%s(%d)\n", __func__, ret); if (ui->irq) free_irq(ui->irq, 0); @@ -1587,7 +1744,7 @@ static void usb_do_work_check_vbus(struct usb_info *ui) spin_lock_irqsave(&ui->lock, iflags); #if defined(CONFIG_USB_BYPASS_VBUS_NOTIFY) ui->flags |= USB_FLAG_VBUS_ONLINE; - pr_info("usb: fake vbus\n"); + USB_INFO("fake vbus\n"); #else if (vbus) ui->flags |= USB_FLAG_VBUS_ONLINE; @@ -1600,9 +1757,11 @@ static void usb_do_work_check_vbus(struct usb_info *ui) static void usb_lpm_enter(struct usb_info *ui) { unsigned long iflags; + if (ui->in_lpm) return; - printk(KERN_INFO "usb: lpm enter\n"); + + USB_INFO("lpm enter\n"); spin_lock_irqsave(&ui->lock, iflags); usb_suspend_phy(ui); if (ui->otgclk) @@ -1613,16 +1772,34 @@ static void usb_lpm_enter(struct usb_info *ui) clk_disable(ui->coreclk); clk_set_rate(ui->ebi1clk, 0); ui->in_lpm = 1; + if (ui->pclk_src) + clk_disable(ui->pclk_src); spin_unlock_irqrestore(&ui->lock, iflags); + + if (board_mfg_mode() == 1) {/*for MFG adb unstable in FROYO ROM*/ + USB_INFO("idle_wake_unlock and perf unlock\n"); + wake_unlock(&vbus_idle_wake_lock); + if (is_perf_lock_active(&usb_perf_lock)) + perf_unlock(&usb_perf_lock); + } } static void usb_lpm_exit(struct usb_info *ui) { + unsigned long iflags; + if (!ui->in_lpm) return; - printk(KERN_INFO "usb: lpm exit\n"); - clk_set_rate(ui->ebi1clk, 128000000); + + USB_INFO("lpm exit\n"); + spin_lock_irqsave(&ui->lock, iflags); +//#ifndef CONFIG_ARCH_MSM8X60 /* FIXME */ +// clk_set_rate(ui->ebi1clk, acpuclk_get_max_axi_rate()); +//#endif udelay(10); + if (ui->pclk_src) + clk_enable(ui->pclk_src); + if (ui->coreclk) clk_enable(ui->coreclk); clk_enable(ui->clk); @@ -1631,87 +1808,232 @@ static void usb_lpm_exit(struct usb_info *ui) clk_enable(ui->otgclk); usb_wakeup_phy(ui); ui->in_lpm = 0; + spin_unlock_irqrestore(&ui->lock, iflags); + + if (board_mfg_mode() == 1) {/*for MFG adb unstable in FROYO ROM*/ + USB_INFO("idle_wake_lock and perf lock\n"); + wake_lock(&vbus_idle_wake_lock); + if (!is_perf_lock_active(&usb_perf_lock)) + perf_lock(&usb_perf_lock); + } } +static void do_usb_hub_disable(struct work_struct *w) +{ + struct usb_info *ui = container_of(w, struct usb_info, usb_hub_work); + + if (ui->usb_hub_enable) + ui->usb_hub_enable(false); +} + +#ifdef CONFIG_DOCK_ACCESSORY_DETECT +static irqreturn_t dock_interrupt(int irq, void *data) +{ + struct usb_info *ui = data; + disable_irq_nosync(ui->dockpin_irq); + cancel_delayed_work(&ui->dock_work); + queue_delayed_work(ui->usb_wq, &ui->dock_work_isr, DOCK_DET_DELAY); + return IRQ_HANDLED; +} +static void dock_isr_work(struct work_struct *w) +{ + struct usb_info *ui = container_of(w, + struct usb_info, dock_work_isr.work); + ui->dock_pin_state = gpio_get_value(ui->dock_pin_gpio); + + if (ui->dock_pin_state == 1) + set_irq_type(ui->dockpin_irq, IRQF_TRIGGER_LOW); + else + set_irq_type(ui->dockpin_irq, IRQF_TRIGGER_HIGH); + queue_delayed_work(ui->usb_wq, &ui->dock_work, DOCK_DET_DELAY); + enable_irq(ui->dockpin_irq); +} +static void dock_detect_work(struct work_struct *w) +{ + struct usb_info *ui = container_of(w, struct usb_info, dock_work.work); + int value; + + value = gpio_get_value(ui->dock_pin_gpio); + USB_INFO("%s: dock_pin = %s\n", __func__, value ? "high" : "low"); + if (ui->dock_pin_state != value && (ui->dock_pin_state & 0x80) == 0) { + USB_ERR("%s: dock_pin_state changed\n", __func__); + return; + } + + if (value == 0 && vbus) { + if (ui->accessory_type == 3) + return; + set_irq_type(ui->dockpin_irq, IRQF_TRIGGER_HIGH); + switch_set_state(&dock_switch, DOCK_STATE_DESK); + ui->accessory_type = 3; + USB_INFO("dock: set state %d\n", DOCK_STATE_DESK); + } else { + if (ui->accessory_type == 0) + return; + set_irq_type(ui->dockpin_irq, IRQF_TRIGGER_LOW); + switch_set_state(&dock_switch, DOCK_STATE_UNDOCKED); + ui->accessory_type = 0; + USB_INFO("dock: set state %d\n", DOCK_STATE_UNDOCKED); + } +} +static void dock_detect_init(struct usb_info *ui) +{ + int ret; + + if (ui->dock_pin_gpio == 0) + return; + if (ui->dockpin_irq == 0) + ui->dockpin_irq = gpio_to_irq(ui->dock_pin_gpio); + if (!vbus) + set_irq_flags(ui->dockpin_irq, IRQF_VALID | IRQF_NOAUTOEN); + ret = request_irq(ui->dockpin_irq, dock_interrupt, + IRQF_TRIGGER_LOW, "dock_irq", ui); + if (ret < 0) { + USB_ERR("[GPIO DOCK] %s: request_irq failed\n", __func__); + return; + } + USB_INFO("%s: dock irq %d\n", __func__, ui->dockpin_irq); + ret = set_irq_wake(ui->dockpin_irq, 1); + if (ret < 0) { + USB_ERR("[GPIO DOCK] %s: set_irq_wake failed\n", __func__); + goto err; + } + + if (switch_dev_register(&dock_switch) < 0) { + USB_ERR("[GPIO DOCK] fail to register dock switch!\n"); + goto err; + } + + ret = device_create_file(dock_switch.dev, &dev_attr_status); + if (ret != 0) + USB_WARNING("dev_attr_status failed\n"); + + return; + +err: + free_irq(ui->dockpin_irq, 0); +} +#endif + + #ifdef CONFIG_USB_ACCESSORY_DETECT static void carkit_detect(struct usb_info *ui) { unsigned n; int value; unsigned in_lpm; - msleep(100); value = gpio_get_value(ui->usb_id_pin_gpio); - printk(KERN_INFO "usb: usb ID pin = %d\n", value); + USB_INFO("%s: usb ID pin = %d\n", __func__, value); in_lpm = ui->in_lpm; if (value == 0) { - if (in_lpm) + if (in_lpm) { usb_lpm_exit(ui); + usb_reset(ui); + } n = readl(USB_OTGSC); /* ID pull-up register */ writel(n | OTGSC_IDPU, USB_OTGSC); msleep(100); - n = readl(USB_OTGSC); + n = readl(USB_OTGSC); if (n & OTGSC_ID) { - printk(KERN_INFO "usb: carkit inserted\n"); + USB_INFO("carkit inserted\n"); if ((board_mfg_mode() == 0) || (board_mfg_mode() == 1 && ui->mfg_usb_carkit_enable == 1)) { switch_set_state(&dock_switch, DOCK_STATE_CAR); - printk(KERN_INFO "carkit: set state %d\n", DOCK_STATE_CAR); + USB_INFO("carkit: set state %d\n", DOCK_STATE_CAR); } ui->accessory_type = 1; - } else - ui->accessory_type = 0; + } else { + USB_INFO("Credle inserted\n"); + switch_set_state(&dock_switch, DOCK_STATE_CREDLE); + ui->accessory_type = 5; + } if (in_lpm) usb_lpm_enter(ui); } else { if (ui->accessory_type == 1) - printk(KERN_INFO "usb: carkit removed\n"); + USB_INFO("carkit removed\n"); + else if (ui->accessory_type == 5) + USB_INFO("credle removed\n"); switch_set_state(&dock_switch, DOCK_STATE_UNDOCKED); - printk(KERN_INFO "carkit: set state %d\n", DOCK_STATE_UNDOCKED); + USB_INFO("carkit: set state %d\n", DOCK_STATE_UNDOCKED); ui->accessory_type = 0; } } #ifdef CONFIG_USB_ACCESSORY_DETECT_BY_ADC +static void mhl_detect(struct usb_info *ui) +{ + uint32_t adc_value = 0xffffffff; + + if (ui->config_usb_id_gpios) + ui->config_usb_id_gpios(1); + + htc_get_usb_accessory_adc_level(&adc_value); + USB_INFO("[2nd] accessory adc = 0x%x\n", adc_value); + + if (adc_value >= 0x5999 && adc_value <= 0x76B0) { + USB_INFO("MHL inserted\n"); + if (ui->usb_mhl_switch) + ui->usb_mhl_switch(1); + ui->accessory_type = 4; + } + if (ui->config_usb_id_gpios) + ui->config_usb_id_gpios(0); +} + static void accessory_detect_by_adc(struct usb_info *ui) { int value; + msleep(100); + value = gpio_get_value(ui->usb_id_pin_gpio); - printk(KERN_INFO "usb: usb ID pin = %d\n", value); + USB_INFO("%s: usb ID pin = %d\n", __func__, value); + if (value == 0) { uint32_t adc_value = 0xffffffff; htc_get_usb_accessory_adc_level(&adc_value); - printk(KERN_INFO "usb: accessory adc = 0x%x\n", adc_value); + USB_INFO("accessory adc = 0x%x\n", adc_value); if (adc_value >= 0x2112 && adc_value <= 0x3D53) { - printk(KERN_INFO "usb: headset inserted\n"); + USB_INFO("headset inserted\n"); ui->accessory_type = 2; - headset_ext_detect(USB_HEADSET); - } else if (adc_value >= 0x88A && adc_value <= 0x1E38) { - printk(KERN_INFO "usb: carkit inserted\n"); + headset_ext_detect(USB_AUDIO_OUT); + } else if (adc_value >= 0x1174 && adc_value <= 0x1E38) { + USB_INFO("carkit inserted\n"); ui->accessory_type = 1; if ((board_mfg_mode() == 0) || (board_mfg_mode() == 1 && ui->mfg_usb_carkit_enable == 1)) { switch_set_state(&dock_switch, DOCK_STATE_CAR); - printk(KERN_INFO "carkit: set state %d\n", DOCK_STATE_CAR); + USB_INFO("carkit: set state %d\n", DOCK_STATE_CAR); } + } else if (adc_value >= 0x0 && adc_value < 0x1174) { + mhl_detect(ui); } else ui->accessory_type = 0; } else { - if (ui->accessory_type == 2) { - printk(KERN_INFO "usb: headset removed\n"); - headset_ext_detect(NO_DEVICE); - } else if (ui->accessory_type == 1) { - printk(KERN_INFO "usb: carkit removed\n"); + switch (ui->accessory_type) { + case 1: + USB_INFO("carkit removed\n"); switch_set_state(&dock_switch, DOCK_STATE_UNDOCKED); + ui->accessory_type = 0; + break; + case 2: + USB_INFO("headset removed\n"); + headset_ext_detect(USB_NO_HEADSET); + ui->accessory_type = 0; + break; + case 3: + /*MHL*/ + break; + default: + break; } - ui->accessory_type = 0; } - } #endif @@ -1743,7 +2065,7 @@ static irqreturn_t usbid_interrupt(int irq, void *data) struct usb_info *ui = data; disable_irq_nosync(ui->idpin_irq); - printk(KERN_INFO "usb: id interrupt\n"); + USB_INFO("id interrupt\n"); queue_work(ui->usb_wq, &ui->detect_work); return IRQ_HANDLED; } @@ -1751,34 +2073,36 @@ static irqreturn_t usbid_interrupt(int irq, void *data) static void accessory_detect_init(struct usb_info *ui) { int ret; - printk(KERN_INFO "%s: id pin %d\n", __func__, - ui->usb_id_pin_gpio); + USB_INFO("%s: id pin %d\n", __func__, ui->usb_id_pin_gpio); if (ui->usb_id_pin_gpio == 0) return; - ui->idpin_irq = gpio_to_irq(ui->usb_id_pin_gpio); + if (ui->idpin_irq == 0) + ui->idpin_irq = gpio_to_irq(ui->usb_id_pin_gpio); ret = request_irq(ui->idpin_irq, usbid_interrupt, IRQF_TRIGGER_LOW, "car_kit_irq", ui); if (ret < 0) { - printk(KERN_ERR "%s: request_irq failed\n", __func__); + USB_ERR("%s: request_irq failed\n", __func__); return; } ret = set_irq_wake(ui->idpin_irq, 1); if (ret < 0) { - printk(KERN_ERR "%s: set_irq_wake failed\n", __func__); + USB_ERR("%s: set_irq_wake failed\n", __func__); goto err; } if (switch_dev_register(&dock_switch) < 0) { - printk(KERN_ERR "usb: fail to register dock switch!\n"); + USB_ERR(" fail to register dock switch!\n"); goto err; } + ret = device_create_file(dock_switch.dev, &dev_attr_status); if (ret != 0) - printk(KERN_WARNING "dev_attr_status failed\n"); + USB_WARNING("dev_attr_status failed\n"); + return; err: free_irq(ui->idpin_irq, 0); @@ -1787,6 +2111,47 @@ err: #endif #define DELAY_FOR_CHECK_CHG msecs_to_jiffies(300) + +static void charger_detect_by_uart(struct usb_info *ui) +{ + int is_china_ac; + + if (!vbus) + return; + + /*UART*/ + if (ui->usb_uart_switch) + ui->usb_uart_switch(1); + + is_china_ac = ui->china_ac_detect(); + + if (is_china_ac) { + ui->connect_type = CONNECT_TYPE_AC; + queue_work(ui->usb_wq, &ui->notifier_work); + usb_lpm_enter(ui); + USB_INFO("AC charger\n"); + } else { + ui->connect_type = CONNECT_TYPE_UNKNOWN; + queue_delayed_work(ui->usb_wq, &ui->chg_work, + DELAY_FOR_CHECK_CHG); + USB_INFO("not AC charger\n"); + + /*set uart to gpo*/ + if (ui->serial_debug_gpios) + ui->serial_debug_gpios(0); + /*turn on USB HUB*/ + if (ui->usb_hub_enable) + ui->usb_hub_enable(1); + + /*USB*/ + if (ui->usb_uart_switch) + ui->usb_uart_switch(0); + + usb_lpm_exit(ui); + usb_reset(ui); + } +} + static void charger_detect(struct usb_info *ui) { if (!vbus) @@ -1794,13 +2159,60 @@ static void charger_detect(struct usb_info *ui) msleep(10); /* detect shorted D+/D-, indicating AC power */ if ((readl(USB_PORTSC) & PORTSC_LS) != PORTSC_LS) { - printk(KERN_INFO "usb: not AC charger\n"); + USB_INFO("not AC charger\n"); ui->connect_type = CONNECT_TYPE_UNKNOWN; queue_delayed_work(ui->usb_wq, &ui->chg_work, DELAY_FOR_CHECK_CHG); + mod_timer(&ui->ac_detect_timer, jiffies + (3 * HZ)); } else { - printk(KERN_INFO "usb: AC charger\n"); - ui->connect_type = CONNECT_TYPE_AC; + if (ui->usb_id_pin_gpio != 0) { + if (gpio_get_value(ui->usb_id_pin_gpio) == 0) { + USB_INFO("9V AC charger\n"); + ui->connect_type = CONNECT_TYPE_9V_AC; + } else { + USB_INFO("AC charger\n"); + ui->connect_type = CONNECT_TYPE_AC; + } + } else { + USB_INFO("AC charger\n"); + ui->connect_type = CONNECT_TYPE_AC; + } + queue_work(ui->usb_wq, &ui->notifier_work); + writel(0x00080000, USB_USBCMD); + msleep(10); + usb_lpm_enter(ui); + if (ui->change_phy_voltage) + ui->change_phy_voltage(0); + } +} + +static void check_charger(struct work_struct *w) +{ + struct usb_info *ui = container_of(w, struct usb_info, chg_work.work); + if (disable_charger) { + USB_INFO("disable charger\n"); + if (ui->disable_usb_charger) + ui->disable_usb_charger(); + disable_charger = 0; + return; + } + /* unknown charger */ + if (vbus && ui->connect_type == CONNECT_TYPE_UNKNOWN) + queue_work(ui->usb_wq, &ui->notifier_work); +} + +static void charger_detect_by_9v_gpio(struct usb_info *ui) +{ + if (!vbus) + return; + msleep(10); + if (gpio_get_value(ui->ac_9v_gpio) == 0) { + printk(KERN_INFO "not 9V AC charger\n"); + ui->connect_type = CONNECT_TYPE_UNKNOWN; + } else { + printk(KERN_INFO "9V AC charger\n"); + ui->connect_type = CONNECT_TYPE_9V_AC; + queue_work(ui->usb_wq, &ui->notifier_work); writel(0x00080000, USB_USBCMD); msleep(10); @@ -1808,13 +2220,6 @@ static void charger_detect(struct usb_info *ui) } } -static void check_charger(struct work_struct *w) -{ - struct usb_info *ui = container_of(w, struct usb_info, chg_work.work); - /* unknown charger */ - if (vbus && ui->connect_type == CONNECT_TYPE_UNKNOWN) - queue_work(ui->usb_wq, &ui->notifier_work); -} static void usb_do_work(struct work_struct *w) { @@ -1832,13 +2237,23 @@ static void usb_do_work(struct work_struct *w) /* give up if we have nothing to do */ if (flags == 0) break; + switch (ui->state) { case USB_STATE_IDLE: if (flags & USB_FLAG_START) { - pr_info("hsusb: IDLE -> ONLINE\n"); + USB_INFO("hsusb: IDLE -> ONLINE\n"); + usb_lpm_exit(ui); usb_reset(ui); - charger_detect(ui); + if (ui->china_ac_detect) + charger_detect_by_uart(ui); + else if (ui->ac_9v_gpio) + charger_detect_by_9v_gpio(ui); + else { + if (ui->usb_id_pin_gpio != 0) + msleep(200); + charger_detect(ui); + } ui->state = USB_STATE_ONLINE; #ifdef CONFIG_USB_ACCESSORY_DETECT @@ -1853,12 +2268,12 @@ static void usb_do_work(struct work_struct *w) * the signal to go offline, we must honor it */ if (flags & USB_FLAG_VBUS_OFFLINE) { - pr_info("hsusb: ONLINE -> OFFLINE\n"); + USB_INFO("hsusb: ONLINE -> OFFLINE\n"); + atomic_set(&ui->running, 0); + atomic_set(&ui->online, 0); /* synchronize with irq context */ spin_lock_irqsave(&ui->lock, iflags); - ui->running = 0; - ui->online = 0; writel(0x00080000, USB_USBCMD); spin_unlock_irqrestore(&ui->lock, iflags); @@ -1871,36 +2286,41 @@ static void usb_do_work(struct work_struct *w) msleep(5); } - /* terminate any transactions, etc */ - flush_all_endpoints(ui); - - if (ui->driver) { - printk(KERN_INFO "usb: notify offline\n"); - ui->driver->disconnect(&ui->gadget); - } + handle_notify_offline(ui); if (ui->phy_reset) ui->phy_reset(); /* power down phy, clock down usb */ usb_lpm_enter(ui); + ui->ac_detect_count = 0; + del_timer_sync(&ui->ac_detect_timer); + switch_set_state(&ui->sdev, 0); ui->state = USB_STATE_OFFLINE; usb_do_work_check_vbus(ui); break; } + + if (flags & USB_FLAG_CONFIGURED) { + switch_set_state(&ui->sdev, atomic_read(&ui->online)); + break; + } + if (flags & USB_FLAG_RESET) { - pr_info("hsusb: ONLINE -> RESET\n"); + USB_INFO("hsusb: ONLINE -> RESET\n"); if (ui->connect_type == CONNECT_TYPE_AC) { - pr_info("hsusb: RESET -> ONLINE\n"); + USB_INFO("hsusb: RESET -> ONLINE\n"); break; } + + atomic_set(&ui->online, 0); spin_lock_irqsave(&ui->lock, iflags); - ui->online = 0; msm72k_pullup(&ui->gadget, 0); spin_unlock_irqrestore(&ui->lock, iflags); usb_reset(ui); - pr_info("hsusb: RESET -> ONLINE\n"); + switch_set_state(&ui->sdev, 0); + USB_INFO("hsusb: RESET -> ONLINE\n"); break; } break; @@ -1909,10 +2329,19 @@ static void usb_do_work(struct work_struct *w) * present when we received the signal, go online. */ if ((flags & USB_FLAG_VBUS_ONLINE) && _vbus) { - pr_info("hsusb: OFFLINE -> ONLINE\n"); - usb_lpm_exit(ui); - usb_reset(ui); - charger_detect(ui); + USB_INFO("hsusb: OFFLINE -> ONLINE\n"); + + if (ui->china_ac_detect) + charger_detect_by_uart(ui); + else if (ui->ac_9v_gpio) { + usb_lpm_exit(ui); + usb_reset(ui); + charger_detect_by_9v_gpio(ui); + } else { + usb_lpm_exit(ui); + usb_reset(ui); + charger_detect(ui); + } ui->state = USB_STATE_ONLINE; usb_do_work_check_vbus(ui); @@ -1930,7 +2359,7 @@ void msm_hsusb_set_vbus_state(int online) { unsigned long flags = 0; struct usb_info *ui = the_usb_info; - printk(KERN_INFO "%s: %d\n", __func__, online); + USB_INFO("%s: %d\n", __func__, online); if (ui) spin_lock_irqsave(&ui->lock, flags); @@ -1942,14 +2371,61 @@ void msm_hsusb_set_vbus_state(int online) } else { ui->flags |= USB_FLAG_VBUS_OFFLINE; } - /* online->switch to USB, offline->switch to uart */ - if (ui->usb_uart_switch) - ui->usb_uart_switch(!online); + + if (ui->change_phy_voltage) + ui->change_phy_voltage(online); + + if (online) { + /*USB*/ + if (ui->usb_uart_switch) + ui->usb_uart_switch(0); + } else { + /*turn off USB HUB*/ + if (ui->usb_hub_enable) + queue_work(ui->usb_wq, &ui->usb_hub_work); + + /*UART*/ + if (ui->usb_uart_switch) + ui->usb_uart_switch(1); + /*configure uart pin to alternate function*/ + if (ui->serial_debug_gpios) + ui->serial_debug_gpios(1); + + /*path should be switched to usb after mhl cable is removed*/ + if (ui->usb_mhl_switch && ui->accessory_type == 4) { + USB_INFO("MHL removed\n"); + ui->usb_mhl_switch(0); + ui->accessory_type = 0; + } + } + queue_work(ui->usb_wq, &ui->work); } } - if (ui) + if (ui) { spin_unlock_irqrestore(&ui->lock, flags); +#ifdef CONFIG_DOCK_ACCESSORY_DETECT + if (ui->dock_detect) { + if (vbus) + enable_irq(ui->dockpin_irq); + else { + disable_irq_nosync(ui->dockpin_irq); + if (cancel_delayed_work_sync(&ui->dock_work_isr)) + enable_irq(ui->dockpin_irq); + + if (cancel_delayed_work_sync(&ui->dock_work)) { + if (ui->dock_pin_state == 0) + set_irq_type(ui->dockpin_irq, + IRQF_TRIGGER_LOW); + } + if (ui->accessory_type == 3) { + ui->dock_pin_state |= 0x80; + queue_delayed_work(ui->usb_wq, &ui->dock_work, 0); + } + } + } +#endif + } } #if defined(CONFIG_DEBUG_FS) && 0 @@ -2100,6 +2576,7 @@ static int msm72k_disable(struct usb_ep *_ep) struct msm_endpoint *ept = to_msm_endpoint(_ep); usb_ept_enable(ept, 0, 0); + flush_endpoint(ept); return 0; } @@ -2140,7 +2617,7 @@ msm72k_queue(struct usb_ep *_ep, struct usb_request *req, gfp_t gfp_flags) if (!req->length) goto ep_queue_done; else { - if (ui->ep0_dir == USB_DIR_OUT) { + if (atomic_read(&ui->ep0_dir) == USB_DIR_OUT) { ep = &ui->ep0out; ep->ep.driver_data = ui->ep0in.ep.driver_data; } @@ -2287,10 +2764,10 @@ static int msm72k_pullup(struct usb_gadget *_gadget, int is_active) /* disable/enable D+ pullup */ if (is_active) { - pr_info("msm_hsusb: enable pullup\n"); + USB_INFO("msm_hsusb: enable pullup\n"); writel(cmd | 1, USB_USBCMD); } else { - pr_info("msm_hsusb: disable pullup\n"); + USB_INFO("msm_hsusb: disable pullup\n"); writel(cmd, USB_USBCMD); #ifndef CONFIG_ARCH_MSM7X00A @@ -2306,19 +2783,19 @@ static int msm72k_wakeup(struct usb_gadget *_gadget) struct usb_info *ui = container_of(_gadget, struct usb_info, gadget); unsigned long flags; - if (!ui->remote_wakeup) { - pr_err("%s: remote wakeup not supported\n", __func__); + if (!atomic_read(&ui->remote_wakeup)) { + USB_ERR("%s: remote wakeup not supported\n", __func__); return -ENOTSUPP; } - if (!ui->online) { - pr_err("%s: device is not configured\n", __func__); + if (!atomic_read(&ui->online)) { + USB_ERR("%s: device is not configured\n", __func__); return -ENODEV; } spin_lock_irqsave(&ui->lock, flags); if ((readl(USB_PORTSC) & PORTSC_SUSP) == PORTSC_SUSP) { - pr_info("%s: enabling force resume\n", __func__); + USB_INFO("%s: enabling force resume\n", __func__); writel(readl(USB_PORTSC) | PORTSC_FPR, USB_PORTSC); } spin_unlock_irqrestore(&ui->lock, flags); @@ -2344,14 +2821,71 @@ static ssize_t usb_remote_wakeup(struct device *dev, } static DEVICE_ATTR(wakeup, S_IWUSR, 0, usb_remote_wakeup); +static void ac_detect_expired(unsigned long _data) +{ + struct usb_info *ui = (struct usb_info *) _data; + u32 delay = 0; + + USB_INFO("%s: count = %d, connect_type = 0x%04x\n", __func__, + ui->ac_detect_count, ui->connect_type); + + if (ui->connect_type == CONNECT_TYPE_USB || ui->ac_detect_count >= 3) + return; + + /* detect shorted D+/D-, indicating AC power */ + if ((readl(USB_PORTSC) & PORTSC_LS) != PORTSC_LS) { + + /* Some carkit can't be recognized as AC mode. + * Add SW solution here to notify battery driver should + * work as AC charger when car mode activated. + */ + if (ui->accessory_type == 1) { + USB_INFO("car mode charger\n"); + ui->connect_type = CONNECT_TYPE_AC; + queue_work(ui->usb_wq, &ui->notifier_work); + writel(0x00080000, USB_USBCMD); + mdelay(10); + usb_lpm_enter(ui); + return; + } + + ui->ac_detect_count++; + /* detect delay: 3 sec, 5 sec, 10 sec */ + if (ui->ac_detect_count == 1) + delay = 5 * HZ; + else if (ui->ac_detect_count == 2) + delay = 10 * HZ; + + mod_timer(&ui->ac_detect_timer, jiffies + delay); + } else { + if (ui->usb_id_pin_gpio != 0) { + if (gpio_get_value(ui->usb_id_pin_gpio) == 0) { + USB_INFO("9V AC charger\n"); + ui->connect_type = CONNECT_TYPE_9V_AC; + } else { + USB_INFO("AC charger\n"); + ui->connect_type = CONNECT_TYPE_AC; + } + } else { + USB_INFO("AC charger\n"); + ui->connect_type = CONNECT_TYPE_AC; + } + queue_work(ui->usb_wq, &ui->notifier_work); + writel(0x00080000, USB_USBCMD); + mdelay(10); + usb_lpm_enter(ui); + } +} + static int msm72k_probe(struct platform_device *pdev) { struct resource *res; struct usb_info *ui; int irq; int ret; + char *serialno = "000000000000"; - INFO("msm72k_probe\n"); + USB_INFO("msm72k_probe\n"); ui = kzalloc(sizeof(struct usb_info), GFP_KERNEL); if (!ui) return -ENOMEM; @@ -2365,13 +2899,33 @@ static int msm72k_probe(struct platform_device *pdev) ui->phy_init_seq = pdata->phy_init_seq; ui->usb_connected = pdata->usb_connected; ui->usb_uart_switch = pdata->usb_uart_switch; + ui->serial_debug_gpios = pdata->serial_debug_gpios; + ui->usb_hub_enable = pdata->usb_hub_enable; +// ui->china_ac_detect = pdata->china_ac_detect; // DISABLED + ui->disable_usb_charger = pdata->disable_usb_charger; + ui->change_phy_voltage = pdata->change_phy_voltage; + ui->ldo_init = pdata->ldo_init; + ui->ldo_enable = pdata->ldo_enable; + ui->usb_mhl_switch = pdata->usb_mhl_switch; + ui->ac_9v_gpio = pdata->ac_9v_gpio; + + if (ui->ldo_init) + ui->ldo_init(1); + + if (ui->ldo_enable) + ui->ldo_enable(1); ui->accessory_detect = pdata->accessory_detect; - printk(KERN_INFO "usb: accessory detect %d\n", - ui->accessory_detect); + USB_INFO("accessory detect %d\n", ui->accessory_detect); ui->usb_id_pin_gpio = pdata->usb_id_pin_gpio; - printk(KERN_INFO "usb: id_pin_gpio %d\n", - pdata->usb_id_pin_gpio); + USB_INFO("id_pin_gpio %d\n", pdata->usb_id_pin_gpio); + + ui->dock_detect = pdata->dock_detect; + USB_INFO("dock detect %d\n", ui->dock_detect); + ui->dock_pin_gpio = pdata->dock_pin_gpio; + USB_INFO("dock pin gpio %d\n", ui->dock_pin_gpio); + + ui->idpin_irq = pdata->id_pin_irq; if (pdata->config_usb_id_gpios) ui->config_usb_id_gpios = pdata->config_usb_id_gpios; } @@ -2393,7 +2947,7 @@ static int msm72k_probe(struct platform_device *pdev) if (!ui->pool) return usb_free(ui, -ENOMEM); - INFO("msm72k_probe() io=%p, irq=%d, dma=%p(%x)\n", + USB_INFO("msm72k_probe() io=%p, irq=%d, dma=%p(%x)\n", ui->addr, irq, ui->buf, ui->dma); #ifdef CONFIG_ARCH_MSM7X30 @@ -2403,6 +2957,19 @@ static int msm72k_probe(struct platform_device *pdev) if (IS_ERR(ui->clk)) return usb_free(ui, PTR_ERR(ui->clk)); + /* If USB Core is running its protocol engine based on PCLK, + * PCLK must be running at >60Mhz for correct HSUSB operation and + * USB core cannot tolerate frequency changes on PCLK. For such + * USB cores, vote for maximum clk frequency on pclk source + */ + if (ui->pclk_src_name) { + ui->pclk_src = clk_get(0, ui->pclk_src_name); + if (IS_ERR(ui->pclk_src)) + return usb_free(ui, PTR_ERR(ui->pclk_src)); + else + clk_set_rate(ui->pclk_src, 64000000); + } + ui->pclk = clk_get(&pdev->dev, "usb_hs_pclk"); if (IS_ERR(ui->pclk)) return usb_free(ui, PTR_ERR(ui->pclk)); @@ -2428,12 +2995,12 @@ static int msm72k_probe(struct platform_device *pdev) clk_enable(ui->otgclk); writel(0, USB_USBINTR); writel(0, USB_OTGSC); - if (ui->coreclk) - clk_disable(ui->coreclk); if (ui->otgclk) clk_disable(ui->otgclk); clk_disable(ui->pclk); clk_disable(ui->clk); + if (ui->coreclk) + clk_disable(ui->coreclk); ui->in_lpm = 1; ret = request_irq(irq, usb_interrupt, 0, pdev->name, ui); @@ -2457,12 +3024,19 @@ static int msm72k_probe(struct platform_device *pdev) /* initialize mfg serial number */ - if (board_mfg_mode() == 1) + if (board_mfg_mode() == 1) { use_mfg_serialno = 1; - else + wake_lock_init(&vbus_idle_wake_lock, WAKE_LOCK_IDLE, "usb_idle_lock"); + perf_lock_init(&usb_perf_lock, PERF_LOCK_HIGHEST, "usb"); + } else use_mfg_serialno = 0; - strncpy(mfg_df_serialno, "000000000000", strlen("000000000000")); + strncpy(mfg_df_serialno, serialno, strlen(serialno)); + ui->connect_type_ready = 0; + ui->ac_detect_count = 0; + ui->ac_detect_timer.data = (unsigned long) ui; + ui->ac_detect_timer.function = ac_detect_expired; + init_timer(&ui->ac_detect_timer); return 0; } @@ -2508,7 +3082,7 @@ int usb_gadget_register_driver(struct usb_gadget_driver *driver) retval = driver->bind(&ui->gadget); if (retval) { - INFO("bind to driver %s --> error %d\n", + USB_INFO("bind to driver %s --> error %d\n", driver->driver.name, retval); device_del(&ui->gadget.dev); goto fail; @@ -2517,10 +3091,14 @@ int usb_gadget_register_driver(struct usb_gadget_driver *driver) /* create sysfs node for remote wakeup */ retval = device_create_file(&ui->gadget.dev, &dev_attr_wakeup); if (retval != 0) - INFO("failed to create sysfs entry: (wakeup) error: (%d)\n", - retval); - INFO("msm72k_udc: registered gadget driver '%s'\n", + USB_INFO("failed to create sysfs entry: (wakeup) error:" + " (%d)\n", retval); + USB_INFO("msm72k_udc: registered gadget driver '%s'\n", driver->driver.name); + +#if defined(CONFIG_USB_BYPASS_VBUS_NOTIFY) + vbus = 1; +#endif usb_start(ui); return 0; @@ -2548,7 +3126,7 @@ int usb_gadget_unregister_driver(struct usb_gadget_driver *driver) device_del(&dev->gadget.dev); - VDEBUG("unregistered gadget driver '%s'\n", driver->driver.name); + USB_DEBUG("unregistered gadget driver '%s'\n", driver->driver.name); return 0; } EXPORT_SYMBOL(usb_gadget_unregister_driver); diff --git a/drivers/usb/gadget/rndis.c b/drivers/usb/gadget/rndis.c index 48267bc0..33ac6acb 100644 --- a/drivers/usb/gadget/rndis.c +++ b/drivers/usb/gadget/rndis.c @@ -291,9 +291,13 @@ gen_ndis_query_resp (int configNr, u32 OID, u8 *buf, unsigned buf_len, /* mandatory */ case OID_GEN_VENDOR_DESCRIPTION: pr_debug("%s: OID_GEN_VENDOR_DESCRIPTION\n", __func__); - length = strlen (rndis_per_dev_params [configNr].vendorDescr); - memcpy (outbuf, - rndis_per_dev_params [configNr].vendorDescr, length); + if ( rndis_per_dev_params [configNr].vendorDescr ) { + length = strlen (rndis_per_dev_params [configNr].vendorDescr); + memcpy (outbuf, + rndis_per_dev_params [configNr].vendorDescr, length); + } else { + outbuf[0] = 0; + } retval = 0; break; From 2c73e1566d9e5de502c103162c561354ef022385 Mon Sep 17 00:00:00 2001 From: tytung Date: Tue, 22 May 2012 19:51:10 +0800 Subject: [PATCH 008/155] Revert "msm: htcleo: removed the real WiFi MAC address for SD build and used new unique WiFi MAC address, so that we have the same WiFi MAC address on NAND and SD." This reverts commit 9ab858cdb2544d3db5655fbc9887e67008e13488. Revert it because new unique WiFi MAC patch only works for NAND ROM and MAGLDR SD boot, not works for WM users using Clrcad & Haret to boot SD build. --- arch/arm/mach-msm/board-htcleo-wifi-nvs.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/arch/arm/mach-msm/board-htcleo-wifi-nvs.c b/arch/arm/mach-msm/board-htcleo-wifi-nvs.c index 562b94b1..aa0561c5 100644 --- a/arch/arm/mach-msm/board-htcleo-wifi-nvs.c +++ b/arch/arm/mach-msm/board-htcleo-wifi-nvs.c @@ -104,6 +104,21 @@ static void parse_tag_msm_wifi(void) pr_info("Device WiFi MAC Address: %s\n", nvs_mac_addr); } +static int parse_tag_msm_wifi_from_spl(void) +{ + uint32_t id1, id2, id3, id4, id5, id6; + uint32_t id_base = 0xFC028; //real mac offset found in spl for haret.exe on WM + id1 = readl(MSM_SPLHOOD_BASE + id_base + 0x0); + id2 = readl(MSM_SPLHOOD_BASE + id_base + 0x1); + id3 = readl(MSM_SPLHOOD_BASE + id_base + 0x2); + id4 = readl(MSM_SPLHOOD_BASE + id_base + 0x3); + id5 = readl(MSM_SPLHOOD_BASE + id_base + 0x4); + id6 = readl(MSM_SPLHOOD_BASE + id_base + 0x5); + sprintf(nvs_mac_addr, "macaddr=%2x:%2x:%2x:%2x:%2x:%2x\n", id1 & 0xff, id2 & 0xff, id3 & 0xff, id4 & 0xff, id5 & 0xff, id6 & 0xff); + pr_info("Device Real Wifi Mac Address: %s\n", nvs_mac_addr); + return 0; +} + static unsigned wifi_get_nvs_size( void ) { unsigned len; @@ -146,6 +161,8 @@ static int __init wifi_nvs_init(void) pr_info("%s\n", __func__); if (htcleo_is_nand_boot()) { parse_tag_msm_wifi(); + } else { + parse_tag_msm_wifi_from_spl(); } wifi_calibration = create_proc_entry("calibration", 0444, NULL); if (wifi_calibration != NULL) { From 5ff45267373083544e070e512e0a214d26dc341b Mon Sep 17 00:00:00 2001 From: tytung Date: Tue, 22 May 2012 19:52:16 +0800 Subject: [PATCH 009/155] htcleo: updated htcleo_defconfig to tytung_HWA_r2.4-uniMAC --- arch/arm/configs/htcleo_defconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/configs/htcleo_defconfig b/arch/arm/configs/htcleo_defconfig index 53792bfb..275faaaa 100644 --- a/arch/arm/configs/htcleo_defconfig +++ b/arch/arm/configs/htcleo_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: 2.6.32-ics -# Sun May 13 11:35:40 CST 2012 +# Tue May 22 19:52:05 CST 2012 # CONFIG_ARM=y CONFIG_SYS_SUPPORTS_APM_EMULATION=y @@ -32,7 +32,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_BROKEN_ON_SMP=y CONFIG_LOCK_KERNEL=y CONFIG_INIT_ENV_ARG_LIMIT=32 -CONFIG_LOCALVERSION="_tytung_HWA_r2.2-uniMAC" +CONFIG_LOCALVERSION="_tytung_HWA_r2.4-uniMAC" # CONFIG_LOCALVERSION_AUTO is not set CONFIG_HAVE_KERNEL_GZIP=y CONFIG_HAVE_KERNEL_BZIP2=y From a979a22695e928e116108c276d9e5674ec1ba890 Mon Sep 17 00:00:00 2001 From: tytung Date: Fri, 1 Jun 2012 00:21:07 +0800 Subject: [PATCH 010/155] drivers: mtd: devices: htcleo_nand: find the real unique WiFi and Bluetooth MAC addresses. (Credits go to Franck78 ) http://forum.xda-developers.com/showpost.php?p=26556691&postcount=3 --- drivers/mtd/devices/htcleo_nand.c | 111 ++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) diff --git a/drivers/mtd/devices/htcleo_nand.c b/drivers/mtd/devices/htcleo_nand.c index e4e347e0..4983c571 100755 --- a/drivers/mtd/devices/htcleo_nand.c +++ b/drivers/mtd/devices/htcleo_nand.c @@ -1835,6 +1835,116 @@ static int param_get_page_size(char *buffer, struct kernel_param *kp) } module_param_call(pagesize, NULL, param_get_page_size, NULL, S_IRUGO); +int is_htc_mac (int pattern) +{ + /* HTC blocks to find : + 00:09:2D + 00:23:76 + 18:87:76 + 1C:B0:94 + 38:E7:D8 + 64:A7:69 + 7C:61:93 + 90:21:55 + A0:F4:50 + A8:26:D9 + D4:20:6D + D8:B3:77 + E8:99:C4 + F8:DB:F7 */ + static int nums[] = { + 0x00092D,0x2D0900, + 0x002376,0x762300, + 0x188776,0x768718, + 0x1CB094,0x94B01C, + 0x38E7D8,0xD8E738, + 0x64A769,0x69A764, + 0x7C6193,0x93617C, + 0x902155,0x552190, + 0xA0F450,0x50F4A0, + 0xA826D9,0xD926A8, + 0xD4206D,0x6D20D4, + 0xD8B377,0x77B3D8, + 0xE899C4,0xC499E8, + 0xF8DBF7,0xF7DBF8}; + int i; + for (i=0; i< (sizeof(nums)/sizeof(nums[0])); i++) { + if (nums[i] == pattern) return 1; + } + return 0; +} + +void scanmac(struct mtd_info *mtd) +{ + unsigned char *iobuf; + int ret; + loff_t addr; + struct mtd_oob_ops ops; + int i,j,k; + + iobuf = kmalloc(2048/*mtd->erasesize*/, GFP_KERNEL); + if (!iobuf) { + /*ret = -ENOMEM;*/ + printk("%s: error: cannot allocate memory\n",__func__); + return; + } + + ops.mode = MTD_OOB_PLACE; + ops.len = 2048; + ops.datbuf = iobuf; + ops.ooblen = 0; + ops.oobbuf = NULL; + ops.retlen = 0; + + /* block 505 page 6 contains as good candidate */ + addr = ((loff_t) 505*0x20000 + 6*2048); + ret = msm_nand_read_oob(mtd, addr, &ops); + + if (ret == -EUCLEAN) + ret = 0; + if (ret || ops.retlen != 2048 ) { + printk("%s: error: read(%d) failed at %#llx\n",__func__,ops.retlen, addr); + goto out; + } + + printk("%s: Prefered candidate mac=%02x:%02x:%02x:%02x:%02x:%02x\n",__func__, + iobuf[5],iobuf[4],iobuf[3],iobuf[2],iobuf[1],iobuf[0]); + + /* now lets walk looking for HTC mac in the first reserved blocks of NAND */ + /* NUM_PROTECTED_BLOCKS=0x212 but Parttiontable starts at 0x219 */ + /* I think 400 is ok, I have already eliminated 0 - 157 with false positive */ + /* If my guess is correct, only 505 will match ;-) */ + for (i=158; i<0x219; i++) { + for (j=0; j<64; j++) { + addr = ((loff_t) i*0x20000 + j*2048); + ret = msm_nand_read_oob(mtd, addr, &ops); + + if (ret == -EUCLEAN) + ret = 0; + if (ret || ops.retlen != 2048 ) { + printk("%s: error: read(%d) failed at %#llx\n",__func__,ops.retlen, addr); + break; + } + /* check */ + for (k=0; k<2045; k++) { + if (is_htc_mac( (iobuf[k+0]<<16) + (iobuf[k+1]<<8) + iobuf[k+2])) { + printk("Mac candidate at block:%d page:%d offset:%d:\n",i,j,k); + k >>= 4; + k <<= 4; + print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, &iobuf[k], 16); + k += 16; + } + } + }/*j*/ + }/*i*/ + ret = 0; +out: + kfree(iobuf); + if (ret) + printk("Find MAc Error %d occurred\n", ret); + return; +} + /** * msm_nand_scan - [msm_nand Interface] Scan for the msm_nand device * @param mtd MTD device structure @@ -2000,6 +2110,7 @@ int msm_nand_scan(struct mtd_info *mtd, int maxchips) /* msm_nand_unlock_all(mtd); */ /* return this->scan_bbt(mtd); */ + scanmac(mtd); #if VERBOSE for (i=0;iblock_count;i++) From 8505d014100aef2868cd9413b42ed96a906e4b07 Mon Sep 17 00:00:00 2001 From: tytung Date: Fri, 1 Jun 2012 00:33:19 +0800 Subject: [PATCH 011/155] drivers: mtd: devices: htcleo_nand: leave only correct candidates for WiFi and Bluetooth MAC addresses. (Credits go to Rick_1995, aka zeusk.) http://forum.xda-developers.com/showpost.php?p=26579738&postcount=4 --- drivers/mtd/devices/htcleo_nand.c | 88 +++++-------------------------- 1 file changed, 14 insertions(+), 74 deletions(-) diff --git a/drivers/mtd/devices/htcleo_nand.c b/drivers/mtd/devices/htcleo_nand.c index 4983c571..7a1a874b 100755 --- a/drivers/mtd/devices/htcleo_nand.c +++ b/drivers/mtd/devices/htcleo_nand.c @@ -1835,56 +1835,15 @@ static int param_get_page_size(char *buffer, struct kernel_param *kp) } module_param_call(pagesize, NULL, param_get_page_size, NULL, S_IRUGO); -int is_htc_mac (int pattern) -{ - /* HTC blocks to find : - 00:09:2D - 00:23:76 - 18:87:76 - 1C:B0:94 - 38:E7:D8 - 64:A7:69 - 7C:61:93 - 90:21:55 - A0:F4:50 - A8:26:D9 - D4:20:6D - D8:B3:77 - E8:99:C4 - F8:DB:F7 */ - static int nums[] = { - 0x00092D,0x2D0900, - 0x002376,0x762300, - 0x188776,0x768718, - 0x1CB094,0x94B01C, - 0x38E7D8,0xD8E738, - 0x64A769,0x69A764, - 0x7C6193,0x93617C, - 0x902155,0x552190, - 0xA0F450,0x50F4A0, - 0xA826D9,0xD926A8, - 0xD4206D,0x6D20D4, - 0xD8B377,0x77B3D8, - 0xE899C4,0xC499E8, - 0xF8DBF7,0xF7DBF8}; - int i; - for (i=0; i< (sizeof(nums)/sizeof(nums[0])); i++) { - if (nums[i] == pattern) return 1; - } - return 0; -} - void scanmac(struct mtd_info *mtd) { unsigned char *iobuf; int ret; loff_t addr; struct mtd_oob_ops ops; - int i,j,k; iobuf = kmalloc(2048/*mtd->erasesize*/, GFP_KERNEL); if (!iobuf) { - /*ret = -ENOMEM;*/ printk("%s: error: cannot allocate memory\n",__func__); return; } @@ -1896,52 +1855,33 @@ void scanmac(struct mtd_info *mtd) ops.oobbuf = NULL; ops.retlen = 0; - /* block 505 page 6 contains as good candidate */ - addr = ((loff_t) 505*0x20000 + 6*2048); + addr = ((loff_t) 505*0x20000); ret = msm_nand_read_oob(mtd, addr, &ops); - if (ret == -EUCLEAN) ret = 0; if (ret || ops.retlen != 2048 ) { printk("%s: error: read(%d) failed at %#llx\n",__func__,ops.retlen, addr); goto out; } + printk("%s: candidate for wifi mac=%02x:%02x:%02x:%02x:%02x:%02x\n",__func__, + iobuf[40],iobuf[41],iobuf[42],iobuf[43],iobuf[44],iobuf[45]); - printk("%s: Prefered candidate mac=%02x:%02x:%02x:%02x:%02x:%02x\n",__func__, + addr = ((loff_t) 505*0x20000 + 6*0x800); + ret = msm_nand_read_oob(mtd, addr, &ops); + if (ret == -EUCLEAN) + ret = 0; + if (ret || ops.retlen != 2048 ) { + printk("%s: error: read(%d) failed at %#llx\n",__func__,ops.retlen, addr); + goto out; + } + printk("%s: candidate for bluetooth mac=%02x:%02x:%02x:%02x:%02x:%02x\n",__func__, iobuf[5],iobuf[4],iobuf[3],iobuf[2],iobuf[1],iobuf[0]); - /* now lets walk looking for HTC mac in the first reserved blocks of NAND */ - /* NUM_PROTECTED_BLOCKS=0x212 but Parttiontable starts at 0x219 */ - /* I think 400 is ok, I have already eliminated 0 - 157 with false positive */ - /* If my guess is correct, only 505 will match ;-) */ - for (i=158; i<0x219; i++) { - for (j=0; j<64; j++) { - addr = ((loff_t) i*0x20000 + j*2048); - ret = msm_nand_read_oob(mtd, addr, &ops); - - if (ret == -EUCLEAN) - ret = 0; - if (ret || ops.retlen != 2048 ) { - printk("%s: error: read(%d) failed at %#llx\n",__func__,ops.retlen, addr); - break; - } - /* check */ - for (k=0; k<2045; k++) { - if (is_htc_mac( (iobuf[k+0]<<16) + (iobuf[k+1]<<8) + iobuf[k+2])) { - printk("Mac candidate at block:%d page:%d offset:%d:\n",i,j,k); - k >>= 4; - k <<= 4; - print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, &iobuf[k], 16); - k += 16; - } - } - }/*j*/ - }/*i*/ ret = 0; + out: kfree(iobuf); - if (ret) - printk("Find MAc Error %d occurred\n", ret); + if (ret) printk("Find MAC Error %d occurred\n", ret); return; } From db81187e55940ec50973e08db909f77a4fde5475 Mon Sep 17 00:00:00 2001 From: tytung Date: Fri, 1 Jun 2012 01:16:20 +0800 Subject: [PATCH 012/155] msm: htcleo: export the real WiFi and Bluetooth MAC addresses. (Credits go to marc1706) --- arch/arm/mach-msm/board-htcleo-wifi-nvs.c | 49 +++---------------- arch/arm/mach-msm/board-htcleo.c | 34 +++---------- .../mach-msm/include/mach/board-htcleo-mac.h | 27 ++++++++++ drivers/mtd/devices/htcleo_nand.c | 10 ++-- 4 files changed, 46 insertions(+), 74 deletions(-) create mode 100644 arch/arm/mach-msm/include/mach/board-htcleo-mac.h diff --git a/arch/arm/mach-msm/board-htcleo-wifi-nvs.c b/arch/arm/mach-msm/board-htcleo-wifi-nvs.c index aa0561c5..7eab5448 100644 --- a/arch/arm/mach-msm/board-htcleo-wifi-nvs.c +++ b/arch/arm/mach-msm/board-htcleo-wifi-nvs.c @@ -28,13 +28,17 @@ #include #include "board-htcleo.h" +#include #define NVS_MAX_SIZE 0x800U -#define NVS_MACADDR_SIZE 0x1AU #define WLAN_SKB_BUF_NUM 16 +/* + * wifi mac address will be parsed in msm_nand_probe + * see drivers/mtd/devices/htcleo_nand.c + */ static struct proc_dir_entry *wifi_calibration; -static unsigned char nvs_mac_addr[NVS_MACADDR_SIZE]; +char nvs_mac_addr[NVS_MACADDR_SIZE]; static unsigned char *hardcoded_nvs = "sromrev=3\n"\ "vendid=0x14e4\n"\ @@ -81,43 +85,7 @@ unsigned char *get_wifi_nvs_ram( void ) } EXPORT_SYMBOL(get_wifi_nvs_ram); -static void parse_tag_msm_wifi(void) -{ - uint32_t id1, id2, sid1, sid2, sid3; - uint32_t id_base = 0xef260; - /* read Serial Number SN (IMEI = TAC.SN) */ - id1 = readl(MSM_SHARED_RAM_BASE + id_base + 0x8); - id2 = readl(MSM_SHARED_RAM_BASE + id_base + 0xc); - /* Xor SN with TAC (yes only two differents TAC for the HD2 */ - id1 ^= readl(MSM_SHARED_RAM_BASE + id_base + 0x0); - id2 ^= readl(MSM_SHARED_RAM_BASE + id_base + 0x4); - /* Xor with CID of operator too further mix the Serial */ - id1 ^= readl(MSM_SHARED_RAM_BASE + id_base + 0x10); - id2 ^= readl(MSM_SHARED_RAM_BASE + id_base + 0x14); - /* repack the SN part from IMEI (id) into three bytes using low nibbles */ - sid1 = ((id1 << 4) & 0xf0) | ((id1 >> 8) & 0xf); - sid2 = ((id1 >> 12) & 0xf0) | ((id1 >> 24) & 0xf); - sid3 = ((id2 << 4) & 0xf0) | ((id2 >> 8) & 0xf); - - sprintf(nvs_mac_addr, "macaddr=00:23:76:%02x:%02x:%02x\n", sid1, sid2, sid3); - pr_info("Device WiFi MAC Address: %s\n", nvs_mac_addr); -} - -static int parse_tag_msm_wifi_from_spl(void) -{ - uint32_t id1, id2, id3, id4, id5, id6; - uint32_t id_base = 0xFC028; //real mac offset found in spl for haret.exe on WM - id1 = readl(MSM_SPLHOOD_BASE + id_base + 0x0); - id2 = readl(MSM_SPLHOOD_BASE + id_base + 0x1); - id3 = readl(MSM_SPLHOOD_BASE + id_base + 0x2); - id4 = readl(MSM_SPLHOOD_BASE + id_base + 0x3); - id5 = readl(MSM_SPLHOOD_BASE + id_base + 0x4); - id6 = readl(MSM_SPLHOOD_BASE + id_base + 0x5); - sprintf(nvs_mac_addr, "macaddr=%2x:%2x:%2x:%2x:%2x:%2x\n", id1 & 0xff, id2 & 0xff, id3 & 0xff, id4 & 0xff, id5 & 0xff, id6 & 0xff); - pr_info("Device Real Wifi Mac Address: %s\n", nvs_mac_addr); - return 0; -} static unsigned wifi_get_nvs_size( void ) { @@ -159,11 +127,6 @@ static int wifi_calibration_read_proc(char *page, char **start, off_t off, static int __init wifi_nvs_init(void) { pr_info("%s\n", __func__); - if (htcleo_is_nand_boot()) { - parse_tag_msm_wifi(); - } else { - parse_tag_msm_wifi_from_spl(); - } wifi_calibration = create_proc_entry("calibration", 0444, NULL); if (wifi_calibration != NULL) { wifi_calibration->size = wifi_get_nvs_size(); diff --git a/arch/arm/mach-msm/board-htcleo.c b/arch/arm/mach-msm/board-htcleo.c index 0b21f220..948af5cd 100644 --- a/arch/arm/mach-msm/board-htcleo.c +++ b/arch/arm/mach-msm/board-htcleo.c @@ -64,6 +64,7 @@ #include #endif +#include #include #include "board-htcleo.h" @@ -535,35 +536,14 @@ static struct platform_device msm_camera_sensor_s5k3e2fx = /////////////////////////////////////////////////////////////////////// /* AOSP style interface */ -#define BDADDR_STR_SIZE 18 -static char bdaddr[BDADDR_STR_SIZE]; - +/* + * bluetooth mac address will be parsed in msm_nand_probe + * see drivers/mtd/devices/htcleo_nand.c + */ +char bdaddr[BDADDR_STR_SIZE]; module_param_string(bdaddr, bdaddr, sizeof(bdaddr), 0400); MODULE_PARM_DESC(bdaddr, "bluetooth address"); -static void parse_tag_bdaddr(void) -{ - uint32_t id1, id2, sid1, sid2, sid3; - uint32_t id_base = 0xef260; - /* read Serial Number SN (IMEI = TAC.SN) */ - id1 = readl(MSM_SHARED_RAM_BASE + id_base + 0x8); - id2 = readl(MSM_SHARED_RAM_BASE + id_base + 0xc); - /* Xor SN with TAC (yes only two differents TAC for the HD2 */ - id1 ^= readl(MSM_SHARED_RAM_BASE + id_base + 0x0); - id2 ^= readl(MSM_SHARED_RAM_BASE + id_base + 0x4); - /* Xor with CID of operator too further mix the Serial */ - id1 ^= readl(MSM_SHARED_RAM_BASE + id_base + 0x10); - id2 ^= readl(MSM_SHARED_RAM_BASE + id_base + 0x14); - - /* repack the SN part from IMEI (id) into three bytes using low nibbles */ - sid1 = ((id1 << 4) & 0xf0) | ((id1 >> 8) & 0xf); - sid2 = ((id1 >> 12) & 0xf0) | ((id1 >> 24) & 0xf); - sid3 = ((id2 << 4) & 0xf0) | ((id2 >> 8) & 0xf); - - sprintf(bdaddr, "00:23:76:%02x:%02x:%02x", sid3, sid2, sid1); - pr_info("Device Bluetooth MAC Address: %s\n", bdaddr); -} -/* end AOSP style interface */ #ifdef CONFIG_SERIAL_MSM_HS static struct msm_serial_hs_platform_data msm_uart_dm1_pdata = { @@ -1081,8 +1061,6 @@ static void __init htcleo_init(void) config_gpio_table(bt_gpio_table, ARRAY_SIZE(bt_gpio_table)); - parse_tag_bdaddr(); - htcleo_audio_init(); msm_device_i2c_init(); diff --git a/arch/arm/mach-msm/include/mach/board-htcleo-mac.h b/arch/arm/mach-msm/include/mach/board-htcleo-mac.h new file mode 100644 index 00000000..f20ea6fe --- /dev/null +++ b/arch/arm/mach-msm/include/mach/board-htcleo-mac.h @@ -0,0 +1,27 @@ +/* arch/arm/mach-msm/include/mach/board-htcleo-mac.h + * + * Copyright (C) 2012 Marc Alexander. + * Author: Marc Alexander + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. +*/ + +#ifndef __ARCH_ARM_MACH_MSM_BOARD_HTCLEO_MAC_H +#define __ARCH_ARM_MACH_MSM_BOARD_HTCLEO_MAC_H + +#define NVS_MACADDR_SIZE 0x1AU + +extern char nvs_mac_addr[NVS_MACADDR_SIZE]; + +#define BDADDR_STR_SIZE 18 + +extern char bdaddr[BDADDR_STR_SIZE]; /* AOSP style */ + +#endif diff --git a/drivers/mtd/devices/htcleo_nand.c b/drivers/mtd/devices/htcleo_nand.c index 7a1a874b..9b23d680 100755 --- a/drivers/mtd/devices/htcleo_nand.c +++ b/drivers/mtd/devices/htcleo_nand.c @@ -39,6 +39,7 @@ #include #include +#include unsigned crci_mask; @@ -1863,8 +1864,9 @@ void scanmac(struct mtd_info *mtd) printk("%s: error: read(%d) failed at %#llx\n",__func__,ops.retlen, addr); goto out; } - printk("%s: candidate for wifi mac=%02x:%02x:%02x:%02x:%02x:%02x\n",__func__, + sprintf(nvs_mac_addr, "macaddr=%02x:%02x:%02x:%02x:%02x:%02x\n", iobuf[40],iobuf[41],iobuf[42],iobuf[43],iobuf[44],iobuf[45]); + pr_info("Device WiFi MAC Address: %s\n", nvs_mac_addr); addr = ((loff_t) 505*0x20000 + 6*0x800); ret = msm_nand_read_oob(mtd, addr, &ops); @@ -1874,9 +1876,11 @@ void scanmac(struct mtd_info *mtd) printk("%s: error: read(%d) failed at %#llx\n",__func__,ops.retlen, addr); goto out; } - printk("%s: candidate for bluetooth mac=%02x:%02x:%02x:%02x:%02x:%02x\n",__func__, - iobuf[5],iobuf[4],iobuf[3],iobuf[2],iobuf[1],iobuf[0]); + // BT MAC for AOSP ROMs + sprintf(bdaddr, "%02x:%02x:%02x:%02x:%02x:%02x", + iobuf[5],iobuf[4],iobuf[3],iobuf[2],iobuf[1],iobuf[0]); + pr_info("Device Bluetooth MAC Address: %s\n", bdaddr); ret = 0; out: From e512b34cde0d0908acb46920e365007ef1e13e6a Mon Sep 17 00:00:00 2001 From: tytung Date: Fri, 1 Jun 2012 01:16:56 +0800 Subject: [PATCH 013/155] htcleo: updated htcleo_defconfig to tytung_HWA_r2.5 --- arch/arm/configs/htcleo_defconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/configs/htcleo_defconfig b/arch/arm/configs/htcleo_defconfig index 275faaaa..d330e5ef 100644 --- a/arch/arm/configs/htcleo_defconfig +++ b/arch/arm/configs/htcleo_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: 2.6.32-ics -# Tue May 22 19:52:05 CST 2012 +# Fri Jun 1 01:10:03 CST 2012 # CONFIG_ARM=y CONFIG_SYS_SUPPORTS_APM_EMULATION=y @@ -32,7 +32,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_BROKEN_ON_SMP=y CONFIG_LOCK_KERNEL=y CONFIG_INIT_ENV_ARG_LIMIT=32 -CONFIG_LOCALVERSION="_tytung_HWA_r2.4-uniMAC" +CONFIG_LOCALVERSION="_tytung_HWA_r2.5" # CONFIG_LOCALVERSION_AUTO is not set CONFIG_HAVE_KERNEL_GZIP=y CONFIG_HAVE_KERNEL_BZIP2=y From c3fcc464568d33224fafea57492aecccd023e60a Mon Sep 17 00:00:00 2001 From: tytung Date: Sat, 2 Jun 2012 00:45:50 +0800 Subject: [PATCH 014/155] updated README. --- README | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/README b/README index e10c6c2d..35d18fb0 100644 --- a/README +++ b/README @@ -20,17 +20,16 @@ Primary features: - Two-way call recording (Credits: avs333, snq-, and tytung) - T-Mobile Wi-Fi Calling (Credits: tytung) - Wi-Fi IEEE 802.1x/EAP authentication (Credits: tytung) -- Native USB Tethering (for Gingerbread) (Credits: tytung) +- Native USB Tethering (Credits: tytung) - Native Wi-Fi Tethering (Credits: tytung) -- Real Wi-Fi MAC address (only for SD build on WinMo 6.5) (Credits: savan and tytung) -- Unique Wi-Fi MAC address (for MAGLDR and cLK) (Credits: Franck78 and markinus) -- Unique Bluetooth MAC address (Credits: Franck78, markinus and tytung) - Official HTC extended battery support (HTC EB 2300mAh) (Credits: arne) - ALSA sound driver as kernel modules (alsa-pcm-htc-leo.ko and alsa-mix-htc-leo.ko) (Credits: cotulla) - Wired headphones support for ICS. (Credits: zivan56) - Backported xt_qtaguid and xt_quota2 to support data usage for ICS. (Credits: tytung) - Improved Flashlight compatibility for ICS. (Credits: tytung) - Backported the GPU driver to enable the Hardware Acceleration for ICS. (Credits: Securecrt and Rick_1995) +- Updated to msm-kgsl3d0 v3.8 to match the latest QCOM Adreno200 drivers for ICS. (Credits: Rick_1995) +- Real WiFi and Bluetooth MAC addresses. (Credits: Franck78, Rick_1995 and Marc1706) Credits: Cotulla, Markinus, Hastarin, TYTung, Letama, Rajko, Dan1j3l, Cedesmith, Arne, Trilu, Charansingh, Mdebeljuh, Jdivic, Avs333, Snq-, Savan, Drizztje, Marc1706, Zivan56, Securecrt, Rick_1995, Franck78, other devs, and testers. From 0cc4b7c1ca775099a0bccd6f3bd19667f4d0348d Mon Sep 17 00:00:00 2001 From: tytung Date: Sat, 2 Jun 2012 00:52:31 +0800 Subject: [PATCH 015/155] net: netfilter: enable bandwidth control to be able to set mobile data limit. --- arch/arm/configs/htcleo_defconfig | 17 +++++++++++++++-- net/netfilter/xt_TPROXY.c | 2 ++ net/netfilter/xt_socket.c | 2 ++ 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/arch/arm/configs/htcleo_defconfig b/arch/arm/configs/htcleo_defconfig index d330e5ef..a5d10380 100644 --- a/arch/arm/configs/htcleo_defconfig +++ b/arch/arm/configs/htcleo_defconfig @@ -709,8 +709,21 @@ CONFIG_IP_NF_ARP_MANGLE=y CONFIG_NF_DEFRAG_IPV6=y CONFIG_NF_CONNTRACK_IPV6=y # CONFIG_IP6_NF_QUEUE is not set -# CONFIG_IP6_NF_IPTABLES is not set -# CONFIG_BRIDGE_NF_EBTABLES is not set +CONFIG_IP6_NF_IPTABLES=y +# CONFIG_IP6_NF_MATCH_AH is not set +# CONFIG_IP6_NF_MATCH_EUI64 is not set +# CONFIG_IP6_NF_MATCH_FRAG is not set +# CONFIG_IP6_NF_MATCH_OPTS is not set +# CONFIG_IP6_NF_MATCH_HL is not set +# CONFIG_IP6_NF_MATCH_IPV6HEADER is not set +# CONFIG_IP6_NF_MATCH_MH is not set +# CONFIG_IP6_NF_MATCH_RT is not set +# CONFIG_IP6_NF_TARGET_HL is not set +CONFIG_IP6_NF_TARGET_LOG=y +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IP6_NF_MANGLE=y +CONFIG_IP6_NF_RAW=y # CONFIG_IP_DCCP is not set # CONFIG_IP_SCTP is not set # CONFIG_RDS is not set diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index 4fa12857..10640fdd 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -22,6 +22,7 @@ #include +/* #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) #define XT_TPROXY_HAVE_IPV6 1 #include @@ -29,6 +30,7 @@ #include #include #endif +*/ #include #include diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 9b38fd15..0f10dfc6 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -22,11 +22,13 @@ #include #include +/* #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) #define XT_SOCKET_HAVE_IPV6 1 #include #include #endif +*/ #include From 8967e1f301956d78ea76b92337fce7cfefdbdac3 Mon Sep 17 00:00:00 2001 From: tytung Date: Sat, 2 Jun 2012 00:54:56 +0800 Subject: [PATCH 016/155] htcleo: updated htcleo_defconfig to tytung_HWA_r3 --- arch/arm/configs/htcleo_defconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/configs/htcleo_defconfig b/arch/arm/configs/htcleo_defconfig index a5d10380..ce9ee98a 100644 --- a/arch/arm/configs/htcleo_defconfig +++ b/arch/arm/configs/htcleo_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: 2.6.32-ics -# Fri Jun 1 01:10:03 CST 2012 +# Sat Jun 2 00:52:50 CST 2012 # CONFIG_ARM=y CONFIG_SYS_SUPPORTS_APM_EMULATION=y @@ -32,7 +32,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_BROKEN_ON_SMP=y CONFIG_LOCK_KERNEL=y CONFIG_INIT_ENV_ARG_LIMIT=32 -CONFIG_LOCALVERSION="_tytung_HWA_r2.5" +CONFIG_LOCALVERSION="_tytung_HWA_r3" # CONFIG_LOCALVERSION_AUTO is not set CONFIG_HAVE_KERNEL_GZIP=y CONFIG_HAVE_KERNEL_BZIP2=y From b38b808efbcffad56e03f5d03d8d6157ff6e2666 Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Sat, 2 Jun 2012 15:33:41 +0800 Subject: [PATCH 017/155] enable fast charge --- drivers/power/ds2746_battery.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) mode change 100644 => 100755 drivers/power/ds2746_battery.c diff --git a/drivers/power/ds2746_battery.c b/drivers/power/ds2746_battery.c old mode 100644 new mode 100755 index 931191b4..71f11425 --- a/drivers/power/ds2746_battery.c +++ b/drivers/power/ds2746_battery.c @@ -280,7 +280,7 @@ static BOOL is_charging_avaiable(void) static BOOL is_high_current_charging_avaialable(void) { if (!poweralg.protect_flags.is_charging_high_current_avaialble) return FALSE; - if (!poweralg.is_china_ac_in) return FALSE; + //if (!poweralg.is_china_ac_in) return FALSE; if (poweralg.charge_state == CHARGE_STATE_UNKNOWN) return FALSE; return TRUE; } From a7c3c3727b9b8b921acbca6083ddeb776d2d58ae Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Sat, 2 Jun 2012 15:35:33 +0800 Subject: [PATCH 018/155] enable the 2700mAh battery for my private use only --- include/linux/ds2746_param_config.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/include/linux/ds2746_param_config.h b/include/linux/ds2746_param_config.h index 728f0d4d..f6d54bb0 100644 --- a/include/linux/ds2746_param_config.h +++ b/include/linux/ds2746_param_config.h @@ -107,7 +107,7 @@ UINT32 FL_25[] = 1280, /* Sony 1300mAh (HTE) */ 1250, /* Sanyo 1300mAh (HTE) */ 1230, /* Samsung 1230mAh */ - 2300, /* HTC Extended 2300mAh */ + 2700, /* HTC Extended 2300mAh */ }; UINT32 PD_M_COEF[] = @@ -118,7 +118,7 @@ UINT32 PD_M_COEF[] = 24, /* Sony 1300mAh (HTE) */ 27, /* Sanyo 1300mAh (HTE) */ 30, /* Samsung 1230mAh */ - 30, /* HTC Extended 2300mAh */ + 30, /* HTC Extended 2700mAh */ }; UINT32 PD_M_RESL[] = @@ -140,7 +140,7 @@ UINT32 PD_T_COEF[] = 140, /* Sony 1300mAh (HTE) */ 156, /* Sanyo 1300mAh (HTE) */ 250, /* Samsung 1230mAh */ - 250, /* HTC Extended 2300mAh */ + 250, /* HTC Extended 2700mAh */ }; /*! star_lee 20100426 - update KADC discharge parameter */ @@ -153,13 +153,13 @@ UINT32 M_PARAMETER_SONY_1300MAH_FORMOSA[] = UINT32 M_PARAMETER_Samsung_1230MAH_FORMOSA[] = { /* capacity (in 0.01%) -> voltage (in mV)*/ - 10000, 4135, 7500, 3960, 4700, 3800, 1700, 3727, 900, 3674, 300, 3640, 0, 3420, + 10000, 4150, 7500, 3960, 4700, 3800, 1700, 3727, 900, 3600, 300, 3400, 0, 3200, }; UINT32 M_PARAMETER_HTC_2300MAH_FORMOSA[] = { /* capacity (in 0.01%) -> voltage (in mV)*/ - 10000, 4135, 7500, 3960, 4700, 3800, 1700, 3727, 900, 3674, 300, 3640, 0, 3420, + 10000, 4150, 7500, 3950, 4700, 3780, 1700, 3700, 900, 3600, 300, 3400, 0, 3200, }; @@ -290,8 +290,8 @@ static BOOL is_allow_batt_id_change = FALSE; /*boot up voltage*/ /*dead battery is voltage < M_0*/ -#define BATTERY_DEAD_VOLTAGE_LEVEL 3420 -#define BATTERY_DEAD_VOLTAGE_RELEASE 3450 +#define BATTERY_DEAD_VOLTAGE_LEVEL 3200 +#define BATTERY_DEAD_VOLTAGE_RELEASE 3250 #define TEMP_MAX 70 #define TEMP_MIN -11 From aacc150509762ee2226fc0050226a6d1b481c623 Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Sat, 2 Jun 2012 16:26:49 +0800 Subject: [PATCH 019/155] add auto build script --- build.sh | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100755 build.sh diff --git a/build.sh b/build.sh new file mode 100755 index 00000000..f2a73c40 --- /dev/null +++ b/build.sh @@ -0,0 +1,31 @@ +#!/bin/sh + +KERNELBASEDIR=/ics/kernel/out + +make htcleo_defconfig +make ARCH=arm CROSS_COMPILE=/home/securecrt/tools/arm-2010q1/bin/arm-none-eabi- zImage -j8 && make ARCH=arm CROSS_COMPILE=/home/securecrt/tools/arm-2010q1/bin/arm-none-eabi- modules -j8 + +if [ -f arch/arm/boot/zImage ]; then + +mkdir -p $KERNELBASEDIR/ +rm -rf $KERNELBASEDIR/boot/* +rm -rf $KERNELBASEDIR/system/* +mkdir -p $KERNELBASEDIR/boot +mkdir -p $KERNELBASEDIR/system/ +mkdir -p $KERNELBASEDIR/system/lib/ +mkdir -p $KERNELBASEDIR/system/lib/modules + +cp arch/arm/boot/zImage $KERNELBASEDIR/boot/zImage + +make ARCH=arm CROSS_COMPILE=/home/securecrt/tools/arm-2010q1/bin/arm-none-eabi- INSTALL_MOD_PATH=$KERNELBASEDIR/system/lib/modules modules_install -j8 + +cd $KERNELBASEDIR/system/lib/modules +find -iname *.ko | xargs -i -t cp {} . +rm -rf $KERNELBASEDIR/system/lib/modules/lib +stat $KERNELBASEDIR/boot/zImage +cd ../../../ +zip -r tytung_HWA_kernel.`date +"%Y%m%d_%H_%M"`.zip . +else +echo "Kernel STUCK in BUILD! no zImage exist" +fi + From 32f796ad5c6babe57dfaef3037436acab1c8606b Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Sat, 2 Jun 2012 16:34:51 +0800 Subject: [PATCH 020/155] compress boot and system dir only --- build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sh b/build.sh index f2a73c40..67ca704e 100755 --- a/build.sh +++ b/build.sh @@ -24,7 +24,7 @@ find -iname *.ko | xargs -i -t cp {} . rm -rf $KERNELBASEDIR/system/lib/modules/lib stat $KERNELBASEDIR/boot/zImage cd ../../../ -zip -r tytung_HWA_kernel.`date +"%Y%m%d_%H_%M"`.zip . +zip -r tytung_HWA_kernel.`date +"%Y%m%d_%H_%M"`.zip boot system else echo "Kernel STUCK in BUILD! no zImage exist" fi From d0bde07fa4f624b10f7d0dc255427a4ffb657184 Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Tue, 5 Jun 2012 00:12:26 +0800 Subject: [PATCH 021/155] set ALLORNOTHING allocator for mdp heap --- arch/arm/mach-msm/board-htcleo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) mode change 100644 => 100755 arch/arm/mach-msm/board-htcleo.c diff --git a/arch/arm/mach-msm/board-htcleo.c b/arch/arm/mach-msm/board-htcleo.c old mode 100644 new mode 100755 index 948af5cd..4697ef53 --- a/arch/arm/mach-msm/board-htcleo.c +++ b/arch/arm/mach-msm/board-htcleo.c @@ -755,7 +755,7 @@ static struct android_pmem_platform_data mdp_pmem_pdata = { .start = MSM_PMEM_MDP_BASE, .size = MSM_PMEM_MDP_SIZE, #ifdef CONFIG_MSM_KGSL - .allocator_type = PMEM_ALLOCATORTYPE_BITMAP, + .allocator_type = PMEM_ALLOCATORTYPE_ALLORNOTHING, #else .no_allocator = 0, #endif From 4f50d6395132bb33d69ae136a7c9f8a8b48008db Mon Sep 17 00:00:00 2001 From: securecrt Date: Mon, 18 Jun 2012 20:28:17 +0800 Subject: [PATCH 022/155] msm: kgsl: fix format of the rbbm read error message msm: kgsl: Assign a valid context only after one has been restored --- drivers/gpu/msm/a2xx_reg.h | 22 ++++------------------ drivers/gpu/msm/adreno_a2xx.c | 26 +++++++++++++++++++------- drivers/gpu/msm/adreno_drawctxt.c | 7 ++++--- 3 files changed, 27 insertions(+), 28 deletions(-) mode change 100644 => 100755 drivers/gpu/msm/a2xx_reg.h mode change 100644 => 100755 drivers/gpu/msm/adreno_a2xx.c mode change 100644 => 100755 drivers/gpu/msm/adreno_drawctxt.c diff --git a/drivers/gpu/msm/a2xx_reg.h b/drivers/gpu/msm/a2xx_reg.h old mode 100644 new mode 100755 index d859d61c..1c7be3e6 --- a/drivers/gpu/msm/a2xx_reg.h +++ b/drivers/gpu/msm/a2xx_reg.h @@ -140,24 +140,9 @@ union reg_rb_edram_info { struct rb_edram_info_t f; }; -#define RBBM_READ_ERROR_UNUSED0_SIZE 2 -#define RBBM_READ_ERROR_READ_ADDRESS_SIZE 15 -#define RBBM_READ_ERROR_UNUSED1_SIZE 13 -#define RBBM_READ_ERROR_READ_REQUESTER_SIZE 1 -#define RBBM_READ_ERROR_READ_ERROR_SIZE 1 - -struct rbbm_read_error_t { - unsigned int unused0:RBBM_READ_ERROR_UNUSED0_SIZE; - unsigned int read_address:RBBM_READ_ERROR_READ_ADDRESS_SIZE; - unsigned int unused1:RBBM_READ_ERROR_UNUSED1_SIZE; - unsigned int read_requester:RBBM_READ_ERROR_READ_REQUESTER_SIZE; - unsigned int read_error:RBBM_READ_ERROR_READ_ERROR_SIZE; -}; - -union rbbm_read_error_u { - unsigned int val:32; - struct rbbm_read_error_t f; -}; +#define RBBM_READ_ERROR_ADDRESS_MASK 0x0001fffc +#define RBBM_READ_ERROR_REQUESTER (1<<30) +#define RBBM_READ_ERROR_ERROR (1<<31) #define CP_RB_CNTL_RB_BUFSZ_SIZE 6 #define CP_RB_CNTL_UNUSED0_SIZE 2 @@ -278,6 +263,7 @@ union reg_cp_rb_cntl { #define REG_CP_ME_CNTL 0x01F6 #define REG_CP_ME_RAM_DATA 0x01FA #define REG_CP_ME_RAM_WADDR 0x01F8 +#define REG_CP_ME_RAM_RADDR 0x01F9 #define REG_CP_ME_STATUS 0x01F7 #define REG_CP_PFP_UCODE_ADDR 0x00C0 #define REG_CP_PFP_UCODE_DATA 0x00C1 diff --git a/drivers/gpu/msm/adreno_a2xx.c b/drivers/gpu/msm/adreno_a2xx.c old mode 100644 new mode 100755 index 064b05e9..f204a3bd --- a/drivers/gpu/msm/adreno_a2xx.c +++ b/drivers/gpu/msm/adreno_a2xx.c @@ -1532,21 +1532,33 @@ static void a2xx_rbbm_intrcallback(struct kgsl_device *device) { unsigned int status = 0; unsigned int rderr = 0; + unsigned int addr = 0; + const char *source; adreno_regread(device, REG_RBBM_INT_STATUS, &status); if (status & RBBM_INT_CNTL__RDERR_INT_MASK) { - union rbbm_read_error_u rerr; adreno_regread(device, REG_RBBM_READ_ERROR, &rderr); - rerr.val = rderr; - if (rerr.f.read_address == REG_CP_INT_STATUS && - rerr.f.read_error && - rerr.f.read_requester) + source = (rderr & RBBM_READ_ERROR_REQUESTER) + ? "host" : "cp"; + /* convert to dword address */ + addr = (rderr & RBBM_READ_ERROR_ADDRESS_MASK) >> 2; + + /* + * Log CP_INT_STATUS interrupts from the CP at a + * lower level because they can happen frequently + * and are worked around in a2xx_irq_handler. + */ + if (addr == REG_CP_INT_STATUS && + rderr & RBBM_READ_ERROR_ERROR && + rderr & RBBM_READ_ERROR_REQUESTER) KGSL_DRV_WARN(device, - "rbbm read error interrupt: %08x\n", rderr); + "rbbm read error interrupt: %s reg: %04X\n", + source, addr); else KGSL_DRV_CRIT(device, - "rbbm read error interrupt: %08x\n", rderr); + "rbbm read error interrupt: %s reg: %04X\n", + source, addr); } status &= RBBM_INT_MASK; diff --git a/drivers/gpu/msm/adreno_drawctxt.c b/drivers/gpu/msm/adreno_drawctxt.c old mode 100644 new mode 100755 index b7b0ea46..b192b862 --- a/drivers/gpu/msm/adreno_drawctxt.c +++ b/drivers/gpu/msm/adreno_drawctxt.c @@ -179,11 +179,12 @@ void adreno_drawctxt_destroy(struct kgsl_device *device, struct kgsl_context *context) { struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - struct adreno_context *drawctxt = context->devctxt; + struct adreno_context *drawctxt; - if (drawctxt == NULL) + if (context == NULL) return; + drawctxt = context->devctxt; /* deactivate context */ if (adreno_dev->drawctxt_active == drawctxt) { /* no need to save GMEM or shader, the context is @@ -261,6 +262,6 @@ void adreno_drawctxt_switch(struct adreno_device *adreno_dev, adreno_dev->gpudev->ctxt_save(adreno_dev, adreno_dev->drawctxt_active); /* Set the new context */ - adreno_dev->drawctxt_active = drawctxt; adreno_dev->gpudev->ctxt_restore(adreno_dev, drawctxt); + adreno_dev->drawctxt_active = drawctxt; } From 1bd0e44d7aa1f23c60ed0837fb29ebe4fe32bc63 Mon Sep 17 00:00:00 2001 From: securecrt Date: Mon, 18 Jun 2012 20:31:47 +0800 Subject: [PATCH 023/155] reduced the pmem size to save memory for userspace, TEST ONLY!! --- arch/arm/mach-msm/board-htcleo.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) mode change 100644 => 100755 arch/arm/mach-msm/board-htcleo.h diff --git a/arch/arm/mach-msm/board-htcleo.h b/arch/arm/mach-msm/board-htcleo.h old mode 100644 new mode 100755 index daf3db03..6e4a70be --- a/arch/arm/mach-msm/board-htcleo.h +++ b/arch/arm/mach-msm/board-htcleo.h @@ -39,10 +39,10 @@ #define MSM_FB_SIZE 0x00600000 #define MSM_PMEM_MDP_BASE 0x3B700000 -#define MSM_PMEM_MDP_SIZE 0x02000000 +#define MSM_PMEM_MDP_SIZE 0x01000000 -#define MSM_PMEM_ADSP_BASE 0x3D700000 -#define MSM_PMEM_ADSP_SIZE 0x02900000 +#define MSM_PMEM_ADSP_BASE 0x3C700000 +#define MSM_PMEM_ADSP_SIZE 0x01000000 #define MSM_GPU_PHYS_BASE (MSM_PMEM_SMI_BASE + MSM_FB_SIZE) #define MSM_GPU_PHYS_SIZE 0x00800000 @@ -58,7 +58,7 @@ /* Begin EBI region */ #define PMEM_KERNEL_EBI1_SIZE 0x00028000 -#define MSM_PMEM_SF_SIZE 0x02000000 +#define MSM_PMEM_SF_SIZE 0x01000000 /* MSM_RAM_CONSOLE uses the last 0x00040000 of EBI memory, defined in msm_iomap.h #define MSM_RAM_CONSOLE_SIZE 0x00040000 From 758812c3aa97089d74ff75075d68afb7f17c803e Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Mon, 18 Jun 2012 23:52:45 +0800 Subject: [PATCH 024/155] fixed the adsp pmem is too low for camera --- arch/arm/mach-msm/board-htcleo.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-msm/board-htcleo.h b/arch/arm/mach-msm/board-htcleo.h index 6e4a70be..4da03b8d 100755 --- a/arch/arm/mach-msm/board-htcleo.h +++ b/arch/arm/mach-msm/board-htcleo.h @@ -26,6 +26,7 @@ #define MSM_EBI1_BANK0_SIZE 0x1E7C0000 /* 488MB - 0x00040000 RAM CONSOLE*/ #endif + /* Don't change that */ #define MSM_SMI_BASE 0x00000000 #define MSM_SMI_SIZE 0x04000000 @@ -42,7 +43,7 @@ #define MSM_PMEM_MDP_SIZE 0x01000000 #define MSM_PMEM_ADSP_BASE 0x3C700000 -#define MSM_PMEM_ADSP_SIZE 0x01000000 +#define MSM_PMEM_ADSP_SIZE 0x01800000 #define MSM_GPU_PHYS_BASE (MSM_PMEM_SMI_BASE + MSM_FB_SIZE) #define MSM_GPU_PHYS_SIZE 0x00800000 From 83cf3269bca2913679c9bc8333ff11904225130f Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Tue, 19 Jun 2012 01:37:29 +0800 Subject: [PATCH 025/155] add more sf_pmem to prevent memory full --- arch/arm/mach-msm/board-htcleo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-msm/board-htcleo.h b/arch/arm/mach-msm/board-htcleo.h index 4da03b8d..b00b2cc7 100755 --- a/arch/arm/mach-msm/board-htcleo.h +++ b/arch/arm/mach-msm/board-htcleo.h @@ -59,7 +59,7 @@ /* Begin EBI region */ #define PMEM_KERNEL_EBI1_SIZE 0x00028000 -#define MSM_PMEM_SF_SIZE 0x01000000 +#define MSM_PMEM_SF_SIZE 0x01400000 /* MSM_RAM_CONSOLE uses the last 0x00040000 of EBI memory, defined in msm_iomap.h #define MSM_RAM_CONSOLE_SIZE 0x00040000 From cad19fbe99a8efb74b9497abb214d5dfcbc07d89 Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Tue, 19 Jun 2012 01:38:16 +0800 Subject: [PATCH 026/155] change the build batch file --- build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sh b/build.sh index 67ca704e..6886e590 100755 --- a/build.sh +++ b/build.sh @@ -24,7 +24,7 @@ find -iname *.ko | xargs -i -t cp {} . rm -rf $KERNELBASEDIR/system/lib/modules/lib stat $KERNELBASEDIR/boot/zImage cd ../../../ -zip -r tytung_HWA_kernel.`date +"%Y%m%d_%H_%M"`.zip boot system +zip -r tytung_HWA_kernel.`date +"%Y%m%d_%H_%M"`.zip boot system META-INF else echo "Kernel STUCK in BUILD! no zImage exist" fi From 2f3f4d14f9f5df2c51e04640ce1cdc10ac0de7fc Mon Sep 17 00:00:00 2001 From: securecrt Date: Tue, 19 Jun 2012 14:00:07 +0800 Subject: [PATCH 027/155] msm: kgsl: Add support for the preamble context flag Userspace will set a flag in the context if preambles are in use. If they are, we can safely skip save and restore commands for the context. GMEM save/restore is still required. To improve performance, preamble commands are skipped when the context hasn't changed since the last issueibcmds. from Code Aurora --- drivers/gpu/msm/adreno.h | 6 +- drivers/gpu/msm/adreno_a2xx.c | 138 +++++++++++++++++++--------- drivers/gpu/msm/adreno_drawctxt.c | 20 ++-- drivers/gpu/msm/adreno_drawctxt.h | 6 +- drivers/gpu/msm/adreno_ringbuffer.c | 31 ++++++- include/linux/msm_kgsl.h | 1 + 6 files changed, 133 insertions(+), 69 deletions(-) mode change 100644 => 100755 drivers/gpu/msm/adreno.h mode change 100644 => 100755 drivers/gpu/msm/adreno_drawctxt.h mode change 100644 => 100755 drivers/gpu/msm/adreno_ringbuffer.c mode change 100644 => 100755 include/linux/msm_kgsl.h diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h old mode 100644 new mode 100755 index 51ee31a5..e0857e05 --- a/drivers/gpu/msm/adreno.h +++ b/drivers/gpu/msm/adreno.h @@ -24,6 +24,7 @@ KGSL_CONTAINER_OF(device, struct adreno_device, dev) /* Flags to control command packet settings */ +#define KGSL_CMD_FLAGS_NONE 0x00000000 #define KGSL_CMD_FLAGS_PMODE 0x00000001 #define KGSL_CMD_FLAGS_NO_TS_CMP 0x00000002 #define KGSL_CMD_FLAGS_NOT_KERNEL_CMD 0x00000004 @@ -67,10 +68,7 @@ struct adreno_device { }; struct adreno_gpudev { - int (*ctxt_gpustate_shadow)(struct adreno_device *, - struct adreno_context *); - int (*ctxt_gmem_shadow)(struct adreno_device *, - struct adreno_context *); + int (*ctxt_create)(struct adreno_device *, struct adreno_context *); void (*ctxt_save)(struct adreno_device *, struct adreno_context *); void (*ctxt_restore)(struct adreno_device *, struct adreno_context *); irqreturn_t (*irq_handler)(struct adreno_device *); diff --git a/drivers/gpu/msm/adreno_a2xx.c b/drivers/gpu/msm/adreno_a2xx.c index f204a3bd..c0a0a59a 100755 --- a/drivers/gpu/msm/adreno_a2xx.c +++ b/drivers/gpu/msm/adreno_a2xx.c @@ -546,6 +546,7 @@ static unsigned int *build_gmem2sys_cmds(struct adreno_device *adreno_dev, unsigned int addr = shadow->gmemshadow.gpuaddr; unsigned int offset = (addr - (addr & 0xfffff000)) / bytesperpixel; + if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) { /* Store TP0_CHICKEN register */ *cmds++ = cp_type3_packet(CP_REG_TO_MEM, 2); *cmds++ = REG_TP0_CHICKEN; @@ -554,6 +555,7 @@ static unsigned int *build_gmem2sys_cmds(struct adreno_device *adreno_dev, *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); *cmds++ = 0; + } /* Set TP0_CHICKEN to zero */ *cmds++ = cp_type0_packet(REG_TP0_CHICKEN, 1); @@ -755,6 +757,7 @@ static unsigned int *build_sys2gmem_cmds(struct adreno_device *adreno_dev, unsigned int *cmds = shadow->gmem_restore_commands; unsigned int *start = cmds; + if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) { /* Store TP0_CHICKEN register */ *cmds++ = cp_type3_packet(CP_REG_TO_MEM, 2); *cmds++ = REG_TP0_CHICKEN; @@ -762,6 +765,7 @@ static unsigned int *build_sys2gmem_cmds(struct adreno_device *adreno_dev, *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); *cmds++ = 0; + } /* Set TP0_CHICKEN to zero */ *cmds++ = cp_type0_packet(REG_TP0_CHICKEN, 1); @@ -1089,7 +1093,8 @@ static void build_regrestore_cmds(struct adreno_device *adreno_dev, } static void -build_shader_save_restore_cmds(struct adreno_context *drawctxt) +build_shader_save_restore_cmds(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) { unsigned int *cmd = tmp_ctx.cmd; unsigned int *save, *restore, *fixup; @@ -1219,45 +1224,22 @@ build_shader_save_restore_cmds(struct adreno_context *drawctxt) } /* create buffers for saving/restoring registers, constants, & GMEM */ -static int a2xx_ctxt_gpustate_shadow(struct adreno_device *adreno_dev, +static int a2xx_create_gpustate_shadow(struct adreno_device *adreno_dev, struct adreno_context *drawctxt) { - int result; - - /* Allocate vmalloc memory to store the gpustate */ - result = kgsl_allocate(&drawctxt->gpustate, - drawctxt->pagetable, CONTEXT_SIZE); - - if (result) - return result; - drawctxt->flags |= CTXT_FLAGS_STATE_SHADOW; - /* Blank out h/w register, constant, and command buffer shadows. */ - kgsl_sharedmem_set(&drawctxt->gpustate, 0, 0, CONTEXT_SIZE); - - /* set-up command and vertex buffer pointers */ - tmp_ctx.cmd = tmp_ctx.start - = (unsigned int *)((char *)drawctxt->gpustate.hostptr + CMD_OFFSET); - /* build indirect command buffers to save & restore regs/constants */ - adreno_idle(&adreno_dev->dev, KGSL_TIMEOUT_DEFAULT); build_regrestore_cmds(adreno_dev, drawctxt); build_regsave_cmds(adreno_dev, drawctxt); - build_shader_save_restore_cmds(drawctxt); + build_shader_save_restore_cmds(adreno_dev, drawctxt); - kgsl_cache_range_op(&drawctxt->gpustate, - KGSL_CACHE_OP_FLUSH); - - kgsl_cffdump_syncmem(NULL, &drawctxt->gpustate, - drawctxt->gpustate.gpuaddr, - drawctxt->gpustate.size, false); return 0; } /* create buffers for saving/restoring registers, constants, & GMEM */ -static int a2xx_ctxt_gmem_shadow(struct adreno_device *adreno_dev, +static int a2xx_create_gmem_shadow(struct adreno_device *adreno_dev, struct adreno_context *drawctxt) { int result; @@ -1284,6 +1266,7 @@ static int a2xx_ctxt_gmem_shadow(struct adreno_device *adreno_dev, &tmp_ctx.cmd); /* build TP0_CHICKEN register restore command buffer */ + if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) tmp_ctx.cmd = build_chicken_restore_cmds(drawctxt); /* build indirect command buffers to save & restore gmem */ @@ -1309,7 +1292,61 @@ static int a2xx_ctxt_gmem_shadow(struct adreno_device *adreno_dev, return 0; } -static void a2xx_ctxt_save(struct adreno_device *adreno_dev, +static int a2xx_drawctxt_create(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + int ret; + + /* + * Allocate memory for the GPU state and the context commands. + * Despite the name, this is much more then just storage for + * the gpustate. This contains command space for gmem save + * and texture and vertex buffer storage too + */ + + ret = kgsl_allocate(&drawctxt->gpustate, + drawctxt->pagetable, CONTEXT_SIZE); + + if (ret) + return ret; + + kgsl_sharedmem_set(&drawctxt->gpustate, 0, 0, + CONTEXT_SIZE); + + tmp_ctx.cmd = tmp_ctx.start + = (unsigned int *)((char *)drawctxt->gpustate.hostptr + CMD_OFFSET); + + if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) { + ret = a2xx_create_gpustate_shadow(adreno_dev, drawctxt); + if (ret) + goto done; + + drawctxt->flags |= CTXT_FLAGS_SHADER_SAVE; + } + + if (!(drawctxt->flags & CTXT_FLAGS_NOGMEMALLOC)) { + ret = a2xx_create_gmem_shadow(adreno_dev, drawctxt); + if (ret) + goto done; + } + + /* Flush and sync the gpustate memory */ + + kgsl_cache_range_op(&drawctxt->gpustate, + KGSL_CACHE_OP_FLUSH); + + kgsl_cffdump_syncmem(NULL, &drawctxt->gpustate, + drawctxt->gpustate.gpuaddr, + drawctxt->gpustate.size, false); + +done: + if (ret) + kgsl_sharedmem_free(&drawctxt->gpustate); + + return ret; +} + +static void a2xx_drawctxt_save(struct adreno_device *adreno_dev, struct adreno_context *context) { struct kgsl_device *device = &adreno_dev->dev; @@ -1321,25 +1358,28 @@ static void a2xx_ctxt_save(struct adreno_device *adreno_dev, KGSL_CTXT_WARN(device, "Current active context has caused gpu hang\n"); - KGSL_CTXT_INFO(device, - "active context flags %08x\n", context->flags); + if (!(context->flags & CTXT_FLAGS_PREAMBLE)) { /* save registers and constants. */ - adreno_ringbuffer_issuecmds(device, 0, context->reg_save, 3); + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, + context->reg_save, 3); if (context->flags & CTXT_FLAGS_SHADER_SAVE) { /* save shader partitioning and instructions. */ - adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE, + adreno_ringbuffer_issuecmds(device, + KGSL_CMD_FLAGS_PMODE, context->shader_save, 3); - /* fixup shader partitioning parameter for + /* + * fixup shader partitioning parameter for * SET_SHADER_BASES. */ - adreno_ringbuffer_issuecmds(device, 0, + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, context->shader_fixup, 3); context->flags |= CTXT_FLAGS_SHADER_RESTORE; } + } if ((context->flags & CTXT_FLAGS_GMEM_SAVE) && (context->flags & CTXT_FLAGS_GMEM_SHADOW)) { @@ -1350,14 +1390,16 @@ static void a2xx_ctxt_save(struct adreno_device *adreno_dev, context->context_gmem_shadow.gmem_save, 3); /* Restore TP0_CHICKEN */ - adreno_ringbuffer_issuecmds(device, 0, + if (!(context->flags & CTXT_FLAGS_PREAMBLE)) { + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, context->chicken_restore, 3); + } context->flags |= CTXT_FLAGS_GMEM_RESTORE; } } -static void a2xx_ctxt_restore(struct adreno_device *adreno_dev, +static void a2xx_drawctxt_restore(struct adreno_device *adreno_dev, struct adreno_context *context) { struct kgsl_device *device = &adreno_dev->dev; @@ -1377,7 +1419,7 @@ static void a2xx_ctxt_restore(struct adreno_device *adreno_dev, cmds[3] = device->memstore.gpuaddr + KGSL_DEVICE_MEMSTORE_OFFSET(current_context); cmds[4] = (unsigned int) context; - adreno_ringbuffer_issuecmds(device, 0, cmds, 5); + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, cmds, 5); kgsl_mmu_setstate(device, context->pagetable); #ifndef CONFIG_MSM_KGSL_CFF_DUMP_NO_CONTEXT_MEM_DUMP @@ -1393,27 +1435,34 @@ static void a2xx_ctxt_restore(struct adreno_device *adreno_dev, adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE, context->context_gmem_shadow.gmem_restore, 3); + if (!(context->flags & CTXT_FLAGS_PREAMBLE)) { /* Restore TP0_CHICKEN */ - adreno_ringbuffer_issuecmds(device, 0, + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, context->chicken_restore, 3); + } context->flags &= ~CTXT_FLAGS_GMEM_RESTORE; } + if (!(context->flags & CTXT_FLAGS_PREAMBLE)) { + /* restore registers and constants. */ - adreno_ringbuffer_issuecmds(device, 0, + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, context->reg_restore, 3); /* restore shader instructions & partitioning. */ if (context->flags & CTXT_FLAGS_SHADER_RESTORE) { - adreno_ringbuffer_issuecmds(device, 0, + adreno_ringbuffer_issuecmds(device, + KGSL_CMD_FLAGS_NONE, context->shader_restore, 3); } + } if (adreno_is_a20x(adreno_dev)) { cmds[0] = cp_type3_packet(CP_SET_BIN_BASE_OFFSET, 1); cmds[1] = context->bin_base_offset; - adreno_ringbuffer_issuecmds(device, 0, cmds, 2); + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, + cmds, 2); } } @@ -1610,10 +1659,9 @@ static void a2xx_irq_control(struct adreno_device *adreno_dev, int state) } struct adreno_gpudev adreno_a2xx_gpudev = { - .ctxt_gpustate_shadow = a2xx_ctxt_gpustate_shadow, - .ctxt_gmem_shadow = a2xx_ctxt_gmem_shadow, - .ctxt_save = a2xx_ctxt_save, - .ctxt_restore = a2xx_ctxt_restore, + .ctxt_create = a2xx_drawctxt_create, + .ctxt_save = a2xx_drawctxt_save, + .ctxt_restore = a2xx_drawctxt_restore, .irq_handler = a2xx_irq_handler, .irq_control = a2xx_irq_control, }; diff --git a/drivers/gpu/msm/adreno_drawctxt.c b/drivers/gpu/msm/adreno_drawctxt.c index b192b862..218c4c09 100755 --- a/drivers/gpu/msm/adreno_drawctxt.c +++ b/drivers/gpu/msm/adreno_drawctxt.c @@ -139,27 +139,19 @@ int adreno_drawctxt_create(struct kgsl_device *device, drawctxt->pagetable = pagetable; drawctxt->bin_base_offset = 0; - /* FIXME: Deal with preambles */ + if (flags & KGSL_CONTEXT_PREAMBLE) + drawctxt->flags |= CTXT_FLAGS_PREAMBLE; - ret = adreno_dev->gpudev->ctxt_gpustate_shadow(adreno_dev, drawctxt); + if (flags & KGSL_CONTEXT_NO_GMEM_ALLOC) + drawctxt->flags |= CTXT_FLAGS_NOGMEMALLOC; + + ret = adreno_dev->gpudev->ctxt_create(adreno_dev, drawctxt); if (ret) - goto err; - - /* Save the shader instruction memory on context switching */ - drawctxt->flags |= CTXT_FLAGS_SHADER_SAVE; - - if (!(flags & KGSL_CONTEXT_NO_GMEM_ALLOC)) { - /* create gmem shadow */ - ret = adreno_dev->gpudev->ctxt_gmem_shadow(adreno_dev, - drawctxt); - if (ret != 0) goto err; - } context->devctxt = drawctxt; return 0; err: - kgsl_sharedmem_free(&drawctxt->gpustate); kfree(drawctxt); return ret; } diff --git a/drivers/gpu/msm/adreno_drawctxt.h b/drivers/gpu/msm/adreno_drawctxt.h old mode 100644 new mode 100755 index 3c3a8536..26ff5344 --- a/drivers/gpu/msm/adreno_drawctxt.h +++ b/drivers/gpu/msm/adreno_drawctxt.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved. +/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -30,12 +30,16 @@ #define CTXT_FLAGS_GMEM_SAVE 0x00000200 /* gmem can be restored from shadow */ #define CTXT_FLAGS_GMEM_RESTORE 0x00000400 +/* preamble packed in cmdbuffer for context switching */ +#define CTXT_FLAGS_PREAMBLE 0x00000800 /* shader must be copied to shadow */ #define CTXT_FLAGS_SHADER_SAVE 0x00002000 /* shader can be restored from shadow */ #define CTXT_FLAGS_SHADER_RESTORE 0x00004000 /* Context has caused a GPU hang */ #define CTXT_FLAGS_GPU_HANG 0x00008000 +/* Specifies there is no need to save GMEM */ +#define CTXT_FLAGS_NOGMEMALLOC 0x00010000 struct kgsl_device; struct adreno_device; diff --git a/drivers/gpu/msm/adreno_ringbuffer.c b/drivers/gpu/msm/adreno_ringbuffer.c old mode 100644 new mode 100755 index d59057c8..e2580171 --- a/drivers/gpu/msm/adreno_ringbuffer.c +++ b/drivers/gpu/msm/adreno_ringbuffer.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved. +/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -391,7 +391,6 @@ void adreno_ringbuffer_stop(struct adreno_ringbuffer *rb) if (rb->flags & KGSL_FLAGS_STARTED) { /* ME_HALT */ adreno_regwrite(rb->device, REG_CP_ME_CNTL, 0x10000000); - rb->flags &= ~KGSL_FLAGS_STARTED; } } @@ -560,6 +559,7 @@ adreno_ringbuffer_issueibcmds(struct kgsl_device_private *dev_priv, unsigned int *cmds; unsigned int i; struct adreno_context *drawctxt; + unsigned int start_index = 0; if (device->state & KGSL_STATE_HUNG) return -EBUSY; @@ -582,7 +582,16 @@ adreno_ringbuffer_issueibcmds(struct kgsl_device_private *dev_priv, " submission, size %x\n", numibs * 3); return -ENOMEM; } - for (i = 0; i < numibs; i++) { + + /*When preamble is enabled, the preamble buffer with state restoration + commands are stored in the first node of the IB chain. We can skip that + if a context switch hasn't occured */ + + if (drawctxt->flags & CTXT_FLAGS_PREAMBLE && + adreno_dev->drawctxt_active == drawctxt) + start_index = 1; + + for (i = start_index; i < numibs; i++) { (void)kgsl_cffdump_parse_ibs(dev_priv, NULL, ibdesc[i].gpuaddr, ibdesc[i].sizedwords, false); @@ -739,8 +748,20 @@ int adreno_ringbuffer_extract(struct adreno_ringbuffer *rb, kgsl_sharedmem_readl(&rb->buffer_desc, &value, rb_rptr); rb_rptr = adreno_ringbuffer_inc_wrapped(rb_rptr, rb->buffer_desc.size); - BUG_ON((copy_rb_contents == 0) && - (value == cur_context)); + + /* + * If other context switches were already lost and + * and the current context is the one that is hanging, + * then we cannot recover. Print an error message + * and leave. + */ + + if ((copy_rb_contents == 0) && (value == cur_context)) { + KGSL_DRV_ERR(device, "GPU recovery could not " + "find the previous context\n"); + return -EINVAL; + } + /* * If we were copying the commands and got to this point * then we need to remove the 3 commands that appear diff --git a/include/linux/msm_kgsl.h b/include/linux/msm_kgsl.h old mode 100644 new mode 100755 index 56e6cc6b..712bb438 --- a/include/linux/msm_kgsl.h +++ b/include/linux/msm_kgsl.h @@ -42,6 +42,7 @@ #define KGSL_CONTEXT_NO_GMEM_ALLOC 2 #define KGSL_CONTEXT_SUBMIT_IB_LIST 4 #define KGSL_CONTEXT_CTX_SWITCH 8 +#define KGSL_CONTEXT_PREAMBLE 16 /* Memory allocayion flags */ #define KGSL_MEMFLAGS_GPUREADONLY 0x01000000 From 8be096244d302cd4fde3565cf90747d586f53bdb Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Tue, 19 Jun 2012 21:46:18 +0800 Subject: [PATCH 028/155] msm: kgsl: Fix when GMEM is saved for A2xx Saving GMEM is set when doing context switching and should not be set when creating the gmem shadow. --- drivers/gpu/msm/adreno_a2xx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/msm/adreno_a2xx.c b/drivers/gpu/msm/adreno_a2xx.c index c0a0a59a..9e167c65 100755 --- a/drivers/gpu/msm/adreno_a2xx.c +++ b/drivers/gpu/msm/adreno_a2xx.c @@ -1254,8 +1254,8 @@ static int a2xx_create_gmem_shadow(struct adreno_device *adreno_dev, if (result) return result; - /* we've allocated the shadow, when swapped out, GMEM must be saved. */ - drawctxt->flags |= CTXT_FLAGS_GMEM_SHADOW | CTXT_FLAGS_GMEM_SAVE; + /* set the gmem shadow flag for the context */ + drawctxt->flags |= CTXT_FLAGS_GMEM_SHADOW; /* blank out gmem shadow. */ kgsl_sharedmem_set(&drawctxt->context_gmem_shadow.gmemshadow, 0, 0, From c5ac3240a5ff68b531c91f36106e0e91086a0c4d Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Tue, 19 Jun 2012 23:30:34 +0800 Subject: [PATCH 029/155] msm: kgsl: improve postmortem and cff bounds checking Some hangs are fooling the postmortem dump code into running off the end of a buffer. Fix this by making its bounds check logic work better by reusing the logic from kgsl_find_region(). --- drivers/gpu/msm/adreno.c | 45 ++++++++++++++++------------- drivers/gpu/msm/adreno.h | 9 ++++-- drivers/gpu/msm/adreno_postmortem.c | 23 ++++++++------- drivers/gpu/msm/kgsl.c | 23 +-------------- drivers/gpu/msm/kgsl.h | 18 ++++++++---- drivers/gpu/msm/kgsl_cffdump.c | 20 ++++--------- 6 files changed, 64 insertions(+), 74 deletions(-) mode change 100644 => 100755 drivers/gpu/msm/adreno.c mode change 100644 => 100755 drivers/gpu/msm/adreno_postmortem.c mode change 100644 => 100755 drivers/gpu/msm/kgsl.c mode change 100644 => 100755 drivers/gpu/msm/kgsl.h mode change 100644 => 100755 drivers/gpu/msm/kgsl_cffdump.c diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c old mode 100644 new mode 100755 index 61f14a4a..39f3004d --- a/drivers/gpu/msm/adreno.c +++ b/drivers/gpu/msm/adreno.c @@ -918,29 +918,25 @@ static int adreno_suspend_context(struct kgsl_device *device) return status; } -uint8_t *kgsl_sharedmem_convertaddr(struct kgsl_device *device, - unsigned int pt_base, unsigned int gpuaddr, unsigned int *size) +const struct kgsl_memdesc *adreno_find_region(struct kgsl_device *device, + unsigned int pt_base, + unsigned int gpuaddr, + unsigned int size) { - uint8_t *result = NULL; + struct kgsl_memdesc *result = NULL; struct kgsl_mem_entry *entry; struct kgsl_process_private *priv; struct adreno_device *adreno_dev = ADRENO_DEVICE(device); struct adreno_ringbuffer *ringbuffer = &adreno_dev->ringbuffer; - if (kgsl_gpuaddr_in_memdesc(&ringbuffer->buffer_desc, gpuaddr)) { - return kgsl_gpuaddr_to_vaddr(&ringbuffer->buffer_desc, - gpuaddr, size); - } + if (kgsl_gpuaddr_in_memdesc(&ringbuffer->buffer_desc, gpuaddr, size)) + return &ringbuffer->buffer_desc; - if (kgsl_gpuaddr_in_memdesc(&ringbuffer->memptrs_desc, gpuaddr)) { - return kgsl_gpuaddr_to_vaddr(&ringbuffer->memptrs_desc, - gpuaddr, size); - } + if (kgsl_gpuaddr_in_memdesc(&ringbuffer->memptrs_desc, gpuaddr, size)) + return &ringbuffer->memptrs_desc; - if (kgsl_gpuaddr_in_memdesc(&device->memstore, gpuaddr)) { - return kgsl_gpuaddr_to_vaddr(&device->memstore, - gpuaddr, size); - } + if (kgsl_gpuaddr_in_memdesc(&device->memstore, gpuaddr, size)) + return &device->memstore; mutex_lock(&kgsl_driver.process_mutex); list_for_each_entry(priv, &kgsl_driver.process_list, list) { @@ -950,8 +946,7 @@ uint8_t *kgsl_sharedmem_convertaddr(struct kgsl_device *device, entry = kgsl_sharedmem_find_region(priv, gpuaddr, sizeof(unsigned int)); if (entry) { - result = kgsl_gpuaddr_to_vaddr(&entry->memdesc, - gpuaddr, size); + result = &entry->memdesc; spin_unlock(&priv->mem_lock); mutex_unlock(&kgsl_driver.process_mutex); return result; @@ -962,14 +957,24 @@ uint8_t *kgsl_sharedmem_convertaddr(struct kgsl_device *device, BUG_ON(!mutex_is_locked(&device->mutex)); list_for_each_entry(entry, &device->memqueue, list) { - if (kgsl_gpuaddr_in_memdesc(&entry->memdesc, gpuaddr)) { - result = kgsl_gpuaddr_to_vaddr(&entry->memdesc, - gpuaddr, size); + if (kgsl_gpuaddr_in_memdesc(&entry->memdesc, gpuaddr, size)) { + result = &entry->memdesc; break; } } return result; + +} + +uint8_t *adreno_convertaddr(struct kgsl_device *device, unsigned int pt_base, + unsigned int gpuaddr, unsigned int size) +{ + const struct kgsl_memdesc *memdesc; + + memdesc = adreno_find_region(device, pt_base, gpuaddr, size); + + return memdesc ? kgsl_gpuaddr_to_vaddr(memdesc, gpuaddr) : NULL; } void adreno_regread(struct kgsl_device *device, unsigned int offsetwords, diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h index e0857e05..40238313 100755 --- a/drivers/gpu/msm/adreno.h +++ b/drivers/gpu/msm/adreno.h @@ -83,8 +83,13 @@ void adreno_regread(struct kgsl_device *device, unsigned int offsetwords, void adreno_regwrite(struct kgsl_device *device, unsigned int offsetwords, unsigned int value); -uint8_t *kgsl_sharedmem_convertaddr(struct kgsl_device *device, - unsigned int pt_base, unsigned int gpuaddr, unsigned int *size); +const struct kgsl_memdesc *adreno_find_region(struct kgsl_device *device, + unsigned int pt_base, + unsigned int gpuaddr, + unsigned int size); + +uint8_t *adreno_convertaddr(struct kgsl_device *device, + unsigned int pt_base, unsigned int gpuaddr, unsigned int size); static inline int adreno_is_a200(struct adreno_device *adreno_dev) { diff --git a/drivers/gpu/msm/adreno_postmortem.c b/drivers/gpu/msm/adreno_postmortem.c old mode 100644 new mode 100755 index 3d957f69..b9b97377 --- a/drivers/gpu/msm/adreno_postmortem.c +++ b/drivers/gpu/msm/adreno_postmortem.c @@ -247,9 +247,8 @@ static void adreno_dump_regs(struct kgsl_device *device, static void dump_ib(struct kgsl_device *device, char* buffId, uint32_t pt_base, uint32_t base_offset, uint32_t ib_base, uint32_t ib_size, bool dump) { - unsigned int memsize; - uint8_t *base_addr = kgsl_sharedmem_convertaddr(device, pt_base, - ib_base, &memsize); + uint8_t *base_addr = adreno_convertaddr(device, pt_base, + ib_base, ib_size*sizeof(uint32_t)); if (base_addr && dump) print_hex_dump(KERN_ERR, buffId, DUMP_PREFIX_OFFSET, @@ -277,14 +276,13 @@ static void dump_ib1(struct kgsl_device *device, uint32_t pt_base, int i, j; uint32_t value; uint32_t *ib1_addr; - unsigned int memsize; dump_ib(device, "IB1:", pt_base, base_offset, ib1_base, ib1_size, dump); /* fetch virtual address for given IB base */ - ib1_addr = (uint32_t *)kgsl_sharedmem_convertaddr(device, pt_base, - ib1_base, &memsize); + ib1_addr = (uint32_t *)adreno_convertaddr(device, pt_base, + ib1_base, ib1_size*sizeof(uint32_t)); if (!ib1_addr) return; @@ -466,7 +464,7 @@ static int adreno_dump(struct kgsl_device *device) const uint32_t *rb_vaddr; int num_item = 0; int read_idx, write_idx; - unsigned int ts_processed, rb_memsize; + unsigned int ts_processed; static struct ib_list ib_list; @@ -681,11 +679,16 @@ static int adreno_dump(struct kgsl_device *device) KGSL_LOG_DUMP(device, "RB: rd_addr:%8.8x rb_size:%d num_item:%d\n", cp_rb_base, rb_count<<2, num_item); - rb_vaddr = (const uint32_t *)kgsl_sharedmem_convertaddr(device, - cur_pt_base, cp_rb_base, &rb_memsize); + + if (adreno_dev->ringbuffer.buffer_desc.gpuaddr != cp_rb_base) + KGSL_LOG_POSTMORTEM_WRITE(device, + "rb address mismatch, should be 0x%08x\n", + adreno_dev->ringbuffer.buffer_desc.gpuaddr); + + rb_vaddr = adreno_dev->ringbuffer.buffer_desc.hostptr; if (!rb_vaddr) { KGSL_LOG_POSTMORTEM_WRITE(device, - "Can't fetch vaddr for CP_RB_BASE\n"); + "rb has no kernel mapping!\n"); goto error_vfree; } diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c old mode 100644 new mode 100755 index e21ca09c..02fbbd94 --- a/drivers/gpu/msm/kgsl.c +++ b/drivers/gpu/msm/kgsl.c @@ -758,9 +758,7 @@ kgsl_sharedmem_find_region(struct kgsl_process_private *private, BUG_ON(private == NULL); list_for_each_entry(entry, &private->mem_list, list) { - if (gpuaddr >= entry->memdesc.gpuaddr && - ((gpuaddr + size) <= - (entry->memdesc.gpuaddr + entry->memdesc.size))) { + if (kgsl_gpuaddr_in_memdesc(&entry->memdesc, gpuaddr, size)) { result = entry; break; } @@ -770,20 +768,6 @@ kgsl_sharedmem_find_region(struct kgsl_process_private *private, } EXPORT_SYMBOL(kgsl_sharedmem_find_region); -uint8_t *kgsl_gpuaddr_to_vaddr(const struct kgsl_memdesc *memdesc, - unsigned int gpuaddr, unsigned int *size) -{ - BUG_ON(memdesc->hostptr == NULL); - - if (memdesc->gpuaddr == 0 || (gpuaddr < memdesc->gpuaddr || - gpuaddr >= memdesc->gpuaddr + memdesc->size)) - return NULL; - - *size = memdesc->size - (gpuaddr - memdesc->gpuaddr); - return memdesc->hostptr + (gpuaddr - memdesc->gpuaddr); -} -EXPORT_SYMBOL(kgsl_gpuaddr_to_vaddr); - /*call all ioctl sub functions with driver locked*/ static long kgsl_ioctl_device_getproperty(struct kgsl_device_private *dev_priv, unsigned int cmd, void *data) @@ -1608,11 +1592,6 @@ kgsl_ioctl_sharedmem_flush_cache(struct kgsl_device_private *dev_priv, result = -EINVAL; goto done; } - if (!entry->memdesc.hostptr) - entry->memdesc.hostptr = - kgsl_gpuaddr_to_vaddr(&entry->memdesc, - param->gpuaddr, &entry->memdesc.size); - if (!entry->memdesc.hostptr) { KGSL_CORE_ERR("invalid hostptr with gpuaddr %08x\n", param->gpuaddr); diff --git a/drivers/gpu/msm/kgsl.h b/drivers/gpu/msm/kgsl.h old mode 100644 new mode 100755 index e26cdc9e..968f2b11 --- a/drivers/gpu/msm/kgsl.h +++ b/drivers/gpu/msm/kgsl.h @@ -139,8 +139,6 @@ struct kgsl_mem_entry { #endif void kgsl_mem_entry_destroy(struct kref *kref); -uint8_t *kgsl_gpuaddr_to_vaddr(const struct kgsl_memdesc *memdesc, - unsigned int gpuaddr, unsigned int *size); struct kgsl_mem_entry *kgsl_sharedmem_find_region( struct kgsl_process_private *private, unsigned int gpuaddr, size_t size); @@ -169,14 +167,24 @@ static inline void kgsl_drm_exit(void) #endif static inline int kgsl_gpuaddr_in_memdesc(const struct kgsl_memdesc *memdesc, - unsigned int gpuaddr) + unsigned int gpuaddr, unsigned int size) { - if (gpuaddr >= memdesc->gpuaddr && (gpuaddr + sizeof(unsigned int)) <= - (memdesc->gpuaddr + memdesc->size)) { + if (gpuaddr >= memdesc->gpuaddr && + ((gpuaddr + size) <= (memdesc->gpuaddr + memdesc->size))) { return 1; } return 0; } +static inline uint8_t *kgsl_gpuaddr_to_vaddr(const struct kgsl_memdesc *memdesc, + unsigned int gpuaddr) +{ + if (memdesc->hostptr == NULL || memdesc->gpuaddr == 0 || + (gpuaddr < memdesc->gpuaddr || + gpuaddr >= memdesc->gpuaddr + memdesc->size)) + return NULL; + + return memdesc->hostptr + (gpuaddr - memdesc->gpuaddr); +} static inline int timestamp_cmp(unsigned int new, unsigned int old) { diff --git a/drivers/gpu/msm/kgsl_cffdump.c b/drivers/gpu/msm/kgsl_cffdump.c old mode 100644 new mode 100755 index aa33152c..4d5de540 --- a/drivers/gpu/msm/kgsl_cffdump.c +++ b/drivers/gpu/msm/kgsl_cffdump.c @@ -401,8 +401,6 @@ void kgsl_cffdump_syncmem(struct kgsl_device_private *dev_priv, bool clean_cache) { const void *src; - uint host_size; - uint physaddr; if (!kgsl_cff_dump_enable) return; @@ -422,13 +420,9 @@ void kgsl_cffdump_syncmem(struct kgsl_device_private *dev_priv, } memdesc = &entry->memdesc; } - BUG_ON(memdesc->gpuaddr == 0); - BUG_ON(gpuaddr == 0); - physaddr = kgsl_get_realaddr(memdesc) + (gpuaddr - memdesc->gpuaddr); - - src = kgsl_gpuaddr_to_vaddr(memdesc, gpuaddr, &host_size); - if (src == NULL || host_size < sizebytes) { - KGSL_CORE_ERR("did not find mapping for " + src = (uint *)kgsl_gpuaddr_to_vaddr(memdesc, gpuaddr); + if (memdesc->hostptr == NULL) { + KGSL_CORE_ERR("no kernel mapping for " "gpuaddr: 0x%08x, m->host: 0x%p, phys: 0x%08x\n", gpuaddr, memdesc->hostptr, memdesc->physaddr); return; @@ -444,7 +438,6 @@ void kgsl_cffdump_syncmem(struct kgsl_device_private *dev_priv, KGSL_CACHE_OP_INV); } - BUG_ON(physaddr > 0x66000000 && physaddr < 0x66ffffff); while (sizebytes > 3) { cffdump_printline(-1, CFF_OP_WRITE_MEM, gpuaddr, *(uint *)src, 0, 0, 0); @@ -462,7 +455,6 @@ void kgsl_cffdump_setmem(uint addr, uint value, uint sizebytes) if (!kgsl_cff_dump_enable) return; - BUG_ON(addr > 0x66000000 && addr < 0x66ffffff); while (sizebytes > 3) { /* Use 32bit memory writes as long as there's at least * 4 bytes left */ @@ -575,7 +567,6 @@ bool kgsl_cffdump_parse_ibs(struct kgsl_device_private *dev_priv, { static uint level; /* recursion level */ bool ret = true; - uint host_size; uint *hostaddr, *hoststart; int dwords_left = sizedwords; /* dwords left in the current command buffer */ @@ -596,10 +587,9 @@ bool kgsl_cffdump_parse_ibs(struct kgsl_device_private *dev_priv, } memdesc = &entry->memdesc; } - - hostaddr = (uint *)kgsl_gpuaddr_to_vaddr(memdesc, gpuaddr, &host_size); + hostaddr = (uint *)kgsl_gpuaddr_to_vaddr(memdesc, gpuaddr); if (hostaddr == NULL) { - KGSL_CORE_ERR("did not find mapping for " + KGSL_CORE_ERR("no kernel mapping for " "gpuaddr: 0x%08x\n", gpuaddr); return true; } From fd5e7d82376f1d78e7e8202a968fcefb505be51d Mon Sep 17 00:00:00 2001 From: securecrt Date: Wed, 20 Jun 2012 12:25:12 +0800 Subject: [PATCH 030/155] msm: kgsl: let postmortem dump find context switch IBs Because the IBs used for context switching are not allocated by userspace, a separate search is needed to find them in adreno_find_region. --- drivers/gpu/msm/adreno.c | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c index 39f3004d..58fee45f 100755 --- a/drivers/gpu/msm/adreno.c +++ b/drivers/gpu/msm/adreno.c @@ -928,6 +928,8 @@ const struct kgsl_memdesc *adreno_find_region(struct kgsl_device *device, struct kgsl_process_private *priv; struct adreno_device *adreno_dev = ADRENO_DEVICE(device); struct adreno_ringbuffer *ringbuffer = &adreno_dev->ringbuffer; + struct kgsl_context *context; + int next = 0; if (kgsl_gpuaddr_in_memdesc(&ringbuffer->buffer_desc, gpuaddr, size)) return &ringbuffer->buffer_desc; @@ -959,11 +961,38 @@ const struct kgsl_memdesc *adreno_find_region(struct kgsl_device *device, list_for_each_entry(entry, &device->memqueue, list) { if (kgsl_gpuaddr_in_memdesc(&entry->memdesc, gpuaddr, size)) { result = &entry->memdesc; - break; + return result; } } - return result; + + while (1) { + struct adreno_context *adreno_context = NULL; + context = idr_get_next(&device->context_idr, &next); + if (context == NULL) + break; + + adreno_context = (struct adreno_context *)context->devctxt; + + if (kgsl_mmu_pt_equal(adreno_context->pagetable, pt_base)) { + struct kgsl_memdesc *desc; + + desc = &adreno_context->gpustate; + if (kgsl_gpuaddr_in_memdesc(desc, gpuaddr, size)) { + result = desc; + return result; + } + + desc = &adreno_context->context_gmem_shadow.gmemshadow; + if (kgsl_gpuaddr_in_memdesc(desc, gpuaddr, size)) { + result = desc; + return result; + } + } + next = next + 1; + } + + return NULL; } From 73aff24078b05e8c51ec3e4230857ca11679440c Mon Sep 17 00:00:00 2001 From: securecrt Date: Wed, 20 Jun 2012 12:39:35 +0800 Subject: [PATCH 031/155] msm: kgsl: fix size checking in adreno_find_region This function is supposed to return the memdesc that contains the range gpuaddr to gpuaddr + size. One of the lookups was using sizeof(unsigned int) instead of size, which could cause false positive results from this function and possibly kernel panics in the snapshot or postmortem code, which rely on it to do bounds checking for them. --- drivers/gpu/msm/adreno.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c index 58fee45f..0fddc996 100755 --- a/drivers/gpu/msm/adreno.c +++ b/drivers/gpu/msm/adreno.c @@ -945,8 +945,7 @@ const struct kgsl_memdesc *adreno_find_region(struct kgsl_device *device, if (!kgsl_mmu_pt_equal(priv->pagetable, pt_base)) continue; spin_lock(&priv->mem_lock); - entry = kgsl_sharedmem_find_region(priv, gpuaddr, - sizeof(unsigned int)); + entry = kgsl_sharedmem_find_region(priv, gpuaddr, size); if (entry) { result = &entry->memdesc; spin_unlock(&priv->mem_lock); From ae32a212a5144b06db0e5c017e753368cdd3f76a Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Thu, 21 Jun 2012 00:02:15 +0800 Subject: [PATCH 032/155] msm: kgsl: fix error handling in adreno_waittimestamp() This function was incorrectly reporting hangs when an error such as ERESTARTSYS was returned by __wait_event_interruptible_timeout(). msm: kgsl: Make sure WPTR reg is updated properly Sometimes writes to WPTR register do not take effect, causing a 3D core hang. Make sure the WPTR is updated properly when waiting. msm: kgsl: Set default value of wait_timeout in the adreno_dev struct Set the initalization value of wait_timeout at compile time in the declaration of the adreno_device struct instead of at runtime in adreno_probe. --- drivers/gpu/msm/adreno.c | 61 +++++++++++++++++++++++++++++----------- 1 file changed, 45 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c index 0fddc996..1d0b98b6 100755 --- a/drivers/gpu/msm/adreno.c +++ b/drivers/gpu/msm/adreno.c @@ -120,6 +120,7 @@ static struct adreno_device device_3d0 = { }, .pfp_fw = NULL, .pm4_fw = NULL, + .wait_timeout = 10000, /* in milliseconds */ }; /* @@ -432,8 +433,6 @@ adreno_probe(struct platform_device *pdev) adreno_dev = ADRENO_DEVICE(device); device->parentdev = &pdev->dev; - adreno_dev->wait_timeout = 10000; /* default value in milliseconds */ - init_completion(&device->recovery_gate); status = adreno_ringbuffer_init(device); @@ -834,6 +833,12 @@ static int adreno_getproperty(struct kgsl_device *device, return status; } +static inline void adreno_poke(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + adreno_regwrite(device, REG_CP_RB_WPTR, adreno_dev->ringbuffer.wptr); +} + /* Caller must hold the device mutex. */ int adreno_idle(struct kgsl_device *device, unsigned int timeout) { @@ -852,6 +857,7 @@ int adreno_idle(struct kgsl_device *device, unsigned int timeout) retry: if (rb->flags & KGSL_FLAGS_STARTED) { do { + adreno_poke(device); GSL_RB_GET_READPTR(rb, &rb->rptr); if (time_after(jiffies, wait_time)) { KGSL_DRV_ERR(device, "rptr: %x, wptr: %x\n", @@ -1111,8 +1117,12 @@ static int adreno_waittimestamp(struct kgsl_device *device, { long status = 0; uint io = 1; + static uint io_cnt; struct adreno_device *adreno_dev = ADRENO_DEVICE(device); struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int retries; + unsigned int msecs_first; + unsigned int msecs_part; /* Don't wait forever, set a max value for now */ if (msecs == -1) @@ -1125,26 +1135,45 @@ static int adreno_waittimestamp(struct kgsl_device *device, status = -EINVAL; goto done; } + + /* Keep the first timeout as 100msecs before rewriting + * the WPTR. Less visible impact if the WPTR has not + * been updated properly. + */ + msecs_first = (msecs <= 100) ? ((msecs + 4) / 5) : 100; + msecs_part = (msecs - msecs_first + 3) / 4; + for (retries = 0; retries < 5; retries++) { if (!kgsl_check_timestamp(device, timestamp)) { - if (pwr->active_pwrlevel) { - int low_pwrlevel = pwr->num_pwrlevels - - KGSL_PWRLEVEL_LOW_OFFSET; - if (pwr->active_pwrlevel == low_pwrlevel) - io = 0; - } + adreno_poke(device); +// the QSD8X50 don't support io_fraction ?? // SecureCRT 2012-06-20 +// io_cnt = (io_cnt + 1) % 100; +// if (io_cnt < +// pwr->pwrlevels[pwr->active_pwrlevel]. +// io_fraction) +// io = 0; mutex_unlock(&device->mutex); - /* We need to make sure that the process is placed in wait-q - * before its condition is called */ + /* We need to make sure that the process is + * placed in wait-q before its condition is called + */ status = kgsl_wait_event_interruptible_timeout( device->wait_queue, kgsl_check_interrupt_timestamp(device, timestamp), - msecs_to_jiffies(msecs), io); + msecs_to_jiffies(retries ? + msecs_part : msecs_first), io); mutex_lock(&device->mutex); - if (status > 0) - status = 0; - else if (status == 0) { + if (status > 0) { + /*completed before the wait finished */ + status = 0; + goto done; + } else if (status < 0) { + /*an error occurred*/ + goto done; + } + /*this wait timed out*/ + } + } if (!kgsl_check_timestamp(device, timestamp)) { status = -ETIMEDOUT; KGSL_DRV_ERR(device, @@ -1162,8 +1191,8 @@ static int adreno_waittimestamp(struct kgsl_device *device, KGSL_TIMEOUT_DEFAULT)) status = 0; } - } - } + } else { + status = 0; } done: From 97dd7fe6b5d7f1d391e00bd3888cdb99323fd19a Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Thu, 21 Jun 2012 00:32:58 +0800 Subject: [PATCH 033/155] msm: kgsl: Add a constant for adreno_ringbuffer_issuecmds flags Use a #define constant instead of a bare constant for the flags parameter of adreno_ringbuffer_issuecmds. --- drivers/gpu/msm/adreno.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c index 1d0b98b6..69d69d64 100755 --- a/drivers/gpu/msm/adreno.c +++ b/drivers/gpu/msm/adreno.c @@ -1086,7 +1086,8 @@ static int kgsl_check_interrupt_timestamp(struct kgsl_device *device, * get an interrupt */ cmds[0] = cp_type3_packet(CP_NOP, 1); cmds[1] = 0; - adreno_ringbuffer_issuecmds(device, 0, &cmds[0], 2); + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, + &cmds[0], 2); } mutex_unlock(&device->mutex); } From d319fcfbbdfcc3b53e047ee7884f2d7fdbbf1fa5 Mon Sep 17 00:00:00 2001 From: securecrt Date: Thu, 21 Jun 2012 12:30:20 +0800 Subject: [PATCH 034/155] msm: kgsl: flush outer cache for alloc_page() pages The outer cache needs to be flushed for these pages after they are allocated so that the GPU and CPU have a consistent view of them. --- drivers/gpu/msm/kgsl_sharedmem.c | 2 ++ 1 file changed, 2 insertions(+) mode change 100644 => 100755 drivers/gpu/msm/kgsl_sharedmem.c diff --git a/drivers/gpu/msm/kgsl_sharedmem.c b/drivers/gpu/msm/kgsl_sharedmem.c old mode 100644 new mode 100755 index a587c44a..5e62b20c --- a/drivers/gpu/msm/kgsl_sharedmem.c +++ b/drivers/gpu/msm/kgsl_sharedmem.c @@ -372,6 +372,8 @@ _kgsl_sharedmem_vmalloc(struct kgsl_memdesc *memdesc, } sg_set_page(&memdesc->sg[i], page, PAGE_SIZE, 0); } + outer_cache_range_op_sg(memdesc->sg, memdesc->sglen, + KGSL_CACHE_OP_FLUSH); kgsl_cache_range_op(memdesc, KGSL_CACHE_OP_INV); From aa5de9cfcb55ebd1cff8db62ab71c7afdbb6ec99 Mon Sep 17 00:00:00 2001 From: securecrt Date: Thu, 21 Jun 2012 12:34:57 +0800 Subject: [PATCH 035/155] msm: kgsl: increase valid timestamp range The existing timestamp_cmp function returns a different result depending on the order of the input parameters due to having an asymetric valid window. When no rollover is detected the window is 2^31 but when a rollover is detected the window is 25000. This change makes the rollover window symmetric at 2^31. --- drivers/gpu/msm/kgsl.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/msm/kgsl.h b/drivers/gpu/msm/kgsl.h index 968f2b11..324b6b65 100755 --- a/drivers/gpu/msm/kgsl.h +++ b/drivers/gpu/msm/kgsl.h @@ -24,6 +24,9 @@ #define KGSL_NAME "kgsl" +/* Timestamp window used to detect rollovers */ +#define KGSL_TIMESTAMP_WINDOW 0x80000000 + /*cache coherency ops */ #define DRM_KGSL_GEM_CACHE_OP_TO_DEV 0x0001 #define DRM_KGSL_GEM_CACHE_OP_FROM_DEV 0x0002 @@ -193,7 +196,7 @@ static inline int timestamp_cmp(unsigned int new, unsigned int old) if (ts_diff == 0) return 0; - return ((ts_diff > 0) || (ts_diff < -20000)) ? 1 : -1; + return ((ts_diff > 0) || (ts_diff < -KGSL_TIMESTAMP_WINDOW)) ? 1 : -1; } static inline void From 69555a62d145b3d171b3f92ab5a80f00bbdf4f88 Mon Sep 17 00:00:00 2001 From: securecrt Date: Thu, 21 Jun 2012 12:46:57 +0800 Subject: [PATCH 036/155] msm: kgsl: Poke regularly in adreno_idle Poking once during adreno_idle is not enough; a GPU hang may still happen. Seen on 7x27A. Write a few times during the wait timeout, to ensure that the WPTR is updated properly. --- drivers/gpu/msm/adreno.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c index 69d69d64..711c6783 100755 --- a/drivers/gpu/msm/adreno.c +++ b/drivers/gpu/msm/adreno.c @@ -847,7 +847,11 @@ int adreno_idle(struct kgsl_device *device, unsigned int timeout) unsigned int rbbm_status; unsigned long wait_timeout = msecs_to_jiffies(adreno_dev->wait_timeout); - unsigned long wait_time = jiffies + wait_timeout; + unsigned long wait_time; + unsigned long wait_time_part; + unsigned int msecs; + unsigned int msecs_first; + unsigned int msecs_part; kgsl_cffdump_regpoll(device->id, REG_RBBM_STATUS << 2, 0x00000000, 0x80000000); @@ -856,8 +860,18 @@ int adreno_idle(struct kgsl_device *device, unsigned int timeout) */ retry: if (rb->flags & KGSL_FLAGS_STARTED) { + msecs = adreno_dev->wait_timeout; + msecs_first = (msecs <= 100) ? ((msecs + 4) / 5) : 100; + msecs_part = (msecs - msecs_first + 3) / 4; + wait_time = jiffies + wait_timeout; + wait_time_part = jiffies + msecs_to_jiffies(msecs_first); + adreno_poke(device); do { - adreno_poke(device); + if (time_after(jiffies, wait_time_part)) { + adreno_poke(device); + wait_time_part = jiffies + + msecs_to_jiffies(msecs_part); + } GSL_RB_GET_READPTR(rb, &rb->rptr); if (time_after(jiffies, wait_time)) { KGSL_DRV_ERR(device, "rptr: %x, wptr: %x\n", From dcf924f072d390a1bb05f19b7c37410721abeabe Mon Sep 17 00:00:00 2001 From: securecrt Date: Thu, 21 Jun 2012 12:54:12 +0800 Subject: [PATCH 037/155] msm: kgsl: Add a new property to IOCTL_KGSL_DEVICE_GETPROPERTY Return the reset status of the GPU unit when IOCTL_KGSL_DEVICE_GETPROPERTY is called with type KGSL_PROP_GPU_RESET_STAT --- drivers/gpu/msm/adreno.c | 20 ++++++++++++++++++++ drivers/gpu/msm/kgsl.c | 34 ++++++++++++++++++++++++++++++++++ drivers/gpu/msm/kgsl_device.h | 5 +++++ include/linux/msm_kgsl.h | 23 ++++++++++++++++++++++- 4 files changed, 81 insertions(+), 1 deletion(-) mode change 100644 => 100755 drivers/gpu/msm/kgsl_device.h diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c index 711c6783..a632aad9 100755 --- a/drivers/gpu/msm/adreno.c +++ b/drivers/gpu/msm/adreno.c @@ -623,6 +623,8 @@ adreno_recover_hang(struct kgsl_device *device) unsigned int soptimestamp; unsigned int eoptimestamp; struct adreno_context *drawctxt; + struct kgsl_context *context; + int next = 0; KGSL_DRV_ERR(device, "Starting recovery from 3D GPU hang....\n"); rb_buffer = vmalloc(rb->buffer_desc.size); @@ -691,6 +693,24 @@ adreno_recover_hang(struct kgsl_device *device) drawctxt->flags |= CTXT_FLAGS_GPU_HANG; + /* + * Set the reset status of all contexts to + * INNOCENT_CONTEXT_RESET_EXT except for the bad context + * since thats the guilty party + */ + while ((context = idr_get_next(&device->context_idr, &next))) { + if (KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT != + context->reset_status) { + if (context->devctxt != drawctxt) + context->reset_status = + KGSL_CTX_STAT_INNOCENT_CONTEXT_RESET_EXT; + else + context->reset_status = + KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT; + } + next = next + 1; + } + /* Restore valid commands in ringbuffer */ adreno_ringbuffer_restore(rb, rb_buffer, num_rb_contents); rb->timestamp = timestamp; diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c index 02fbbd94..3f1b5d30 100755 --- a/drivers/gpu/msm/kgsl.c +++ b/drivers/gpu/msm/kgsl.c @@ -794,6 +794,40 @@ static long kgsl_ioctl_device_getproperty(struct kgsl_device_private *dev_priv, break; } + case KGSL_PROP_GPU_RESET_STAT: + { + /* Return reset status of given context and clear it */ + uint32_t id; + struct kgsl_context *context; + + if (param->sizebytes != sizeof(unsigned int)) { + result = -EINVAL; + break; + } + /* We expect the value passed in to contain the context id */ + if (copy_from_user(&id, param->value, + sizeof(unsigned int))) { + result = -EFAULT; + break; + } + context = kgsl_find_context(dev_priv, id); + if (!context) { + result = -EINVAL; + break; + } + /* + * Copy the reset status to value which also serves as + * the out parameter + */ + if (copy_to_user(param->value, &(context->reset_status), + sizeof(unsigned int))) { + result = -EFAULT; + break; + } + /* Clear reset status once its been queried */ + context->reset_status = KGSL_CTX_STAT_NO_ERROR; + break; + } default: result = dev_priv->device->ftbl->getproperty( dev_priv->device, param->type, diff --git a/drivers/gpu/msm/kgsl_device.h b/drivers/gpu/msm/kgsl_device.h old mode 100644 new mode 100755 index 64d369eb..44c17bc5 --- a/drivers/gpu/msm/kgsl_device.h +++ b/drivers/gpu/msm/kgsl_device.h @@ -185,6 +185,11 @@ struct kgsl_context { /* Pointer to the device specific context information */ void *devctxt; + /* + * Status indicating whether a gpu reset occurred and whether this + * context was responsible for causing it + */ + unsigned int reset_status; }; struct kgsl_process_private { diff --git a/include/linux/msm_kgsl.h b/include/linux/msm_kgsl.h index 712bb438..36357e08 100755 --- a/include/linux/msm_kgsl.h +++ b/include/linux/msm_kgsl.h @@ -59,6 +59,24 @@ #define KGSL_FLAGS_RESERVED2 0x00000080 #define KGSL_FLAGS_SOFT_RESET 0x00000100 +/* Clock flags to show which clocks should be controled by a given platform */ +#define KGSL_CLK_SRC 0x00000001 +#define KGSL_CLK_CORE 0x00000002 +#define KGSL_CLK_IFACE 0x00000004 +#define KGSL_CLK_MEM 0x00000008 +#define KGSL_CLK_MEM_IFACE 0x00000010 +#define KGSL_CLK_AXI 0x00000020 + +/* + * Reset status values for context + */ +enum kgsl_ctx_reset_stat { + KGSL_CTX_STAT_NO_ERROR = 0x00000000, + KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT = 0x00000001, + KGSL_CTX_STAT_INNOCENT_CONTEXT_RESET_EXT = 0x00000002, + KGSL_CTX_STAT_UNKNOWN_CONTEXT_RESET_EXT = 0x00000003 +}; + #define KGSL_MAX_PWRLEVELS 5 #define KGSL_CONVERT_TO_MBPS(val) \ @@ -75,7 +93,9 @@ enum kgsl_deviceid { enum kgsl_user_mem_type { KGSL_USER_MEM_TYPE_PMEM = 0x00000000, KGSL_USER_MEM_TYPE_ASHMEM = 0x00000001, - KGSL_USER_MEM_TYPE_ADDR = 0x00000002 + KGSL_USER_MEM_TYPE_ADDR = 0x00000002, + KGSL_USER_MEM_TYPE_ION = 0x00000003, + KGSL_USER_MEM_TYPE_MAX = 0x00000004, }; struct kgsl_devinfo { @@ -133,6 +153,7 @@ enum kgsl_property_type { KGSL_PROP_MMU_ENABLE = 0x00000006, KGSL_PROP_INTERRUPT_WAITS = 0x00000007, KGSL_PROP_VERSION = 0x00000008, + KGSL_PROP_GPU_RESET_STAT = 0x00000009 }; struct kgsl_shadowprop { From 9d909cf27b4399f3967fef4c53e4d07eba37f3c7 Mon Sep 17 00:00:00 2001 From: securecrt Date: Thu, 21 Jun 2012 13:01:23 +0800 Subject: [PATCH 038/155] msm: kgsl: Make sure kmemleak tool does not report incorrect mem leak. Certain memory allocations are not properly tracked by kmemleak tool, which makes it to incorrectly detect memory leak. Notify the tool by using kmemleak_not_leak() to ignore the memory allocation so that incorrect leaks report are avoided. --- drivers/gpu/msm/kgsl_sharedmem.c | 6 ++++++ drivers/gpu/msm/kgsl_sharedmem.h | 3 +++ 2 files changed, 9 insertions(+) mode change 100644 => 100755 drivers/gpu/msm/kgsl_sharedmem.h diff --git a/drivers/gpu/msm/kgsl_sharedmem.c b/drivers/gpu/msm/kgsl_sharedmem.c index 5e62b20c..09dbd7e8 100755 --- a/drivers/gpu/msm/kgsl_sharedmem.c +++ b/drivers/gpu/msm/kgsl_sharedmem.c @@ -14,6 +14,8 @@ #include #include #include +#include +#include #include "kgsl.h" #include "kgsl_sharedmem.h" @@ -361,6 +363,8 @@ _kgsl_sharedmem_vmalloc(struct kgsl_memdesc *memdesc, goto done; } + kmemleak_not_leak(memdesc->sg); + memdesc->sglen = sglen; sg_init_table(memdesc->sg, sglen); @@ -435,6 +439,8 @@ kgsl_sharedmem_vmalloc_user(struct kgsl_memdesc *memdesc, return -ENOMEM; } + kmemleak_not_leak(ptr); + protflags = GSL_PT_PAGE_RV; if (!(flags & KGSL_MEMFLAGS_GPUREADONLY)) protflags |= GSL_PT_PAGE_WV; diff --git a/drivers/gpu/msm/kgsl_sharedmem.h b/drivers/gpu/msm/kgsl_sharedmem.h old mode 100644 new mode 100755 index 61bcf05b..ae77aec2 --- a/drivers/gpu/msm/kgsl_sharedmem.h +++ b/drivers/gpu/msm/kgsl_sharedmem.h @@ -16,6 +16,7 @@ #include #include +#include /* * Convert a page to a physical address @@ -90,6 +91,8 @@ memdesc_sg_phys(struct kgsl_memdesc *memdesc, if (memdesc->sg == NULL) return -ENOMEM; + kmemleak_not_leak(memdesc->sg); + memdesc->sglen = 1; sg_init_table(memdesc->sg, 1); sg_set_page(&memdesc->sg[0], page, size, 0); From 91bbe54c4f33c8aade3be5983eadeea8e3a29cf0 Mon Sep 17 00:00:00 2001 From: securecrt Date: Thu, 21 Jun 2012 13:41:21 +0800 Subject: [PATCH 039/155] msm: kgsl: Fixup per-process memory statistics Make the framework for reporting per-process memory statistics a little bit more generic. This should make it easier to keep track of more external memory sources as they are added. --- drivers/gpu/msm/kgsl.c | 57 +++++------ drivers/gpu/msm/kgsl.h | 13 ++- drivers/gpu/msm/kgsl_device.h | 19 ++-- drivers/gpu/msm/kgsl_sharedmem.c | 169 +++++++++++++++++++++---------- 4 files changed, 163 insertions(+), 95 deletions(-) diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c index 3f1b5d30..626c66d8 100755 --- a/drivers/gpu/msm/kgsl.c +++ b/drivers/gpu/msm/kgsl.c @@ -121,18 +121,20 @@ kgsl_mem_entry_destroy(struct kref *kref) struct kgsl_mem_entry *entry = container_of(kref, struct kgsl_mem_entry, refcount); - size_t size = entry->memdesc.size; + + entry->priv->stats[entry->memtype].cur -= entry->memdesc.size; + + if (entry->memtype != KGSL_MEM_ENTRY_KERNEL) + kgsl_driver.stats.mapped -= entry->memdesc.size; kgsl_sharedmem_free(&entry->memdesc); - if (entry->memtype == KGSL_USER_MEMORY) - entry->priv->stats.user -= size; - else if (entry->memtype == KGSL_MAPPED_MEMORY) { - if (entry->file_ptr) - fput(entry->file_ptr); - - kgsl_driver.stats.mapped -= size; - entry->priv->stats.mapped -= size; + switch (entry->memtype) { + case KGSL_MEM_ENTRY_PMEM: + case KGSL_MEM_ENTRY_ASHMEM: + if (entry->priv_data) + fput(entry->priv_data); + break; } kfree(entry); @@ -585,11 +587,6 @@ kgsl_put_process_private(struct kgsl_device *device, if (--private->refcnt) goto unlock; - KGSL_MEM_INFO(device, - "Memory usage: user (%d/%d) mapped (%d/%d)\n", - private->stats.user, private->stats.user_max, - private->stats.mapped, private->stats.mapped_max); - kgsl_process_uninit_sysfs(private); list_del(&private->list); @@ -1203,13 +1200,12 @@ kgsl_ioctl_sharedmem_from_vmalloc(struct kgsl_device_private *dev_priv, param->gpuaddr = entry->memdesc.gpuaddr; - entry->memtype = KGSL_USER_MEMORY; + entry->memtype = KGSL_MEM_ENTRY_KERNEL; kgsl_mem_entry_attach_process(entry, private); /* Process specific statistics */ - KGSL_STATS_ADD(len, private->stats.user, - private->stats.user_max); + kgsl_process_add_stats(private, entry->memtype, len); kgsl_check_idle(dev_priv->device); return 0; @@ -1310,7 +1306,7 @@ static int kgsl_setup_phys_file(struct kgsl_mem_entry *entry, } - entry->file_ptr = filep; + entry->priv_data = filep; entry->memdesc.pagetable = pagetable; entry->memdesc.size = size; @@ -1482,7 +1478,7 @@ static int kgsl_setup_ashmem(struct kgsl_mem_entry *entry, goto err; } - entry->file_ptr = filep; + entry->priv_data = filep; entry->memdesc.pagetable = pagetable; entry->memdesc.size = ALIGN(size, PAGE_SIZE); entry->memdesc.hostptr = hostptr; @@ -1533,6 +1529,7 @@ static long kgsl_ioctl_map_user_mem(struct kgsl_device_private *dev_priv, result = kgsl_setup_phys_file(entry, private->pagetable, param->fd, param->offset, param->len); + entry->memtype = KGSL_MEM_ENTRY_PMEM; break; case KGSL_USER_MEM_TYPE_ADDR: @@ -1549,6 +1546,7 @@ static long kgsl_ioctl_map_user_mem(struct kgsl_device_private *dev_priv, result = kgsl_setup_hostptr(entry, private->pagetable, (void *) param->hostptr, param->offset, param->len); + entry->memtype = KGSL_MEM_ENTRY_USER; break; case KGSL_USER_MEM_TYPE_ASHMEM: @@ -1565,6 +1563,8 @@ static long kgsl_ioctl_map_user_mem(struct kgsl_device_private *dev_priv, result = kgsl_setup_ashmem(entry, private->pagetable, param->fd, (void *) param->hostptr, param->len); + + entry->memtype = KGSL_MEM_ENTRY_ASHMEM; break; default: KGSL_CORE_ERR("Invalid memory type: %x\n", memtype); @@ -1584,14 +1584,10 @@ static long kgsl_ioctl_map_user_mem(struct kgsl_device_private *dev_priv, /* Adjust the returned value for a non 4k aligned offset */ param->gpuaddr = entry->memdesc.gpuaddr + (param->offset & ~PAGE_MASK); - entry->memtype = KGSL_MAPPED_MEMORY; - KGSL_STATS_ADD(param->len, kgsl_driver.stats.mapped, kgsl_driver.stats.mapped_max); - /* Statistics */ - KGSL_STATS_ADD(param->len, private->stats.mapped, - private->stats.mapped_max); + kgsl_process_add_stats(private, entry->memtype, param->len); kgsl_mem_entry_attach_process(entry, private); @@ -1599,8 +1595,8 @@ static long kgsl_ioctl_map_user_mem(struct kgsl_device_private *dev_priv, return result; error_put_file_ptr: - if (entry->file_ptr) - fput(entry->file_ptr); + if (entry->priv_data) + fput(entry->priv_data); error: kfree(entry); @@ -1626,6 +1622,7 @@ kgsl_ioctl_sharedmem_flush_cache(struct kgsl_device_private *dev_priv, result = -EINVAL; goto done; } + if (!entry->memdesc.hostptr) { KGSL_CORE_ERR("invalid hostptr with gpuaddr %08x\n", param->gpuaddr); @@ -1633,9 +1630,6 @@ kgsl_ioctl_sharedmem_flush_cache(struct kgsl_device_private *dev_priv, } kgsl_cache_range_op(&entry->memdesc, KGSL_CACHE_OP_CLEAN); - - /* Statistics - keep track of how many flushes each process does */ - private->stats.flushes++; done: spin_unlock(&private->mem_lock); return result; @@ -1658,12 +1652,11 @@ kgsl_ioctl_gpumem_alloc(struct kgsl_device_private *dev_priv, param->size, param->flags); if (result == 0) { - entry->memtype = KGSL_USER_MEMORY; + entry->memtype = KGSL_MEM_ENTRY_KERNEL; kgsl_mem_entry_attach_process(entry, private); param->gpuaddr = entry->memdesc.gpuaddr; - KGSL_STATS_ADD(entry->memdesc.size, private->stats.user, - private->stats.user_max); + kgsl_process_add_stats(private, entry->memtype, param->size); } else kfree(entry); diff --git a/drivers/gpu/msm/kgsl.h b/drivers/gpu/msm/kgsl.h index 324b6b65..0c5eef57 100755 --- a/drivers/gpu/msm/kgsl.h +++ b/drivers/gpu/msm/kgsl.h @@ -104,9 +104,6 @@ struct kgsl_driver { extern struct kgsl_driver kgsl_driver; -#define KGSL_USER_MEMORY 1 -#define KGSL_MAPPED_MEMORY 2 - struct kgsl_pagetable; struct kgsl_memdesc_ops; @@ -123,11 +120,19 @@ struct kgsl_memdesc { struct kgsl_memdesc_ops *ops; }; +/* List of different memory entry types */ + +#define KGSL_MEM_ENTRY_KERNEL 0 +#define KGSL_MEM_ENTRY_PMEM 1 +#define KGSL_MEM_ENTRY_ASHMEM 2 +#define KGSL_MEM_ENTRY_USER 3 +#define KGSL_MEM_ENTRY_MAX 4 + struct kgsl_mem_entry { struct kref refcount; struct kgsl_memdesc memdesc; int memtype; - struct file *file_ptr; + void *priv_data; struct list_head list; uint32_t free_timestamp; /* back pointer to private structure under whose context this diff --git a/drivers/gpu/msm/kgsl_device.h b/drivers/gpu/msm/kgsl_device.h index 44c17bc5..b7524d87 100755 --- a/drivers/gpu/msm/kgsl_device.h +++ b/drivers/gpu/msm/kgsl_device.h @@ -199,15 +199,12 @@ struct kgsl_process_private { struct list_head mem_list; struct kgsl_pagetable *pagetable; struct list_head list; - struct kobject *kobj; + struct kobject kobj; struct { - unsigned int user; - unsigned int user_max; - unsigned int mapped; - unsigned int mapped_max; - unsigned int flushes; - } stats; + unsigned int cur; + unsigned int max; + } stats[KGSL_MEM_ENTRY_MAX]; }; struct kgsl_device_private { @@ -222,6 +219,14 @@ struct kgsl_power_stats { struct kgsl_device *kgsl_get_device(int dev_idx); +static inline void kgsl_process_add_stats(struct kgsl_process_private *priv, + unsigned int type, size_t size) +{ + priv->stats[type].cur += size; + if (priv->stats[type].max < priv->stats[type].cur) + priv->stats[type].max = priv->stats[type].cur; +} + static inline void kgsl_regread(struct kgsl_device *device, unsigned int offsetwords, unsigned int *value) diff --git a/drivers/gpu/msm/kgsl_sharedmem.c b/drivers/gpu/msm/kgsl_sharedmem.c index 09dbd7e8..db190d25 100755 --- a/drivers/gpu/msm/kgsl_sharedmem.c +++ b/drivers/gpu/msm/kgsl_sharedmem.c @@ -23,6 +23,52 @@ #include "kgsl_device.h" #include "adreno_ringbuffer.h" +/* An attribute for showing per-process memory statistics */ +struct kgsl_mem_entry_attribute { + struct attribute attr; + int memtype; + ssize_t (*show)(struct kgsl_process_private *priv, + int type, char *buf); +}; + +#define to_mem_entry_attr(a) \ +container_of(a, struct kgsl_mem_entry_attribute, attr) + +#define __MEM_ENTRY_ATTR(_type, _name, _show) \ +{ \ + .attr = { .name = __stringify(_name), .mode = 0444 }, \ + .memtype = _type, \ + .show = _show, \ +} + +/* + * A structure to hold the attributes for a particular memory type. + * For each memory type in each process we store the current and maximum + * memory usage and display the counts in sysfs. This structure and + * the following macro allow us to simplify the definition for those + * adding new memory types + */ + +struct mem_entry_stats { + int memtype; + struct kgsl_mem_entry_attribute attr; + struct kgsl_mem_entry_attribute max_attr; +}; + + +#define MEM_ENTRY_STAT(_type, _name) \ +{ \ + .memtype = _type, \ + .attr = __MEM_ENTRY_ATTR(_type, _name, mem_entry_show), \ + .max_attr = __MEM_ENTRY_ATTR(_type, _name##_max, \ + mem_entry_max_show), \ +} + + +/** + * Given a kobj, find the process structure attached to it + */ + static struct kgsl_process_private * _get_priv_from_kobj(struct kobject *kobj) { @@ -43,87 +89,106 @@ _get_priv_from_kobj(struct kobject *kobj) return NULL; } -/* sharedmem / memory sysfs files */ +/** + * Show the current amount of memory allocated for the given memtype + */ static ssize_t -process_show(struct kobject *kobj, - struct kobj_attribute *attr, - char *buf) +mem_entry_show(struct kgsl_process_private *priv, int type, char *buf) { + return snprintf(buf, PAGE_SIZE, "%d\n", priv->stats[type].cur); +} + +/** + * Show the maximum memory allocated for the given memtype through the life of + * the process + */ + +static ssize_t +mem_entry_max_show(struct kgsl_process_private *priv, int type, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", priv->stats[type].max); +} + + +static void mem_entry_sysfs_release(struct kobject *kobj) +{ +} + +static ssize_t mem_entry_sysfs_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct kgsl_mem_entry_attribute *pattr = to_mem_entry_attr(attr); struct kgsl_process_private *priv; - unsigned int val = 0; + ssize_t ret; mutex_lock(&kgsl_driver.process_mutex); priv = _get_priv_from_kobj(kobj); - if (priv == NULL) { - mutex_unlock(&kgsl_driver.process_mutex); - return 0; - } - - if (!strncmp(attr->attr.name, "user", 4)) - val = priv->stats.user; - if (!strncmp(attr->attr.name, "user_max", 8)) - val = priv->stats.user_max; - if (!strncmp(attr->attr.name, "mapped", 6)) - val = priv->stats.mapped; - if (!strncmp(attr->attr.name, "mapped_max", 10)) - val = priv->stats.mapped_max; - if (!strncmp(attr->attr.name, "flushes", 7)) - val = priv->stats.flushes; + if (priv && pattr->show) + ret = pattr->show(priv, pattr->memtype, buf); + else + ret = -EIO; mutex_unlock(&kgsl_driver.process_mutex); - return snprintf(buf, PAGE_SIZE, "%u\n", val); + return ret; } -#define KGSL_MEMSTAT_ATTR(_name, _show) \ - static struct kobj_attribute attr_##_name = \ - __ATTR(_name, 0444, _show, NULL) - -KGSL_MEMSTAT_ATTR(user, process_show); -KGSL_MEMSTAT_ATTR(user_max, process_show); -KGSL_MEMSTAT_ATTR(mapped, process_show); -KGSL_MEMSTAT_ATTR(mapped_max, process_show); -KGSL_MEMSTAT_ATTR(flushes, process_show); - -static struct attribute *process_attrs[] = { - &attr_user.attr, - &attr_user_max.attr, - &attr_mapped.attr, - &attr_mapped_max.attr, - &attr_flushes.attr, - NULL +static const struct sysfs_ops mem_entry_sysfs_ops = { + .show = mem_entry_sysfs_show, }; -static struct attribute_group process_attr_group = { - .attrs = process_attrs, +static struct kobj_type ktype_mem_entry = { + .sysfs_ops = &mem_entry_sysfs_ops, + .default_attrs = NULL, + .release = mem_entry_sysfs_release +}; + +static struct mem_entry_stats mem_stats[] = { + MEM_ENTRY_STAT(KGSL_MEM_ENTRY_KERNEL, kernel), +#ifdef CONFIG_ANDROID_PMEM + MEM_ENTRY_STAT(KGSL_MEM_ENTRY_PMEM, pmem), +#endif +#ifdef CONFIG_ASHMEM + MEM_ENTRY_STAT(KGSL_MEM_ENTRY_ASHMEM, ashmem), +#endif + MEM_ENTRY_STAT(KGSL_MEM_ENTRY_USER, user), }; void kgsl_process_uninit_sysfs(struct kgsl_process_private *private) { - /* Remove the sysfs entry */ - if (private->kobj) { - sysfs_remove_group(private->kobj, &process_attr_group); - kobject_put(private->kobj); + int i; + + for (i = 0; i < ARRAY_SIZE(mem_stats); i++) { + sysfs_remove_file(&private->kobj, &mem_stats[i].attr.attr); + sysfs_remove_file(&private->kobj, + &mem_stats[i].max_attr.attr); } + + kobject_put(&private->kobj); } void kgsl_process_init_sysfs(struct kgsl_process_private *private) { unsigned char name[16]; + int i, ret; - /* Add a entry to the sysfs device */ snprintf(name, sizeof(name), "%d", private->pid); - private->kobj = kobject_create_and_add(name, kgsl_driver.prockobj); - /* sysfs failure isn't fatal, just annoying */ - if (private->kobj != NULL) { - if (sysfs_create_group(private->kobj, &process_attr_group)) { - kobject_put(private->kobj); - private->kobj = NULL; - } + if (kobject_init_and_add(&private->kobj, &ktype_mem_entry, + kgsl_driver.prockobj, name)) + return; + + for (i = 0; i < ARRAY_SIZE(mem_stats); i++) { + /* We need to check the value of sysfs_create_file, but we + * don't really care if it passed or not */ + + ret = sysfs_create_file(&private->kobj, + &mem_stats[i].attr.attr); + ret = sysfs_create_file(&private->kobj, + &mem_stats[i].max_attr.attr); } } From a19d2698ccf67e96b4dc0da6c4ab8bdc42f35961 Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Fri, 22 Jun 2012 15:24:51 +0800 Subject: [PATCH 040/155] msm: kgsl: Add ION as an external memory source Allow ION buffers to be attached via IOCTL_KGSL_MAP_USER_MEM --- arch/arm/mach-msm/include/mach/ion.h | 23 ++ drivers/gpu/msm/kgsl.c | 233 ++++-------- drivers/gpu/msm/kgsl.h | 3 +- drivers/gpu/msm/kgsl_sharedmem.c | 3 + include/linux/ion.h | 548 +++++++++++++++++++++++++++ 5 files changed, 641 insertions(+), 169 deletions(-) create mode 100755 arch/arm/mach-msm/include/mach/ion.h create mode 100755 include/linux/ion.h diff --git a/arch/arm/mach-msm/include/mach/ion.h b/arch/arm/mach-msm/include/mach/ion.h new file mode 100755 index 00000000..4d12249d --- /dev/null +++ b/arch/arm/mach-msm/include/mach/ion.h @@ -0,0 +1,23 @@ +/** + * + * Copyright (c) 2011, Code Aurora Forum. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __MACH_ION_H_ +#define __MACH_ION_H_ + +enum ion_memory_types { + ION_EBI_TYPE, + ION_SMI_TYPE, +}; + +#endif diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c index 626c66d8..22132197 100755 --- a/drivers/gpu/msm/kgsl.c +++ b/drivers/gpu/msm/kgsl.c @@ -1,5 +1,4 @@ /* Copyright (c) 2008-2011, Code Aurora Forum. All rights reserved. - * Copyright (C) 2011 Sony Ericsson Mobile Communications AB. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -21,10 +20,10 @@ #include #include #include -#include #include #include +#include #include "kgsl.h" #include "kgsl_debugfs.h" @@ -45,62 +44,7 @@ module_param_named(mmutype, ksgl_mmu_type, charp, 0); MODULE_PARM_DESC(ksgl_mmu_type, "Type of MMU to be used for graphics. Valid values are 'iommu' or 'gpummu' or 'nommu'"); -#ifdef CONFIG_GENLOCK - -/** - * kgsl_add_event - Add a new timstamp event for the KGSL device - * @device - KGSL device for the new event - * @ts - the timestamp to trigger the event on - * @cb - callback function to call when the timestamp expires - * @priv - private data for the specific event type - * - * @returns - 0 on success or error code on failure - */ - -static int kgsl_add_event(struct kgsl_device *device, u32 ts, - void (*cb)(struct kgsl_device *, void *, u32), void *priv) -{ - struct kgsl_event *event; - struct list_head *n; - unsigned int cur = device->ftbl->readtimestamp(device, - KGSL_TIMESTAMP_RETIRED); - - if (cb == NULL) - return -EINVAL; - - /* Check to see if the requested timestamp has already fired */ - - if (timestamp_cmp(cur, ts) >= 0) { - cb(device, priv, cur); - return 0; - } - - event = kzalloc(sizeof(*event), GFP_KERNEL); - if (event == NULL) - return -ENOMEM; - - event->timestamp = ts; - event->priv = priv; - event->func = cb; - - /* Add the event in order to the list */ - - for (n = device->events.next ; n != &device->events; n = n->next) { - struct kgsl_event *e = - list_entry(n, struct kgsl_event, list); - - if (timestamp_cmp(e->timestamp, ts) > 0) { - list_add(&event->list, n->prev); - break; - } - } - - if (n == &device->events) - list_add_tail(&event->list, &device->events); - - return 0; -} -#endif +static struct ion_client *kgsl_ion_client; static inline struct kgsl_mem_entry * kgsl_mem_entry_create(void) @@ -127,6 +71,17 @@ kgsl_mem_entry_destroy(struct kref *kref) if (entry->memtype != KGSL_MEM_ENTRY_KERNEL) kgsl_driver.stats.mapped -= entry->memdesc.size; + /* + * Ion takes care of freeing the sglist for us (how nice ) so + * unmap the dma before freeing the sharedmem so kgsl_sharedmem_free + * doesn't try to free it again + */ + + if (entry->memtype == KGSL_MEM_ENTRY_ION) { + ion_unmap_dma(kgsl_ion_client, entry->priv_data); + entry->memdesc.sg = NULL; + } + kgsl_sharedmem_free(&entry->memdesc); switch (entry->memtype) { @@ -135,6 +90,9 @@ kgsl_mem_entry_destroy(struct kref *kref) if (entry->priv_data) fput(entry->priv_data); break; + case KGSL_MEM_ENTRY_ION: + ion_free(kgsl_ion_client, entry->priv_data); + break; } kfree(entry); @@ -1502,6 +1460,51 @@ static int kgsl_setup_ashmem(struct kgsl_mem_entry *entry, } #endif +static int kgsl_setup_ion(struct kgsl_mem_entry *entry, + struct kgsl_pagetable *pagetable, int fd) +{ + struct ion_handle *handle; + struct scatterlist *s; + unsigned long flags; + + if (kgsl_ion_client == NULL) { + kgsl_ion_client = msm_ion_client_create(UINT_MAX, KGSL_NAME); + if (kgsl_ion_client == NULL) + return -ENODEV; + } + + handle = ion_import_fd(kgsl_ion_client, fd); + if (IS_ERR_OR_NULL(handle)) + return PTR_ERR(handle); + + entry->memtype = KGSL_MEM_ENTRY_ION; + entry->priv_data = handle; + entry->memdesc.pagetable = pagetable; + entry->memdesc.size = 0; + + if (ion_handle_get_flags(kgsl_ion_client, handle, &flags)) + goto err; + + entry->memdesc.sg = ion_map_dma(kgsl_ion_client, handle, flags); + + if (IS_ERR_OR_NULL(entry->memdesc.sg)) + goto err; + + /* Calculate the size of the memdesc from the sglist */ + + entry->memdesc.sglen = 0; + + for (s = entry->memdesc.sg; s != NULL; s = sg_next(s)) { + entry->memdesc.size += s->length; + entry->memdesc.sglen++; + } + + return 0; +err: + ion_free(kgsl_ion_client, handle); + return -ENOMEM; +} + static long kgsl_ioctl_map_user_mem(struct kgsl_device_private *dev_priv, unsigned int cmd, void *data) { @@ -1566,6 +1569,10 @@ static long kgsl_ioctl_map_user_mem(struct kgsl_device_private *dev_priv, entry->memtype = KGSL_MEM_ENTRY_ASHMEM; break; + case KGSL_USER_MEM_TYPE_ION: + result = kgsl_setup_ion(entry, private->pagetable, + param->fd); + break; default: KGSL_CORE_ERR("Invalid memory type: %x\n", memtype); break; @@ -1694,114 +1701,6 @@ static long kgsl_ioctl_cff_user_event(struct kgsl_device_private *dev_priv, return result; } -#ifdef CONFIG_GENLOCK -struct kgsl_genlock_event_priv { - struct genlock_handle *handle; - struct genlock *lock; -}; - -/** - * kgsl_genlock_event_cb - Event callback for a genlock timestamp event - * @device - The KGSL device that expired the timestamp - * @priv - private data for the event - * @timestamp - the timestamp that triggered the event - * - * Release a genlock lock following the expiration of a timestamp - */ - -static void kgsl_genlock_event_cb(struct kgsl_device *device, - void *priv, u32 timestamp) -{ - struct kgsl_genlock_event_priv *ev = priv; - int ret; - - ret = genlock_lock(ev->handle, GENLOCK_UNLOCK, 0, 0); - if (ret) - KGSL_CORE_ERR("Error while unlocking genlock: %d\n", ret); - - genlock_put_handle(ev->handle); - - kfree(ev); -} - -/** - * kgsl_add_genlock-event - Create a new genlock event - * @device - KGSL device to create the event on - * @timestamp - Timestamp to trigger the event - * @data - User space buffer containing struct kgsl_genlock_event_priv - * @len - length of the userspace buffer - * @returns 0 on success or error code on error - * - * Attack to a genlock handle and register an event to release the - * genlock lock when the timestamp expires - */ - -static int kgsl_add_genlock_event(struct kgsl_device *device, - u32 timestamp, void __user *data, int len) -{ - struct kgsl_genlock_event_priv *event; - struct kgsl_timestamp_event_genlock priv; - int ret; - - if (len != sizeof(priv)) - return -EINVAL; - - if (copy_from_user(&priv, data, sizeof(priv))) - return -EFAULT; - - event = kzalloc(sizeof(*event), GFP_KERNEL); - - if (event == NULL) - return -ENOMEM; - - event->handle = genlock_get_handle_fd(priv.handle); - - if (IS_ERR(event->handle)) { - int ret = PTR_ERR(event->handle); - kfree(event); - return ret; - } - - ret = kgsl_add_event(device, timestamp, kgsl_genlock_event_cb, event); - if (ret) - kfree(event); - - return ret; -} -#else -static long kgsl_add_genlock_event(struct kgsl_device *device, - u32 timestamp, void __user *data, int len) -{ - return -EINVAL; -} -#endif - -/** - * kgsl_ioctl_timestamp_event - Register a new timestamp event from userspace - * @dev_priv - pointer to the private device structure - * @cmd - the ioctl cmd passed from kgsl_ioctl - * @data - the user data buffer from kgsl_ioctl - * @returns 0 on success or error code on failure - */ - -static long kgsl_ioctl_timestamp_event(struct kgsl_device_private *dev_priv, - unsigned int cmd, void *data) -{ - struct kgsl_timestamp_event *param = data; - int ret; - - switch (param->type) { - case KGSL_TIMESTAMP_EVENT_GENLOCK: - ret = kgsl_add_genlock_event(dev_priv->device, - param->timestamp, param->priv, param->len); - break; - default: - ret = -EINVAL; - } - - return ret; -} - typedef long (*kgsl_ioctl_func_t)(struct kgsl_device_private *, unsigned int, void *); @@ -1843,8 +1742,6 @@ static const struct { kgsl_ioctl_cff_syncmem, 0), KGSL_IOCTL_FUNC(IOCTL_KGSL_CFF_USER_EVENT, kgsl_ioctl_cff_user_event, 0), - KGSL_IOCTL_FUNC(IOCTL_KGSL_TIMESTAMP_EVENT, - kgsl_ioctl_timestamp_event, 1), }; static long kgsl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) diff --git a/drivers/gpu/msm/kgsl.h b/drivers/gpu/msm/kgsl.h index 0c5eef57..0f8ff172 100755 --- a/drivers/gpu/msm/kgsl.h +++ b/drivers/gpu/msm/kgsl.h @@ -126,7 +126,8 @@ struct kgsl_memdesc { #define KGSL_MEM_ENTRY_PMEM 1 #define KGSL_MEM_ENTRY_ASHMEM 2 #define KGSL_MEM_ENTRY_USER 3 -#define KGSL_MEM_ENTRY_MAX 4 +#define KGSL_MEM_ENTRY_ION 4 +#define KGSL_MEM_ENTRY_MAX 5 struct kgsl_mem_entry { struct kref refcount; diff --git a/drivers/gpu/msm/kgsl_sharedmem.c b/drivers/gpu/msm/kgsl_sharedmem.c index db190d25..394e78c8 100755 --- a/drivers/gpu/msm/kgsl_sharedmem.c +++ b/drivers/gpu/msm/kgsl_sharedmem.c @@ -153,6 +153,9 @@ static struct mem_entry_stats mem_stats[] = { MEM_ENTRY_STAT(KGSL_MEM_ENTRY_ASHMEM, ashmem), #endif MEM_ENTRY_STAT(KGSL_MEM_ENTRY_USER, user), +#ifdef CONFIG_ION + MEM_ENTRY_STAT(KGSL_MEM_ENTRY_USER, ion), +#endif }; void diff --git a/include/linux/ion.h b/include/linux/ion.h new file mode 100755 index 00000000..4b7b8b7d --- /dev/null +++ b/include/linux/ion.h @@ -0,0 +1,548 @@ +/* + * include/linux/ion.h + * + * Copyright (C) 2011 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _LINUX_ION_H +#define _LINUX_ION_H + +#include +#include + + +struct ion_handle; +/** + * enum ion_heap_types - list of all possible types of heaps + * @ION_HEAP_TYPE_SYSTEM: memory allocated via vmalloc + * @ION_HEAP_TYPE_SYSTEM_CONTIG: memory allocated via kmalloc + * @ION_HEAP_TYPE_CARVEOUT: memory allocated from a prereserved + * carveout heap, allocations are physically + * contiguous + * @ION_HEAP_END: helper for iterating over heaps + */ +enum ion_heap_type { + ION_HEAP_TYPE_SYSTEM, + ION_HEAP_TYPE_SYSTEM_CONTIG, + ION_HEAP_TYPE_CARVEOUT, + ION_HEAP_TYPE_CUSTOM, /* must be last so device specific heaps always + are at the end of this enum */ + ION_NUM_HEAPS, +}; + +#define ION_HEAP_SYSTEM_MASK (1 << ION_HEAP_TYPE_SYSTEM) +#define ION_HEAP_SYSTEM_CONTIG_MASK (1 << ION_HEAP_TYPE_SYSTEM_CONTIG) +#define ION_HEAP_CARVEOUT_MASK (1 << ION_HEAP_TYPE_CARVEOUT) + + +/** + * These are the only ids that should be used for Ion heap ids. + * The ids listed are the order in which allocation will be attempted + * if specified. Don't swap the order of heap ids unless you know what + * you are doing! + */ + +enum ion_heap_ids { + ION_HEAP_SYSTEM_ID, + ION_HEAP_SYSTEM_CONTIG_ID, + ION_HEAP_EBI_ID, + ION_HEAP_SMI_ID, + ION_HEAP_ADSP_ID, + ION_HEAP_AUDIO_ID, +}; + +#define ION_KMALLOC_HEAP_NAME "kmalloc" +#define ION_VMALLOC_HEAP_NAME "vmalloc" +#define ION_EBI1_HEAP_NAME "EBI1" +#define ION_ADSP_HEAP_NAME "adsp" +#define ION_SMI_HEAP_NAME "smi" + +#define CACHED 1 +#define UNCACHED 0 + +#define ION_CACHE_SHIFT 0 + +#define ION_SET_CACHE(__cache) ((__cache) << ION_CACHE_SHIFT) + +#define ION_IS_CACHED(__flags) ((__flags) & (1 << ION_CACHE_SHIFT)) + +#ifdef __KERNEL__ +#include +#include +struct ion_device; +struct ion_heap; +struct ion_mapper; +struct ion_client; +struct ion_buffer; + +/* This should be removed some day when phys_addr_t's are fully + plumbed in the kernel, and all instances of ion_phys_addr_t should + be converted to phys_addr_t. For the time being many kernel interfaces + do not accept phys_addr_t's that would have to */ +#define ion_phys_addr_t unsigned long + +/** + * struct ion_platform_heap - defines a heap in the given platform + * @type: type of the heap from ion_heap_type enum + * @id: unique identifier for heap. When allocating (lower numbers + * will be allocated from first) + * @name: used for debug purposes + * @base: base address of heap in physical memory if applicable + * @size: size of the heap in bytes if applicable + * + * Provided by the board file. + */ +struct ion_platform_heap { + enum ion_heap_type type; + unsigned int id; + const char *name; + ion_phys_addr_t base; + size_t size; + enum ion_memory_types memory_type; +}; + +/** + * struct ion_platform_data - array of platform heaps passed from board file + * @nr: number of structures in the array + * @heaps: array of platform_heap structions + * + * Provided by the board file in the form of platform data to a platform device. + */ +struct ion_platform_data { + int nr; + struct ion_platform_heap heaps[]; +}; + +#ifdef CONFIG_ION + +/** + * ion_client_create() - allocate a client and returns it + * @dev: the global ion device + * @heap_mask: mask of heaps this client can allocate from + * @name: used for debugging + */ +struct ion_client *ion_client_create(struct ion_device *dev, + unsigned int heap_mask, const char *name); + +/** + * msm_ion_client_create - allocate a client using the ion_device specified in + * drivers/gpu/ion/msm/msm_ion.c + * + * heap_mask and name are the same as ion_client_create, return values + * are the same as ion_client_create. + */ + +struct ion_client *msm_ion_client_create(unsigned int heap_mask, + const char *name); + +/** + * ion_client_destroy() - free's a client and all it's handles + * @client: the client + * + * Free the provided client and all it's resources including + * any handles it is holding. + */ +void ion_client_destroy(struct ion_client *client); + +/** + * ion_alloc - allocate ion memory + * @client: the client + * @len: size of the allocation + * @align: requested allocation alignment, lots of hardware blocks have + * alignment requirements of some kind + * @flags: mask of heaps to allocate from, if multiple bits are set + * heaps will be tried in order from lowest to highest order bit + * + * Allocate memory in one of the heaps provided in heap mask and return + * an opaque handle to it. + */ +struct ion_handle *ion_alloc(struct ion_client *client, size_t len, + size_t align, unsigned int flags); + +/** + * ion_free - free a handle + * @client: the client + * @handle: the handle to free + * + * Free the provided handle. + */ +void ion_free(struct ion_client *client, struct ion_handle *handle); + +/** + * ion_phys - returns the physical address and len of a handle + * @client: the client + * @handle: the handle + * @addr: a pointer to put the address in + * @len: a pointer to put the length in + * + * This function queries the heap for a particular handle to get the + * handle's physical address. It't output is only correct if + * a heap returns physically contiguous memory -- in other cases + * this api should not be implemented -- ion_map_dma should be used + * instead. Returns -EINVAL if the handle is invalid. This has + * no implications on the reference counting of the handle -- + * the returned value may not be valid if the caller is not + * holding a reference. + */ +int ion_phys(struct ion_client *client, struct ion_handle *handle, + ion_phys_addr_t *addr, size_t *len); + +/** + * ion_map_kernel - create mapping for the given handle + * @client: the client + * @handle: handle to map + * @flags: flags for this mapping + * + * Map the given handle into the kernel and return a kernel address that + * can be used to access this address. If no flags are specified, this + * will return a non-secure uncached mapping. + */ +void *ion_map_kernel(struct ion_client *client, struct ion_handle *handle, + unsigned long flags); + +/** + * ion_unmap_kernel() - destroy a kernel mapping for a handle + * @client: the client + * @handle: handle to unmap + */ +void ion_unmap_kernel(struct ion_client *client, struct ion_handle *handle); + +/** + * ion_map_dma - create a dma mapping for a given handle + * @client: the client + * @handle: handle to map + * + * Return an sglist describing the given handle + */ +struct scatterlist *ion_map_dma(struct ion_client *client, + struct ion_handle *handle, + unsigned long flags); + +/** + * ion_unmap_dma() - destroy a dma mapping for a handle + * @client: the client + * @handle: handle to unmap + */ +void ion_unmap_dma(struct ion_client *client, struct ion_handle *handle); + +/** + * ion_share() - given a handle, obtain a buffer to pass to other clients + * @client: the client + * @handle: the handle to share + * + * Given a handle, return a buffer, which exists in a global name + * space, and can be passed to other clients. Should be passed into ion_import + * to obtain a new handle for this buffer. + * + * NOTE: This function does do not an extra reference. The burden is on the + * caller to make sure the buffer doesn't go away while it's being passed to + * another client. That is, ion_free should not be called on this handle until + * the buffer has been imported into the other client. + */ +struct ion_buffer *ion_share(struct ion_client *client, + struct ion_handle *handle); + +/** + * ion_import() - given an buffer in another client, import it + * @client: this blocks client + * @buffer: the buffer to import (as obtained from ion_share) + * + * Given a buffer, add it to the client and return the handle to use to refer + * to it further. This is called to share a handle from one kernel client to + * another. + */ +struct ion_handle *ion_import(struct ion_client *client, + struct ion_buffer *buffer); + +/** + * ion_import_fd() - given an fd obtained via ION_IOC_SHARE ioctl, import it + * @client: this blocks client + * @fd: the fd + * + * A helper function for drivers that will be recieving ion buffers shared + * with them from userspace. These buffers are represented by a file + * descriptor obtained as the return from the ION_IOC_SHARE ioctl. + * This function coverts that fd into the underlying buffer, and returns + * the handle to use to refer to it further. + */ +struct ion_handle *ion_import_fd(struct ion_client *client, int fd); + +/** + * ion_handle_get_flags - get the flags for a given handle + * + * @client - client who allocated the handle + * @handle - handle to get the flags + * @flags - pointer to store the flags + * + * Gets the current flags for a handle. These flags indicate various options + * of the buffer (caching, security, etc.) + */ +int ion_handle_get_flags(struct ion_client *client, struct ion_handle *handle, + unsigned long *flags); + +#else +static inline struct ion_client *ion_client_create(struct ion_device *dev, + unsigned int heap_mask, const char *name) +{ + return ERR_PTR(-ENODEV); +} + +static inline struct ion_client *msm_ion_client_create(unsigned int heap_mask, + const char *name) +{ + return ERR_PTR(-ENODEV); +} + +static inline void ion_client_destroy(struct ion_client *client) { } + +static inline struct ion_handle *ion_alloc(struct ion_client *client, + size_t len, size_t align, unsigned int flags) +{ + return ERR_PTR(-ENODEV); +} + +static inline void ion_free(struct ion_client *client, + struct ion_handle *handle) { } + + +static inline int ion_phys(struct ion_client *client, + struct ion_handle *handle, ion_phys_addr_t *addr, size_t *len) +{ + return -ENODEV; +} + +static inline void *ion_map_kernel(struct ion_client *client, + struct ion_handle *handle, unsigned long flags) +{ + return ERR_PTR(-ENODEV); +} + +static inline void ion_unmap_kernel(struct ion_client *client, + struct ion_handle *handle) { } + +static inline struct scatterlist *ion_map_dma(struct ion_client *client, + struct ion_handle *handle, unsigned long flags) +{ + return ERR_PTR(-ENODEV); +} + +static inline void ion_unmap_dma(struct ion_client *client, + struct ion_handle *handle) { } + +static inline struct ion_buffer *ion_share(struct ion_client *client, + struct ion_handle *handle) +{ + return ERR_PTR(-ENODEV); +} + +static inline struct ion_handle *ion_import(struct ion_client *client, + struct ion_buffer *buffer) +{ + return ERR_PTR(-ENODEV); +} + +static inline struct ion_handle *ion_import_fd(struct ion_client *client, + int fd) +{ + return ERR_PTR(-ENODEV); +} + +static inline int ion_handle_get_flags(struct ion_client *client, + struct ion_handle *handle, unsigned long *flags) +{ + return -ENODEV; +} +#endif /* CONFIG_ION */ +#endif /* __KERNEL__ */ + +/** + * DOC: Ion Userspace API + * + * create a client by opening /dev/ion + * most operations handled via following ioctls + * + */ + +/** + * struct ion_allocation_data - metadata passed from userspace for allocations + * @len: size of the allocation + * @align: required alignment of the allocation + * @flags: flags passed to heap + * @handle: pointer that will be populated with a cookie to use to refer + * to this allocation + * + * Provided by userspace as an argument to the ioctl + */ +struct ion_allocation_data { + size_t len; + size_t align; + unsigned int flags; + struct ion_handle *handle; +}; + +/** + * struct ion_fd_data - metadata passed to/from userspace for a handle/fd pair + * @handle: a handle + * @fd: a file descriptor representing that handle + * + * For ION_IOC_SHARE or ION_IOC_MAP userspace populates the handle field with + * the handle returned from ion alloc, and the kernel returns the file + * descriptor to share or map in the fd field. For ION_IOC_IMPORT, userspace + * provides the file descriptor and the kernel returns the handle. + */ +struct ion_fd_data { + struct ion_handle *handle; + int fd; +}; + +/** + * struct ion_handle_data - a handle passed to/from the kernel + * @handle: a handle + */ +struct ion_handle_data { + struct ion_handle *handle; +}; + +/** + * struct ion_custom_data - metadata passed to/from userspace for a custom ioctl + * @cmd: the custom ioctl function to call + * @arg: additional data to pass to the custom ioctl, typically a user + * pointer to a predefined structure + * + * This works just like the regular cmd and arg fields of an ioctl. + */ +struct ion_custom_data { + unsigned int cmd; + unsigned long arg; +}; + + +/* struct ion_flush_data - data passed to ion for flushing caches + * + * @handle: handle with data to flush + * @vaddr: userspace virtual address mapped with mmap + * @offset: offset into the handle to flush + * @length: length of handle to flush + * + * Performs cache operations on the handle. If p is the start address + * of the handle, p + offset through p + offset + length will have + * the cache operations performed + */ +struct ion_flush_data { + struct ion_handle *handle; + void *vaddr; + unsigned int offset; + unsigned int length; +}; + +/* struct ion_flag_data - information about flags for this buffer + * + * @handle: handle to get flags from + * @flags: flags of this handle + * + * Takes handle as an input and outputs the flags from the handle + * in the flag field. + */ +struct ion_flag_data { + struct ion_handle *handle; + unsigned long flags; +}; + +#define ION_IOC_MAGIC 'I' + +/** + * DOC: ION_IOC_ALLOC - allocate memory + * + * Takes an ion_allocation_data struct and returns it with the handle field + * populated with the opaque handle for the allocation. + */ +#define ION_IOC_ALLOC _IOWR(ION_IOC_MAGIC, 0, \ + struct ion_allocation_data) + +/** + * DOC: ION_IOC_FREE - free memory + * + * Takes an ion_handle_data struct and frees the handle. + */ +#define ION_IOC_FREE _IOWR(ION_IOC_MAGIC, 1, struct ion_handle_data) + +/** + * DOC: ION_IOC_MAP - get a file descriptor to mmap + * + * Takes an ion_fd_data struct with the handle field populated with a valid + * opaque handle. Returns the struct with the fd field set to a file + * descriptor open in the current address space. This file descriptor + * can then be used as an argument to mmap. + */ +#define ION_IOC_MAP _IOWR(ION_IOC_MAGIC, 2, struct ion_fd_data) + +/** + * DOC: ION_IOC_SHARE - creates a file descriptor to use to share an allocation + * + * Takes an ion_fd_data struct with the handle field populated with a valid + * opaque handle. Returns the struct with the fd field set to a file + * descriptor open in the current address space. This file descriptor + * can then be passed to another process. The corresponding opaque handle can + * be retrieved via ION_IOC_IMPORT. + */ +#define ION_IOC_SHARE _IOWR(ION_IOC_MAGIC, 4, struct ion_fd_data) + +/** + * DOC: ION_IOC_IMPORT - imports a shared file descriptor + * + * Takes an ion_fd_data struct with the fd field populated with a valid file + * descriptor obtained from ION_IOC_SHARE and returns the struct with the handle + * filed set to the corresponding opaque handle. + */ +#define ION_IOC_IMPORT _IOWR(ION_IOC_MAGIC, 5, int) + +/** + * DOC: ION_IOC_CUSTOM - call architecture specific ion ioctl + * + * Takes the argument of the architecture specific ioctl to call and + * passes appropriate userdata for that ioctl + */ +#define ION_IOC_CUSTOM _IOWR(ION_IOC_MAGIC, 6, struct ion_custom_data) + + +/** + * DOC: ION_IOC_CLEAN_CACHES - clean the caches + * + * Clean the caches of the handle specified. + */ +#define ION_IOC_CLEAN_CACHES _IOWR(ION_IOC_MAGIC, 7, \ + struct ion_flush_data) +/** + * DOC: ION_MSM_IOC_INV_CACHES - invalidate the caches + * + * Invalidate the caches of the handle specified. + */ +#define ION_IOC_INV_CACHES _IOWR(ION_IOC_MAGIC, 8, \ + struct ion_flush_data) +/** + * DOC: ION_MSM_IOC_CLEAN_CACHES - clean and invalidate the caches + * + * Clean and invalidate the caches of the handle specified. + */ +#define ION_IOC_CLEAN_INV_CACHES _IOWR(ION_IOC_MAGIC, 9, \ + struct ion_flush_data) + +/** + * DOC: ION_IOC_GET_FLAGS - get the flags of the handle + * + * Gets the flags of the current handle which indicate cachability, + * secure state etc. + */ +#define ION_IOC_GET_FLAGS _IOWR(ION_IOC_MAGIC, 10, \ + struct ion_flag_data) +#endif /* _LINUX_ION_H */ From b4c5202beca6f249dc9d14817f0424cee9f122df Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Fri, 22 Jun 2012 15:38:14 +0800 Subject: [PATCH 041/155] msm: kgsl: make cffdump work with the MMU enabled The tools that process cff dumps expect a linear memory region, but the start address of that region can be configured. As long as there is only a single pagetable (so that there aren't duplicate virtual addresses in the dump), dumps captured with the mmu on are easier to deal with than reconfiguring to turn the mmu off. --- drivers/gpu/msm/adreno.c | 11 +++++++---- drivers/gpu/msm/kgsl_cffdump.c | 12 +----------- drivers/gpu/msm/kgsl_mmu.c | 1 + drivers/gpu/msm/kgsl_sharedmem.c | 4 ++-- 4 files changed, 11 insertions(+), 17 deletions(-) mode change 100644 => 100755 drivers/gpu/msm/kgsl_mmu.c diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c index a632aad9..50c8f223 100755 --- a/drivers/gpu/msm/adreno.c +++ b/drivers/gpu/msm/adreno.c @@ -269,10 +269,13 @@ static void adreno_setstate(struct kgsl_device *device, int sizedwords = 0; unsigned int mh_mmu_invalidate = 0x00000003; /*invalidate all and tc */ - /* If possible, then set the state via the command stream to avoid - a CPU idle. Otherwise, use the default setstate which uses register - writes */ - if (adreno_dev->drawctxt_active) { + /* + * If possible, then set the state via the command stream to avoid + * a CPU idle. Otherwise, use the default setstate which uses register + * writes For CFF dump we must idle and use the registers so that it is + * easier to filter out the mmu accesses from the dump + */ + if (!kgsl_cff_dump_enable && adreno_dev->drawctxt_active) { if (flags & KGSL_MMUFLAGS_PTUPDATE) { /* wait for graphics pipe to be idle */ *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); diff --git a/drivers/gpu/msm/kgsl_cffdump.c b/drivers/gpu/msm/kgsl_cffdump.c index 4d5de540..fb1ecac7 100755 --- a/drivers/gpu/msm/kgsl_cffdump.c +++ b/drivers/gpu/msm/kgsl_cffdump.c @@ -231,8 +231,6 @@ static void cffdump_printline(int id, uint opcode, uint op1, uint op2, spin_lock(&cffdump_lock); if (opcode == CFF_OP_WRITE_MEM) { - if (op1 < 0x40000000 || op1 >= 0x60000000) - KGSL_CORE_ERR("addr out-of-range: op1=%08x", op1); if ((cff_op_write_membuf.addr != op1 && cff_op_write_membuf.count) || (cff_op_write_membuf.count == MEMBUF_SIZE)) @@ -360,15 +358,7 @@ void kgsl_cffdump_destroy() void kgsl_cffdump_open(enum kgsl_deviceid device_id) { - /*TODO: move this to where we can report correct gmemsize*/ - unsigned int va_base; - - if (cpu_is_msm8x60() || cpu_is_msm8960() || cpu_is_msm8930()) - va_base = 0x40000000; - else - va_base = 0x20000000; - - kgsl_cffdump_memory_base(device_id, va_base, + kgsl_cffdump_memory_base(device_id, KGSL_PAGETABLE_BASE, CONFIG_MSM_KGSL_PAGE_TABLE_SIZE, SZ_256K); } diff --git a/drivers/gpu/msm/kgsl_mmu.c b/drivers/gpu/msm/kgsl_mmu.c old mode 100644 new mode 100755 index 7916ecb0..82f33c06 --- a/drivers/gpu/msm/kgsl_mmu.c +++ b/drivers/gpu/msm/kgsl_mmu.c @@ -23,6 +23,7 @@ #include "kgsl_mmu.h" #include "kgsl_device.h" #include "kgsl_sharedmem.h" +#include "adreno_postmortem.h" #define KGSL_MMU_ALIGN_SHIFT 13 #define KGSL_MMU_ALIGN_MASK (~((1 << KGSL_MMU_ALIGN_SHIFT) - 1)) diff --git a/drivers/gpu/msm/kgsl_sharedmem.c b/drivers/gpu/msm/kgsl_sharedmem.c index 394e78c8..8c4f7814 100755 --- a/drivers/gpu/msm/kgsl_sharedmem.c +++ b/drivers/gpu/msm/kgsl_sharedmem.c @@ -665,7 +665,7 @@ kgsl_sharedmem_writel(const struct kgsl_memdesc *memdesc, BUG_ON(memdesc == NULL || memdesc->hostptr == NULL); BUG_ON(offsetbytes + sizeof(unsigned int) > memdesc->size); - kgsl_cffdump_setmem(memdesc->physaddr + offsetbytes, + kgsl_cffdump_setmem(memdesc->gpuaddr + offsetbytes, src, sizeof(uint)); writel_relaxed(src, memdesc->hostptr + offsetbytes); return 0; @@ -679,7 +679,7 @@ kgsl_sharedmem_set(const struct kgsl_memdesc *memdesc, unsigned int offsetbytes, BUG_ON(memdesc == NULL || memdesc->hostptr == NULL); BUG_ON(offsetbytes + sizebytes > memdesc->size); - kgsl_cffdump_setmem(memdesc->physaddr + offsetbytes, value, + kgsl_cffdump_setmem(memdesc->gpuaddr + offsetbytes, value, sizebytes); memset(memdesc->hostptr + offsetbytes, value, sizebytes); return 0; From 376f66c119a01424d7f044b2e6bdef0bbafdeef2 Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Fri, 22 Jun 2012 16:08:12 +0800 Subject: [PATCH 042/155] msm: kgsl: convert sg allocation to vmalloc kmalloc allocates physically contiguous memory and may fail for larger allocations due to fragmentation. The large allocations are caused by the fact that the scatterlist structure is 24 bytes and the array size is proportional to the number of pages being mapped. --- drivers/gpu/msm/adreno.c | 1 + drivers/gpu/msm/kgsl.c | 5 ++--- drivers/gpu/msm/kgsl_cffdump.c | 2 +- drivers/gpu/msm/kgsl_sharedmem.c | 4 ++-- drivers/gpu/msm/kgsl_sharedmem.h | 3 ++- 5 files changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c index 50c8f223..adc3456a 100755 --- a/drivers/gpu/msm/adreno.c +++ b/drivers/gpu/msm/adreno.c @@ -72,6 +72,7 @@ | (MMU_CONFIG << MH_MMU_CONFIG__PA_W_CLNT_BEHAVIOR__SHIFT)) static const struct kgsl_functable adreno_functable; +unsigned int kgsl_cff_dump_enable=0; static struct adreno_device device_3d0 = { .dev = { diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c index 22132197..7b6d4aa9 100755 --- a/drivers/gpu/msm/kgsl.c +++ b/drivers/gpu/msm/kgsl.c @@ -1291,8 +1291,7 @@ static int memdesc_sg_virt(struct kgsl_memdesc *memdesc, int sglen = PAGE_ALIGN(size) / PAGE_SIZE; unsigned long paddr = (unsigned long) addr; - memdesc->sg = kmalloc(sglen * sizeof(struct scatterlist), - GFP_KERNEL); + memdesc->sg = vmalloc(sglen * sizeof(struct scatterlist)); if (memdesc->sg == NULL) return -ENOMEM; @@ -1332,7 +1331,7 @@ static int memdesc_sg_virt(struct kgsl_memdesc *memdesc, err: spin_unlock(¤t->mm->page_table_lock); - kfree(memdesc->sg); + vfree(memdesc->sg); memdesc->sg = NULL; return -EINVAL; diff --git a/drivers/gpu/msm/kgsl_cffdump.c b/drivers/gpu/msm/kgsl_cffdump.c index fb1ecac7..945b535d 100755 --- a/drivers/gpu/msm/kgsl_cffdump.c +++ b/drivers/gpu/msm/kgsl_cffdump.c @@ -20,7 +20,7 @@ #include #include #include -#include +//#include #include "kgsl.h" #include "kgsl_cffdump.h" diff --git a/drivers/gpu/msm/kgsl_sharedmem.c b/drivers/gpu/msm/kgsl_sharedmem.c index 8c4f7814..17c26c5f 100755 --- a/drivers/gpu/msm/kgsl_sharedmem.c +++ b/drivers/gpu/msm/kgsl_sharedmem.c @@ -425,7 +425,7 @@ _kgsl_sharedmem_vmalloc(struct kgsl_memdesc *memdesc, memdesc->ops = &kgsl_vmalloc_ops; memdesc->hostptr = (void *) ptr; - memdesc->sg = kmalloc(sglen * sizeof(struct scatterlist), GFP_KERNEL); + memdesc->sg = vmalloc(sglen * sizeof(struct scatterlist)); if (memdesc->sg == NULL) { ret = -ENOMEM; goto done; @@ -564,7 +564,7 @@ void kgsl_sharedmem_free(struct kgsl_memdesc *memdesc) if (memdesc->ops && memdesc->ops->free) memdesc->ops->free(memdesc); - kfree(memdesc->sg); + vfree(memdesc->sg); memset(memdesc, 0, sizeof(*memdesc)); } diff --git a/drivers/gpu/msm/kgsl_sharedmem.h b/drivers/gpu/msm/kgsl_sharedmem.h index ae77aec2..63e12e66 100755 --- a/drivers/gpu/msm/kgsl_sharedmem.h +++ b/drivers/gpu/msm/kgsl_sharedmem.h @@ -16,6 +16,7 @@ #include #include +#include #include /* @@ -87,7 +88,7 @@ memdesc_sg_phys(struct kgsl_memdesc *memdesc, { struct page *page = phys_to_page(physaddr); - memdesc->sg = kmalloc(sizeof(struct scatterlist) * 1, GFP_KERNEL); + memdesc->sg = vmalloc(sizeof(struct scatterlist) * 1); if (memdesc->sg == NULL) return -ENOMEM; From 47e6ec131b82fb5f2f243ad349563fd605c0f936 Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Fri, 22 Jun 2012 16:20:22 +0800 Subject: [PATCH 043/155] reverse the GENLOCK --- drivers/gpu/msm/kgsl.c | 168 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 168 insertions(+) diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c index 7b6d4aa9..59dce83a 100755 --- a/drivers/gpu/msm/kgsl.c +++ b/drivers/gpu/msm/kgsl.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -46,6 +47,63 @@ MODULE_PARM_DESC(ksgl_mmu_type, static struct ion_client *kgsl_ion_client; +#ifdef CONFIG_GENLOCK + +/** + * kgsl_add_event - Add a new timstamp event for the KGSL device + * @device - KGSL device for the new event + * @ts - the timestamp to trigger the event on + * @cb - callback function to call when the timestamp expires + * @priv - private data for the specific event type + * + * @returns - 0 on success or error code on failure + */ + +static int kgsl_add_event(struct kgsl_device *device, u32 ts, + void (*cb)(struct kgsl_device *, void *, u32), void *priv) +{ + struct kgsl_event *event; + struct list_head *n; + unsigned int cur = device->ftbl->readtimestamp(device, + KGSL_TIMESTAMP_RETIRED); + + if (cb == NULL) + return -EINVAL; + + /* Check to see if the requested timestamp has already fired */ + + if (timestamp_cmp(cur, ts) >= 0) { + cb(device, priv, cur); + return 0; + } + + event = kzalloc(sizeof(*event), GFP_KERNEL); + if (event == NULL) + return -ENOMEM; + + event->timestamp = ts; + event->priv = priv; + event->func = cb; + + /* Add the event in order to the list */ + + for (n = device->events.next ; n != &device->events; n = n->next) { + struct kgsl_event *e = + list_entry(n, struct kgsl_event, list); + + if (timestamp_cmp(e->timestamp, ts) > 0) { + list_add(&event->list, n->prev); + break; + } + } + + if (n == &device->events) + list_add_tail(&event->list, &device->events); + + return 0; +} +#endif + static inline struct kgsl_mem_entry * kgsl_mem_entry_create(void) { @@ -1700,6 +1758,114 @@ static long kgsl_ioctl_cff_user_event(struct kgsl_device_private *dev_priv, return result; } +#ifdef CONFIG_GENLOCK +struct kgsl_genlock_event_priv { + struct genlock_handle *handle; + struct genlock *lock; +}; + +/** + * kgsl_genlock_event_cb - Event callback for a genlock timestamp event + * @device - The KGSL device that expired the timestamp + * @priv - private data for the event + * @timestamp - the timestamp that triggered the event + * + * Release a genlock lock following the expiration of a timestamp + */ + +static void kgsl_genlock_event_cb(struct kgsl_device *device, + void *priv, u32 timestamp) +{ + struct kgsl_genlock_event_priv *ev = priv; + int ret; + + ret = genlock_lock(ev->handle, GENLOCK_UNLOCK, 0, 0); + if (ret) + KGSL_CORE_ERR("Error while unlocking genlock: %d\n", ret); + + genlock_put_handle(ev->handle); + + kfree(ev); +} + +/** + * kgsl_add_genlock-event - Create a new genlock event + * @device - KGSL device to create the event on + * @timestamp - Timestamp to trigger the event + * @data - User space buffer containing struct kgsl_genlock_event_priv + * @len - length of the userspace buffer + * @returns 0 on success or error code on error + * + * Attack to a genlock handle and register an event to release the + * genlock lock when the timestamp expires + */ + +static int kgsl_add_genlock_event(struct kgsl_device *device, + u32 timestamp, void __user *data, int len) +{ + struct kgsl_genlock_event_priv *event; + struct kgsl_timestamp_event_genlock priv; + int ret; + + if (len != sizeof(priv)) + return -EINVAL; + + if (copy_from_user(&priv, data, sizeof(priv))) + return -EFAULT; + + event = kzalloc(sizeof(*event), GFP_KERNEL); + + if (event == NULL) + return -ENOMEM; + + event->handle = genlock_get_handle_fd(priv.handle); + + if (IS_ERR(event->handle)) { + int ret = PTR_ERR(event->handle); + kfree(event); + return ret; + } + + ret = kgsl_add_event(device, timestamp, kgsl_genlock_event_cb, event); + if (ret) + kfree(event); + + return ret; +} +#else +static long kgsl_add_genlock_event(struct kgsl_device *device, + u32 timestamp, void __user *data, int len) +{ + return -EINVAL; +} +#endif + +/** + * kgsl_ioctl_timestamp_event - Register a new timestamp event from userspace + * @dev_priv - pointer to the private device structure + * @cmd - the ioctl cmd passed from kgsl_ioctl + * @data - the user data buffer from kgsl_ioctl + * @returns 0 on success or error code on failure + */ + +static long kgsl_ioctl_timestamp_event(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_timestamp_event *param = data; + int ret; + + switch (param->type) { + case KGSL_TIMESTAMP_EVENT_GENLOCK: + ret = kgsl_add_genlock_event(dev_priv->device, + param->timestamp, param->priv, param->len); + break; + default: + ret = -EINVAL; + } + + return ret; +} + typedef long (*kgsl_ioctl_func_t)(struct kgsl_device_private *, unsigned int, void *); @@ -1741,6 +1907,8 @@ static const struct { kgsl_ioctl_cff_syncmem, 0), KGSL_IOCTL_FUNC(IOCTL_KGSL_CFF_USER_EVENT, kgsl_ioctl_cff_user_event, 0), + KGSL_IOCTL_FUNC(IOCTL_KGSL_TIMESTAMP_EVENT, + kgsl_ioctl_timestamp_event, 1), }; static long kgsl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) From 8c39724a756d86b16d02a0408e7b8f398e52d44b Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Fri, 22 Jun 2012 16:48:37 +0800 Subject: [PATCH 044/155] remove zImage before compile --- build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sh b/build.sh index 6886e590..3a29e816 100755 --- a/build.sh +++ b/build.sh @@ -1,7 +1,7 @@ #!/bin/sh KERNELBASEDIR=/ics/kernel/out - +rm arch/arm/boot/zImage make htcleo_defconfig make ARCH=arm CROSS_COMPILE=/home/securecrt/tools/arm-2010q1/bin/arm-none-eabi- zImage -j8 && make ARCH=arm CROSS_COMPILE=/home/securecrt/tools/arm-2010q1/bin/arm-none-eabi- modules -j8 From 361e591fe7f111f2aaee748bdc36c6eba901c3e1 Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Fri, 22 Jun 2012 16:49:00 +0800 Subject: [PATCH 045/155] msm: kgsl: remove readl/writel use for dma memory For dma_alloc_coherent() you don't need writel/readl because it's just a plain old void *. Linux tries very hard to make a distinction between io memory (void __iomem *) and memory (void *) so that drivers are portable to architectures that don't have a way to access registers via pointer dereferences. You can see http://lwn.net/Articles/102232/ and the Linus rant http://lwn.net/Articles/102240/ here for more details behind the motivation. msm: kgsl: Allocate physical pages instead of using vmalloc Replace vmalloc allocation with physical page allocation. For most allocations we do not need a kernel virual address. vmalloc uses up the kernel virtual address space. By replacing vmalloc with physical page alloction and mapping that allocation to kernel space only when it is required prevents the kgsl driver from using unnecessary vmalloc virtual space. --- drivers/gpu/msm/adreno.c | 4 +- drivers/gpu/msm/adreno.h | 4 +- drivers/gpu/msm/adreno_ringbuffer.h | 4 +- drivers/gpu/msm/kgsl.c | 4 +- drivers/gpu/msm/kgsl.h | 21 +++- drivers/gpu/msm/kgsl_drm.c | 7 +- drivers/gpu/msm/kgsl_gpummu.c | 6 +- drivers/gpu/msm/kgsl_sharedmem.c | 146 ++++++++++++++++++++-------- drivers/gpu/msm/kgsl_sharedmem.h | 11 +-- 9 files changed, 144 insertions(+), 63 deletions(-) mode change 100644 => 100755 drivers/gpu/msm/adreno_ringbuffer.h mode change 100644 => 100755 drivers/gpu/msm/kgsl_drm.c mode change 100644 => 100755 drivers/gpu/msm/kgsl_gpummu.c diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c index adc3456a..739f2276 100755 --- a/drivers/gpu/msm/adreno.c +++ b/drivers/gpu/msm/adreno.c @@ -962,7 +962,7 @@ static int adreno_suspend_context(struct kgsl_device *device) return status; } -const struct kgsl_memdesc *adreno_find_region(struct kgsl_device *device, +struct kgsl_memdesc *adreno_find_region(struct kgsl_device *device, unsigned int pt_base, unsigned int gpuaddr, unsigned int size) @@ -1042,7 +1042,7 @@ const struct kgsl_memdesc *adreno_find_region(struct kgsl_device *device, uint8_t *adreno_convertaddr(struct kgsl_device *device, unsigned int pt_base, unsigned int gpuaddr, unsigned int size) { - const struct kgsl_memdesc *memdesc; + struct kgsl_memdesc *memdesc; memdesc = adreno_find_region(device, pt_base, gpuaddr, size); diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h index 40238313..088511af 100755 --- a/drivers/gpu/msm/adreno.h +++ b/drivers/gpu/msm/adreno.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-2011, Code Aurora Forum. All rights reserved. +/* Copyright (c) 2008-2012, Code Aurora Forum. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -83,7 +83,7 @@ void adreno_regread(struct kgsl_device *device, unsigned int offsetwords, void adreno_regwrite(struct kgsl_device *device, unsigned int offsetwords, unsigned int value); -const struct kgsl_memdesc *adreno_find_region(struct kgsl_device *device, +struct kgsl_memdesc *adreno_find_region(struct kgsl_device *device, unsigned int pt_base, unsigned int gpuaddr, unsigned int size); diff --git a/drivers/gpu/msm/adreno_ringbuffer.h b/drivers/gpu/msm/adreno_ringbuffer.h old mode 100644 new mode 100755 index 3e7a6880..4494a1bc --- a/drivers/gpu/msm/adreno_ringbuffer.h +++ b/drivers/gpu/msm/adreno_ringbuffer.h @@ -64,7 +64,7 @@ struct adreno_ringbuffer { #define GSL_RB_WRITE(ring, gpuaddr, data) \ do { \ - writel_relaxed(data, ring); \ + *ring = data; \ wmb(); \ kgsl_cffdump_setmem(gpuaddr, data, 4); \ ring++; \ @@ -93,7 +93,7 @@ struct adreno_ringbuffer { #define GSL_RB_CNTL_NO_UPDATE 0x0 /* enable */ #define GSL_RB_GET_READPTR(rb, data) \ do { \ - *(data) = readl_relaxed(&(rb)->memptrs->rptr); \ + *(data) = rb->memptrs->rptr; \ } while (0) #else #define GSL_RB_CNTL_NO_UPDATE 0x1 /* disable */ diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c index 59dce83a..283e29a7 100755 --- a/drivers/gpu/msm/kgsl.c +++ b/drivers/gpu/msm/kgsl.c @@ -1208,9 +1208,9 @@ kgsl_ioctl_sharedmem_from_vmalloc(struct kgsl_device_private *dev_priv, vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); - result = remap_vmalloc_range(vma, (void *) entry->memdesc.hostptr, 0); + result = kgsl_sharedmem_map_vma(vma, &entry->memdesc); if (result) { - KGSL_CORE_ERR("remap_vmalloc_range failed: %d\n", result); + KGSL_CORE_ERR("kgsl_sharedmem_map_vma failed: %d\n", result); goto error_free_vmalloc; } diff --git a/drivers/gpu/msm/kgsl.h b/drivers/gpu/msm/kgsl.h index 0f8ff172..91b42ffe 100755 --- a/drivers/gpu/msm/kgsl.h +++ b/drivers/gpu/msm/kgsl.h @@ -21,6 +21,7 @@ #include #include #include +#include #define KGSL_NAME "kgsl" @@ -105,7 +106,15 @@ struct kgsl_driver { extern struct kgsl_driver kgsl_driver; struct kgsl_pagetable; -struct kgsl_memdesc_ops; +struct kgsl_memdesc; + +struct kgsl_memdesc_ops { + int (*vmflags)(struct kgsl_memdesc *); + int (*vmfault)(struct kgsl_memdesc *, struct vm_area_struct *, + struct vm_fault *); + void (*free)(struct kgsl_memdesc *memdesc); + int (*map_kernel_mem)(struct kgsl_memdesc *); +}; /* shared memory allocation */ struct kgsl_memdesc { @@ -184,12 +193,14 @@ static inline int kgsl_gpuaddr_in_memdesc(const struct kgsl_memdesc *memdesc, } return 0; } -static inline uint8_t *kgsl_gpuaddr_to_vaddr(const struct kgsl_memdesc *memdesc, +static inline uint8_t *kgsl_gpuaddr_to_vaddr(struct kgsl_memdesc *memdesc, unsigned int gpuaddr) { - if (memdesc->hostptr == NULL || memdesc->gpuaddr == 0 || - (gpuaddr < memdesc->gpuaddr || - gpuaddr >= memdesc->gpuaddr + memdesc->size)) + if (memdesc->gpuaddr == 0 || + gpuaddr < memdesc->gpuaddr || + gpuaddr >= (memdesc->gpuaddr + memdesc->size) || + (NULL == memdesc->hostptr && memdesc->ops->map_kernel_mem && + memdesc->ops->map_kernel_mem(memdesc))) return NULL; return memdesc->hostptr + (gpuaddr - memdesc->gpuaddr); diff --git a/drivers/gpu/msm/kgsl_drm.c b/drivers/gpu/msm/kgsl_drm.c old mode 100644 new mode 100755 index cdf9dc4e..f8dd216b --- a/drivers/gpu/msm/kgsl_drm.c +++ b/drivers/gpu/msm/kgsl_drm.c @@ -1068,17 +1068,18 @@ int kgsl_gem_kmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) struct drm_gem_object *obj = vma->vm_private_data; struct drm_device *dev = obj->dev; struct drm_kgsl_gem_object *priv; - unsigned long offset, pg; + unsigned long offset; struct page *page; + int i; mutex_lock(&dev->struct_mutex); priv = obj->driver_private; offset = (unsigned long) vmf->virtual_address - vma->vm_start; - pg = (unsigned long) priv->memdesc.hostptr + offset; + i = offset >> PAGE_SHIFT; + page = sg_page(&(priv->memdesc.sg[i])); - page = vmalloc_to_page((void *) pg); if (!page) { mutex_unlock(&dev->struct_mutex); return VM_FAULT_SIGBUS; diff --git a/drivers/gpu/msm/kgsl_gpummu.c b/drivers/gpu/msm/kgsl_gpummu.c old mode 100644 new mode 100755 index 9e7ef61d..30018093 --- a/drivers/gpu/msm/kgsl_gpummu.c +++ b/drivers/gpu/msm/kgsl_gpummu.c @@ -385,14 +385,16 @@ kgsl_pt_map_set(struct kgsl_gpummu_pt *pt, uint32_t pte, uint32_t val) { uint32_t *baseptr = (uint32_t *)pt->base.hostptr; - writel_relaxed(val, &baseptr[pte]); + BUG_ON(pte*sizeof(uint32_t) >= pt->base.size); + baseptr[pte] = val; } static inline uint32_t kgsl_pt_map_get(struct kgsl_gpummu_pt *pt, uint32_t pte) { uint32_t *baseptr = (uint32_t *)pt->base.hostptr; - return readl_relaxed(&baseptr[pte]) & GSL_PT_PAGE_ADDR_MASK; + BUG_ON(pte*sizeof(uint32_t) >= pt->base.size); + return baseptr[pte] & GSL_PT_PAGE_ADDR_MASK; } static unsigned int kgsl_gpummu_pt_get_flags(struct kgsl_pagetable *pt, diff --git a/drivers/gpu/msm/kgsl_sharedmem.c b/drivers/gpu/msm/kgsl_sharedmem.c index 17c26c5f..66684868 100755 --- a/drivers/gpu/msm/kgsl_sharedmem.c +++ b/drivers/gpu/msm/kgsl_sharedmem.c @@ -301,13 +301,14 @@ static int kgsl_vmalloc_vmfault(struct kgsl_memdesc *memdesc, struct vm_area_struct *vma, struct vm_fault *vmf) { - unsigned long offset, pg; + unsigned long offset; struct page *page; + int i; offset = (unsigned long) vmf->virtual_address - vma->vm_start; - pg = (unsigned long) memdesc->hostptr + offset; - page = vmalloc_to_page((void *) pg); + i = offset >> PAGE_SHIFT; + page = sg_page(&memdesc->sg[i]); if (page == NULL) return VM_FAULT_SIGBUS; @@ -324,8 +325,14 @@ static int kgsl_vmalloc_vmflags(struct kgsl_memdesc *memdesc) static void kgsl_vmalloc_free(struct kgsl_memdesc *memdesc) { + int i = 0; + struct scatterlist *sg; kgsl_driver.stats.vmalloc -= memdesc->size; - vfree(memdesc->hostptr); + if (memdesc->hostptr) + vunmap(memdesc->hostptr); + if (memdesc->sg) + for_each_sg(memdesc->sg, sg, memdesc->sglen, i) + __free_page(sg_page(sg)); } static int kgsl_contiguous_vmflags(struct kgsl_memdesc *memdesc) @@ -333,6 +340,39 @@ static int kgsl_contiguous_vmflags(struct kgsl_memdesc *memdesc) return VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTEXPAND; } +/* + * kgsl_vmalloc_map_kernel - Map the memory in memdesc to kernel address space + * + * @memdesc - The memory descriptor which contains information about the memory + * + * Return: 0 on success else error code + */ +static int kgsl_vmalloc_map_kernel(struct kgsl_memdesc *memdesc) +{ + if (!memdesc->hostptr) { + pgprot_t page_prot = pgprot_writecombine(PAGE_KERNEL); + struct page **pages = NULL; + struct scatterlist *sg; + int i; + /* create a list of pages to call vmap */ + pages = vmalloc(memdesc->sglen * sizeof(struct page *)); + if (!pages) { + KGSL_CORE_ERR("vmalloc(%d) failed\n", + memdesc->sglen * sizeof(struct page *)); + return -ENOMEM; + } + for_each_sg(memdesc->sg, sg, memdesc->sglen, i) + pages[i] = sg_page(sg); + memdesc->hostptr = vmap(pages, memdesc->sglen, + VM_IOREMAP, page_prot); + vfree(pages); + } + if (!memdesc->hostptr) + return -ENOMEM; + + return 0; +} + static int kgsl_contiguous_vmfault(struct kgsl_memdesc *memdesc, struct vm_area_struct *vma, struct vm_fault *vmf) @@ -376,6 +416,7 @@ struct kgsl_memdesc_ops kgsl_vmalloc_ops = { .free = kgsl_vmalloc_free, .vmflags = kgsl_vmalloc_vmflags, .vmfault = kgsl_vmalloc_vmfault, + .map_kernel_mem = kgsl_vmalloc_map_kernel, }; EXPORT_SYMBOL(kgsl_vmalloc_ops); @@ -413,7 +454,7 @@ EXPORT_SYMBOL(kgsl_cache_range_op); static int _kgsl_sharedmem_vmalloc(struct kgsl_memdesc *memdesc, struct kgsl_pagetable *pagetable, - void *ptr, size_t size, unsigned int protflags) + size_t size, unsigned int protflags) { int order, ret = 0; int sglen = PAGE_ALIGN(size) / PAGE_SIZE; @@ -423,7 +464,6 @@ _kgsl_sharedmem_vmalloc(struct kgsl_memdesc *memdesc, memdesc->pagetable = pagetable; memdesc->priv = KGSL_MEMFLAGS_CACHED; memdesc->ops = &kgsl_vmalloc_ops; - memdesc->hostptr = (void *) ptr; memdesc->sg = vmalloc(sglen * sizeof(struct scatterlist)); if (memdesc->sg == NULL) { @@ -436,19 +476,20 @@ _kgsl_sharedmem_vmalloc(struct kgsl_memdesc *memdesc, memdesc->sglen = sglen; sg_init_table(memdesc->sg, sglen); - for (i = 0; i < memdesc->sglen; i++, ptr += PAGE_SIZE) { - struct page *page = vmalloc_to_page(ptr); + for (i = 0; i < memdesc->sglen; i++) { + struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO | + __GFP_HIGHMEM); if (!page) { - ret = -EINVAL; + ret = -ENOMEM; + memdesc->sglen = i; goto done; } + flush_dcache_page(page); sg_set_page(&memdesc->sg[i], page, PAGE_SIZE, 0); } outer_cache_range_op_sg(memdesc->sg, memdesc->sglen, KGSL_CACHE_OP_FLUSH); - kgsl_cache_range_op(memdesc, KGSL_CACHE_OP_INV); - ret = kgsl_mmu_map(pagetable, memdesc, protflags); if (ret) @@ -473,20 +514,18 @@ int kgsl_sharedmem_vmalloc(struct kgsl_memdesc *memdesc, struct kgsl_pagetable *pagetable, size_t size) { - void *ptr; - + int ret = 0; BUG_ON(size == 0); size = ALIGN(size, PAGE_SIZE * 2); - ptr = vmalloc(size); - if (ptr == NULL) { - KGSL_CORE_ERR("vmalloc(%d) failed\n", size); - return -ENOMEM; - } - - return _kgsl_sharedmem_vmalloc(memdesc, pagetable, ptr, size, + ret = _kgsl_sharedmem_vmalloc(memdesc, pagetable, size, GSL_PT_PAGE_RV | GSL_PT_PAGE_WV); + if (!ret) + ret = kgsl_vmalloc_map_kernel(memdesc); + if (ret) + kgsl_sharedmem_free(memdesc); + return ret; } EXPORT_SYMBOL(kgsl_sharedmem_vmalloc); @@ -495,25 +534,15 @@ kgsl_sharedmem_vmalloc_user(struct kgsl_memdesc *memdesc, struct kgsl_pagetable *pagetable, size_t size, int flags) { - void *ptr; unsigned int protflags; BUG_ON(size == 0); - ptr = vmalloc_user(size); - - if (ptr == NULL) { - KGSL_CORE_ERR("vmalloc_user(%d) failed: allocated=%d\n", - size, kgsl_driver.stats.vmalloc); - return -ENOMEM; - } - - kmemleak_not_leak(ptr); protflags = GSL_PT_PAGE_RV; if (!(flags & KGSL_MEMFLAGS_GPUREADONLY)) protflags |= GSL_PT_PAGE_WV; - return _kgsl_sharedmem_vmalloc(memdesc, pagetable, ptr, size, + return _kgsl_sharedmem_vmalloc(memdesc, pagetable, size, protflags); } EXPORT_SYMBOL(kgsl_sharedmem_vmalloc_user); @@ -646,13 +675,17 @@ kgsl_sharedmem_readl(const struct kgsl_memdesc *memdesc, uint32_t *dst, unsigned int offsetbytes) { + uint32_t *src; BUG_ON(memdesc == NULL || memdesc->hostptr == NULL || dst == NULL); - WARN_ON(offsetbytes + sizeof(unsigned int) > memdesc->size); + WARN_ON(offsetbytes % sizeof(uint32_t) != 0); + if (offsetbytes % sizeof(uint32_t) != 0) + return -EINVAL; - if (offsetbytes + sizeof(unsigned int) > memdesc->size) + WARN_ON(offsetbytes + sizeof(uint32_t) > memdesc->size); + if (offsetbytes + sizeof(uint32_t) > memdesc->size) return -ERANGE; - - *dst = readl_relaxed(memdesc->hostptr + offsetbytes); + src = (uint32_t *)(memdesc->hostptr + offsetbytes); + *dst = *src; return 0; } EXPORT_SYMBOL(kgsl_sharedmem_readl); @@ -662,12 +695,19 @@ kgsl_sharedmem_writel(const struct kgsl_memdesc *memdesc, unsigned int offsetbytes, uint32_t src) { + uint32_t *dst; BUG_ON(memdesc == NULL || memdesc->hostptr == NULL); - BUG_ON(offsetbytes + sizeof(unsigned int) > memdesc->size); + WARN_ON(offsetbytes % sizeof(uint32_t) != 0); + if (offsetbytes % sizeof(uint32_t) != 0) + return -EINVAL; + WARN_ON(offsetbytes + sizeof(uint32_t) > memdesc->size); + if (offsetbytes + sizeof(uint32_t) > memdesc->size) + return -ERANGE; kgsl_cffdump_setmem(memdesc->gpuaddr + offsetbytes, - src, sizeof(uint)); - writel_relaxed(src, memdesc->hostptr + offsetbytes); + src, sizeof(uint32_t)); + dst = (uint32_t *)(memdesc->hostptr + offsetbytes); + *dst = src; return 0; } EXPORT_SYMBOL(kgsl_sharedmem_writel); @@ -685,3 +725,33 @@ kgsl_sharedmem_set(const struct kgsl_memdesc *memdesc, unsigned int offsetbytes, return 0; } EXPORT_SYMBOL(kgsl_sharedmem_set); + +/* + * kgsl_sharedmem_map_vma - Map a user vma to physical memory + * + * @vma - The user vma to map + * @memdesc - The memory descriptor which contains information about the + * physical memory + * + * Return: 0 on success else error code + */ +int +kgsl_sharedmem_map_vma(struct vm_area_struct *vma, + const struct kgsl_memdesc *memdesc) +{ + unsigned long addr = vma->vm_start; + unsigned long size = vma->vm_end - vma->vm_start; + int ret, i = 0; + + if (!memdesc->sg || (size != memdesc->size) || + (memdesc->sglen != (size / PAGE_SIZE))) + return -EINVAL; + + for (; addr < vma->vm_end; addr += PAGE_SIZE, i++) { + ret = vm_insert_page(vma, addr, sg_page(&memdesc->sg[i])); + if (ret) + return ret; + } + return 0; +} +EXPORT_SYMBOL(kgsl_sharedmem_map_vma); diff --git a/drivers/gpu/msm/kgsl_sharedmem.h b/drivers/gpu/msm/kgsl_sharedmem.h index 63e12e66..468f7388 100755 --- a/drivers/gpu/msm/kgsl_sharedmem.h +++ b/drivers/gpu/msm/kgsl_sharedmem.h @@ -34,13 +34,6 @@ struct kgsl_process_private; /** Set if the memdesc describes cached memory */ #define KGSL_MEMFLAGS_CACHED 0x00000001 -struct kgsl_memdesc_ops { - int (*vmflags)(struct kgsl_memdesc *); - int (*vmfault)(struct kgsl_memdesc *, struct vm_area_struct *, - struct vm_fault *); - void (*free)(struct kgsl_memdesc *memdesc); -}; - extern struct kgsl_memdesc_ops kgsl_vmalloc_ops; int kgsl_sharedmem_vmalloc(struct kgsl_memdesc *memdesc, @@ -82,6 +75,10 @@ void kgsl_process_uninit_sysfs(struct kgsl_process_private *private); int kgsl_sharedmem_init_sysfs(void); void kgsl_sharedmem_uninit_sysfs(void); +int +kgsl_sharedmem_map_vma(struct vm_area_struct *vma, + const struct kgsl_memdesc *memdesc); + static inline int memdesc_sg_phys(struct kgsl_memdesc *memdesc, unsigned int physaddr, unsigned int size) From a7bb935abbf810f9e169893ebc01f25d5071318a Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Sat, 23 Jun 2012 17:01:57 +0800 Subject: [PATCH 046/155] revert the pmem size to default configration --- arch/arm/mach-msm/board-htcleo.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/mach-msm/board-htcleo.h b/arch/arm/mach-msm/board-htcleo.h index b00b2cc7..1a4cae6b 100755 --- a/arch/arm/mach-msm/board-htcleo.h +++ b/arch/arm/mach-msm/board-htcleo.h @@ -40,10 +40,10 @@ #define MSM_FB_SIZE 0x00600000 #define MSM_PMEM_MDP_BASE 0x3B700000 -#define MSM_PMEM_MDP_SIZE 0x01000000 +#define MSM_PMEM_MDP_SIZE 0x02000000 -#define MSM_PMEM_ADSP_BASE 0x3C700000 -#define MSM_PMEM_ADSP_SIZE 0x01800000 +#define MSM_PMEM_ADSP_BASE 0x3D700000 +#define MSM_PMEM_ADSP_SIZE 0x02900000 #define MSM_GPU_PHYS_BASE (MSM_PMEM_SMI_BASE + MSM_FB_SIZE) #define MSM_GPU_PHYS_SIZE 0x00800000 @@ -59,7 +59,7 @@ /* Begin EBI region */ #define PMEM_KERNEL_EBI1_SIZE 0x00028000 -#define MSM_PMEM_SF_SIZE 0x01400000 +#define MSM_PMEM_SF_SIZE 0x02000000 /* MSM_RAM_CONSOLE uses the last 0x00040000 of EBI memory, defined in msm_iomap.h #define MSM_RAM_CONSOLE_SIZE 0x00040000 From 5c1047c767043c95f73cd5be97cad1d6a0a9fe7f Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Sat, 23 Jun 2012 17:02:28 +0800 Subject: [PATCH 047/155] msm: kgsl: set the dma_address field of scatterlists Ion carveout and content protect heap buffers do not have a struct page associated with them. Thus sg_phys() will not work reliably on these buffers. Set the dma_address field on physically contiguous buffers. When mapping a scatterlist to the gpummu use sg_dma_address() first and if it returns 0 then use sg_phys(). msm: kgsl: Use kzalloc to allocate scatterlists of 1 page or less The majority of the scatterlist allocations used in KGSL are under 1 page (1 page of struct scatterlist is approximately 1024 entries equalling 4MB of allocated buffer). In these cases using vmalloc for the sglist is undesirable and slow. Add functions to check the size of the allocation and favor kzalloc for 1 page allocations and vmalloc for larger lists. --- drivers/gpu/msm/kgsl.c | 5 ++-- drivers/gpu/msm/kgsl_gpummu.c | 2 +- drivers/gpu/msm/kgsl_sharedmem.c | 11 ++++---- drivers/gpu/msm/kgsl_sharedmem.h | 45 +++++++++++++++++++++++++++----- 4 files changed, 49 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c index 283e29a7..e06a7c7e 100755 --- a/drivers/gpu/msm/kgsl.c +++ b/drivers/gpu/msm/kgsl.c @@ -1349,7 +1349,8 @@ static int memdesc_sg_virt(struct kgsl_memdesc *memdesc, int sglen = PAGE_ALIGN(size) / PAGE_SIZE; unsigned long paddr = (unsigned long) addr; - memdesc->sg = vmalloc(sglen * sizeof(struct scatterlist)); + memdesc->sg = kgsl_sg_alloc(sglen); + if (memdesc->sg == NULL) return -ENOMEM; @@ -1389,7 +1390,7 @@ static int memdesc_sg_virt(struct kgsl_memdesc *memdesc, err: spin_unlock(¤t->mm->page_table_lock); - vfree(memdesc->sg); + kgsl_sg_free(memdesc->sg, sglen); memdesc->sg = NULL; return -EINVAL; diff --git a/drivers/gpu/msm/kgsl_gpummu.c b/drivers/gpu/msm/kgsl_gpummu.c index 30018093..20f068e5 100755 --- a/drivers/gpu/msm/kgsl_gpummu.c +++ b/drivers/gpu/msm/kgsl_gpummu.c @@ -685,7 +685,7 @@ kgsl_gpummu_map(void *mmu_specific_pt, flushtlb = 1; for_each_sg(memdesc->sg, s, memdesc->sglen, i) { - unsigned int paddr = sg_phys(s); + unsigned int paddr = kgsl_get_sg_pa(s); unsigned int j; /* Each sg entry might be multiple pages long */ diff --git a/drivers/gpu/msm/kgsl_sharedmem.c b/drivers/gpu/msm/kgsl_sharedmem.c index 66684868..3e490c93 100755 --- a/drivers/gpu/msm/kgsl_sharedmem.c +++ b/drivers/gpu/msm/kgsl_sharedmem.c @@ -154,7 +154,7 @@ static struct mem_entry_stats mem_stats[] = { #endif MEM_ENTRY_STAT(KGSL_MEM_ENTRY_USER, user), #ifdef CONFIG_ION - MEM_ENTRY_STAT(KGSL_MEM_ENTRY_USER, ion), + MEM_ENTRY_STAT(KGSL_MEM_ENTRY_ION, ion), #endif }; @@ -286,7 +286,7 @@ static void outer_cache_range_op_sg(struct scatterlist *sg, int sglen, int op) int i; for_each_sg(sg, s, sglen, i) { - unsigned int paddr = sg_phys(s); + unsigned int paddr = kgsl_get_sg_pa(s); _outer_cache_range_op(op, paddr, s->length); } } @@ -465,7 +465,8 @@ _kgsl_sharedmem_vmalloc(struct kgsl_memdesc *memdesc, memdesc->priv = KGSL_MEMFLAGS_CACHED; memdesc->ops = &kgsl_vmalloc_ops; - memdesc->sg = vmalloc(sglen * sizeof(struct scatterlist)); + memdesc->sg = kgsl_sg_alloc(sglen); + if (memdesc->sg == NULL) { ret = -ENOMEM; goto done; @@ -487,7 +488,7 @@ _kgsl_sharedmem_vmalloc(struct kgsl_memdesc *memdesc, flush_dcache_page(page); sg_set_page(&memdesc->sg[i], page, PAGE_SIZE, 0); } - outer_cache_range_op_sg(memdesc->sg, memdesc->sglen, + outer_cache_range_op_sg(memdesc->sg, memdesc->sglen, KGSL_CACHE_OP_FLUSH); ret = kgsl_mmu_map(pagetable, memdesc, protflags); @@ -593,7 +594,7 @@ void kgsl_sharedmem_free(struct kgsl_memdesc *memdesc) if (memdesc->ops && memdesc->ops->free) memdesc->ops->free(memdesc); - vfree(memdesc->sg); + kgsl_sg_free(memdesc->sg, memdesc->sglen); memset(memdesc, 0, sizeof(*memdesc)); } diff --git a/drivers/gpu/msm/kgsl_sharedmem.h b/drivers/gpu/msm/kgsl_sharedmem.h index 468f7388..49694f90 100755 --- a/drivers/gpu/msm/kgsl_sharedmem.h +++ b/drivers/gpu/msm/kgsl_sharedmem.h @@ -75,25 +75,58 @@ void kgsl_process_uninit_sysfs(struct kgsl_process_private *private); int kgsl_sharedmem_init_sysfs(void); void kgsl_sharedmem_uninit_sysfs(void); +static inline unsigned int kgsl_get_sg_pa(struct scatterlist *sg) +{ + /* + * Try sg_dma_address first to support ion carveout + * regions which do not work with sg_phys(). + */ + unsigned int pa = sg_dma_address(sg); + if (pa == 0) + pa = sg_phys(sg); + return pa; +} + int kgsl_sharedmem_map_vma(struct vm_area_struct *vma, const struct kgsl_memdesc *memdesc); +/* + * For relatively small sglists, it is preferable to use kzalloc + * rather than going down the vmalloc rat hole. If the size of + * the sglist is < PAGE_SIZE use kzalloc otherwise fallback to + * vmalloc + */ + +static inline void *kgsl_sg_alloc(unsigned int sglen) +{ + if ((sglen * sizeof(struct scatterlist)) < PAGE_SIZE) + return kzalloc(sglen * sizeof(struct scatterlist), GFP_KERNEL); + else + return vmalloc(sglen * sizeof(struct scatterlist)); +} + +static inline void kgsl_sg_free(void *ptr, unsigned int sglen) +{ + if ((sglen * sizeof(struct scatterlist)) < PAGE_SIZE) + kfree(ptr); + else + vfree(ptr); +} + static inline int memdesc_sg_phys(struct kgsl_memdesc *memdesc, unsigned int physaddr, unsigned int size) { - struct page *page = phys_to_page(physaddr); - - memdesc->sg = vmalloc(sizeof(struct scatterlist) * 1); - if (memdesc->sg == NULL) - return -ENOMEM; + memdesc->sg = kgsl_sg_alloc(1); kmemleak_not_leak(memdesc->sg); memdesc->sglen = 1; sg_init_table(memdesc->sg, 1); - sg_set_page(&memdesc->sg[0], page, size, 0); + memdesc->sg[0].length = size; + memdesc->sg[0].offset = 0; + memdesc->sg[0].dma_address = physaddr; return 0; } From f6acf3ab9f87b346cf1c6e3de3a470c0ec606081 Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Sat, 23 Jun 2012 17:48:20 +0800 Subject: [PATCH 048/155] msm: kgsl: queue timestamp expired work more often There are a some workloads where interrupts do not always get generated, and as a result the timestamp work was not triggered often enough. Queue timestamp expired work from adreno_waittimestamp(), when the timestamp expires while we are not waiting. It is possible in this case that no interrupt fired because no processes were waiting. Queue timestamp expired work when freememontimestamp is called, which reduces the amount of memory built up by applications that use this api often. --- drivers/gpu/msm/adreno.c | 31 +++++++++++++++---------------- drivers/gpu/msm/kgsl.c | 3 ++- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c index 739f2276..e9ddb5e2 100755 --- a/drivers/gpu/msm/adreno.c +++ b/drivers/gpu/msm/adreno.c @@ -1182,13 +1182,21 @@ static int adreno_waittimestamp(struct kgsl_device *device, msecs_first = (msecs <= 100) ? ((msecs + 4) / 5) : 100; msecs_part = (msecs - msecs_first + 3) / 4; for (retries = 0; retries < 5; retries++) { - if (!kgsl_check_timestamp(device, timestamp)) { + if (kgsl_check_timestamp(device, timestamp)) { + /* if the timestamp happens while we're not + * waiting, there's a chance that an interrupt + * will not be generated and thus the timestamp + * work needs to be queued. + */ + queue_work(device->work_queue, &device->ts_expired_ws); + status = 0; + goto done; + } adreno_poke(device); // the QSD8X50 don't support io_fraction ?? // SecureCRT 2012-06-20 // io_cnt = (io_cnt + 1) % 100; // if (io_cnt < -// pwr->pwrlevels[pwr->active_pwrlevel]. -// io_fraction) +// pwr->pwrlevels[pwr->active_pwrlevel].o_fraction) // io = 0; mutex_unlock(&device->mutex); /* We need to make sure that the process is @@ -1212,28 +1220,19 @@ static int adreno_waittimestamp(struct kgsl_device *device, } /*this wait timed out*/ } - } - if (!kgsl_check_timestamp(device, timestamp)) { status = -ETIMEDOUT; KGSL_DRV_ERR(device, - "Device hang detected while waiting " - "for timestamp: %x, last " - "submitted(rb->timestamp): %x, wptr: " - "%x\n", timestamp, - adreno_dev->ringbuffer.timestamp, + "Device hang detected while waiting for timestamp: %x," + "last submitted(rb->timestamp): %x, wptr: %x\n", + timestamp, adreno_dev->ringbuffer.timestamp, adreno_dev->ringbuffer.wptr); if (!adreno_dump_and_recover(device)) { /* wait for idle after recovery as the * timestamp that this process wanted * to wait on may be invalid */ - if (!adreno_idle(device, - KGSL_TIMEOUT_DEFAULT)) - status = 0; - } - } else { + if (!adreno_idle(device, KGSL_TIMEOUT_DEFAULT)) status = 0; } - done: return (int)status; } diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c index e06a7c7e..6f1120de 100755 --- a/drivers/gpu/msm/kgsl.c +++ b/drivers/gpu/msm/kgsl.c @@ -99,7 +99,8 @@ static int kgsl_add_event(struct kgsl_device *device, u32 ts, if (n == &device->events) list_add_tail(&event->list, &device->events); - + + queue_work(device->work_queue, &device->ts_expired_ws); return 0; } #endif From 4520a7c383cb0827649d1a90f4c68b7403661ccc Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Sat, 23 Jun 2012 18:52:06 +0800 Subject: [PATCH 049/155] msm: kgsl: cancel events from kgsl_release Events need to be cancelled when an fd is released, to avoid possible memory leaks or use after free. When the event is cancelled, its callback is called. Currently this is sufficient since events are used for resource management and we have no option but to release the lock or memory. If future uses need to distinguish between the callback firing and a cancel, they can look at the timestamp passed to the callback, which will be before the timestamp they expected. Otherwise a separate cancel callback can be added. --- drivers/gpu/msm/kgsl.c | 51 ++++++++++++++++++++++++++++++----- drivers/gpu/msm/kgsl_device.h | 1 + 2 files changed, 46 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c index 6f1120de..1abcc585 100755 --- a/drivers/gpu/msm/kgsl.c +++ b/drivers/gpu/msm/kgsl.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-2011, Code Aurora Forum. All rights reserved. +/* Copyright (c) 2008-2012, Code Aurora Forum. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -55,12 +55,14 @@ static struct ion_client *kgsl_ion_client; * @ts - the timestamp to trigger the event on * @cb - callback function to call when the timestamp expires * @priv - private data for the specific event type + * @owner - driver instance that owns this event * * @returns - 0 on success or error code on failure */ static int kgsl_add_event(struct kgsl_device *device, u32 ts, - void (*cb)(struct kgsl_device *, void *, u32), void *priv) + void (*cb)(struct kgsl_device *, void *, u32), void *priv, + struct kgsl_device_private *owner) { struct kgsl_event *event; struct list_head *n; @@ -84,6 +86,7 @@ static int kgsl_add_event(struct kgsl_device *device, u32 ts, event->timestamp = ts; event->priv = priv; event->func = cb; + event->owner = owner; /* Add the event in order to the list */ @@ -105,6 +108,36 @@ static int kgsl_add_event(struct kgsl_device *device, u32 ts, } #endif +/** + * kgsl_cancel_events - Cancel all events for a process + * @device - KGSL device for the events to cancel + * @owner - driver instance that owns the events to cancel + * + */ +static void kgsl_cancel_events(struct kgsl_device *device, + struct kgsl_device_private *owner) +{ + struct kgsl_event *event, *event_tmp; + unsigned int cur = device->ftbl->readtimestamp(device, + KGSL_TIMESTAMP_RETIRED); + + list_for_each_entry_safe(event, event_tmp, &device->events, list) { + if (event->owner != owner) + continue; + /* + * "cancel" the events by calling their callback. + * Currently, events are used for lock and memory + * management, so if the process is dying the right + * thing to do is release or free. + */ + if (event->func) + event->func(device, event->priv, cur); + + list_del(&event->list); + kfree(event); + } +} + static inline struct kgsl_mem_entry * kgsl_mem_entry_create(void) { @@ -656,6 +689,7 @@ static int kgsl_release(struct inode *inodep, struct file *filep) * process and this device */ kgsl_memqueue_cleanup(device, private); + kgsl_cancel_events(device, dev_priv); mutex_unlock(&device->mutex); kfree(dev_priv); @@ -1796,6 +1830,7 @@ static void kgsl_genlock_event_cb(struct kgsl_device *device, * @timestamp - Timestamp to trigger the event * @data - User space buffer containing struct kgsl_genlock_event_priv * @len - length of the userspace buffer + * @owner - driver instance that owns this event * @returns 0 on success or error code on error * * Attack to a genlock handle and register an event to release the @@ -1803,7 +1838,8 @@ static void kgsl_genlock_event_cb(struct kgsl_device *device, */ static int kgsl_add_genlock_event(struct kgsl_device *device, - u32 timestamp, void __user *data, int len) + u32 timestamp, void __user *data, int len, + struct kgsl_device_private *owner) { struct kgsl_genlock_event_priv *event; struct kgsl_timestamp_event_genlock priv; @@ -1828,7 +1864,8 @@ static int kgsl_add_genlock_event(struct kgsl_device *device, return ret; } - ret = kgsl_add_event(device, timestamp, kgsl_genlock_event_cb, event); + ret = kgsl_add_event(device, timestamp, kgsl_genlock_event_cb, event, + owner); if (ret) kfree(event); @@ -1836,7 +1873,8 @@ static int kgsl_add_genlock_event(struct kgsl_device *device, } #else static long kgsl_add_genlock_event(struct kgsl_device *device, - u32 timestamp, void __user *data, int len) + u32 timestamp, void __user *data, int len, + struct kgsl_device_private *owner) { return -EINVAL; } @@ -1859,7 +1897,8 @@ static long kgsl_ioctl_timestamp_event(struct kgsl_device_private *dev_priv, switch (param->type) { case KGSL_TIMESTAMP_EVENT_GENLOCK: ret = kgsl_add_genlock_event(dev_priv->device, - param->timestamp, param->priv, param->len); + param->timestamp, param->priv, param->len, + dev_priv); break; default: ret = -EINVAL; diff --git a/drivers/gpu/msm/kgsl_device.h b/drivers/gpu/msm/kgsl_device.h index b7524d87..998c16f7 100755 --- a/drivers/gpu/msm/kgsl_device.h +++ b/drivers/gpu/msm/kgsl_device.h @@ -124,6 +124,7 @@ struct kgsl_event { void (*func)(struct kgsl_device *, void *, u32); void *priv; struct list_head list; + struct kgsl_device_private *owner; }; From b8450f4096eaad1f1d6934cff7af2509d3b63a11 Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Sat, 23 Jun 2012 19:03:55 +0800 Subject: [PATCH 050/155] msm: kgsl: change timestamp frees to use kgsl_event The timestamp memqueue was unsorted, which could cause memory to not be freed soon enough. The kgsl_event list is sorted and does almost exactly the same thing as the memqueue did, so freememontimestamp is now implemented using the kgsl_event list. --- drivers/gpu/msm/adreno.c | 9 ----- drivers/gpu/msm/kgsl.c | 64 +++++++---------------------------- drivers/gpu/msm/kgsl_device.h | 1 - 3 files changed, 12 insertions(+), 62 deletions(-) diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c index e9ddb5e2..c536fb89 100755 --- a/drivers/gpu/msm/adreno.c +++ b/drivers/gpu/msm/adreno.c @@ -1000,15 +1000,6 @@ struct kgsl_memdesc *adreno_find_region(struct kgsl_device *device, } mutex_unlock(&kgsl_driver.process_mutex); - BUG_ON(!mutex_is_locked(&device->mutex)); - list_for_each_entry(entry, &device->memqueue, list) { - if (kgsl_gpuaddr_in_memdesc(&entry->memdesc, gpuaddr, size)) { - result = &entry->memdesc; - return result; - } - - } - while (1) { struct adreno_context *adreno_context = NULL; context = idr_get_next(&device->context_idr, &next); diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c index 1abcc585..65efe2f4 100755 --- a/drivers/gpu/msm/kgsl.c +++ b/drivers/gpu/msm/kgsl.c @@ -47,8 +47,6 @@ MODULE_PARM_DESC(ksgl_mmu_type, static struct ion_client *kgsl_ion_client; -#ifdef CONFIG_GENLOCK - /** * kgsl_add_event - Add a new timstamp event for the KGSL device * @device - KGSL device for the new event @@ -106,7 +104,6 @@ static int kgsl_add_event(struct kgsl_device *device, u32 ts, queue_work(device->work_queue, &device->ts_expired_ws); return 0; } -#endif /** * kgsl_cancel_events - Cancel all events for a process @@ -258,44 +255,10 @@ kgsl_destroy_context(struct kgsl_device_private *dev_priv, idr_remove(&dev_priv->device->context_idr, id); } -/* to be called when a process is destroyed, this walks the memqueue and - * frees any entryies that belong to the dying process - */ -static void kgsl_memqueue_cleanup(struct kgsl_device *device, - struct kgsl_process_private *private) -{ - struct kgsl_mem_entry *entry, *entry_tmp; - - if (!private) - return; - - BUG_ON(!mutex_is_locked(&device->mutex)); - - list_for_each_entry_safe(entry, entry_tmp, &device->memqueue, list) { - if (entry->priv == private) { - list_del(&entry->list); - kgsl_mem_entry_put(entry); - } - } -} - -static void kgsl_memqueue_freememontimestamp(struct kgsl_device *device, - struct kgsl_mem_entry *entry, - uint32_t timestamp, - enum kgsl_timestamp_type type) -{ - BUG_ON(!mutex_is_locked(&device->mutex)); - - entry->free_timestamp = timestamp; - - list_add_tail(&entry->list, &device->memqueue); -} - static void kgsl_timestamp_expired(struct work_struct *work) { struct kgsl_device *device = container_of(work, struct kgsl_device, ts_expired_ws); - struct kgsl_mem_entry *entry, *entry_tmp; struct kgsl_event *event, *event_tmp; uint32_t ts_processed; @@ -305,15 +268,6 @@ static void kgsl_timestamp_expired(struct work_struct *work) ts_processed = device->ftbl->readtimestamp(device, KGSL_TIMESTAMP_RETIRED); - /* Flush the freememontimestamp queue */ - list_for_each_entry_safe(entry, entry_tmp, &device->memqueue, list) { - if (timestamp_cmp(ts_processed, entry->free_timestamp) < 0) - break; - - list_del(&entry->list); - kgsl_mem_entry_put(entry); - } - /* Process expired events */ list_for_each_entry_safe(event, event_tmp, &device->events, list) { if (timestamp_cmp(ts_processed, event->timestamp) < 0) @@ -688,7 +642,6 @@ static int kgsl_release(struct inode *inodep, struct file *filep) /* clean up any to-be-freed entries that belong to this * process and this device */ - kgsl_memqueue_cleanup(device, private); kgsl_cancel_events(device, dev_priv); mutex_unlock(&device->mutex); @@ -1062,6 +1015,16 @@ static long kgsl_ioctl_cmdstream_readtimestamp(struct kgsl_device_private return 0; } +static void kgsl_freemem_event_cb(struct kgsl_device *device, + void *priv, u32 timestamp) +{ + struct kgsl_mem_entry *entry = priv; + spin_lock(&entry->priv->mem_lock); + list_del(&entry->list); + spin_unlock(&entry->priv->mem_lock); + kgsl_mem_entry_put(entry); +} + static long kgsl_ioctl_cmdstream_freememontimestamp(struct kgsl_device_private *dev_priv, unsigned int cmd, void *data) @@ -1072,13 +1035,11 @@ static long kgsl_ioctl_cmdstream_freememontimestamp(struct kgsl_device_private spin_lock(&dev_priv->process_priv->mem_lock); entry = kgsl_sharedmem_find(dev_priv->process_priv, param->gpuaddr); - if (entry) - list_del(&entry->list); spin_unlock(&dev_priv->process_priv->mem_lock); if (entry) { - kgsl_memqueue_freememontimestamp(dev_priv->device, entry, - param->timestamp, param->type); + result = kgsl_add_event(dev_priv->device, param->timestamp, + kgsl_freemem_event_cb, entry, dev_priv); } else { KGSL_DRV_ERR(dev_priv->device, "invalid gpuaddr %08x\n", param->gpuaddr); @@ -2253,7 +2214,6 @@ kgsl_register_device(struct kgsl_device *device) INIT_WORK(&device->idle_check_ws, kgsl_idle_check); INIT_WORK(&device->ts_expired_ws, kgsl_timestamp_expired); - INIT_LIST_HEAD(&device->memqueue); INIT_LIST_HEAD(&device->events); ret = kgsl_mmu_init(device); diff --git a/drivers/gpu/msm/kgsl_device.h b/drivers/gpu/msm/kgsl_device.h index 998c16f7..ff78ae3f 100755 --- a/drivers/gpu/msm/kgsl_device.h +++ b/drivers/gpu/msm/kgsl_device.h @@ -153,7 +153,6 @@ struct kgsl_device { uint32_t state; uint32_t requested_state; - struct list_head memqueue; unsigned int active_cnt; struct completion suspend_gate; From 664e1188572ec04c3d5367d12210f4541af68a6c Mon Sep 17 00:00:00 2001 From: Marc Alexander Date: Fri, 11 May 2012 12:45:55 +0200 Subject: [PATCH 051/155] Allow high current charging on china chargers --- drivers/power/ds2746_battery.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/power/ds2746_battery.c b/drivers/power/ds2746_battery.c index 931191b4..5f071a39 100644 --- a/drivers/power/ds2746_battery.c +++ b/drivers/power/ds2746_battery.c @@ -280,7 +280,7 @@ static BOOL is_charging_avaiable(void) static BOOL is_high_current_charging_avaialable(void) { if (!poweralg.protect_flags.is_charging_high_current_avaialble) return FALSE; - if (!poweralg.is_china_ac_in) return FALSE; + //if (!poweralg.is_china_ac_in) return FALSE; /* allow high current charging on china chargers */ if (poweralg.charge_state == CHARGE_STATE_UNKNOWN) return FALSE; return TRUE; } From d4595be82672b3a0f5453d5a04cba7df20e3b672 Mon Sep 17 00:00:00 2001 From: tytung Date: Fri, 29 Jun 2012 02:04:58 +0800 Subject: [PATCH 052/155] net: bluetooth: Hold wakelock until BT idle timer kicks in. Source: https://github.com/dorimanx/Dorimanx-HD2-2.6.32.X/commit/7e394e255119ad31a986e2d3f7cef3f134e10a79 --- include/net/bluetooth/hci_core.h | 7 +++++-- net/bluetooth/hci_conn.c | 30 ++++++++++++++++++++++++++++-- net/bluetooth/hci_event.c | 6 ++++++ 3 files changed, 39 insertions(+), 4 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 2ac98852..748f853b 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -26,7 +26,7 @@ #define __HCI_CORE_H #include - +#include /* HCI upper protocols */ #define HCI_PROTO_L2CAP 0 #define HCI_PROTO_SCO 1 @@ -183,10 +183,11 @@ struct hci_conn { struct timer_list disc_timer; struct timer_list idle_timer; + struct timer_list auto_accept_timer; struct work_struct work_add; struct work_struct work_del; - + struct wake_lock idle_lock; struct device dev; atomic_t devref; @@ -246,6 +247,7 @@ enum { HCI_CONN_ENCRYPT_PEND, HCI_CONN_RSWITCH_PEND, HCI_CONN_MODE_CHANGE_PEND, + HCI_CONN_SCO_SETUP_PEND, }; static inline void hci_conn_hash_init(struct hci_dev *hdev) @@ -326,6 +328,7 @@ void hci_acl_connect(struct hci_conn *conn); void hci_acl_disconn(struct hci_conn *conn, __u8 reason); void hci_add_sco(struct hci_conn *conn, __u16 handle); void hci_setup_sync(struct hci_conn *conn, __u16 handle); +void hci_sco_setup(struct hci_conn *conn, __u8 status); struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, __u16 pkt_type, bdaddr_t *dst); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 2f4d30fd..c22bc17c 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -155,6 +155,27 @@ void hci_setup_sync(struct hci_conn *conn, __u16 handle) hci_send_cmd(hdev, HCI_OP_SETUP_SYNC_CONN, sizeof(cp), &cp); } +/* Device _must_ be locked */ +void hci_sco_setup(struct hci_conn *conn, __u8 status) +{ + struct hci_conn *sco = conn->link; + + BT_DBG("%p", conn); + + if (!sco) + return; + + if (!status) { + if (lmp_esco_capable(conn->hdev)) + hci_setup_sync(sco, conn->handle); + else + hci_add_sco(sco, conn->handle); + } else { + hci_proto_connect_cfm(sco, status); + hci_conn_del(sco); + } +} + static void hci_conn_timeout(unsigned long arg) { struct hci_conn *conn = (void *) arg; @@ -216,6 +237,7 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, conn->power_save = 1; conn->disc_timeout = HCI_DISCONN_TIMEOUT; + wake_lock_init(&conn->idle_lock, WAKE_LOCK_SUSPEND, "bt_idle"); switch (type) { case ACL_LINK: @@ -271,9 +293,11 @@ int hci_conn_del(struct hci_conn *conn) BT_DBG("%s conn %p handle %d", hdev->name, conn, conn->handle); + /* Make sure no timers are running */ del_timer(&conn->idle_timer); - + wake_lock_destroy(&conn->idle_lock); del_timer(&conn->disc_timer); + del_timer(&conn->auto_accept_timer); if (conn->type == ACL_LINK) { struct hci_conn *sco = conn->link; @@ -521,9 +545,11 @@ void hci_conn_enter_active_mode(struct hci_conn *conn) } timer: - if (hdev->idle_timeout > 0) + if (hdev->idle_timeout > 0) { mod_timer(&conn->idle_timer, jiffies + msecs_to_jiffies(hdev->idle_timeout)); + wake_lock(&conn->idle_lock); + } } /* Enter sniff mode */ diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 8c59afcc..6b0b59c4 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1482,6 +1482,12 @@ static inline void hci_mode_change_evt(struct hci_dev *hdev, struct sk_buff *skb else conn->power_save = 0; } + if (conn->mode == HCI_CM_SNIFF) + if (wake_lock_active(&conn->idle_lock)) + wake_unlock(&conn->idle_lock); + + if (test_and_clear_bit(HCI_CONN_SCO_SETUP_PEND, &conn->pend)) + hci_sco_setup(conn, ev->status); } hci_dev_unlock(hdev); From 544a54b32b27ed5c8b0c70eeaa1c07d56292b124 Mon Sep 17 00:00:00 2001 From: securecrt Date: Mon, 23 Jul 2012 14:13:02 +0800 Subject: [PATCH 053/155] ignore the version check --- arch/arm/mach-msm/qdsp6_1550/msm_q6vdec.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) mode change 100644 => 100755 arch/arm/mach-msm/qdsp6_1550/msm_q6vdec.c diff --git a/arch/arm/mach-msm/qdsp6_1550/msm_q6vdec.c b/arch/arm/mach-msm/qdsp6_1550/msm_q6vdec.c old mode 100644 new mode 100755 index 69ce1804..ee3283c5 --- a/arch/arm/mach-msm/qdsp6_1550/msm_q6vdec.c +++ b/arch/arm/mach-msm/qdsp6_1550/msm_q6vdec.c @@ -61,7 +61,7 @@ #define VDEC_GET_MAJOR_VERSION(version) (((version)&MAJOR_MASK)>>16) #define VDEC_GET_MINOR_VERSION(version) ((version)&MINOR_MASK) - +#define DEBUG_TRACE_VDEC #ifdef DEBUG_TRACE_VDEC #define TRACE(fmt,x...) \ do { pr_debug("%s:%d " fmt, __func__, __LINE__, ##x); } while (0) @@ -69,6 +69,8 @@ #define TRACE(fmt,x...) do { } while (0) #endif +/* the version check will cause vdec hang up!!! */ +#define VERSION_CHECK 0 static DEFINE_MUTEX(idlecount_lock); static int idlecount; @@ -696,7 +698,7 @@ static long vdec_ioctl(struct file *file, unsigned int cmd, unsigned long arg) break; default: - pr_err("%s: invalid ioctl!\n", __func__); + pr_err("%s: invalid ioctl! cmd= %08x \n", __func__,cmd); ret = -EINVAL; break; } @@ -799,8 +801,9 @@ static int vdec_open(struct inode *inode, struct file *file) int i; struct vdec_msg_list *l; struct vdec_data *vd; +#if VERSION_CHECK struct dal_info version_info; - +#endif pr_info("q6vdec_open()\n"); mutex_lock(&vdec_ref_lock); if (ref_cnt >= MAX_SUPPORTED_INSTANCES) { @@ -845,6 +848,7 @@ static int vdec_open(struct inode *inode, struct file *file) ret = -EIO; goto vdec_open_err_handle_list; } +#if VERSION_CHECK ret = dal_call_f9(vd->vdec_handle, DAL_OP_INFO, &version_info, sizeof(struct dal_info)); @@ -859,12 +863,15 @@ static int vdec_open(struct inode *inode, struct file *file) pr_err("%s: driver version mismatch !\n", __func__); goto vdec_open_err_handle_version; } - +#endif vd->running = 1; prevent_sleep(); return 0; + +#if VERSION_CHECK vdec_open_err_handle_version: dal_detach(vd->vdec_handle); +#endif vdec_open_err_handle_list: { struct vdec_msg_list *l, *n; From 148ebef127c31fd730ad6e8af71c8d4d8b60c4af Mon Sep 17 00:00:00 2001 From: securecrt Date: Mon, 23 Jul 2012 14:37:40 +0800 Subject: [PATCH 054/155] reverse DEBUG_TRACE_VDEC --- arch/arm/mach-msm/qdsp6_1550/msm_q6vdec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-msm/qdsp6_1550/msm_q6vdec.c b/arch/arm/mach-msm/qdsp6_1550/msm_q6vdec.c index ee3283c5..b6162d89 100755 --- a/arch/arm/mach-msm/qdsp6_1550/msm_q6vdec.c +++ b/arch/arm/mach-msm/qdsp6_1550/msm_q6vdec.c @@ -61,7 +61,7 @@ #define VDEC_GET_MAJOR_VERSION(version) (((version)&MAJOR_MASK)>>16) #define VDEC_GET_MINOR_VERSION(version) ((version)&MINOR_MASK) -#define DEBUG_TRACE_VDEC +//#define DEBUG_TRACE_VDEC #ifdef DEBUG_TRACE_VDEC #define TRACE(fmt,x...) \ do { pr_debug("%s:%d " fmt, __func__, __LINE__, ##x); } while (0) From ee339b2bcb751c02f64afc6d451aa5b5e248c0ab Mon Sep 17 00:00:00 2001 From: securecrt Date: Mon, 23 Jul 2012 18:59:50 +0800 Subject: [PATCH 055/155] msm: kgsl: Write the retired timestamp on resume Write the retired timestamp into the expected location. This fixes userspace crashes after resume when the retired timestamp is read as 0 instead of the expected last timestamp. --- drivers/gpu/msm/adreno_ringbuffer.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/msm/adreno_ringbuffer.c b/drivers/gpu/msm/adreno_ringbuffer.c index e2580171..57af5563 100755 --- a/drivers/gpu/msm/adreno_ringbuffer.c +++ b/drivers/gpu/msm/adreno_ringbuffer.c @@ -309,6 +309,11 @@ int adreno_ringbuffer_start(struct adreno_ringbuffer *rb, unsigned int init_ram) adreno_regwrite(device, REG_SCRATCH_UMSK, GSL_RB_MEMPTRS_SCRATCH_MASK); + /* update the eoptimestamp field with the last retired timestamp */ + kgsl_sharedmem_writel(&device->memstore, + KGSL_DEVICE_MEMSTORE_OFFSET(eoptimestamp), + rb->timestamp); + /* load the CP ucode */ status = adreno_ringbuffer_load_pm4_ucode(device); From be4c38e2f5c165dffdaf76a1a573bf3ad1faff5f Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Tue, 24 Jul 2012 23:30:19 +0800 Subject: [PATCH 056/155] msm: kgsl: handle larger instruction store for adreno225 This GPU has a larger instruction store, so more memory needs to be reserved for saving shader state when context switching. The initial vertex and pixel partitioning of the instruction store also needs to be different. --- drivers/gpu/msm/adreno.c | 19 +++++++++++----- drivers/gpu/msm/adreno.h | 31 +++++++++++++++++++++++++ drivers/gpu/msm/adreno_a2xx.c | 35 +++++++++++++++++------------ drivers/gpu/msm/adreno_debugfs.c | 11 +++++---- drivers/gpu/msm/adreno_ringbuffer.c | 10 +++++---- 5 files changed, 79 insertions(+), 27 deletions(-) mode change 100644 => 100755 drivers/gpu/msm/adreno_debugfs.c diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c index c536fb89..ad164fee 100755 --- a/drivers/gpu/msm/adreno.c +++ b/drivers/gpu/msm/adreno.c @@ -137,21 +137,28 @@ static const struct { const char *pm4fw; const char *pfpfw; struct adreno_gpudev *gpudev; + unsigned int istore_size; + unsigned int pix_shader_start; } adreno_gpulist[] = { { ADRENO_REV_A200, 0, 2, ANY_ID, ANY_ID, - "yamato_pm4.fw", "yamato_pfp.fw", &adreno_a2xx_gpudev }, + "yamato_pm4.fw", "yamato_pfp.fw", &adreno_a2xx_gpudev, + 512, 384}, { ADRENO_REV_A205, 0, 1, 0, ANY_ID, - "yamato_pm4.fw", "yamato_pfp.fw", &adreno_a2xx_gpudev }, + "yamato_pm4.fw", "yamato_pfp.fw", &adreno_a2xx_gpudev, + 512, 384}, { ADRENO_REV_A220, 2, 1, ANY_ID, ANY_ID, - "leia_pm4_470.fw", "leia_pfp_470.fw", &adreno_a2xx_gpudev }, + "leia_pm4_470.fw", "leia_pfp_470.fw", &adreno_a2xx_gpudev, + 512, 384}, /* * patchlevel 5 (8960v2) needs special pm4 firmware to work around * a hardware problem. */ { ADRENO_REV_A225, 2, 2, 0, 5, - "a225p5_pm4.fw", "a225_pfp.fw", &adreno_a2xx_gpudev }, + "a225p5_pm4.fw", "a225_pfp.fw", &adreno_a2xx_gpudev, + 1536, 768 }, { ADRENO_REV_A225, 2, 2, ANY_ID, ANY_ID, - "a225_pm4.fw", "a225_pfp.fw", &adreno_a2xx_gpudev }, + "a225_pm4.fw", "a225_pfp.fw", &adreno_a2xx_gpudev, + 1536, 768 }, }; static void adreno_gmeminit(struct adreno_device *adreno_dev) @@ -424,6 +431,8 @@ adreno_identify_gpu(struct adreno_device *adreno_dev) adreno_dev->gpudev = adreno_gpulist[i].gpudev; adreno_dev->pfp_fwfile = adreno_gpulist[i].pfpfw; adreno_dev->pm4_fwfile = adreno_gpulist[i].pm4fw; + adreno_dev->istore_size = adreno_gpulist[i].istore_size; + adreno_dev->pix_shader_start = adreno_gpulist[i].pix_shader_start; } static int __devinit diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h index 088511af..b54699ad 100755 --- a/drivers/gpu/msm/adreno.h +++ b/drivers/gpu/msm/adreno.h @@ -39,6 +39,12 @@ #define ADRENO_DEFAULT_PWRSCALE_POLICY NULL #endif +/* + * constants for the size of shader instructions + */ +#define ADRENO_ISTORE_BYTES 12 +#define ADRENO_ISTORE_WORDS 3 + enum adreno_gpurev { ADRENO_REV_UNKNOWN = 0, ADRENO_REV_A200 = 200, @@ -65,6 +71,8 @@ struct adreno_device { unsigned int mharb; struct adreno_gpudev *gpudev; unsigned int wait_timeout; + unsigned int istore_size; + unsigned int pix_shader_start; }; struct adreno_gpudev { @@ -128,5 +136,28 @@ static inline int adreno_is_a2xx(struct adreno_device *adreno_dev) return (adreno_dev->gpurev <= ADRENO_REV_A225); } +/** + * adreno_encode_istore_size - encode istore size in CP format + * @adreno_dev - The 3D device. + * + * Encode the istore size into the format expected that the + * CP_SET_SHADER_BASES and CP_ME_INIT commands: + * bits 31:29 - istore size as encoded by this function + * bits 27:16 - vertex shader start offset in instructions + * bits 11:0 - pixel shader start offset in instructions. + */ +static inline int adreno_encode_istore_size(struct adreno_device *adreno_dev) +{ + unsigned int size; + /* in a225 the CP microcode multiplies the encoded + * value by 3 while decoding. + */ + if (adreno_is_a225(adreno_dev)) + size = adreno_dev->istore_size/3; + else + size = adreno_dev->istore_size; + + return (ilog2(size) - 5) << 29; +} #endif /*__ADRENO_H */ diff --git a/drivers/gpu/msm/adreno_a2xx.c b/drivers/gpu/msm/adreno_a2xx.c index 9e167c65..499d0b4f 100755 --- a/drivers/gpu/msm/adreno_a2xx.c +++ b/drivers/gpu/msm/adreno_a2xx.c @@ -72,10 +72,6 @@ #define TEX_CONSTANTS (32*6) /* DWORDS */ #define BOOL_CONSTANTS 8 /* DWORDS */ #define LOOP_CONSTANTS 56 /* DWORDS */ -#define SHADER_INSTRUCT_LOG2 9U /* 2^n == SHADER_INSTRUCTIONS */ - -/* 96-bit instructions */ -#define SHADER_INSTRUCT (1<istore_size*ADRENO_ISTORE_BYTES; +} + +static inline int _context_size(struct adreno_device *adreno_dev) +{ + return SHADER_OFFSET + 3*_shader_shadow_size(adreno_dev); +} /* A scratchpad used to build commands during context create */ @@ -603,7 +606,8 @@ static unsigned int *build_gmem2sys_cmds(struct adreno_device *adreno_dev, *cmds++ = 0x00003F00; *cmds++ = cp_type3_packet(CP_SET_SHADER_BASES, 1); - *cmds++ = (0x80000000) | 0x180; + *cmds++ = adreno_encode_istore_size(adreno_dev) + | adreno_dev->pix_shader_start; /* load the patched vertex shader stream */ cmds = program_shader(cmds, 0, gmem2sys_vtx_pgm, GMEM2SYS_VTX_PGM_LEN); @@ -806,7 +810,8 @@ static unsigned int *build_sys2gmem_cmds(struct adreno_device *adreno_dev, *cmds++ = 0x00000300; /* 0x100 = Vertex, 0x200 = Pixel */ *cmds++ = cp_type3_packet(CP_SET_SHADER_BASES, 1); - *cmds++ = (0x80000000) | 0x180; + *cmds++ = adreno_encode_istore_size(adreno_dev) + | adreno_dev->pix_shader_start; /* Load the patched fragment shader stream */ cmds = @@ -1104,8 +1109,10 @@ build_shader_save_restore_cmds(struct adreno_device *adreno_dev, /* compute vertex, pixel and shared instruction shadow GPU addresses */ tmp_ctx.shader_vertex = drawctxt->gpustate.gpuaddr + SHADER_OFFSET; - tmp_ctx.shader_pixel = tmp_ctx.shader_vertex + SHADER_SHADOW_SIZE; - tmp_ctx.shader_shared = tmp_ctx.shader_pixel + SHADER_SHADOW_SIZE; + tmp_ctx.shader_pixel = tmp_ctx.shader_vertex + + _shader_shadow_size(adreno_dev); + tmp_ctx.shader_shared = tmp_ctx.shader_pixel + + _shader_shadow_size(adreno_dev); /* restore shader partitioning and instructions */ @@ -1161,8 +1168,8 @@ build_shader_save_restore_cmds(struct adreno_device *adreno_dev, *cmd++ = REG_SCRATCH_REG2; /* AND off invalid bits. */ *cmd++ = 0x0FFF0FFF; - /* OR in instruction memory size */ - *cmd++ = (unsigned int)((SHADER_INSTRUCT_LOG2 - 5U) << 29); + /* OR in instruction memory size. */ + *cmd++ = adreno_encode_istore_size(adreno_dev); /* write the computed value to the SET_SHADER_BASES data field */ *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); @@ -1305,13 +1312,13 @@ static int a2xx_drawctxt_create(struct adreno_device *adreno_dev, */ ret = kgsl_allocate(&drawctxt->gpustate, - drawctxt->pagetable, CONTEXT_SIZE); + drawctxt->pagetable, _context_size(adreno_dev)); if (ret) return ret; kgsl_sharedmem_set(&drawctxt->gpustate, 0, 0, - CONTEXT_SIZE); + _context_size(adreno_dev)); tmp_ctx.cmd = tmp_ctx.start = (unsigned int *)((char *)drawctxt->gpustate.hostptr + CMD_OFFSET); diff --git a/drivers/gpu/msm/adreno_debugfs.c b/drivers/gpu/msm/adreno_debugfs.c old mode 100644 new mode 100755 index c878a2c2..419ce9d2 --- a/drivers/gpu/msm/adreno_debugfs.c +++ b/drivers/gpu/msm/adreno_debugfs.c @@ -223,21 +223,23 @@ static int kgsl_regread_nolock(struct kgsl_device *device, return 0; } -#define KGSL_ISTORE_START 0x5000 -#define KGSL_ISTORE_LENGTH 0x600 +#define ADRENO_ISTORE_START 0x5000 static ssize_t kgsl_istore_read( struct file *file, char __user *buff, size_t buff_count, loff_t *ppos) { - int i, count = KGSL_ISTORE_LENGTH, remaining, pos = 0, tot = 0; + int i, count, remaining, pos = 0, tot = 0; struct kgsl_device *device = file->private_data; const int rowc = 8; + struct adreno_device *adreno_dev; if (!ppos || !device) return 0; + adreno_dev = ADRENO_DEVICE(device); + count = adreno_dev->istore_size * ADRENO_ISTORE_WORDS; remaining = count; for (i = 0; i < count; i += rowc) { unsigned int vals[rowc]; @@ -248,7 +250,8 @@ static ssize_t kgsl_istore_read( if (pos >= *ppos) { for (j = 0; j < linec; ++j) kgsl_regread_nolock(device, - KGSL_ISTORE_START+i+j, vals+j); + ADRENO_ISTORE_START + i + j, + vals + j); } else memset(vals, 0, sizeof(vals)); diff --git a/drivers/gpu/msm/adreno_ringbuffer.c b/drivers/gpu/msm/adreno_ringbuffer.c index 57af5563..e5c28d71 100755 --- a/drivers/gpu/msm/adreno_ringbuffer.c +++ b/drivers/gpu/msm/adreno_ringbuffer.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved. +/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -246,6 +246,7 @@ int adreno_ringbuffer_start(struct adreno_ringbuffer *rb, unsigned int init_ram) union reg_cp_rb_cntl cp_rb_cntl; unsigned int *cmds, rb_cntl; struct kgsl_device *device = rb->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); uint cmds_gpu; if (rb->flags & KGSL_FLAGS_STARTED) @@ -362,9 +363,10 @@ int adreno_ringbuffer_start(struct adreno_ringbuffer *rb, unsigned int init_ram) GSL_RB_WRITE(cmds, cmds_gpu, SUBBLOCK_OFFSET(REG_PA_SU_POLY_OFFSET_FRONT_SCALE)); - /* Vertex and Pixel Shader Start Addresses in instructions - * (3 DWORDS per instruction) */ - GSL_RB_WRITE(cmds, cmds_gpu, 0x80000180); + /* Instruction memory size: */ + GSL_RB_WRITE(cmds, cmds_gpu, + (adreno_encode_istore_size(adreno_dev) + | adreno_dev->pix_shader_start)); /* Maximum Contexts */ GSL_RB_WRITE(cmds, cmds_gpu, 0x00000001); /* Write Confirm Interval and The CP will wait the From 08851495121f74be24fda98efc4873e5864f219b Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Wed, 25 Jul 2012 00:10:26 +0800 Subject: [PATCH 057/155] msm: kgsl: Add support for the A3XX family of GPUs Add support for the A320, the first of the new generation of Adreno GPUs. --- drivers/gpu/msm/Makefile | 1 + drivers/gpu/msm/a3xx_reg.h | 453 +++++ drivers/gpu/msm/adreno.c | 146 +- drivers/gpu/msm/adreno.h | 21 +- drivers/gpu/msm/adreno_a2xx.c | 201 ++- drivers/gpu/msm/adreno_a3xx.c | 2547 +++++++++++++++++++++++++++ drivers/gpu/msm/adreno_drawctxt.c | 24 +- drivers/gpu/msm/adreno_drawctxt.h | 41 +- drivers/gpu/msm/adreno_pm4types.h | 14 +- drivers/gpu/msm/adreno_ringbuffer.c | 152 +- drivers/gpu/msm/adreno_ringbuffer.h | 15 +- 11 files changed, 3398 insertions(+), 217 deletions(-) mode change 100644 => 100755 drivers/gpu/msm/Makefile create mode 100755 drivers/gpu/msm/a3xx_reg.h create mode 100755 drivers/gpu/msm/adreno_a3xx.c mode change 100644 => 100755 drivers/gpu/msm/adreno_pm4types.h diff --git a/drivers/gpu/msm/Makefile b/drivers/gpu/msm/Makefile old mode 100644 new mode 100755 index f49e7164..8ef724e2 --- a/drivers/gpu/msm/Makefile +++ b/drivers/gpu/msm/Makefile @@ -19,6 +19,7 @@ msm_adreno-y += \ adreno_drawctxt.o \ adreno_postmortem.o \ adreno_a2xx.o \ + adreno_a3xx.o \ adreno.o msm_adreno-$(CONFIG_DEBUG_FS) += adreno_debugfs.o diff --git a/drivers/gpu/msm/a3xx_reg.h b/drivers/gpu/msm/a3xx_reg.h new file mode 100755 index 00000000..84c83b8c --- /dev/null +++ b/drivers/gpu/msm/a3xx_reg.h @@ -0,0 +1,453 @@ +/* Copyright (c) 2012, Code Aurora Forum. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _A300_REG_H +#define _A300_REG_H + +/* Interrupt bit positions within RBBM_INT_0 */ + +#define A3XX_INT_RBBM_GPU_IDLE 0 +#define A3XX_INT_RBBM_AHB_ERROR 1 +#define A3XX_INT_RBBM_REG_TIMEOUT 2 +#define A3XX_INT_RBBM_ME_MS_TIMEOUT 3 +#define A3XX_INT_RBBM_PFP_MS_TIMEOUT 4 +#define A3XX_INT_RBBM_ATB_BUS_OVERFLOW 5 +#define A3XX_INT_VFD_ERROR 6 +#define A3XX_INT_CP_SW_INT 7 +#define A3XX_INT_CP_T0_PACKET_IN_IB 8 +#define A3XX_INT_CP_OPCODE_ERROR 9 +#define A3XX_INT_CP_RESERVED_BIT_ERROR 10 +#define A3XX_INT_CP_HW_FAULT 11 +#define A3xx_INT_CP_DMA 12 +#define A3XX_INT_CP_IB2_INT 13 +#define A3XX_INT_CP_IB1_INT 14 +#define A3XX_INT_CP_RB_INT 15 +#define A3XX_INT_CP_REG_PROTECT_FAULT 16 +#define A3XX_INT_CP_RB_DONE_TS 17 +#define A3XX_INT_CP_VS_DONE_TS 18 +#define A3XX_INT_CP_PS_DONE_TS 19 +#define A3XX_INT_CACHE_FLUSH_TS 20 +#define A3XX_INT_CP_AHB_ERROR_HALT 21 +#define A3XX_INT_MISC_HANG_DETECT 24 +#define A3XX_INT_UCHE_OOB_ACCESS 25 + +/* Register definitions */ + +#define A3XX_RBBM_HW_VERSION 0x000 +#define A3XX_RBBM_HW_RELEASE 0x001 +#define A3XX_RBBM_HW_CONFIGURATION 0x002 +#define A3XX_RBBM_SW_RESET_CMD 0x018 +#define A3XX_RBBM_AHB_CTL0 0x020 +#define A3XX_RBBM_AHB_CTL1 0x021 +#define A3XX_RBBM_AHB_CMD 0x022 +#define A3XX_RBBM_AHB_ERROR_STATUS 0x027 +#define A3XX_RBBM_GPR0_CTL 0x02E +/* This the same register as on A2XX, just in a different place */ +#define A3XX_RBBM_STATUS 0x030 +#define A3XX_RBBM_INTERFACE_HANG_INT_CTL 0x50 +#define A3XX_RBBM_INTERFACE_HANG_MASK_CTL0 0x51 +#define A3XX_RBBM_INTERFACE_HANG_MASK_CTL1 0x54 +#define A3XX_RBBM_INTERFACE_HANG_MASK_CTL2 0x57 +#define A3XX_RBBM_INTERFACE_HANG_MASK_CTL3 0x5A +#define A3XX_RBBM_INT_CLEAR_CMD 0x061 +#define A3XX_RBBM_INT_0_MASK 0x063 +#define A3XX_RBBM_INT_0_STATUS 0x064 +#define A3XX_RBBM_GPU_BUSY_MASKED 0x88 +#define A3XX_RBBM_RBBM_CTL 0x100 +#define A3XX_RBBM_RBBM_CTL 0x100 +#define A3XX_RBBM_PERFCTR_PWR_1_LO 0x0EC +#define A3XX_RBBM_PERFCTR_PWR_1_HI 0x0ED +/* Following two are same as on A2XX, just in a different place */ +#define A3XX_CP_PFP_UCODE_ADDR 0x1C9 +#define A3XX_CP_PFP_UCODE_DATA 0x1CA +#define A3XX_CP_HW_FAULT 0x45C +#define A3XX_CP_AHB_FAULT 0x54D +#define A3XX_CP_PROTECT_CTRL 0x45E +#define A3XX_CP_PROTECT_STATUS 0x45F +#define A3XX_CP_PROTECT_REG_0 0x460 +#define A3XX_CP_PROTECT_REG_1 0x461 +#define A3XX_CP_PROTECT_REG_2 0x462 +#define A3XX_CP_PROTECT_REG_3 0x463 +#define A3XX_CP_PROTECT_REG_4 0x464 +#define A3XX_CP_PROTECT_REG_5 0x465 +#define A3XX_CP_PROTECT_REG_6 0x466 +#define A3XX_CP_PROTECT_REG_7 0x467 +#define A3XX_CP_PROTECT_REG_8 0x468 +#define A3XX_CP_PROTECT_REG_9 0x469 +#define A3XX_CP_PROTECT_REG_A 0x46A +#define A3XX_CP_PROTECT_REG_B 0x46B +#define A3XX_CP_PROTECT_REG_C 0x46C +#define A3XX_CP_PROTECT_REG_D 0x46D +#define A3XX_CP_PROTECT_REG_E 0x46E +#define A3XX_CP_PROTECT_REG_F 0x46F +#define A3XX_CP_SCRATCH_REG2 0x57A +#define A3XX_CP_SCRATCH_REG3 0x57B +#define A3XX_VSC_BIN_SIZE 0xC01 +#define A3XX_VSC_SIZE_ADDRESS 0xC02 +#define A3XX_VSC_PIPE_CONFIG_0 0xC06 +#define A3XX_VSC_PIPE_DATA_ADDRESS_0 0xC07 +#define A3XX_VSC_PIPE_DATA_LENGTH_0 0xC08 +#define A3XX_VSC_PIPE_CONFIG_1 0xC09 +#define A3XX_VSC_PIPE_DATA_ADDRESS_1 0xC0A +#define A3XX_VSC_PIPE_DATA_LENGTH_1 0xC0B +#define A3XX_VSC_PIPE_CONFIG_2 0xC0C +#define A3XX_VSC_PIPE_DATA_ADDRESS_2 0xC0D +#define A3XX_VSC_PIPE_DATA_LENGTH_2 0xC0E +#define A3XX_VSC_PIPE_CONFIG_3 0xC0F +#define A3XX_VSC_PIPE_DATA_ADDRESS_3 0xC10 +#define A3XX_VSC_PIPE_DATA_LENGTH_3 0xC11 +#define A3XX_VSC_PIPE_CONFIG_4 0xC12 +#define A3XX_VSC_PIPE_DATA_ADDRESS_4 0xC13 +#define A3XX_VSC_PIPE_DATA_LENGTH_4 0xC14 +#define A3XX_VSC_PIPE_CONFIG_5 0xC15 +#define A3XX_VSC_PIPE_DATA_ADDRESS_5 0xC16 +#define A3XX_VSC_PIPE_DATA_LENGTH_5 0xC17 +#define A3XX_VSC_PIPE_CONFIG_6 0xC18 +#define A3XX_VSC_PIPE_DATA_ADDRESS_6 0xC19 +#define A3XX_VSC_PIPE_DATA_LENGTH_6 0xC1A +#define A3XX_VSC_PIPE_CONFIG_7 0xC1B +#define A3XX_VSC_PIPE_DATA_ADDRESS_7 0xC1C +#define A3XX_VSC_PIPE_DATA_LENGTH_7 0xC1D +#define A3XX_GRAS_CL_USER_PLANE_X0 0xCA0 +#define A3XX_GRAS_CL_USER_PLANE_Y0 0xCA1 +#define A3XX_GRAS_CL_USER_PLANE_Z0 0xCA2 +#define A3XX_GRAS_CL_USER_PLANE_W0 0xCA3 +#define A3XX_GRAS_CL_USER_PLANE_X1 0xCA4 +#define A3XX_GRAS_CL_USER_PLANE_Y1 0xCA5 +#define A3XX_GRAS_CL_USER_PLANE_Z1 0xCA6 +#define A3XX_GRAS_CL_USER_PLANE_W1 0xCA7 +#define A3XX_GRAS_CL_USER_PLANE_X2 0xCA8 +#define A3XX_GRAS_CL_USER_PLANE_Y2 0xCA9 +#define A3XX_GRAS_CL_USER_PLANE_Z2 0xCAA +#define A3XX_GRAS_CL_USER_PLANE_W2 0xCAB +#define A3XX_GRAS_CL_USER_PLANE_X3 0xCAC +#define A3XX_GRAS_CL_USER_PLANE_Y3 0xCAD +#define A3XX_GRAS_CL_USER_PLANE_Z3 0xCAE +#define A3XX_GRAS_CL_USER_PLANE_W3 0xCAF +#define A3XX_GRAS_CL_USER_PLANE_X4 0xCB0 +#define A3XX_GRAS_CL_USER_PLANE_Y4 0xCB1 +#define A3XX_GRAS_CL_USER_PLANE_Z4 0xCB2 +#define A3XX_GRAS_CL_USER_PLANE_W4 0xCB3 +#define A3XX_GRAS_CL_USER_PLANE_X5 0xCB4 +#define A3XX_GRAS_CL_USER_PLANE_Y5 0xCB5 +#define A3XX_GRAS_CL_USER_PLANE_Z5 0xCB6 +#define A3XX_GRAS_CL_USER_PLANE_W5 0xCB7 +#define A3XX_UCHE_CACHE_INVALIDATE0_REG 0xEA0 +#define A3XX_GRAS_CL_CLIP_CNTL 0x2040 +#define A3XX_GRAS_CL_GB_CLIP_ADJ 0x2044 +#define A3XX_GRAS_CL_VPORT_XOFFSET 0x2048 +#define A3XX_GRAS_CL_VPORT_ZOFFSET 0x204C +#define A3XX_GRAS_CL_VPORT_ZSCALE 0x204D +#define A3XX_GRAS_SU_POINT_MINMAX 0x2068 +#define A3XX_GRAS_SU_POINT_SIZE 0x2069 +#define A3XX_GRAS_SU_POLY_OFFSET_SCALE 0x206C +#define A3XX_GRAS_SU_POLY_OFFSET_OFFSET 0x206D +#define A3XX_GRAS_SU_MODE_CONTROL 0x2070 +#define A3XX_GRAS_SC_CONTROL 0x2072 +#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL 0x2074 +#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR 0x2075 +#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL 0x2079 +#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR 0x207A +#define A3XX_RB_MODE_CONTROL 0x20C0 +#define A3XX_RB_RENDER_CONTROL 0x20C1 +#define A3XX_RB_MSAA_CONTROL 0x20C2 +#define A3XX_RB_MRT_CONTROL0 0x20C4 +#define A3XX_RB_MRT_BUF_INFO0 0x20C5 +#define A3XX_RB_MRT_BLEND_CONTROL0 0x20C7 +#define A3XX_RB_MRT_BLEND_CONTROL1 0x20CB +#define A3XX_RB_MRT_BLEND_CONTROL2 0x20CF +#define A3XX_RB_MRT_BLEND_CONTROL3 0x20D3 +#define A3XX_RB_BLEND_RED 0x20E4 +#define A3XX_RB_COPY_CONTROL 0x20EC +#define A3XX_RB_COPY_DEST_INFO 0x20EF +#define A3XX_RB_DEPTH_CONTROL 0x2100 +#define A3XX_RB_STENCIL_CONTROL 0x2104 +#define A3XX_PC_VSTREAM_CONTROL 0x21E4 +#define A3XX_PC_VERTEX_REUSE_BLOCK_CNTL 0x21EA +#define A3XX_PC_PRIM_VTX_CNTL 0x21EC +#define A3XX_PC_RESTART_INDEX 0x21ED +#define A3XX_HLSQ_CONTROL_0_REG 0x2200 +#define A3XX_HLSQ_VS_CONTROL_REG 0x2204 +#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG 0x2207 +#define A3XX_HLSQ_CL_NDRANGE_0_REG 0x220A +#define A3XX_HLSQ_CL_NDRANGE_2_REG 0x220C +#define A3XX_HLSQ_CL_CONTROL_0_REG 0x2211 +#define A3XX_HLSQ_CL_CONTROL_1_REG 0x2212 +#define A3XX_HLSQ_CL_KERNEL_CONST_REG 0x2214 +#define A3XX_HLSQ_CL_KERNEL_GROUP_X_REG 0x2215 +#define A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG 0x2217 +#define A3XX_HLSQ_CL_WG_OFFSET_REG 0x221A +#define A3XX_VFD_CONTROL_0 0x2240 +#define A3XX_VFD_INDEX_MIN 0x2242 +#define A3XX_VFD_FETCH_INSTR_0_0 0x2246 +#define A3XX_VFD_FETCH_INSTR_0_4 0x224E +#define A3XX_VFD_DECODE_INSTR_0 0x2266 +#define A3XX_VFD_VS_THREADING_THRESHOLD 0x227E +#define A3XX_VPC_ATTR 0x2280 +#define A3XX_VPC_VARY_CYLWRAP_ENABLE_1 0x228B +#define A3XX_SP_SP_CTRL_REG 0x22C0 +#define A3XX_SP_VS_CTRL_REG0 0x22C4 +#define A3XX_SP_VS_CTRL_REG1 0x22C5 +#define A3XX_SP_VS_PARAM_REG 0x22C6 +#define A3XX_SP_VS_OUT_REG_7 0x22CE +#define A3XX_SP_VS_VPC_DST_REG_0 0x22D0 +#define A3XX_SP_VS_OBJ_OFFSET_REG 0x22D4 +#define A3XX_SP_VS_PVT_MEM_SIZE_REG 0x22D8 +#define A3XX_SP_VS_LENGTH_REG 0x22DF +#define A3XX_SP_FS_CTRL_REG0 0x22E0 +#define A3XX_SP_FS_CTRL_REG1 0x22E1 +#define A3XX_SP_FS_OBJ_OFFSET_REG 0x22E2 +#define A3XX_SP_FS_PVT_MEM_SIZE_REG 0x22E6 +#define A3XX_SP_FS_FLAT_SHAD_MODE_REG_0 0x22E8 +#define A3XX_SP_FS_FLAT_SHAD_MODE_REG_1 0x22E9 +#define A3XX_SP_FS_OUTPUT_REG 0x22EC +#define A3XX_SP_FS_MRT_REG_0 0x22F0 +#define A3XX_SP_FS_IMAGE_OUTPUT_REG_0 0x22F4 +#define A3XX_SP_FS_IMAGE_OUTPUT_REG_3 0x22F7 +#define A3XX_SP_FS_LENGTH_REG 0x22FF +#define A3XX_TPL1_TP_VS_TEX_OFFSET 0x2340 +#define A3XX_TPL1_TP_FS_TEX_OFFSET 0x2342 +#define A3XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR 0x2343 +#define A3XX_VBIF_FIXED_SORT_EN 0x300C +#define A3XX_VBIF_FIXED_SORT_SEL0 0x300D +#define A3XX_VBIF_FIXED_SORT_SEL1 0x300E + +/* Bit flags for RBBM_CTL */ +#define RBBM_RBBM_CTL_RESET_PWR_CTR1 (1 << 1) +#define RBBM_RBBM_CTL_ENABLE_PWR_CTR1 (17 << 1) + +/* Various flags used by the context switch code */ + +#define SP_MULTI 0 +#define SP_BUFFER_MODE 1 +#define SP_TWO_VTX_QUADS 0 +#define SP_PIXEL_BASED 0 +#define SP_R8G8B8A8_UNORM 8 +#define SP_FOUR_PIX_QUADS 1 + +#define HLSQ_DIRECT 0 +#define HLSQ_BLOCK_ID_SP_VS 4 +#define HLSQ_SP_VS_INSTR 0 +#define HLSQ_SP_FS_INSTR 0 +#define HLSQ_BLOCK_ID_SP_FS 6 +#define HLSQ_TWO_PIX_QUADS 0 +#define HLSQ_TWO_VTX_QUADS 0 +#define HLSQ_BLOCK_ID_TP_TEX 2 +#define HLSQ_TP_TEX_SAMPLERS 0 +#define HLSQ_TP_TEX_MEMOBJ 1 +#define HLSQ_BLOCK_ID_TP_MIPMAP 3 +#define HLSQ_TP_MIPMAP_BASE 1 +#define HLSQ_FOUR_PIX_QUADS 1 + +#define RB_FACTOR_ONE 1 +#define RB_BLEND_OP_ADD 0 +#define RB_FACTOR_ZERO 0 +#define RB_DITHER_DISABLE 0 +#define RB_DITHER_ALWAYS 1 +#define RB_FRAG_NEVER 0 +#define RB_ENDIAN_NONE 0 +#define RB_R8G8B8A8_UNORM 8 +#define RB_RESOLVE_PASS 2 +#define RB_CLEAR_MODE_RESOLVE 1 +#define RB_TILINGMODE_LINEAR 0 +#define RB_REF_NEVER 0 +#define RB_STENCIL_KEEP 0 +#define RB_RENDERING_PASS 0 +#define RB_TILINGMODE_32X32 2 + +#define PC_DRAW_TRIANGLES 2 +#define PC_DI_PT_RECTLIST 8 +#define PC_DI_SRC_SEL_AUTO_INDEX 2 +#define PC_DI_INDEX_SIZE_16_BIT 0 +#define PC_DI_IGNORE_VISIBILITY 0 +#define PC_DI_PT_TRILIST 4 +#define PC_DI_SRC_SEL_IMMEDIATE 1 +#define PC_DI_INDEX_SIZE_32_BIT 1 + +#define UCHE_ENTIRE_CACHE 1 +#define UCHE_OP_INVALIDATE 1 + +/* + * The following are bit field shifts within some of the registers defined + * above. These are used in the context switch code in conjunction with the + * _SET macro + */ + +#define GRAS_CL_CLIP_CNTL_CLIP_DISABLE 16 +#define GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER 12 +#define GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE 21 +#define GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE 19 +#define GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE 20 +#define GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE 17 +#define GRAS_CL_VPORT_XSCALE_VPORT_XSCALE 0 +#define GRAS_CL_VPORT_YSCALE_VPORT_YSCALE 0 +#define GRAS_CL_VPORT_ZSCALE_VPORT_ZSCALE 0 +#define GRAS_SC_CONTROL_RASTER_MODE 12 +#define GRAS_SC_CONTROL_RENDER_MODE 4 +#define GRAS_SC_SCREEN_SCISSOR_BR_BR_X 0 +#define GRAS_SC_SCREEN_SCISSOR_BR_BR_Y 16 +#define GRAS_SC_WINDOW_SCISSOR_BR_BR_X 0 +#define GRAS_SC_WINDOW_SCISSOR_BR_BR_Y 16 +#define HLSQ_CONSTFSPRESERVEDRANGEREG_ENDENTRY 16 +#define HLSQ_CONSTFSPRESERVEDRANGEREG_STARTENTRY 0 +#define HLSQ_CTRL0REG_CHUNKDISABLE 26 +#define HLSQ_CTRL0REG_CONSTSWITCHMODE 27 +#define HLSQ_CTRL0REG_FSSUPERTHREADENABLE 6 +#define HLSQ_CTRL0REG_FSTHREADSIZE 4 +#define HLSQ_CTRL0REG_LAZYUPDATEDISABLE 28 +#define HLSQ_CTRL0REG_RESERVED2 10 +#define HLSQ_CTRL0REG_SPCONSTFULLUPDATE 29 +#define HLSQ_CTRL0REG_SPSHADERRESTART 9 +#define HLSQ_CTRL0REG_TPFULLUPDATE 30 +#define HLSQ_CTRL1REG_RESERVED1 9 +#define HLSQ_CTRL1REG_VSSUPERTHREADENABLE 8 +#define HLSQ_CTRL1REG_VSTHREADSIZE 6 +#define HLSQ_CTRL2REG_PRIMALLOCTHRESHOLD 26 +#define HLSQ_FSCTRLREG_FSCONSTLENGTH 0 +#define HLSQ_FSCTRLREG_FSCONSTSTARTOFFSET 12 +#define HLSQ_FSCTRLREG_FSINSTRLENGTH 24 +#define HLSQ_VSCTRLREG_VSINSTRLENGTH 24 +#define PC_PRIM_VTX_CONTROL_POLYMODE_BACK_PTYPE 8 +#define PC_PRIM_VTX_CONTROL_POLYMODE_FRONT_PTYPE 5 +#define PC_PRIM_VTX_CONTROL_PROVOKING_VTX_LAST 25 +#define PC_PRIM_VTX_CONTROL_STRIDE_IN_VPC 0 +#define PC_DRAW_INITIATOR_PRIM_TYPE 0 +#define PC_DRAW_INITIATOR_SOURCE_SELECT 6 +#define PC_DRAW_INITIATOR_VISIBILITY_CULLING_MODE 9 +#define PC_DRAW_INITIATOR_INDEX_SIZE 0x0B +#define PC_DRAW_INITIATOR_SMALL_INDEX 0x0D +#define PC_DRAW_INITIATOR_PRE_DRAW_INITIATOR_ENABLE 0x0E +#define RB_COPYCONTROL_COPY_GMEM_BASE 14 +#define RB_COPYCONTROL_RESOLVE_CLEAR_MODE 4 +#define RB_COPYDESTBASE_COPY_DEST_BASE 4 +#define RB_COPYDESTINFO_COPY_COMPONENT_ENABLE 14 +#define RB_COPYDESTINFO_COPY_DEST_ENDIAN 18 +#define RB_COPYDESTINFO_COPY_DEST_FORMAT 2 +#define RB_COPYDESTINFO_COPY_DEST_TILE 0 +#define RB_COPYDESTPITCH_COPY_DEST_PITCH 0 +#define RB_DEPTHCONTROL_Z_TEST_FUNC 4 +#define RB_MODECONTROL_RENDER_MODE 8 +#define RB_MODECONTROL_MARB_CACHE_SPLIT_MODE 15 +#define RB_MODECONTROL_PACKER_TIMER_ENABLE 16 +#define RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE 21 +#define RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR 24 +#define RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR 16 +#define RB_MRTBLENDCONTROL_CLAMP_ENABLE 29 +#define RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE 5 +#define RB_MRTBLENDCONTROL_RGB_DEST_FACTOR 8 +#define RB_MRTBLENDCONTROL_RGB_SRC_FACTOR 0 +#define RB_MRTBUFBASE_COLOR_BUF_BASE 4 +#define RB_MRTBUFINFO_COLOR_BUF_PITCH 17 +#define RB_MRTBUFINFO_COLOR_FORMAT 0 +#define RB_MRTBUFINFO_COLOR_TILE_MODE 6 +#define RB_MRTCONTROL_COMPONENT_ENABLE 24 +#define RB_MRTCONTROL_DITHER_MODE 12 +#define RB_MRTCONTROL_READ_DEST_ENABLE 3 +#define RB_MRTCONTROL_ROP_CODE 8 +#define RB_MSAACONTROL_MSAA_DISABLE 10 +#define RB_MSAACONTROL_SAMPLE_MASK 16 +#define RB_RENDERCONTROL_ALPHA_TEST_FUNC 24 +#define RB_RENDERCONTROL_BIN_WIDTH 4 +#define RB_RENDERCONTROL_DISABLE_COLOR_PIPE 12 +#define RB_STENCILCONTROL_STENCIL_FAIL 11 +#define RB_STENCILCONTROL_STENCIL_FAIL_BF 23 +#define RB_STENCILCONTROL_STENCIL_FUNC 8 +#define RB_STENCILCONTROL_STENCIL_FUNC_BF 20 +#define RB_STENCILCONTROL_STENCIL_ZFAIL 17 +#define RB_STENCILCONTROL_STENCIL_ZFAIL_BF 29 +#define RB_STENCILCONTROL_STENCIL_ZPASS 14 +#define RB_STENCILCONTROL_STENCIL_ZPASS_BF 26 +#define SP_FSCTRLREG0_FSFULLREGFOOTPRINT 10 +#define SP_FSCTRLREG0_FSICACHEINVALID 2 +#define SP_FSCTRLREG0_FSINOUTREGOVERLAP 18 +#define SP_FSCTRLREG0_FSINSTRBUFFERMODE 1 +#define SP_FSCTRLREG0_FSLENGTH 24 +#define SP_FSCTRLREG0_FSSUPERTHREADMODE 21 +#define SP_FSCTRLREG0_FSTHREADMODE 0 +#define SP_FSCTRLREG0_FSTHREADSIZE 20 +#define SP_FSCTRLREG0_PIXLODENABLE 22 +#define SP_FSCTRLREG1_FSCONSTLENGTH 0 +#define SP_FSCTRLREG1_FSINITIALOUTSTANDING 20 +#define SP_FSCTRLREG1_HALFPRECVAROFFSET 24 +#define SP_FSMRTREG_REGID 0 +#define SP_FSOUTREG_PAD0 2 +#define SP_IMAGEOUTPUTREG_MRTFORMAT 0 +#define SP_IMAGEOUTPUTREG_PAD0 6 +#define SP_OBJOFFSETREG_CONSTOBJECTSTARTOFFSET 16 +#define SP_OBJOFFSETREG_SHADEROBJOFFSETINIC 25 +#define SP_SHADERLENGTH_LEN 0 +#define SP_SPCTRLREG_CONSTMODE 18 +#define SP_SPCTRLREG_SLEEPMODE 20 +#define SP_VSCTRLREG0_VSFULLREGFOOTPRINT 10 +#define SP_VSCTRLREG0_VSICACHEINVALID 2 +#define SP_VSCTRLREG0_VSINSTRBUFFERMODE 1 +#define SP_VSCTRLREG0_VSLENGTH 24 +#define SP_VSCTRLREG0_VSSUPERTHREADMODE 21 +#define SP_VSCTRLREG0_VSTHREADMODE 0 +#define SP_VSCTRLREG0_VSTHREADSIZE 20 +#define SP_VSCTRLREG1_VSINITIALOUTSTANDING 24 +#define SP_VSOUTREG_COMPMASK0 9 +#define SP_VSPARAMREG_POSREGID 0 +#define SP_VSPARAMREG_PSIZEREGID 8 +#define SP_VSPARAMREG_TOTALVSOUTVAR 20 +#define SP_VSVPCDSTREG_OUTLOC0 0 +#define TPL1_TPTEXOFFSETREG_BASETABLEPTR 16 +#define TPL1_TPTEXOFFSETREG_MEMOBJOFFSET 8 +#define TPL1_TPTEXOFFSETREG_SAMPLEROFFSET 0 +#define UCHE_INVALIDATE1REG_OPCODE 0x1C +#define UCHE_INVALIDATE1REG_ALLORPORTION 0x1F +#define VFD_BASEADDR_BASEADDR 0 +#define VFD_CTRLREG0_PACKETSIZE 18 +#define VFD_CTRLREG0_STRMDECINSTRCNT 22 +#define VFD_CTRLREG0_STRMFETCHINSTRCNT 27 +#define VFD_CTRLREG0_TOTALATTRTOVS 0 +#define VFD_CTRLREG1_MAXSTORAGE 0 +#define VFD_CTRLREG1_REGID4INST 24 +#define VFD_CTRLREG1_REGID4VTX 16 +#define VFD_DECODEINSTRUCTIONS_CONSTFILL 4 +#define VFD_DECODEINSTRUCTIONS_FORMAT 6 +#define VFD_DECODEINSTRUCTIONS_LASTCOMPVALID 29 +#define VFD_DECODEINSTRUCTIONS_REGID 12 +#define VFD_DECODEINSTRUCTIONS_SHIFTCNT 24 +#define VFD_DECODEINSTRUCTIONS_SWITCHNEXT 30 +#define VFD_DECODEINSTRUCTIONS_WRITEMASK 0 +#define VFD_FETCHINSTRUCTIONS_BUFSTRIDE 7 +#define VFD_FETCHINSTRUCTIONS_FETCHSIZE 0 +#define VFD_FETCHINSTRUCTIONS_INDEXDECODE 18 +#define VFD_FETCHINSTRUCTIONS_STEPRATE 24 +#define VFD_FETCHINSTRUCTIONS_SWITCHNEXT 17 +#define VFD_THREADINGTHRESHOLD_REGID_VTXCNT 8 +#define VFD_THREADINGTHRESHOLD_RESERVED6 4 +#define VPC_VPCATTR_LMSIZE 28 +#define VPC_VPCATTR_THRHDASSIGN 12 +#define VPC_VPCATTR_TOTALATTR 0 +#define VPC_VPCPACK_NUMFPNONPOSVAR 8 +#define VPC_VPCPACK_NUMNONPOSVSVAR 16 +#define VPC_VPCVARPSREPLMODE_COMPONENT08 0 +#define VPC_VPCVARPSREPLMODE_COMPONENT09 2 +#define VPC_VPCVARPSREPLMODE_COMPONENT0A 4 +#define VPC_VPCVARPSREPLMODE_COMPONENT0B 6 +#define VPC_VPCVARPSREPLMODE_COMPONENT0C 8 +#define VPC_VPCVARPSREPLMODE_COMPONENT0D 10 +#define VPC_VPCVARPSREPLMODE_COMPONENT0E 12 +#define VPC_VPCVARPSREPLMODE_COMPONENT0F 14 +#define VPC_VPCVARPSREPLMODE_COMPONENT10 16 +#define VPC_VPCVARPSREPLMODE_COMPONENT11 18 +#define VPC_VPCVARPSREPLMODE_COMPONENT12 20 +#define VPC_VPCVARPSREPLMODE_COMPONENT13 22 +#define VPC_VPCVARPSREPLMODE_COMPONENT14 24 +#define VPC_VPCVARPSREPLMODE_COMPONENT15 26 +#define VPC_VPCVARPSREPLMODE_COMPONENT16 28 +#define VPC_VPCVARPSREPLMODE_COMPONENT17 30 + +#endif diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c index ad164fee..e5465f46 100755 --- a/drivers/gpu/msm/adreno.c +++ b/drivers/gpu/msm/adreno.c @@ -156,38 +156,16 @@ static const struct { { ADRENO_REV_A225, 2, 2, 0, 5, "a225p5_pm4.fw", "a225_pfp.fw", &adreno_a2xx_gpudev, 1536, 768 }, + { ADRENO_REV_A225, 2, 2, 0, 6, + "a225_pm4.fw", "a225_pfp.fw", &adreno_a2xx_gpudev, + 1536, 768 }, { ADRENO_REV_A225, 2, 2, ANY_ID, ANY_ID, "a225_pm4.fw", "a225_pfp.fw", &adreno_a2xx_gpudev, 1536, 768 }, + { ADRENO_REV_A320, 3, 1, ANY_ID, ANY_ID, + "a300_pm4.fw", "a300_pfp.fw", &adreno_a3xx_gpudev }, }; -static void adreno_gmeminit(struct adreno_device *adreno_dev) -{ - struct kgsl_device *device = &adreno_dev->dev; - union reg_rb_edram_info rb_edram_info; - unsigned int gmem_size; - unsigned int edram_value = 0; - - /* make sure edram range is aligned to size */ - BUG_ON(adreno_dev->gmemspace.gpu_base & - (adreno_dev->gmemspace.sizebytes - 1)); - - /* get edram_size value equivalent */ - gmem_size = (adreno_dev->gmemspace.sizebytes >> 14); - while (gmem_size >>= 1) - edram_value++; - - rb_edram_info.val = 0; - - rb_edram_info.f.edram_size = edram_value; - rb_edram_info.f.edram_mapping_mode = 0; /* EDRAM_MAP_UPPER */ - - /* must be aligned to size */ - rb_edram_info.f.edram_range = (adreno_dev->gmemspace.gpu_base >> 14); - - adreno_regwrite(device, REG_RB_EDRAM_INFO, rb_edram_info.val); -} - static irqreturn_t adreno_isr(int irq, void *data) { irqreturn_t result; @@ -492,7 +470,6 @@ static int adreno_start(struct kgsl_device *device, unsigned int init_ram) { int status = -EINVAL; struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - int init_reftimestamp = 0x7fffffff; device->state = KGSL_STATE_INIT; device->requested_state = KGSL_STATE_NONE; @@ -520,80 +497,22 @@ static int adreno_start(struct kgsl_device *device, unsigned int init_ram) kgsl_mh_start(device); - if (kgsl_mmu_start(device)) + status = kgsl_mmu_start(device); + if (status) goto error_clk_off; - /*We need to make sure all blocks are powered up and clocked before - *issuing a soft reset. The overrides will then be turned off (set to 0) - */ - adreno_regwrite(device, REG_RBBM_PM_OVERRIDE1, 0xfffffffe); - adreno_regwrite(device, REG_RBBM_PM_OVERRIDE2, 0xffffffff); - - /* Only reset CP block if all blocks have previously been reset */ - if (!(device->flags & KGSL_FLAGS_SOFT_RESET) || - !adreno_is_a22x(adreno_dev)) { - adreno_regwrite(device, REG_RBBM_SOFT_RESET, 0xFFFFFFFF); - device->flags |= KGSL_FLAGS_SOFT_RESET; - } else - adreno_regwrite(device, REG_RBBM_SOFT_RESET, 0x00000001); - - /* The core is in an indeterminate state until the reset completes - * after 30ms. - */ - msleep(30); - - adreno_regwrite(device, REG_RBBM_SOFT_RESET, 0x00000000); - - adreno_regwrite(device, REG_RBBM_CNTL, 0x00004442); - - if (adreno_is_a225(adreno_dev)) { - /* Enable large instruction store for A225 */ - adreno_regwrite(device, REG_SQ_FLOW_CONTROL, 0x18000000); - } - - adreno_regwrite(device, REG_SQ_VS_PROGRAM, 0x00000000); - adreno_regwrite(device, REG_SQ_PS_PROGRAM, 0x00000000); - - if (cpu_is_msm8960() || cpu_is_msm8930()) - adreno_regwrite(device, REG_RBBM_PM_OVERRIDE1, 0x200); - else - adreno_regwrite(device, REG_RBBM_PM_OVERRIDE1, 0); - - if (!adreno_is_a22x(adreno_dev)) - adreno_regwrite(device, REG_RBBM_PM_OVERRIDE2, 0); - else - adreno_regwrite(device, REG_RBBM_PM_OVERRIDE2, 0x80); - - kgsl_sharedmem_set(&device->memstore, 0, 0, device->memstore.size); - - kgsl_sharedmem_writel(&device->memstore, - KGSL_DEVICE_MEMSTORE_OFFSET(ref_wait_ts), - init_reftimestamp); - - adreno_regwrite(device, REG_RBBM_DEBUG, 0x00080000); - - /* Make sure interrupts are disabled */ - - adreno_regwrite(device, REG_RBBM_INT_CNTL, 0); - adreno_regwrite(device, REG_CP_INT_CNTL, 0); - adreno_regwrite(device, REG_SQ_INT_CNTL, 0); - - if (adreno_is_a22x(adreno_dev)) - adreno_dev->gmemspace.sizebytes = SZ_512K; - else - adreno_dev->gmemspace.sizebytes = SZ_256K; - adreno_gmeminit(adreno_dev); + /* Start the GPU */ + adreno_dev->gpudev->start(adreno_dev); kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_ON); + device->ftbl->irqctrl(device, 1); status = adreno_ringbuffer_start(&adreno_dev->ringbuffer, init_ram); - if (status != 0) - goto error_irq_off; - + if (status == 0) { mod_timer(&device->idle_timer, jiffies + FIRST_TIMEOUT); - return status; + return 0; + } -error_irq_off: kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF); kgsl_mmu_stop(device); error_clk_off: @@ -886,7 +805,8 @@ int adreno_idle(struct kgsl_device *device, unsigned int timeout) unsigned int msecs_first; unsigned int msecs_part; - kgsl_cffdump_regpoll(device->id, REG_RBBM_STATUS << 2, + kgsl_cffdump_regpoll(device->id, + adreno_dev->gpudev->reg_rbbm_status << 2, 0x00000000, 0x80000000); /* first, wait until the CP has consumed all the commands in * the ring buffer @@ -917,7 +837,8 @@ retry: /* now, wait for the GPU to finish its operations */ wait_time = jiffies + wait_timeout; while (time_before(jiffies, wait_time)) { - adreno_regread(device, REG_RBBM_STATUS, &rbbm_status); + adreno_regread(device, adreno_dev->gpudev->reg_rbbm_status, + &rbbm_status); if (rbbm_status == 0x110) return 0; } @@ -1295,44 +1216,29 @@ static inline s64 adreno_ticks_to_us(u32 ticks, u32 gpu_freq) static void adreno_power_stats(struct kgsl_device *device, struct kgsl_power_stats *stats) { - unsigned int reg; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); struct kgsl_pwrctrl *pwr = &device->pwrctrl; + unsigned int cycles; + + /* Get the busy cycles counted since the counter was last reset */ + /* Calling this function also resets and restarts the counter */ + + cycles = adreno_dev->gpudev->busy_cycles(adreno_dev); /* In order to calculate idle you have to have run the algorithm * * at least once to get a start time. */ if (pwr->time != 0) { - s64 tmp; - /* Stop the performance moniter and read the current * - * busy cycles. */ - adreno_regwrite(device, - REG_CP_PERFMON_CNTL, - REG_PERF_MODE_CNT | - REG_PERF_STATE_FREEZE); - adreno_regread(device, REG_RBBM_PERFCOUNTER1_LO, ®); - tmp = ktime_to_us(ktime_get()); + s64 tmp = ktime_to_us(ktime_get()); stats->total_time = tmp - pwr->time; pwr->time = tmp; - stats->busy_time = adreno_ticks_to_us(reg, device->pwrctrl. + stats->busy_time = adreno_ticks_to_us(cycles, device->pwrctrl. pwrlevels[device->pwrctrl.active_pwrlevel]. gpu_freq); - - adreno_regwrite(device, - REG_CP_PERFMON_CNTL, - REG_PERF_MODE_CNT | - REG_PERF_STATE_RESET); } else { stats->total_time = 0; stats->busy_time = 0; pwr->time = ktime_to_us(ktime_get()); } - - /* re-enable the performance moniters */ - adreno_regread(device, REG_RBBM_PM_OVERRIDE2, ®); - adreno_regwrite(device, REG_RBBM_PM_OVERRIDE2, (reg | 0x40)); - adreno_regwrite(device, REG_RBBM_PERFCOUNTER1_SELECT, 0x1); - adreno_regwrite(device, - REG_CP_PERFMON_CNTL, - REG_PERF_MODE_CNT | REG_PERF_STATE_ENABLE); } void adreno_irqctrl(struct kgsl_device *device, int state) diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h index b54699ad..7c5cb7a4 100755 --- a/drivers/gpu/msm/adreno.h +++ b/drivers/gpu/msm/adreno.h @@ -51,6 +51,7 @@ enum adreno_gpurev { ADRENO_REV_A205 = 205, ADRENO_REV_A220 = 220, ADRENO_REV_A225 = 225, + ADRENO_REV_A320 = 320, }; struct adreno_gpudev; @@ -76,14 +77,27 @@ struct adreno_device { }; struct adreno_gpudev { + /* + * These registers are in a different location on A3XX, so define + * them in the structure and use them as variables. + */ + unsigned int reg_rbbm_status; + unsigned int reg_cp_pfp_ucode_data; + unsigned int reg_cp_pfp_ucode_addr; + + /* GPU specific function hooks */ int (*ctxt_create)(struct adreno_device *, struct adreno_context *); void (*ctxt_save)(struct adreno_device *, struct adreno_context *); void (*ctxt_restore)(struct adreno_device *, struct adreno_context *); irqreturn_t (*irq_handler)(struct adreno_device *); void (*irq_control)(struct adreno_device *, int); + void (*rb_init)(struct adreno_device *, struct adreno_ringbuffer *); + void (*start)(struct adreno_device *); + unsigned int (*busy_cycles)(struct adreno_device *); }; extern struct adreno_gpudev adreno_a2xx_gpudev; +extern struct adreno_gpudev adreno_a3xx_gpudev; int adreno_idle(struct kgsl_device *device, unsigned int timeout); void adreno_regread(struct kgsl_device *device, unsigned int offsetwords, @@ -133,7 +147,12 @@ static inline int adreno_is_a22x(struct adreno_device *adreno_dev) static inline int adreno_is_a2xx(struct adreno_device *adreno_dev) { - return (adreno_dev->gpurev <= ADRENO_REV_A225); + return (adreno_dev->gpurev <= 299); +} + +static inline int adreno_is_a3xx(struct adreno_device *adreno_dev) +{ + return (adreno_dev->gpurev >= 300); } /** diff --git a/drivers/gpu/msm/adreno_a2xx.c b/drivers/gpu/msm/adreno_a2xx.c index 499d0b4f..bc55bd51 100755 --- a/drivers/gpu/msm/adreno_a2xx.c +++ b/drivers/gpu/msm/adreno_a2xx.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved. +/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -11,6 +11,8 @@ * */ +#include + #include "kgsl.h" #include "kgsl_sharedmem.h" #include "kgsl_cffdump.h" @@ -1665,10 +1667,207 @@ static void a2xx_irq_control(struct adreno_device *adreno_dev, int state) wmb(); } +static void a2xx_rb_init(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb) +{ + unsigned int *cmds, cmds_gpu; + + /* ME_INIT */ + cmds = adreno_ringbuffer_allocspace(rb, 19); + cmds_gpu = rb->buffer_desc.gpuaddr + sizeof(uint)*(rb->wptr-19); + + GSL_RB_WRITE(cmds, cmds_gpu, cp_type3_packet(CP_ME_INIT, 18)); + /* All fields present (bits 9:0) */ + GSL_RB_WRITE(cmds, cmds_gpu, 0x000003ff); + /* Disable/Enable Real-Time Stream processing (present but ignored) */ + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); + /* Enable (2D <-> 3D) implicit synchronization (present but ignored) */ + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); + + GSL_RB_WRITE(cmds, cmds_gpu, + SUBBLOCK_OFFSET(REG_RB_SURFACE_INFO)); + GSL_RB_WRITE(cmds, cmds_gpu, + SUBBLOCK_OFFSET(REG_PA_SC_WINDOW_OFFSET)); + GSL_RB_WRITE(cmds, cmds_gpu, + SUBBLOCK_OFFSET(REG_VGT_MAX_VTX_INDX)); + GSL_RB_WRITE(cmds, cmds_gpu, + SUBBLOCK_OFFSET(REG_SQ_PROGRAM_CNTL)); + GSL_RB_WRITE(cmds, cmds_gpu, + SUBBLOCK_OFFSET(REG_RB_DEPTHCONTROL)); + GSL_RB_WRITE(cmds, cmds_gpu, + SUBBLOCK_OFFSET(REG_PA_SU_POINT_SIZE)); + GSL_RB_WRITE(cmds, cmds_gpu, + SUBBLOCK_OFFSET(REG_PA_SC_LINE_CNTL)); + GSL_RB_WRITE(cmds, cmds_gpu, + SUBBLOCK_OFFSET(REG_PA_SU_POLY_OFFSET_FRONT_SCALE)); + + /* Instruction memory size: */ + GSL_RB_WRITE(cmds, cmds_gpu, + (adreno_encode_istore_size(adreno_dev) + | adreno_dev->pix_shader_start)); + /* Maximum Contexts */ + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000001); + /* Write Confirm Interval and The CP will wait the + * wait_interval * 16 clocks between polling */ + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); + + /* NQ and External Memory Swap */ + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); + /* Protected mode error checking */ + GSL_RB_WRITE(cmds, cmds_gpu, GSL_RB_PROTECTED_MODE_CONTROL); + /* Disable header dumping and Header dump address */ + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); + /* Header dump size */ + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); + + adreno_ringbuffer_submit(rb); +} + +static unsigned int a2xx_busy_cycles(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = &adreno_dev->dev; + unsigned int reg, val; + + /* Freeze the counter */ + adreno_regwrite(device, REG_CP_PERFMON_CNTL, + REG_PERF_MODE_CNT | REG_PERF_STATE_FREEZE); + + /* Get the value */ + adreno_regread(device, REG_RBBM_PERFCOUNTER1_LO, &val); + + /* Reset the counter */ + adreno_regwrite(device, REG_CP_PERFMON_CNTL, + REG_PERF_MODE_CNT | REG_PERF_STATE_RESET); + + /* Re-Enable the performance monitors */ + adreno_regread(device, REG_RBBM_PM_OVERRIDE2, ®); + adreno_regwrite(device, REG_RBBM_PM_OVERRIDE2, (reg | 0x40)); + adreno_regwrite(device, REG_RBBM_PERFCOUNTER1_SELECT, 0x1); + adreno_regwrite(device, REG_CP_PERFMON_CNTL, + REG_PERF_MODE_CNT | REG_PERF_STATE_ENABLE); + + return val; +} + +static void a2xx_gmeminit(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = &adreno_dev->dev; + union reg_rb_edram_info rb_edram_info; + unsigned int gmem_size; + unsigned int edram_value = 0; + + /* make sure edram range is aligned to size */ + BUG_ON(adreno_dev->gmemspace.gpu_base & + (adreno_dev->gmemspace.sizebytes - 1)); + + /* get edram_size value equivalent */ + gmem_size = (adreno_dev->gmemspace.sizebytes >> 14); + while (gmem_size >>= 1) + edram_value++; + + rb_edram_info.val = 0; + + rb_edram_info.f.edram_size = edram_value; + rb_edram_info.f.edram_mapping_mode = 0; /* EDRAM_MAP_UPPER */ + + /* must be aligned to size */ + rb_edram_info.f.edram_range = (adreno_dev->gmemspace.gpu_base >> 14); + + adreno_regwrite(device, REG_RB_EDRAM_INFO, rb_edram_info.val); +} + +static void a2xx_start(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = &adreno_dev->dev; + int init_reftimestamp = 0x7fffffff; + + /* + * We need to make sure all blocks are powered up and clocked + * before issuing a soft reset. The overrides will then be + * turned off (set to 0) + */ + adreno_regwrite(device, REG_RBBM_PM_OVERRIDE1, 0xfffffffe); + adreno_regwrite(device, REG_RBBM_PM_OVERRIDE2, 0xffffffff); + + /* + * Only reset CP block if all blocks have previously been + * reset + */ + if (!(device->flags & KGSL_FLAGS_SOFT_RESET) || + !adreno_is_a22x(adreno_dev)) { + adreno_regwrite(device, REG_RBBM_SOFT_RESET, + 0xFFFFFFFF); + device->flags |= KGSL_FLAGS_SOFT_RESET; + } else { + adreno_regwrite(device, REG_RBBM_SOFT_RESET, + 0x00000001); + } + /* + * The core is in an indeterminate state until the reset + * completes after 30ms. + */ + msleep(30); + + adreno_regwrite(device, REG_RBBM_SOFT_RESET, 0x00000000); + + if (adreno_is_a225(adreno_dev)) { + /* Enable large instruction store for A225 */ + adreno_regwrite(device, REG_SQ_FLOW_CONTROL, + 0x18000000); + } + + adreno_regwrite(device, REG_RBBM_CNTL, 0x00004442); + + adreno_regwrite(device, REG_SQ_VS_PROGRAM, 0x00000000); + adreno_regwrite(device, REG_SQ_PS_PROGRAM, 0x00000000); + +// if (cpu_is_msm8960() || cpu_is_msm8930()) + if(0) + adreno_regwrite(device, REG_RBBM_PM_OVERRIDE1, 0x200); + else + adreno_regwrite(device, REG_RBBM_PM_OVERRIDE1, 0); + + if (!adreno_is_a22x(adreno_dev)) + adreno_regwrite(device, REG_RBBM_PM_OVERRIDE2, 0); + else + adreno_regwrite(device, REG_RBBM_PM_OVERRIDE2, 0x80); + + kgsl_sharedmem_set(&device->memstore, 0, 0, device->memstore.size); + + kgsl_sharedmem_writel(&device->memstore, + KGSL_DEVICE_MEMSTORE_OFFSET(ref_wait_ts), + init_reftimestamp); + + adreno_regwrite(device, REG_RBBM_DEBUG, 0x00080000); + + /* Make sure interrupts are disabled */ + adreno_regwrite(device, REG_RBBM_INT_CNTL, 0); + adreno_regwrite(device, REG_CP_INT_CNTL, 0); + adreno_regwrite(device, REG_SQ_INT_CNTL, 0); + + if (adreno_is_a22x(adreno_dev)) + adreno_dev->gmemspace.sizebytes = SZ_512K; + else + adreno_dev->gmemspace.sizebytes = SZ_256K; + + a2xx_gmeminit(adreno_dev); +} + +/* Defined in adreno_a2xx_snapshot.c */ +void *a2xx_snapshot(struct adreno_device *adreno_dev, void *snapshot, + int *remain, int hang); + struct adreno_gpudev adreno_a2xx_gpudev = { + .reg_rbbm_status = REG_RBBM_STATUS, + .reg_cp_pfp_ucode_addr = REG_CP_PFP_UCODE_ADDR, + .reg_cp_pfp_ucode_data = REG_CP_PFP_UCODE_DATA, + .ctxt_create = a2xx_drawctxt_create, .ctxt_save = a2xx_drawctxt_save, .ctxt_restore = a2xx_drawctxt_restore, .irq_handler = a2xx_irq_handler, .irq_control = a2xx_irq_control, + .rb_init = a2xx_rb_init, + .busy_cycles = a2xx_busy_cycles, + .start = a2xx_start, }; diff --git a/drivers/gpu/msm/adreno_a3xx.c b/drivers/gpu/msm/adreno_a3xx.c new file mode 100755 index 00000000..cbc7bed4 --- /dev/null +++ b/drivers/gpu/msm/adreno_a3xx.c @@ -0,0 +1,2547 @@ +/* Copyright (c) 2012, Code Aurora Forum. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include + +#include "kgsl.h" +#include "adreno.h" +#include "kgsl_sharedmem.h" +#include "kgsl_cffdump.h" +#include "a3xx_reg.h" + +/* Simple macro to facilitate bit setting in the gmem2sys and sys2gmem + * functions. + */ + +#define _SET(_shift, _val) ((_val) << (_shift)) + +/* + **************************************************************************** + * + * Context state shadow structure: + * + * +---------------------+------------+-------------+---------------------+---+ + * | ALU Constant Shadow | Reg Shadow | C&V Buffers | Shader Instr Shadow |Tex| + * +---------------------+------------+-------------+---------------------+---+ + * + * 8K - ALU Constant Shadow (8K aligned) + * 4K - H/W Register Shadow (8K aligned) + * 5K - Command and Vertex Buffers + * 8K - Shader Instruction Shadow + * ~6K - Texture Constant Shadow + * + * + *************************************************************************** + */ + +/* Sizes of all sections in state shadow memory */ +#define ALU_SHADOW_SIZE (8*1024) /* 8KB */ +#define REG_SHADOW_SIZE (4*1024) /* 4KB */ +#define CMD_BUFFER_SIZE (5*1024) /* 5KB */ +#define TEX_SIZE_MEM_OBJECTS 896 /* bytes */ +#define TEX_SIZE_MIPMAP 1936 /* bytes */ +#define TEX_SIZE_SAMPLER_OBJ 256 /* bytes */ +#define TEX_SHADOW_SIZE \ + ((TEX_SIZE_MEM_OBJECTS + TEX_SIZE_MIPMAP + \ + TEX_SIZE_SAMPLER_OBJ)*2) /* ~6KB */ +#define SHADER_SHADOW_SIZE (8*1024) /* 8KB */ + +/* Total context size, excluding GMEM shadow */ +#define CONTEXT_SIZE \ + (ALU_SHADOW_SIZE+REG_SHADOW_SIZE + \ + CMD_BUFFER_SIZE+SHADER_SHADOW_SIZE + \ + TEX_SHADOW_SIZE) + +/* Offsets to different sections in context shadow memory */ +#define REG_OFFSET ALU_SHADOW_SIZE +#define CMD_OFFSET (REG_OFFSET+REG_SHADOW_SIZE) +#define SHADER_OFFSET (CMD_OFFSET+CMD_BUFFER_SIZE) +#define TEX_OFFSET (SHADER_OFFSET+SHADER_SHADOW_SIZE) +#define VS_TEX_OFFSET_MEM_OBJECTS TEX_OFFSET +#define VS_TEX_OFFSET_MIPMAP (VS_TEX_OFFSET_MEM_OBJECTS+TEX_SIZE_MEM_OBJECTS) +#define VS_TEX_OFFSET_SAMPLER_OBJ (VS_TEX_OFFSET_MIPMAP+TEX_SIZE_MIPMAP) +#define FS_TEX_OFFSET_MEM_OBJECTS \ + (VS_TEX_OFFSET_SAMPLER_OBJ+TEX_SIZE_SAMPLER_OBJ) +#define FS_TEX_OFFSET_MIPMAP (FS_TEX_OFFSET_MEM_OBJECTS+TEX_SIZE_MEM_OBJECTS) +#define FS_TEX_OFFSET_SAMPLER_OBJ (FS_TEX_OFFSET_MIPMAP+TEX_SIZE_MIPMAP) + +/* The offset for fragment shader data in HLSQ context */ +#define SSIZE (16*1024) + +#define HLSQ_SAMPLER_OFFSET 0x000 +#define HLSQ_MEMOBJ_OFFSET 0x400 +#define HLSQ_MIPMAP_OFFSET 0x800 + +#ifdef GSL_USE_A3XX_HLSQ_SHADOW_RAM +/* Use shadow RAM */ +#define HLSQ_SHADOW_BASE (0x10000+SSIZE*2) +#else +/* Use working RAM */ +#define HLSQ_SHADOW_BASE 0x10000 +#endif + +#define REG_TO_MEM_LOOP_COUNT_SHIFT 15 + +#define BUILD_PC_DRAW_INITIATOR(prim_type, source_select, index_size, \ + vis_cull_mode) \ + (((prim_type) << PC_DRAW_INITIATOR_PRIM_TYPE) | \ + ((source_select) << PC_DRAW_INITIATOR_SOURCE_SELECT) | \ + ((index_size & 1) << PC_DRAW_INITIATOR_INDEX_SIZE) | \ + ((index_size >> 1) << PC_DRAW_INITIATOR_SMALL_INDEX) | \ + ((vis_cull_mode) << PC_DRAW_INITIATOR_VISIBILITY_CULLING_MODE) | \ + (1 << PC_DRAW_INITIATOR_PRE_DRAW_INITIATOR_ENABLE)) + +/* + * List of context registers (starting from dword offset 0x2000). + * Each line contains start and end of a range of registers. + */ +static const unsigned int context_register_ranges[] = { + A3XX_GRAS_CL_CLIP_CNTL, A3XX_GRAS_CL_CLIP_CNTL, + A3XX_GRAS_CL_GB_CLIP_ADJ, A3XX_GRAS_CL_GB_CLIP_ADJ, + A3XX_GRAS_CL_VPORT_XOFFSET, A3XX_GRAS_CL_VPORT_ZSCALE, + A3XX_GRAS_SU_POINT_MINMAX, A3XX_GRAS_SU_POINT_SIZE, + A3XX_GRAS_SU_POLY_OFFSET_SCALE, A3XX_GRAS_SU_POLY_OFFSET_OFFSET, + A3XX_GRAS_SU_MODE_CONTROL, A3XX_GRAS_SU_MODE_CONTROL, + A3XX_GRAS_SC_CONTROL, A3XX_GRAS_SC_CONTROL, + A3XX_GRAS_SC_SCREEN_SCISSOR_TL, A3XX_GRAS_SC_SCREEN_SCISSOR_BR, + A3XX_GRAS_SC_WINDOW_SCISSOR_TL, A3XX_GRAS_SC_WINDOW_SCISSOR_BR, + A3XX_RB_MODE_CONTROL, A3XX_RB_MRT_BLEND_CONTROL3, + A3XX_RB_BLEND_RED, A3XX_RB_COPY_DEST_INFO, + A3XX_RB_DEPTH_CONTROL, A3XX_RB_DEPTH_CONTROL, + A3XX_PC_VSTREAM_CONTROL, A3XX_PC_VSTREAM_CONTROL, + A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, + A3XX_PC_PRIM_VTX_CNTL, A3XX_PC_RESTART_INDEX, + A3XX_HLSQ_CONTROL_0_REG, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG, + A3XX_HLSQ_CL_NDRANGE_0_REG, A3XX_HLSQ_CL_NDRANGE_0_REG, + A3XX_HLSQ_CL_NDRANGE_2_REG, A3XX_HLSQ_CL_CONTROL_1_REG, + A3XX_HLSQ_CL_KERNEL_CONST_REG, A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG, + A3XX_HLSQ_CL_WG_OFFSET_REG, A3XX_HLSQ_CL_WG_OFFSET_REG, + A3XX_VFD_CONTROL_0, A3XX_VFD_VS_THREADING_THRESHOLD, + A3XX_SP_SP_CTRL_REG, A3XX_SP_SP_CTRL_REG, + A3XX_SP_VS_CTRL_REG0, A3XX_SP_VS_OUT_REG_7, + A3XX_SP_VS_VPC_DST_REG_0, A3XX_SP_VS_PVT_MEM_SIZE_REG, + A3XX_SP_VS_LENGTH_REG, A3XX_SP_FS_PVT_MEM_SIZE_REG, + A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, A3XX_SP_FS_FLAT_SHAD_MODE_REG_1, + A3XX_SP_FS_OUTPUT_REG, A3XX_SP_FS_OUTPUT_REG, + A3XX_SP_FS_MRT_REG_0, A3XX_SP_FS_IMAGE_OUTPUT_REG_3, + A3XX_SP_FS_LENGTH_REG, A3XX_SP_FS_LENGTH_REG, + A3XX_TPL1_TP_VS_TEX_OFFSET, A3XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, + A3XX_VPC_ATTR, A3XX_VPC_VARY_CYLWRAP_ENABLE_1, +}; + +/* Global registers that need to be saved separately */ +static const unsigned int global_registers[] = { + A3XX_GRAS_CL_USER_PLANE_X0, A3XX_GRAS_CL_USER_PLANE_Y0, + A3XX_GRAS_CL_USER_PLANE_Z0, A3XX_GRAS_CL_USER_PLANE_W0, + A3XX_GRAS_CL_USER_PLANE_X1, A3XX_GRAS_CL_USER_PLANE_Y1, + A3XX_GRAS_CL_USER_PLANE_Z1, A3XX_GRAS_CL_USER_PLANE_W1, + A3XX_GRAS_CL_USER_PLANE_X2, A3XX_GRAS_CL_USER_PLANE_Y2, + A3XX_GRAS_CL_USER_PLANE_Z2, A3XX_GRAS_CL_USER_PLANE_W2, + A3XX_GRAS_CL_USER_PLANE_X3, A3XX_GRAS_CL_USER_PLANE_Y3, + A3XX_GRAS_CL_USER_PLANE_Z3, A3XX_GRAS_CL_USER_PLANE_W3, + A3XX_GRAS_CL_USER_PLANE_X4, A3XX_GRAS_CL_USER_PLANE_Y4, + A3XX_GRAS_CL_USER_PLANE_Z4, A3XX_GRAS_CL_USER_PLANE_W4, + A3XX_GRAS_CL_USER_PLANE_X5, A3XX_GRAS_CL_USER_PLANE_Y5, + A3XX_GRAS_CL_USER_PLANE_Z5, A3XX_GRAS_CL_USER_PLANE_W5, + A3XX_VSC_BIN_SIZE, + A3XX_VSC_PIPE_CONFIG_0, A3XX_VSC_PIPE_CONFIG_1, + A3XX_VSC_PIPE_CONFIG_2, A3XX_VSC_PIPE_CONFIG_3, + A3XX_VSC_PIPE_CONFIG_4, A3XX_VSC_PIPE_CONFIG_5, + A3XX_VSC_PIPE_CONFIG_6, A3XX_VSC_PIPE_CONFIG_7, + A3XX_VSC_PIPE_DATA_ADDRESS_0, A3XX_VSC_PIPE_DATA_ADDRESS_1, + A3XX_VSC_PIPE_DATA_ADDRESS_2, A3XX_VSC_PIPE_DATA_ADDRESS_3, + A3XX_VSC_PIPE_DATA_ADDRESS_4, A3XX_VSC_PIPE_DATA_ADDRESS_5, + A3XX_VSC_PIPE_DATA_ADDRESS_6, A3XX_VSC_PIPE_DATA_ADDRESS_7, + A3XX_VSC_PIPE_DATA_LENGTH_0, A3XX_VSC_PIPE_DATA_LENGTH_1, + A3XX_VSC_PIPE_DATA_LENGTH_2, A3XX_VSC_PIPE_DATA_LENGTH_3, + A3XX_VSC_PIPE_DATA_LENGTH_4, A3XX_VSC_PIPE_DATA_LENGTH_5, + A3XX_VSC_PIPE_DATA_LENGTH_6, A3XX_VSC_PIPE_DATA_LENGTH_7, + A3XX_VSC_SIZE_ADDRESS +}; + +#define GLOBAL_REGISTER_COUNT ARRAY_SIZE(global_registers) + +/* A scratchpad used to build commands during context create */ +static struct tmp_ctx { + unsigned int *cmd; /* Next available dword in C&V buffer */ + + /* Addresses in comamnd buffer where registers are saved */ + uint32_t reg_values[GLOBAL_REGISTER_COUNT]; + uint32_t gmem_base; /* Base GPU address of GMEM */ +} tmp_ctx; + +#ifndef GSL_CONTEXT_SWITCH_CPU_SYNC +/* + * Function for executing dest = ( (reg & and) ROL rol ) | or + */ +static unsigned int *rmw_regtomem(unsigned int *cmd, + unsigned int reg, unsigned int and, + unsigned int rol, unsigned int or, + unsigned int dest) +{ + /* CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0x00000000) | reg */ + *cmd++ = cp_type3_packet(CP_REG_RMW, 3); + *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2; + *cmd++ = 0x00000000; /* AND value */ + *cmd++ = reg; /* OR address */ + + /* CP_SCRATCH_REG2 = ( (CP_SCRATCH_REG2 & and) ROL rol ) | or */ + *cmd++ = cp_type3_packet(CP_REG_RMW, 3); + *cmd++ = (rol << 24) | A3XX_CP_SCRATCH_REG2; + *cmd++ = and; /* AND value */ + *cmd++ = or; /* OR value */ + + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = A3XX_CP_SCRATCH_REG2; + *cmd++ = dest; + + return cmd; +} +#endif + +static void build_regconstantsave_cmds(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + unsigned int *cmd = tmp_ctx.cmd; + unsigned int *start = cmd; + unsigned int i; + + drawctxt->constant_save_commands[0].hostptr = cmd; + drawctxt->constant_save_commands[0].gpuaddr = + virt2gpu(cmd, &drawctxt->gpustate); + cmd++; + + *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmd++ = 0; + +#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES + /* + * Context registers are already shadowed; just need to + * disable shadowing to prevent corruption. + */ + + *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3); + *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000; + *cmd++ = 4 << 16; /* regs, start=0 */ + *cmd++ = 0x0; /* count = 0 */ + +#else + /* + * Make sure the HW context has the correct register values before + * reading them. + */ + + /* Write context registers into shadow */ + for (i = 0; i < ARRAY_SIZE(context_register_ranges) / 2; i++) { + unsigned int start = context_register_ranges[i * 2]; + unsigned int end = context_register_ranges[i * 2 + 1]; + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = ((end - start + 1) << REG_TO_MEM_LOOP_COUNT_SHIFT) | + start; + *cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET) + & 0xFFFFE000) + (start - 0x2000) * 4; + } +#endif + + /* Need to handle some of the global registers separately */ + for (i = 0; i < ARRAY_SIZE(global_registers); i++) { + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = global_registers[i]; + *cmd++ = tmp_ctx.reg_values[i]; + } + + /* Save vertex shader constants */ + *cmd++ = cp_type3_packet(CP_COND_EXEC, 4); + *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2; + *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2; + *cmd++ = 0x0000FFFF; + *cmd++ = 3; /* EXEC_COUNT */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + drawctxt->constant_save_commands[1].hostptr = cmd; + drawctxt->constant_save_commands[1].gpuaddr = + virt2gpu(cmd, &drawctxt->gpustate); + /* + From fixup: + + dwords = SP_VS_CTRL_REG1.VSCONSTLENGTH / 4 + src = (HLSQ_SHADOW_BASE + 0x2000) / 4 + + From register spec: + SP_VS_CTRL_REG1.VSCONSTLENGTH [09:00]: 0-512, unit = 128bits. + */ + *cmd++ = 0; /* (dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */ + /* ALU constant shadow base */ + *cmd++ = drawctxt->gpustate.gpuaddr & 0xfffffffc; + + /* Save fragment shader constants */ + *cmd++ = cp_type3_packet(CP_COND_EXEC, 4); + *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2; + *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2; + *cmd++ = 0x0000FFFF; + *cmd++ = 3; /* EXEC_COUNT */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + drawctxt->constant_save_commands[2].hostptr = cmd; + drawctxt->constant_save_commands[2].gpuaddr = + virt2gpu(cmd, &drawctxt->gpustate); + /* + From fixup: + + dwords = SP_FS_CTRL_REG1.FSCONSTLENGTH / 4 + src = (HLSQ_SHADOW_BASE + 0x2000 + SSIZE) / 4 + + From register spec: + SP_FS_CTRL_REG1.FSCONSTLENGTH [09:00]: 0-512, unit = 128bits. + */ + *cmd++ = 0; /* (dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */ + + /* + From fixup: + + base = drawctxt->gpustate.gpuaddr (ALU constant shadow base) + offset = SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET + + From register spec: + SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET [16:24]: Constant object + start offset in on chip RAM, + 128bit aligned + + dst = base + offset + Because of the base alignment we can use + dst = base | offset + */ + *cmd++ = 0; /* dst */ + + /* Save VS texture memory objects */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = + ((TEX_SIZE_MEM_OBJECTS / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) | + ((HLSQ_SHADOW_BASE + HLSQ_MEMOBJ_OFFSET) / 4); + *cmd++ = + (drawctxt->gpustate.gpuaddr + + VS_TEX_OFFSET_MEM_OBJECTS) & 0xfffffffc; + + /* Save VS texture mipmap pointers */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = + ((TEX_SIZE_MIPMAP / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) | + ((HLSQ_SHADOW_BASE + HLSQ_MIPMAP_OFFSET) / 4); + *cmd++ = + (drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MIPMAP) & 0xfffffffc; + + /* Save VS texture sampler objects */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = ((TEX_SIZE_SAMPLER_OBJ / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) | + ((HLSQ_SHADOW_BASE + HLSQ_SAMPLER_OFFSET) / 4); + *cmd++ = + (drawctxt->gpustate.gpuaddr + + VS_TEX_OFFSET_SAMPLER_OBJ) & 0xfffffffc; + + /* Save FS texture memory objects */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = + ((TEX_SIZE_MEM_OBJECTS / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) | + ((HLSQ_SHADOW_BASE + HLSQ_MEMOBJ_OFFSET + SSIZE) / 4); + *cmd++ = + (drawctxt->gpustate.gpuaddr + + FS_TEX_OFFSET_MEM_OBJECTS) & 0xfffffffc; + + /* Save FS texture mipmap pointers */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = + ((TEX_SIZE_MIPMAP / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) | + ((HLSQ_SHADOW_BASE + HLSQ_MIPMAP_OFFSET + SSIZE) / 4); + *cmd++ = + (drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MIPMAP) & 0xfffffffc; + + /* Save FS texture sampler objects */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = + ((TEX_SIZE_SAMPLER_OBJ / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) | + ((HLSQ_SHADOW_BASE + HLSQ_SAMPLER_OFFSET + SSIZE) / 4); + *cmd++ = + (drawctxt->gpustate.gpuaddr + + FS_TEX_OFFSET_SAMPLER_OBJ) & 0xfffffffc; + + /* Create indirect buffer command for above command sequence */ + create_ib1(drawctxt, drawctxt->regconstant_save, start, cmd); + + tmp_ctx.cmd = cmd; +} + +/* Copy GMEM contents to system memory shadow. */ +static unsigned int *build_gmem2sys_cmds(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, + struct gmem_shadow_t *shadow) +{ + unsigned int *cmds = tmp_ctx.cmd; + unsigned int *start = cmds; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_RB_MODE_CONTROL); + + /* RB_MODE_CONTROL */ + *cmds++ = _SET(RB_MODECONTROL_RENDER_MODE, RB_RESOLVE_PASS) | + _SET(RB_MODECONTROL_MARB_CACHE_SPLIT_MODE, 1) | + _SET(RB_MODECONTROL_PACKER_TIMER_ENABLE, 1); + /* RB_RENDER_CONTROL */ + *cmds++ = _SET(RB_RENDERCONTROL_BIN_WIDTH, shadow->width >> 5) | + _SET(RB_RENDERCONTROL_DISABLE_COLOR_PIPE, 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5); + *cmds++ = CP_REG(A3XX_RB_COPY_CONTROL); + /* RB_COPY_CONTROL */ + *cmds++ = _SET(RB_COPYCONTROL_RESOLVE_CLEAR_MODE, + RB_CLEAR_MODE_RESOLVE) | + _SET(RB_COPYCONTROL_COPY_GMEM_BASE, + tmp_ctx.gmem_base >> 14); + /* RB_COPY_DEST_BASE */ + *cmds++ = _SET(RB_COPYDESTBASE_COPY_DEST_BASE, + shadow->gmemshadow.gpuaddr >> 5); + /* RB_COPY_DEST_PITCH */ + *cmds++ = _SET(RB_COPYDESTPITCH_COPY_DEST_PITCH, + (shadow->pitch * 4) / 32); + /* RB_COPY_DEST_INFO */ + *cmds++ = _SET(RB_COPYDESTINFO_COPY_DEST_TILE, + RB_TILINGMODE_LINEAR) | + _SET(RB_COPYDESTINFO_COPY_DEST_FORMAT, RB_R8G8B8A8_UNORM) | + _SET(RB_COPYDESTINFO_COPY_COMPONENT_ENABLE, 0X0F) | + _SET(RB_COPYDESTINFO_COPY_DEST_ENDIAN, RB_ENDIAN_NONE); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_GRAS_SC_CONTROL); + /* GRAS_SC_CONTROL */ + *cmds++ = _SET(GRAS_SC_CONTROL_RENDER_MODE, 2); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_VFD_CONTROL_0); + /* VFD_CONTROL_0 */ + *cmds++ = _SET(VFD_CTRLREG0_TOTALATTRTOVS, 4) | + _SET(VFD_CTRLREG0_PACKETSIZE, 2) | + _SET(VFD_CTRLREG0_STRMDECINSTRCNT, 1) | + _SET(VFD_CTRLREG0_STRMFETCHINSTRCNT, 1); + /* VFD_CONTROL_1 */ + *cmds++ = _SET(VFD_CTRLREG1_MAXSTORAGE, 1) | + _SET(VFD_CTRLREG1_REGID4VTX, 252) | + _SET(VFD_CTRLREG1_REGID4INST, 252); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_VFD_FETCH_INSTR_0_0); + /* VFD_FETCH_INSTR_0_0 */ + *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 11) | + _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 12) | + _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1); + /* VFD_FETCH_INSTR_1_0 */ + *cmds++ = _SET(VFD_BASEADDR_BASEADDR, + shadow->quad_vertices.gpuaddr); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_VFD_DECODE_INSTR_0); + /* VFD_DECODE_INSTR_0 */ + *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) | + _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) | + _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 2) | + _SET(VFD_DECODEINSTRUCTIONS_REGID, 5) | + _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 12) | + _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5); + *cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG); + /* HLSQ_CONTROL_0_REG */ + *cmds++ = _SET(HLSQ_CTRL0REG_FSTHREADSIZE, HLSQ_TWO_PIX_QUADS) | + _SET(HLSQ_CTRL0REG_FSSUPERTHREADENABLE, 1) | + _SET(HLSQ_CTRL0REG_SPSHADERRESTART, 1) | + _SET(HLSQ_CTRL0REG_RESERVED2, 1) | + _SET(HLSQ_CTRL0REG_CHUNKDISABLE, 1) | + _SET(HLSQ_CTRL0REG_CONSTSWITCHMODE, 1) | + _SET(HLSQ_CTRL0REG_LAZYUPDATEDISABLE, 1) | + _SET(HLSQ_CTRL0REG_SPCONSTFULLUPDATE, 1) | + _SET(HLSQ_CTRL0REG_TPFULLUPDATE, 1); + /* HLSQ_CONTROL_1_REG */ + *cmds++ = _SET(HLSQ_CTRL1REG_VSTHREADSIZE, HLSQ_TWO_VTX_QUADS) | + _SET(HLSQ_CTRL1REG_VSSUPERTHREADENABLE, 1) | + _SET(HLSQ_CTRL1REG_RESERVED1, 4); + /* HLSQ_CONTROL_2_REG */ + *cmds++ = _SET(HLSQ_CTRL2REG_PRIMALLOCTHRESHOLD, 31); + /* HLSQ_CONTROL_3_REG */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5); + *cmds++ = CP_REG(A3XX_HLSQ_VS_CONTROL_REG); + /* HLSQ_VS_CONTROL_REG */ + *cmds++ = _SET(HLSQ_VSCTRLREG_VSINSTRLENGTH, 1); + /* HLSQ_FS_CONTROL_REG */ + *cmds++ = _SET(HLSQ_FSCTRLREG_FSCONSTLENGTH, 1) | + _SET(HLSQ_FSCTRLREG_FSCONSTSTARTOFFSET, 272) | + _SET(HLSQ_FSCTRLREG_FSINSTRLENGTH, 1); + /* HLSQ_CONST_VSPRESV_RANGE_REG */ + *cmds++ = 0x00000000; + /* HLSQ_CONST_FSPRESV_RANGE_REQ */ + *cmds++ = _SET(HLSQ_CONSTFSPRESERVEDRANGEREG_STARTENTRY, 32) | + _SET(HLSQ_CONSTFSPRESERVEDRANGEREG_ENDENTRY, 32); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_SP_FS_LENGTH_REG); + /* SP_FS_LENGTH_REG */ + *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_SP_SP_CTRL_REG); + /* SP_SP_CTRL_REG */ + *cmds++ = _SET(SP_SPCTRLREG_CONSTMODE, 1) | + _SET(SP_SPCTRLREG_SLEEPMODE, 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 12); + *cmds++ = CP_REG(A3XX_SP_VS_CTRL_REG0); + /* SP_VS_CTRL_REG0 */ + *cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) | + _SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) | + _SET(SP_VSCTRLREG0_VSICACHEINVALID, 1) | + _SET(SP_VSCTRLREG0_VSFULLREGFOOTPRINT, 3) | + _SET(SP_VSCTRLREG0_VSTHREADSIZE, SP_TWO_VTX_QUADS) | + _SET(SP_VSCTRLREG0_VSSUPERTHREADMODE, 1) | + _SET(SP_VSCTRLREG0_VSLENGTH, 1); + /* SP_VS_CTRL_REG1 */ + *cmds++ = _SET(SP_VSCTRLREG1_VSINITIALOUTSTANDING, 4); + /* SP_VS_PARAM_REG */ + *cmds++ = _SET(SP_VSPARAMREG_POSREGID, 1) | + _SET(SP_VSPARAMREG_PSIZEREGID, 252); + /* SP_VS_OUT_REG_0 */ + *cmds++ = 0x00000000; + /* SP_VS_OUT_REG_1 */ + *cmds++ = 0x00000000; + /* SP_VS_OUT_REG_2 */ + *cmds++ = 0x00000000; + /* SP_VS_OUT_REG_3 */ + *cmds++ = 0x00000000; + /* SP_VS_OUT_REG_4 */ + *cmds++ = 0x00000000; + /* SP_VS_OUT_REG_5 */ + *cmds++ = 0x00000000; + /* SP_VS_OUT_REG_6 */ + *cmds++ = 0x00000000; + /* SP_VS_OUT_REG_7 */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7); + *cmds++ = CP_REG(A3XX_SP_VS_VPC_DST_REG_0); + /* SP_VS_VPC_DST_REG_0 */ + *cmds++ = 0x00000000; + /* SP_VS_VPC_DST_REG_1 */ + *cmds++ = 0x00000000; + /* SP_VS_VPC_DST_REG_2 */ + *cmds++ = 0x00000000; + /* SP_VS_VPC_DST_REG_3 */ + *cmds++ = 0x00000000; + /* SP_VS_OBJ_OFFSET_REG */ + *cmds++ = 0x00000000; + /* SP_VS_OBJ_START_REG */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6); + *cmds++ = CP_REG(A3XX_SP_VS_LENGTH_REG); + /* SP_VS_LENGTH_REG */ + *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1); + /* SP_FS_CTRL_REG0 */ + *cmds++ = _SET(SP_FSCTRLREG0_FSTHREADMODE, SP_MULTI) | + _SET(SP_FSCTRLREG0_FSINSTRBUFFERMODE, SP_BUFFER_MODE) | + _SET(SP_FSCTRLREG0_FSICACHEINVALID, 1) | + _SET(SP_FSCTRLREG0_FSFULLREGFOOTPRINT, 2) | + _SET(SP_FSCTRLREG0_FSINOUTREGOVERLAP, 1) | + _SET(SP_FSCTRLREG0_FSTHREADSIZE, SP_TWO_VTX_QUADS) | + _SET(SP_FSCTRLREG0_FSSUPERTHREADMODE, 1) | + _SET(SP_FSCTRLREG0_FSLENGTH, 1); + /* SP_FS_CTRL_REG1 */ + *cmds++ = _SET(SP_FSCTRLREG1_FSCONSTLENGTH, 1) | + _SET(SP_FSCTRLREG1_FSINITIALOUTSTANDING, 2) | + _SET(SP_FSCTRLREG1_HALFPRECVAROFFSET, 63); + /* SP_FS_OBJ_OFFSET_REG */ + *cmds++ = _SET(SP_OBJOFFSETREG_CONSTOBJECTSTARTOFFSET, 272) | + _SET(SP_OBJOFFSETREG_SHADEROBJOFFSETINIC, 1); + /* SP_FS_OBJ_START_REG */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0); + /* SP_FS_FLAT_SHAD_MODE_REG_0 */ + *cmds++ = 0x00000000; + /* SP_FS_FLAT_SHAD_MODE_REG_1 */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_SP_FS_OUTPUT_REG); + /* SP_FS_OUTPUT_REG */ + *cmds++ = _SET(SP_IMAGEOUTPUTREG_PAD0, SP_PIXEL_BASED); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5); + *cmds++ = CP_REG(A3XX_SP_FS_MRT_REG_0); + /* SP_FS_MRT_REG_0 */ + *cmds++ = _SET(SP_FSMRTREG_REGID, 1); + /* SP_FS_MRT_REG_1 */ + *cmds++ = 0x00000000; + /* SP_FS_MRT_REG_2 */ + *cmds++ = 0x00000000; + /* SP_FS_MRT_REG_3 */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 11); + *cmds++ = CP_REG(A3XX_VPC_ATTR); + /* VPC_ATTR */ + *cmds++ = _SET(VPC_VPCATTR_THRHDASSIGN, 1) | + _SET(VPC_VPCATTR_LMSIZE, 1); + /* VPC_PACK */ + *cmds++ = 0x00000000; + /* VPC_VARRYING_INTERUPT_MODE_0 */ + *cmds++ = 0x00000000; + /* VPC_VARRYING_INTERUPT_MODE_1 */ + *cmds++ = 0x00000000; + /* VPC_VARRYING_INTERUPT_MODE_2 */ + *cmds++ = 0x00000000; + /* VPC_VARRYING_INTERUPT_MODE_3 */ + *cmds++ = 0x00000000; + /* VPC_VARYING_PS_REPL_MODE_0 */ + *cmds++ = 0x00000000; + /* VPC_VARYING_PS_REPL_MODE_1 */ + *cmds++ = 0x00000000; + /* VPC_VARYING_PS_REPL_MODE_2 */ + *cmds++ = 0x00000000; + /* VPC_VARYING_PS_REPL_MODE_3 */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10); + *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT) + | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT) + | (HLSQ_BLOCK_ID_SP_VS << CP_LOADSTATE_STATEBLOCKID_SHIFT) + | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT); + *cmds++ = (HLSQ_SP_VS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT) + | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT); + + /* (sy)(rpt3)mov.f32f32 r0.y, (r)r1.y; */ + *cmds++ = 0x00000005; *cmds++ = 0x30044b01; + /* end; */ + *cmds++ = 0x00000000; *cmds++ = 0x03000000; + /* nop; */ + *cmds++ = 0x00000000; *cmds++ = 0x00000000; + /* nop; */ + *cmds++ = 0x00000000; *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10); + *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT) + | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT) + | (HLSQ_BLOCK_ID_SP_FS << CP_LOADSTATE_STATEBLOCKID_SHIFT) + | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT); + *cmds++ = (HLSQ_SP_FS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT) + | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT); + + /* (sy)(rpt3)mov.f32f32 r0.y, (r)c0.x; */ + *cmds++ = 0x00000000; *cmds++ = 0x30244b01; + /* end; */ + *cmds++ = 0x00000000; *cmds++ = 0x03000000; + /* nop; */ + *cmds++ = 0x00000000; *cmds++ = 0x00000000; + /* nop; */ + *cmds++ = 0x00000000; *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_RB_MSAA_CONTROL); + /* RB_MSAA_CONTROL */ + *cmds++ = _SET(RB_MSAACONTROL_MSAA_DISABLE, 1) | + _SET(RB_MSAACONTROL_SAMPLE_MASK, 0xFFFF); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_RB_DEPTH_CONTROL); + /* RB_DEPTH_CONTROL */ + *cmds++ = _SET(RB_DEPTHCONTROL_Z_TEST_FUNC, RB_FRAG_NEVER); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_RB_MRT_CONTROL0); + /* RB_MRT_CONTROL0 */ + *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) | + _SET(RB_MRTCONTROL_ROP_CODE, 12) | + _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) | + _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL0); + /* RB_MRT_BLEND_CONTROL0 */ + *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) | + _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1); + /* RB_MRT_CONTROL1 */ + *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) | + _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) | + _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL1); + /* RB_MRT_BLEND_CONTROL1 */ + *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) | + _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1); + /* RB_MRT_CONTROL2 */ + *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) | + _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) | + _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL2); + /* RB_MRT_BLEND_CONTROL2 */ + *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) | + _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1); + /* RB_MRT_CONTROL3 */ + *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) | + _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) | + _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL3); + /* RB_MRT_BLEND_CONTROL3 */ + *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) | + _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5); + *cmds++ = CP_REG(A3XX_VFD_INDEX_MIN); + /* VFD_INDEX_MIN */ + *cmds++ = 0x00000000; + /* VFD_INDEX_MAX */ + *cmds++ = 0xFFFFFFFF; + /* VFD_INSTANCEID_OFFSET */ + *cmds++ = 0x00000000; + /* VFD_INDEX_OFFSET */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_VFD_VS_THREADING_THRESHOLD); + /* VFD_VS_THREADING_THRESHOLD */ + *cmds++ = _SET(VFD_THREADINGTHRESHOLD_RESERVED6, 12) | + _SET(VFD_THREADINGTHRESHOLD_REGID_VTXCNT, 252); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_TPL1_TP_VS_TEX_OFFSET); + /* TPL1_TP_VS_TEX_OFFSET */ + *cmds++ = 0; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_TPL1_TP_FS_TEX_OFFSET); + /* TPL1_TP_FS_TEX_OFFSET */ + *cmds++ = _SET(TPL1_TPTEXOFFSETREG_SAMPLEROFFSET, 16) | + _SET(TPL1_TPTEXOFFSETREG_MEMOBJOFFSET, 16) | + _SET(TPL1_TPTEXOFFSETREG_BASETABLEPTR, 224); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_PC_PRIM_VTX_CNTL); + /* PC_PRIM_VTX_CNTL */ + *cmds++ = _SET(PC_PRIM_VTX_CONTROL_POLYMODE_FRONT_PTYPE, + PC_DRAW_TRIANGLES) | + _SET(PC_PRIM_VTX_CONTROL_POLYMODE_BACK_PTYPE, + PC_DRAW_TRIANGLES) | + _SET(PC_PRIM_VTX_CONTROL_PROVOKING_VTX_LAST, 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_GRAS_SC_WINDOW_SCISSOR_TL); + /* GRAS_SC_WINDOW_SCISSOR_TL */ + *cmds++ = 0x00000000; + /* GRAS_SC_WINDOW_SCISSOR_BR */ + *cmds++ = _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_X, shadow->width - 1) | + _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_Y, shadow->height - 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_GRAS_SC_SCREEN_SCISSOR_TL); + /* GRAS_SC_SCREEN_SCISSOR_TL */ + *cmds++ = 0x00000000; + /* GRAS_SC_SCREEN_SCISSOR_BR */ + *cmds++ = _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_X, shadow->width - 1) | + _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_Y, shadow->height - 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5); + *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_XOFFSET); + /* GRAS_CL_VPORT_XOFFSET */ + *cmds++ = 0x00000000; + /* GRAS_CL_VPORT_XSCALE */ + *cmds++ = _SET(GRAS_CL_VPORT_XSCALE_VPORT_XSCALE, 0x3f800000); + /* GRAS_CL_VPORT_YOFFSET */ + *cmds++ = 0x00000000; + /* GRAS_CL_VPORT_YSCALE */ + *cmds++ = _SET(GRAS_CL_VPORT_YSCALE_VPORT_YSCALE, 0x3f800000); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_ZOFFSET); + /* GRAS_CL_VPORT_ZOFFSET */ + *cmds++ = 0x00000000; + /* GRAS_CL_VPORT_ZSCALE */ + *cmds++ = _SET(GRAS_CL_VPORT_ZSCALE_VPORT_ZSCALE, 0x3f800000); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_GRAS_CL_CLIP_CNTL); + /* GRAS_CL_CLIP_CNTL */ + *cmds++ = _SET(GRAS_CL_CLIP_CNTL_CLIP_DISABLE, 1) | + _SET(GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE, 1) | + _SET(GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE, 1) | + _SET(GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE, 1) | + _SET(GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE, 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_GRAS_CL_GB_CLIP_ADJ); + /* GRAS_CL_GB_CLIP_ADJ */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmds++ = 0x00000000; + + /* + * Resolve using two draw calls with a dummy register + * write in between. This is a HLM workaround + * that should be removed later. + */ + *cmds++ = cp_type3_packet(CP_DRAW_INDX_2, 6); + *cmds++ = 0x00000000; /* Viz query info */ + *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_TRILIST, + PC_DI_SRC_SEL_IMMEDIATE, + PC_DI_INDEX_SIZE_32_BIT, + PC_DI_IGNORE_VISIBILITY); + *cmds++ = 0x00000003; /* Num indices */ + *cmds++ = 0x00000000; /* Index 0 */ + *cmds++ = 0x00000001; /* Index 1 */ + *cmds++ = 0x00000002; /* Index 2 */ + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_HLSQ_CL_CONTROL_0_REG); + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_DRAW_INDX_2, 6); + *cmds++ = 0x00000000; /* Viz query info */ + *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_TRILIST, + PC_DI_SRC_SEL_IMMEDIATE, + PC_DI_INDEX_SIZE_32_BIT, + PC_DI_IGNORE_VISIBILITY); + *cmds++ = 0x00000003; /* Num indices */ + *cmds++ = 0x00000002; /* Index 0 */ + *cmds++ = 0x00000001; /* Index 1 */ + *cmds++ = 0x00000003; /* Index 2 */ + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_HLSQ_CL_CONTROL_0_REG); + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmds++ = 0x00000000; + + /* Create indirect buffer command for above command sequence */ + create_ib1(drawctxt, shadow->gmem_save, start, cmds); + + return cmds; +} + +static void build_shader_save_cmds(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + unsigned int *cmd = tmp_ctx.cmd; + unsigned int *start; + + /* Reserve space for boolean values used for COND_EXEC packet */ + drawctxt->cond_execs[0].hostptr = cmd; + drawctxt->cond_execs[0].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate); + *cmd++ = 0; + drawctxt->cond_execs[1].hostptr = cmd; + drawctxt->cond_execs[1].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate); + *cmd++ = 0; + + drawctxt->shader_save_commands[0].hostptr = cmd; + drawctxt->shader_save_commands[0].gpuaddr = + virt2gpu(cmd, &drawctxt->gpustate); + *cmd++ = 0; + drawctxt->shader_save_commands[1].hostptr = cmd; + drawctxt->shader_save_commands[1].gpuaddr = + virt2gpu(cmd, &drawctxt->gpustate); + *cmd++ = 0; + + start = cmd; + + /* Save vertex shader */ + + *cmd++ = cp_type3_packet(CP_COND_EXEC, 4); + *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2; + *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2; + *cmd++ = 0x0000FFFF; + *cmd++ = 3; /* EXEC_COUNT */ + + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + drawctxt->shader_save_commands[2].hostptr = cmd; + drawctxt->shader_save_commands[2].gpuaddr = + virt2gpu(cmd, &drawctxt->gpustate); + /* + From fixup: + + dwords = SP_VS_CTRL_REG0.VS_LENGTH * 8 + + From regspec: + SP_VS_CTRL_REG0.VS_LENGTH [31:24]: VS length, unit = 256bits. + If bit31 is 1, it means overflow + or any long shader. + + src = (HLSQ_SHADOW_BASE + 0x1000)/4 + */ + *cmd++ = 0; /*(dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */ + *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET) & 0xfffffffc; + + /* Save fragment shader */ + *cmd++ = cp_type3_packet(CP_COND_EXEC, 4); + *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2; + *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2; + *cmd++ = 0x0000FFFF; + *cmd++ = 3; /* EXEC_COUNT */ + + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + drawctxt->shader_save_commands[3].hostptr = cmd; + drawctxt->shader_save_commands[3].gpuaddr = + virt2gpu(cmd, &drawctxt->gpustate); + /* + From fixup: + + dwords = SP_FS_CTRL_REG0.FS_LENGTH * 8 + + From regspec: + SP_FS_CTRL_REG0.FS_LENGTH [31:24]: FS length, unit = 256bits. + If bit31 is 1, it means overflow + or any long shader. + + fs_offset = SP_FS_OBJ_OFFSET_REG.SHADEROBJOFFSETINIC * 32 + From regspec: + + SP_FS_OBJ_OFFSET_REG.SHADEROBJOFFSETINIC [31:25]: + First instruction of the whole shader will be stored from + the offset in instruction cache, unit = 256bits, a cache line. + It can start from 0 if no VS available. + + src = (HLSQ_SHADOW_BASE + 0x1000 + SSIZE + fs_offset)/4 + */ + *cmd++ = 0; /*(dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */ + *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET + + (SHADER_SHADOW_SIZE / 2)) & 0xfffffffc; + + /* Create indirect buffer command for above command sequence */ + create_ib1(drawctxt, drawctxt->shader_save, start, cmd); + + tmp_ctx.cmd = cmd; +} + +/* + * Make an IB to modify context save IBs with the correct shader instruction + * and constant sizes and offsets. + */ + +static void build_save_fixup_cmds(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + unsigned int *cmd = tmp_ctx.cmd; + unsigned int *start = cmd; + + /* Flush HLSQ lazy updates */ + *cmd++ = cp_type3_packet(CP_EVENT_WRITE, 1); + *cmd++ = 0x7; /* HLSQ_FLUSH */ + *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmd++ = 0; + + *cmd++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2); + *cmd++ = 0x00000000; /* No start addr for full invalidate */ + *cmd++ = (unsigned int) + UCHE_ENTIRE_CACHE << UCHE_INVALIDATE1REG_ALLORPORTION | + UCHE_OP_INVALIDATE << UCHE_INVALIDATE1REG_OPCODE | + 0; /* No end addr for full invalidate */ + + /* Make sure registers are flushed */ + *cmd++ = cp_type3_packet(CP_CONTEXT_UPDATE, 1); + *cmd++ = 0; + +#ifdef GSL_CONTEXT_SWITCH_CPU_SYNC + + /* Save shader sizes */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = A3XX_SP_VS_CTRL_REG0; + *cmd++ = drawctxt->shader_save_commands[2].gpuaddr; + + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = A3XX_SP_FS_CTRL_REG0; + *cmd++ = drawctxt->shader_save_commands[3].gpuaddr; + + /* Save shader offsets */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG; + *cmd++ = drawctxt->shader_save_commands[1].gpuaddr; + + /* Save constant sizes */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = A3XX_SP_VS_CTRL_REG1; + *cmd++ = drawctxt->constant_save_commands[1].gpuaddr; + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = A3XX_SP_FS_CTRL_REG1; + *cmd++ = drawctxt->constant_save_commands[2].gpuaddr; + + /* Save FS constant offset */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG; + *cmd++ = drawctxt->constant_save_commands[0].gpuaddr; + + + /* Save VS instruction store mode */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = A3XX_SP_VS_CTRL_REG0; + *cmd++ = drawctxt->cond_execs[0].gpuaddr; + + /* Save FS instruction store mode */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = A3XX_SP_FS_CTRL_REG0; + *cmd++ = drawctxt->cond_execs[1].gpuaddr; +#else + + /* Shader save */ + cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x7f000000, + 11+REG_TO_MEM_LOOP_COUNT_SHIFT, + (HLSQ_SHADOW_BASE + 0x1000) / 4, + drawctxt->shader_save_commands[2].gpuaddr); + + /* CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0x00000000) | SP_FS_CTRL_REG0 */ + *cmd++ = cp_type3_packet(CP_REG_RMW, 3); + *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2; + *cmd++ = 0x00000000; /* AND value */ + *cmd++ = A3XX_SP_FS_CTRL_REG0; /* OR address */ + /* CP_SCRATCH_REG2 = ( (CP_SCRATCH_REG2 & 0x7f000000) >> 21 ) + | ((HLSQ_SHADOW_BASE+0x1000+SSIZE)/4) */ + *cmd++ = cp_type3_packet(CP_REG_RMW, 3); + *cmd++ = ((11 + REG_TO_MEM_LOOP_COUNT_SHIFT) << 24) | + A3XX_CP_SCRATCH_REG2; + *cmd++ = 0x7f000000; /* AND value */ + *cmd++ = (HLSQ_SHADOW_BASE + 0x1000 + SSIZE) / 4; /* OR value */ + + /* + * CP_SCRATCH_REG3 = (CP_SCRATCH_REG3 & 0x00000000) | + * SP_FS_OBJ_OFFSET_REG + */ + + *cmd++ = cp_type3_packet(CP_REG_RMW, 3); + *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG3; + *cmd++ = 0x00000000; /* AND value */ + *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG; /* OR address */ + /* + * CP_SCRATCH_REG3 = ( (CP_SCRATCH_REG3 & 0xfe000000) >> 25 ) | + * 0x00000000 + */ + *cmd++ = cp_type3_packet(CP_REG_RMW, 3); + *cmd++ = A3XX_CP_SCRATCH_REG3; + *cmd++ = 0xfe000000; /* AND value */ + *cmd++ = 0x00000000; /* OR value */ + /* + * CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0xffffffff) | CP_SCRATCH_REG3 + */ + *cmd++ = cp_type3_packet(CP_REG_RMW, 3); + *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2; + *cmd++ = 0xffffffff; /* AND value */ + *cmd++ = A3XX_CP_SCRATCH_REG3; /* OR address */ + + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = A3XX_CP_SCRATCH_REG2; + *cmd++ = drawctxt->shader_save_commands[3].gpuaddr; + + /* Constant save */ + cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff, + 17, (HLSQ_SHADOW_BASE + 0x2000) / 4, + drawctxt->constant_save_commands[1].gpuaddr); + + cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff, + 17, (HLSQ_SHADOW_BASE + 0x2000 + SSIZE) / 4, + drawctxt->constant_save_commands[2].gpuaddr); + + cmd = rmw_regtomem(cmd, A3XX_SP_FS_OBJ_OFFSET_REG, 0x00ff0000, + 18, drawctxt->gpustate.gpuaddr & 0xfffffe00, + drawctxt->constant_save_commands[2].gpuaddr + + sizeof(unsigned int)); + + /* Modify constant save conditionals */ + cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff, + 0, 0, drawctxt->cond_execs[2].gpuaddr); + + cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff, + 0, 0, drawctxt->cond_execs[3].gpuaddr); + + /* Save VS instruction store mode */ + + cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x00000002, + 31, 0, drawctxt->cond_execs[0].gpuaddr); + + /* Save FS instruction store mode */ + cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG0, 0x00000002, + 31, 0, drawctxt->cond_execs[1].gpuaddr); + +#endif + + create_ib1(drawctxt, drawctxt->save_fixup, start, cmd); + + tmp_ctx.cmd = cmd; +} + +/****************************************************************************/ +/* Functions to build context restore IBs */ +/****************************************************************************/ + +static unsigned int *build_sys2gmem_cmds(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, + struct gmem_shadow_t *shadow) +{ + unsigned int *cmds = tmp_ctx.cmd; + unsigned int *start = cmds; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5); + *cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG); + /* HLSQ_CONTROL_0_REG */ + *cmds++ = _SET(HLSQ_CTRL0REG_FSTHREADSIZE, HLSQ_FOUR_PIX_QUADS) | + _SET(HLSQ_CTRL0REG_SPSHADERRESTART, 1) | + _SET(HLSQ_CTRL0REG_CHUNKDISABLE, 1) | + _SET(HLSQ_CTRL0REG_SPCONSTFULLUPDATE, 1) | + _SET(HLSQ_CTRL0REG_TPFULLUPDATE, 1); + /* HLSQ_CONTROL_1_REG */ + *cmds++ = _SET(HLSQ_CTRL1REG_VSTHREADSIZE, HLSQ_TWO_VTX_QUADS); + /* HLSQ_CONTROL_2_REG */ + *cmds++ = _SET(HLSQ_CTRL2REG_PRIMALLOCTHRESHOLD, 31); + /* HLSQ_CONTROL3_REG */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_RB_MRT_BUF_INFO0); + /* RB_MRT_BUF_INFO0 */ + *cmds++ = _SET(RB_MRTBUFINFO_COLOR_FORMAT, RB_R8G8B8A8_UNORM) | + _SET(RB_MRTBUFINFO_COLOR_TILE_MODE, RB_TILINGMODE_32X32) | + _SET(RB_MRTBUFINFO_COLOR_BUF_PITCH, + (shadow->gmem_pitch * 4 * 8) / 256); + /* RB_MRT_BUF_BASE0 */ + *cmds++ = _SET(RB_MRTBUFBASE_COLOR_BUF_BASE, tmp_ctx.gmem_base >> 5); + + /* Texture samplers */ + *cmds++ = cp_type3_packet(CP_LOAD_STATE, 4); + *cmds++ = (16 << CP_LOADSTATE_DSTOFFSET_SHIFT) + | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT) + | (HLSQ_BLOCK_ID_TP_TEX << CP_LOADSTATE_STATEBLOCKID_SHIFT) + | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT); + *cmds++ = (HLSQ_TP_TEX_SAMPLERS << CP_LOADSTATE_STATETYPE_SHIFT) + | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT); + *cmds++ = 0x00000240; + *cmds++ = 0x00000000; + + /* Texture memobjs */ + *cmds++ = cp_type3_packet(CP_LOAD_STATE, 6); + *cmds++ = (16 << CP_LOADSTATE_DSTOFFSET_SHIFT) + | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT) + | (HLSQ_BLOCK_ID_TP_TEX << CP_LOADSTATE_STATEBLOCKID_SHIFT) + | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT); + *cmds++ = (HLSQ_TP_TEX_MEMOBJ << CP_LOADSTATE_STATETYPE_SHIFT) + | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT); + *cmds++ = 0x4cc06880; + *cmds++ = shadow->height | (shadow->width << 14); + *cmds++ = (shadow->pitch*4*8) << 9; + *cmds++ = 0x00000000; + + /* Mipmap bases */ + *cmds++ = cp_type3_packet(CP_LOAD_STATE, 16); + *cmds++ = (224 << CP_LOADSTATE_DSTOFFSET_SHIFT) + | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT) + | (HLSQ_BLOCK_ID_TP_MIPMAP << CP_LOADSTATE_STATEBLOCKID_SHIFT) + | (14 << CP_LOADSTATE_NUMOFUNITS_SHIFT); + *cmds++ = (HLSQ_TP_MIPMAP_BASE << CP_LOADSTATE_STATETYPE_SHIFT) + | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT); + *cmds++ = shadow->gmemshadow.gpuaddr; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5); + *cmds++ = CP_REG(A3XX_HLSQ_VS_CONTROL_REG); + /* HLSQ_VS_CONTROL_REG */ + *cmds++ = _SET(HLSQ_VSCTRLREG_VSINSTRLENGTH, 1); + /* HLSQ_FS_CONTROL_REG */ + *cmds++ = _SET(HLSQ_FSCTRLREG_FSCONSTLENGTH, 1) | + _SET(HLSQ_FSCTRLREG_FSCONSTSTARTOFFSET, 128) | + _SET(HLSQ_FSCTRLREG_FSINSTRLENGTH, 2); + /* HLSQ_CONST_VSPRESV_RANGE_REG */ + *cmds++ = 0x00000000; + /* HLSQ_CONST_FSPRESV_RANGE_REG */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_SP_FS_LENGTH_REG); + /* SP_FS_LENGTH_REG */ + *cmds++ = _SET(SP_SHADERLENGTH_LEN, 2); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 12); + *cmds++ = CP_REG(A3XX_SP_VS_CTRL_REG0); + /* SP_VS_CTRL_REG0 */ + *cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) | + _SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) | + _SET(SP_VSCTRLREG0_VSICACHEINVALID, 1) | + _SET(SP_VSCTRLREG0_VSFULLREGFOOTPRINT, 2) | + _SET(SP_VSCTRLREG0_VSTHREADSIZE, SP_TWO_VTX_QUADS) | + _SET(SP_VSCTRLREG0_VSLENGTH, 1); + /* SP_VS_CTRL_REG1 */ + *cmds++ = _SET(SP_VSCTRLREG1_VSINITIALOUTSTANDING, 8); + /* SP_VS_PARAM_REG */ + *cmds++ = _SET(SP_VSPARAMREG_POSREGID, 4) | + _SET(SP_VSPARAMREG_PSIZEREGID, 252) | + _SET(SP_VSPARAMREG_TOTALVSOUTVAR, 1); + /* SP_VS_OUT_REG0 */ + *cmds++ = _SET(SP_VSOUTREG_COMPMASK0, 3); + /* SP_VS_OUT_REG1 */ + *cmds++ = 0x00000000; + /* SP_VS_OUT_REG2 */ + *cmds++ = 0x00000000; + /* SP_VS_OUT_REG3 */ + *cmds++ = 0x00000000; + /* SP_VS_OUT_REG4 */ + *cmds++ = 0x00000000; + /* SP_VS_OUT_REG5 */ + *cmds++ = 0x00000000; + /* SP_VS_OUT_REG6 */ + *cmds++ = 0x00000000; + /* SP_VS_OUT_REG7 */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7); + *cmds++ = CP_REG(A3XX_SP_VS_VPC_DST_REG_0); + /* SP_VS_VPC_DST_REG0 */ + *cmds++ = _SET(SP_VSVPCDSTREG_OUTLOC0, 8); + /* SP_VS_VPC_DST_REG1 */ + *cmds++ = 0x00000000; + /* SP_VS_VPC_DST_REG2 */ + *cmds++ = 0x00000000; + /* SP_VS_VPC_DST_REG3 */ + *cmds++ = 0x00000000; + /* SP_VS_OBJ_OFFSET_REG */ + *cmds++ = 0x00000000; + /* SP_VS_OBJ_START_REG */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6); + *cmds++ = CP_REG(A3XX_SP_VS_LENGTH_REG); + /* SP_VS_LENGTH_REG */ + *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1); + /* SP_FS_CTRL_REG0 */ + *cmds++ = _SET(SP_FSCTRLREG0_FSTHREADMODE, SP_MULTI) | + _SET(SP_FSCTRLREG0_FSINSTRBUFFERMODE, SP_BUFFER_MODE) | + _SET(SP_FSCTRLREG0_FSICACHEINVALID, 1) | + _SET(SP_FSCTRLREG0_FSFULLREGFOOTPRINT, 2) | + _SET(SP_FSCTRLREG0_FSINOUTREGOVERLAP, 1) | + _SET(SP_FSCTRLREG0_FSTHREADSIZE, SP_FOUR_PIX_QUADS) | + _SET(SP_FSCTRLREG0_PIXLODENABLE, 1) | + _SET(SP_FSCTRLREG0_FSLENGTH, 2); + /* SP_FS_CTRL_REG1 */ + *cmds++ = _SET(SP_FSCTRLREG1_FSCONSTLENGTH, 1) | + _SET(SP_FSCTRLREG1_FSINITIALOUTSTANDING, 2) | + _SET(SP_FSCTRLREG1_HALFPRECVAROFFSET, 63); + /* SP_FS_OBJ_OFFSET_REG */ + *cmds++ = _SET(SP_OBJOFFSETREG_CONSTOBJECTSTARTOFFSET, 128); + /* SP_FS_OBJ_START_REG */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0); + /* SP_FS_FLAT_SHAD_MODE_REG0 */ + *cmds++ = 0x00000000; + /* SP_FS_FLAT_SHAD_MODE_REG1 */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_SP_FS_OUTPUT_REG); + /* SP_FS_OUT_REG */ + *cmds++ = _SET(SP_FSOUTREG_PAD0, SP_PIXEL_BASED); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_SP_FS_MRT_REG_0); + /* SP_FS_MRT_REG0 */ + *cmds++ = _SET(SP_FSMRTREG_REGID, 4); + /* SP_FS_MRT_REG1 */ + *cmds++ = 0; + /* SP_FS_MRT_REG2 */ + *cmds++ = 0; + /* SP_FS_MRT_REG3 */ + *cmds++ = 0; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 11); + *cmds++ = CP_REG(A3XX_VPC_ATTR); + /* VPC_ATTR */ + *cmds++ = _SET(VPC_VPCATTR_TOTALATTR, 2) | + _SET(VPC_VPCATTR_THRHDASSIGN, 1) | + _SET(VPC_VPCATTR_LMSIZE, 1); + /* VPC_PACK */ + *cmds++ = _SET(VPC_VPCPACK_NUMFPNONPOSVAR, 2) | + _SET(VPC_VPCPACK_NUMNONPOSVSVAR, 2); + /* VPC_VARYING_INTERP_MODE_0 */ + *cmds++ = 0x00000000; + /* VPC_VARYING_INTERP_MODE1 */ + *cmds++ = 0x00000000; + /* VPC_VARYING_INTERP_MODE2 */ + *cmds++ = 0x00000000; + /* VPC_VARYING_IINTERP_MODE3 */ + *cmds++ = 0x00000000; + /* VPC_VARRYING_PS_REPL_MODE_0 */ + *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2); + /* VPC_VARRYING_PS_REPL_MODE_1 */ + *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2); + /* VPC_VARRYING_PS_REPL_MODE_2 */ + *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2); + /* VPC_VARRYING_PS_REPL_MODE_3 */ + *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 11); + *cmds++ = CP_REG(A3XX_SP_SP_CTRL_REG); + /* SP_SP_CTRL_REG */ + *cmds++ = _SET(SP_SPCTRLREG_SLEEPMODE, 1); + + /* Load vertex shader */ + *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10); + *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT) + | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT) + | (HLSQ_BLOCK_ID_SP_VS << CP_LOADSTATE_STATEBLOCKID_SHIFT) + | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT); + *cmds++ = (HLSQ_SP_VS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT) + | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT); + /* (sy)end; */ + *cmds++ = 0x00000000; *cmds++ = 0x13000000; + /* nop; */ + *cmds++ = 0x00000000; *cmds++ = 0x00000000; + /* nop; */ + *cmds++ = 0x00000000; *cmds++ = 0x00000000; + /* nop; */ + *cmds++ = 0x00000000; *cmds++ = 0x00000000; + + /* Load fragment shader */ + *cmds++ = cp_type3_packet(CP_LOAD_STATE, 18); + *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT) + | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT) + | (HLSQ_BLOCK_ID_SP_FS << CP_LOADSTATE_STATEBLOCKID_SHIFT) + | (2 << CP_LOADSTATE_NUMOFUNITS_SHIFT); + *cmds++ = (HLSQ_SP_FS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT) + | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT); + /* (sy)(rpt1)bary.f (ei)r0.z, (r)0, r0.x; */ + *cmds++ = 0x00002000; *cmds++ = 0x57368902; + /* (rpt5)nop; */ + *cmds++ = 0x00000000; *cmds++ = 0x00000500; + /* sam (f32)r0.xyzw, r0.z, s#0, t#0; */ + *cmds++ = 0x00000005; *cmds++ = 0xa0c01f00; + /* (sy)mov.f32f32 r1.x, r0.x; */ + *cmds++ = 0x00000000; *cmds++ = 0x30044004; + /* mov.f32f32 r1.y, r0.y; */ + *cmds++ = 0x00000001; *cmds++ = 0x20044005; + /* mov.f32f32 r1.z, r0.z; */ + *cmds++ = 0x00000002; *cmds++ = 0x20044006; + /* mov.f32f32 r1.w, r0.w; */ + *cmds++ = 0x00000003; *cmds++ = 0x20044007; + /* end; */ + *cmds++ = 0x00000000; *cmds++ = 0x03000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_VFD_CONTROL_0); + /* VFD_CONTROL_0 */ + *cmds++ = _SET(VFD_CTRLREG0_TOTALATTRTOVS, 8) | + _SET(VFD_CTRLREG0_PACKETSIZE, 2) | + _SET(VFD_CTRLREG0_STRMDECINSTRCNT, 2) | + _SET(VFD_CTRLREG0_STRMFETCHINSTRCNT, 2); + /* VFD_CONTROL_1 */ + *cmds++ = _SET(VFD_CTRLREG1_MAXSTORAGE, 2) | + _SET(VFD_CTRLREG1_REGID4VTX, 252) | + _SET(VFD_CTRLREG1_REGID4INST, 252); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5); + *cmds++ = CP_REG(A3XX_VFD_FETCH_INSTR_0_0); + /* VFD_FETCH_INSTR_0_0 */ + *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 7) | + _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 8) | + _SET(VFD_FETCHINSTRUCTIONS_SWITCHNEXT, 1) | + _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1); + /* VFD_FETCH_INSTR_1_0 */ + *cmds++ = _SET(VFD_BASEADDR_BASEADDR, + shadow->quad_vertices_restore.gpuaddr); + /* VFD_FETCH_INSTR_0_1 */ + *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 11) | + _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 12) | + _SET(VFD_FETCHINSTRUCTIONS_INDEXDECODE, 1) | + _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1); + /* VFD_FETCH_INSTR_1_1 */ + *cmds++ = _SET(VFD_BASEADDR_BASEADDR, + shadow->quad_vertices_restore.gpuaddr + 16); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_VFD_DECODE_INSTR_0); + /* VFD_DECODE_INSTR_0 */ + *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) | + _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) | + _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 1) | + _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 8) | + _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1) | + _SET(VFD_DECODEINSTRUCTIONS_SWITCHNEXT, 1); + /* VFD_DECODE_INSTR_1 */ + *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) | + _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) | + _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 2) | + _SET(VFD_DECODEINSTRUCTIONS_REGID, 4) | + _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 12) | + _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_RB_DEPTH_CONTROL); + /* RB_DEPTH_CONTROL */ + *cmds++ = _SET(RB_DEPTHCONTROL_Z_TEST_FUNC, RB_FRAG_NEVER); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_RB_STENCIL_CONTROL); + /* RB_STENCIL_CONTROL */ + *cmds++ = _SET(RB_STENCILCONTROL_STENCIL_FUNC, RB_REF_NEVER) | + _SET(RB_STENCILCONTROL_STENCIL_FAIL, RB_STENCIL_KEEP) | + _SET(RB_STENCILCONTROL_STENCIL_ZPASS, RB_STENCIL_KEEP) | + _SET(RB_STENCILCONTROL_STENCIL_ZFAIL, RB_STENCIL_KEEP) | + _SET(RB_STENCILCONTROL_STENCIL_FUNC_BF, RB_REF_NEVER) | + _SET(RB_STENCILCONTROL_STENCIL_FAIL_BF, RB_STENCIL_KEEP) | + _SET(RB_STENCILCONTROL_STENCIL_ZPASS_BF, RB_STENCIL_KEEP) | + _SET(RB_STENCILCONTROL_STENCIL_ZFAIL_BF, RB_STENCIL_KEEP); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_RB_MODE_CONTROL); + /* RB_MODE_CONTROL */ + *cmds++ = _SET(RB_MODECONTROL_RENDER_MODE, RB_RENDERING_PASS) | + _SET(RB_MODECONTROL_MARB_CACHE_SPLIT_MODE, 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_RB_RENDER_CONTROL); + /* RB_RENDER_CONTROL */ + *cmds++ = _SET(RB_RENDERCONTROL_BIN_WIDTH, shadow->width >> 5) | + _SET(RB_RENDERCONTROL_ALPHA_TEST_FUNC, 7); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_RB_MSAA_CONTROL); + /* RB_MSAA_CONTROL */ + *cmds++ = _SET(RB_MSAACONTROL_MSAA_DISABLE, 1) | + _SET(RB_MSAACONTROL_SAMPLE_MASK, 0xFFFF); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_RB_MRT_CONTROL0); + /* RB_MRT_CONTROL0 */ + *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) | + _SET(RB_MRTCONTROL_ROP_CODE, 12) | + _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) | + _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL0); + /* RB_MRT_BLENDCONTROL0 */ + *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) | + _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) | + _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1); + /* RB_MRT_CONTROL1 */ + *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) | + _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) | + _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL1); + /* RB_MRT_BLENDCONTROL1 */ + *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) | + _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) | + _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1); + /* RB_MRT_CONTROL2 */ + *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) | + _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) | + _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL2); + /* RB_MRT_BLENDCONTROL2 */ + *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) | + _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) | + _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1); + /* RB_MRT_CONTROL3 */ + *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) | + _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) | + _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL3); + /* RB_MRT_BLENDCONTROL3 */ + *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) | + _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) | + _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5); + *cmds++ = CP_REG(A3XX_VFD_INDEX_MIN); + /* VFD_INDEX_MIN */ + *cmds++ = 0x00000000; + /* VFD_INDEX_MAX */ + *cmds++ = 0xFFFFFFFF; + /* VFD_INDEX_OFFSET */ + *cmds++ = 0x00000000; + /* TPL1_TP_VS_TEX_OFFSET */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_VFD_VS_THREADING_THRESHOLD); + /* VFD_VS_THREADING_THRESHOLD */ + *cmds++ = _SET(VFD_THREADINGTHRESHOLD_RESERVED6, 12) | + _SET(VFD_THREADINGTHRESHOLD_REGID_VTXCNT, 252); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_TPL1_TP_VS_TEX_OFFSET); + /* TPL1_TP_VS_TEX_OFFSET */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_TPL1_TP_FS_TEX_OFFSET); + /* TPL1_TP_FS_TEX_OFFSET */ + *cmds++ = _SET(TPL1_TPTEXOFFSETREG_SAMPLEROFFSET, 16) | + _SET(TPL1_TPTEXOFFSETREG_MEMOBJOFFSET, 16) | + _SET(TPL1_TPTEXOFFSETREG_BASETABLEPTR, 224); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_GRAS_SC_CONTROL); + /* GRAS_SC_CONTROL */ + *cmds++ = _SET(GRAS_SC_CONTROL_RASTER_MODE, 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_GRAS_SU_MODE_CONTROL); + /* GRAS_SU_MODE_CONTROL */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_GRAS_SC_WINDOW_SCISSOR_TL); + /* GRAS_SC_WINDOW_SCISSOR_TL */ + *cmds++ = 0x00000000; + /* GRAS_SC_WINDOW_SCISSOR_BR */ + *cmds++ = _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_X, shadow->width - 1) | + _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_Y, shadow->height - 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_GRAS_SC_SCREEN_SCISSOR_TL); + /* GRAS_SC_SCREEN_SCISSOR_TL */ + *cmds++ = 0x00000000; + /* GRAS_SC_SCREEN_SCISSOR_BR */ + *cmds++ = _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_X, shadow->width - 1) | + _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_Y, shadow->height - 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5); + *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_XOFFSET); + /* GRAS_CL_VPORT_XOFFSET */ + *cmds++ = 0x00000000; + /* GRAS_CL_VPORT_XSCALE */ + *cmds++ = _SET(GRAS_CL_VPORT_XSCALE_VPORT_XSCALE, 0x3F800000); + /* GRAS_CL_VPORT_YOFFSET */ + *cmds++ = 0x00000000; + /* GRAS_CL_VPORT_YSCALE */ + *cmds++ = _SET(GRAS_CL_VPORT_YSCALE_VPORT_YSCALE, 0x3F800000); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_ZOFFSET); + /* GRAS_CL_VPORT_ZOFFSET */ + *cmds++ = 0x00000000; + /* GRAS_CL_VPORT_ZSCALE */ + *cmds++ = _SET(GRAS_CL_VPORT_ZSCALE_VPORT_ZSCALE, 0x3F800000); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_GRAS_CL_CLIP_CNTL); + /* GRAS_CL_CLIP_CNTL */ + *cmds++ = _SET(GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER, 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_SP_FS_IMAGE_OUTPUT_REG_0); + /* SP_FS_IMAGE_OUTPUT_REG_0 */ + *cmds++ = _SET(SP_IMAGEOUTPUTREG_MRTFORMAT, SP_R8G8B8A8_UNORM); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_PC_PRIM_VTX_CNTL); + /* PC_PRIM_VTX_CONTROL */ + *cmds++ = _SET(PC_PRIM_VTX_CONTROL_STRIDE_IN_VPC, 2) | + _SET(PC_PRIM_VTX_CONTROL_POLYMODE_FRONT_PTYPE, + PC_DRAW_TRIANGLES) | + _SET(PC_PRIM_VTX_CONTROL_POLYMODE_BACK_PTYPE, + PC_DRAW_TRIANGLES) | + _SET(PC_PRIM_VTX_CONTROL_PROVOKING_VTX_LAST, 1); + + *cmds++ = cp_type3_packet(CP_DRAW_INDX, 3); + *cmds++ = 0x00000000; /* Viz query info */ + *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_RECTLIST, + PC_DI_SRC_SEL_AUTO_INDEX, + PC_DI_INDEX_SIZE_16_BIT, + PC_DI_IGNORE_VISIBILITY); + *cmds++ = 0x00000002; /* Num indices */ + + /* Create indirect buffer command for above command sequence */ + create_ib1(drawctxt, shadow->gmem_restore, start, cmds); + + return cmds; +} + +static void build_regrestore_cmds(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + unsigned int *start = tmp_ctx.cmd; + unsigned int *cmd = start; + unsigned int *lcc_start; + + int i; + + /* Flush HLSQ lazy updates */ + *cmd++ = cp_type3_packet(CP_EVENT_WRITE, 1); + *cmd++ = 0x7; /* HLSQ_FLUSH */ + *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmd++ = 0; + + *cmd++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2); + *cmd++ = 0x00000000; /* No start addr for full invalidate */ + *cmd++ = (unsigned int) + UCHE_ENTIRE_CACHE << UCHE_INVALIDATE1REG_ALLORPORTION | + UCHE_OP_INVALIDATE << UCHE_INVALIDATE1REG_OPCODE | + 0; /* No end addr for full invalidate */ + + lcc_start = cmd; + + /* deferred cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, ???); */ + cmd++; + +#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES + /* Force mismatch */ + *cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000) | 1; +#else + *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000; +#endif + + for (i = 0; i < ARRAY_SIZE(context_register_ranges) / 2; i++) { + cmd = reg_range(cmd, context_register_ranges[i * 2], + context_register_ranges[i * 2 + 1]); + } + + lcc_start[0] = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, + (cmd - lcc_start) - 1); + +#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES + lcc_start[2] |= (0 << 24) | (4 << 16); /* Disable shadowing. */ +#else + lcc_start[2] |= (1 << 24) | (4 << 16); +#endif + + for (i = 0; i < ARRAY_SIZE(global_registers); i++) { + *cmd++ = cp_type0_packet(global_registers[i], 1); + tmp_ctx.reg_values[i] = virt2gpu(cmd, &drawctxt->gpustate); + *cmd++ = 0x00000000; + } + + create_ib1(drawctxt, drawctxt->reg_restore, start, cmd); + tmp_ctx.cmd = cmd; +} + +static void build_constantrestore_cmds(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + unsigned int *cmd = tmp_ctx.cmd; + unsigned int *start = cmd; + unsigned int mode = 4; /* Indirect mode */ + unsigned int stateblock; + unsigned int numunits; + unsigned int statetype; + + drawctxt->cond_execs[2].hostptr = cmd; + drawctxt->cond_execs[2].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate); + *cmd++ = 0; + drawctxt->cond_execs[3].hostptr = cmd; + drawctxt->cond_execs[3].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate); + *cmd++ = 0; + +#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES + *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3); + *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000; + *cmd++ = 4 << 16; + *cmd++ = 0x0; +#endif + /* HLSQ full update */ + *cmd++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmd++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG); + *cmd++ = 0x68000240; /* A3XX_HLSQ_CONTROL_0_REG */ + +#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES + /* Re-enable shadowing */ + *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3); + *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000; + *cmd++ = (4 << 16) | (1 << 24); + *cmd++ = 0x0; +#endif + + /* Load vertex shader constants */ + *cmd++ = cp_type3_packet(CP_COND_EXEC, 4); + *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2; + *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2; + *cmd++ = 0x0000ffff; + *cmd++ = 3; /* EXEC_COUNT */ + *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2); + drawctxt->constant_load_commands[0].hostptr = cmd; + drawctxt->constant_load_commands[0].gpuaddr = virt2gpu(cmd, + &drawctxt->gpustate); + + /* + From fixup: + + mode = 4 (indirect) + stateblock = 4 (Vertex constants) + numunits = SP_VS_CTRL_REG1.VSCONSTLENGTH * 2; (256bit units) + + From register spec: + SP_VS_CTRL_REG1.VSCONSTLENGTH [09:00]: 0-512, unit = 128bits. + + ord1 = (numunits<<22) | (stateblock<<19) | (mode<<16); + */ + + *cmd++ = 0; /* ord1 */ + *cmd++ = ((drawctxt->gpustate.gpuaddr) & 0xfffffffc) | 1; + + /* Load fragment shader constants */ + *cmd++ = cp_type3_packet(CP_COND_EXEC, 4); + *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2; + *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2; + *cmd++ = 0x0000ffff; + *cmd++ = 3; /* EXEC_COUNT */ + *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2); + drawctxt->constant_load_commands[1].hostptr = cmd; + drawctxt->constant_load_commands[1].gpuaddr = + virt2gpu(cmd, &drawctxt->gpustate); + /* + From fixup: + + mode = 4 (indirect) + stateblock = 6 (Fragment constants) + numunits = SP_FS_CTRL_REG1.FSCONSTLENGTH * 2; (256bit units) + + From register spec: + SP_FS_CTRL_REG1.FSCONSTLENGTH [09:00]: 0-512, unit = 128bits. + + ord1 = (numunits<<22) | (stateblock<<19) | (mode<<16); + */ + + *cmd++ = 0; /* ord1 */ + drawctxt->constant_load_commands[2].hostptr = cmd; + drawctxt->constant_load_commands[2].gpuaddr = + virt2gpu(cmd, &drawctxt->gpustate); + /* + From fixup: + base = drawctxt->gpustate.gpuaddr (ALU constant shadow base) + offset = SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET + + From register spec: + SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET [16:24]: Constant object + start offset in on chip RAM, + 128bit aligned + + ord2 = base + offset | 1 + Because of the base alignment we can use + ord2 = base | offset | 1 + */ + *cmd++ = 0; /* ord2 */ + + /* Restore VS texture memory objects */ + stateblock = 0; + statetype = 1; + numunits = (TEX_SIZE_MEM_OBJECTS / 7) / 4; + + *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2); + *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16); + *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MEM_OBJECTS) + & 0xfffffffc) | statetype; + + /* Restore VS texture mipmap addresses */ + stateblock = 1; + statetype = 1; + numunits = TEX_SIZE_MIPMAP / 4; + *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2); + *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16); + *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MIPMAP) + & 0xfffffffc) | statetype; + + /* Restore VS texture sampler objects */ + stateblock = 0; + statetype = 0; + numunits = (TEX_SIZE_SAMPLER_OBJ / 2) / 4; + *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2); + *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16); + *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_SAMPLER_OBJ) + & 0xfffffffc) | statetype; + + /* Restore FS texture memory objects */ + stateblock = 2; + statetype = 1; + numunits = (TEX_SIZE_MEM_OBJECTS / 7) / 4; + *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2); + *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16); + *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MEM_OBJECTS) + & 0xfffffffc) | statetype; + + /* Restore FS texture mipmap addresses */ + stateblock = 3; + statetype = 1; + numunits = TEX_SIZE_MIPMAP / 4; + *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2); + *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16); + *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MIPMAP) + & 0xfffffffc) | statetype; + + /* Restore FS texture sampler objects */ + stateblock = 2; + statetype = 0; + numunits = (TEX_SIZE_SAMPLER_OBJ / 2) / 4; + *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2); + *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16); + *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_SAMPLER_OBJ) + & 0xfffffffc) | statetype; + + create_ib1(drawctxt, drawctxt->constant_restore, start, cmd); + tmp_ctx.cmd = cmd; +} + +static void build_shader_restore_cmds(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + unsigned int *cmd = tmp_ctx.cmd; + unsigned int *start = cmd; + + /* Vertex shader */ + *cmd++ = cp_type3_packet(CP_COND_EXEC, 4); + *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2; + *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2; + *cmd++ = 1; + *cmd++ = 3; /* EXEC_COUNT */ + + *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2); + drawctxt->shader_load_commands[0].hostptr = cmd; + drawctxt->shader_load_commands[0].gpuaddr = + virt2gpu(cmd, &drawctxt->gpustate); + /* + From fixup: + + mode = 4 (indirect) + stateblock = 4 (Vertex shader) + numunits = SP_VS_CTRL_REG0.VS_LENGTH + + From regspec: + SP_VS_CTRL_REG0.VS_LENGTH [31:24]: VS length, unit = 256bits. + If bit31 is 1, it means overflow + or any long shader. + + ord1 = (numunits<<22) | (stateblock<<19) | (mode<<11) + */ + *cmd++ = 0; /*ord1 */ + *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET) & 0xfffffffc; + + /* Fragment shader */ + *cmd++ = cp_type3_packet(CP_COND_EXEC, 4); + *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2; + *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2; + *cmd++ = 1; + *cmd++ = 3; /* EXEC_COUNT */ + + *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2); + drawctxt->shader_load_commands[1].hostptr = cmd; + drawctxt->shader_load_commands[1].gpuaddr = + virt2gpu(cmd, &drawctxt->gpustate); + /* + From fixup: + + mode = 4 (indirect) + stateblock = 6 (Fragment shader) + numunits = SP_FS_CTRL_REG0.FS_LENGTH + + From regspec: + SP_FS_CTRL_REG0.FS_LENGTH [31:24]: FS length, unit = 256bits. + If bit31 is 1, it means overflow + or any long shader. + + ord1 = (numunits<<22) | (stateblock<<19) | (mode<<11) + */ + *cmd++ = 0; /*ord1 */ + *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET + + (SHADER_SHADOW_SIZE / 2)) & 0xfffffffc; + + create_ib1(drawctxt, drawctxt->shader_restore, start, cmd); + tmp_ctx.cmd = cmd; +} + +static void build_hlsqcontrol_restore_cmds(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + unsigned int *cmd = tmp_ctx.cmd; + unsigned int *start = cmd; + + *cmd++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmd++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG); + drawctxt->hlsqcontrol_restore_commands[0].hostptr = cmd; + drawctxt->hlsqcontrol_restore_commands[0].gpuaddr + = virt2gpu(cmd, &drawctxt->gpustate); + *cmd++ = 0; + + /* Create indirect buffer command for above command sequence */ + create_ib1(drawctxt, drawctxt->hlsqcontrol_restore, start, cmd); + + tmp_ctx.cmd = cmd; +} + +/* IB that modifies the shader and constant sizes and offsets in restore IBs. */ +static void build_restore_fixup_cmds(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + unsigned int *cmd = tmp_ctx.cmd; + unsigned int *start = cmd; + +#ifdef GSL_CONTEXT_SWITCH_CPU_SYNC + /* Save shader sizes */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = A3XX_SP_VS_CTRL_REG0; + *cmd++ = drawctxt->shader_load_commands[0].gpuaddr; + + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = A3XX_SP_FS_CTRL_REG0; + *cmd++ = drawctxt->shader_load_commands[1].gpuaddr; + + /* Save constant sizes */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = A3XX_SP_VS_CTRL_REG1; + *cmd++ = drawctxt->constant_load_commands[0].gpuaddr; + + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = A3XX_SP_FS_CTRL_REG1; + *cmd++ = drawctxt->constant_load_commands[1].gpuaddr; + + /* Save constant offsets */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG; + *cmd++ = drawctxt->constant_load_commands[2].gpuaddr; +#else + /* Save shader sizes */ + cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x7f000000, + 30, (4 << 19) | (4 << 16), + drawctxt->shader_load_commands[0].gpuaddr); + + cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG0, 0x7f000000, + 30, (6 << 19) | (4 << 16), + drawctxt->shader_load_commands[1].gpuaddr); + + /* Save constant sizes */ + cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff, + 23, (4 << 19) | (4 << 16), + drawctxt->constant_load_commands[0].gpuaddr); + + cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff, + 23, (6 << 19) | (4 << 16), + drawctxt->constant_load_commands[1].gpuaddr); + + /* Modify constant restore conditionals */ + cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff, + 0, 0, drawctxt->cond_execs[2].gpuaddr); + + cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff, + 0, 0, drawctxt->cond_execs[3].gpuaddr); + + /* Save fragment constant shadow offset */ + cmd = rmw_regtomem(cmd, A3XX_SP_FS_OBJ_OFFSET_REG, 0x00ff0000, + 18, (drawctxt->gpustate.gpuaddr & 0xfffffe00) | 1, + drawctxt->constant_load_commands[2].gpuaddr); +#endif + + /* Use mask value to avoid flushing HLSQ which would cause the HW to + discard all the shader data */ + + cmd = rmw_regtomem(cmd, A3XX_HLSQ_CONTROL_0_REG, 0x9ffffdff, + 0, 0, drawctxt->hlsqcontrol_restore_commands[0].gpuaddr); + + create_ib1(drawctxt, drawctxt->restore_fixup, start, cmd); + + tmp_ctx.cmd = cmd; +} + +static int a3xx_create_gpustate_shadow(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + drawctxt->flags |= CTXT_FLAGS_STATE_SHADOW; + + build_regrestore_cmds(adreno_dev, drawctxt); + build_constantrestore_cmds(adreno_dev, drawctxt); + build_hlsqcontrol_restore_cmds(adreno_dev, drawctxt); + build_regconstantsave_cmds(adreno_dev, drawctxt); + build_shader_save_cmds(adreno_dev, drawctxt); + build_shader_restore_cmds(adreno_dev, drawctxt); + build_restore_fixup_cmds(adreno_dev, drawctxt); + build_save_fixup_cmds(adreno_dev, drawctxt); + + return 0; +} + +/* create buffers for saving/restoring registers, constants, & GMEM */ +static int a3xx_create_gmem_shadow(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + calc_gmemsize(&drawctxt->context_gmem_shadow, + adreno_dev->gmemspace.sizebytes); + tmp_ctx.gmem_base = adreno_dev->gmemspace.gpu_base; + + if (drawctxt->flags & CTXT_FLAGS_GMEM_SHADOW) { + int result = + kgsl_allocate(&drawctxt->context_gmem_shadow.gmemshadow, + drawctxt->pagetable, + drawctxt->context_gmem_shadow.size); + + if (result) + return result; + } else { + memset(&drawctxt->context_gmem_shadow.gmemshadow, 0, + sizeof(drawctxt->context_gmem_shadow.gmemshadow)); + + return 0; + } + + build_quad_vtxbuff(drawctxt, &drawctxt->context_gmem_shadow, + &tmp_ctx.cmd); + + /* Dow we need to idle? */ + /* adreno_idle(&adreno_dev->dev, KGSL_TIMEOUT_DEFAULT); */ + + tmp_ctx.cmd = build_gmem2sys_cmds(adreno_dev, drawctxt, + &drawctxt->context_gmem_shadow); + tmp_ctx.cmd = build_sys2gmem_cmds(adreno_dev, drawctxt, + &drawctxt->context_gmem_shadow); + + kgsl_cache_range_op(&drawctxt->context_gmem_shadow.gmemshadow, + KGSL_CACHE_OP_FLUSH); + + return 0; +} + +static int a3xx_drawctxt_create(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + int ret; + + /* + * Allocate memory for the GPU state and the context commands. + * Despite the name, this is much more then just storage for + * the gpustate. This contains command space for gmem save + * and texture and vertex buffer storage too + */ + + ret = kgsl_allocate(&drawctxt->gpustate, + drawctxt->pagetable, CONTEXT_SIZE); + + if (ret) + return ret; + + kgsl_sharedmem_set(&drawctxt->gpustate, 0, 0, CONTEXT_SIZE); + tmp_ctx.cmd = drawctxt->gpustate.hostptr + CMD_OFFSET; + + if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) { + ret = a3xx_create_gpustate_shadow(adreno_dev, drawctxt); + if (ret) + goto done; + + drawctxt->flags |= CTXT_FLAGS_SHADER_SAVE; + } + + if (!(drawctxt->flags & CTXT_FLAGS_NOGMEMALLOC)) + ret = a3xx_create_gmem_shadow(adreno_dev, drawctxt); + +done: + if (ret) + kgsl_sharedmem_free(&drawctxt->gpustate); + + return ret; +} + +static void a3xx_drawctxt_save(struct adreno_device *adreno_dev, + struct adreno_context *context) +{ + struct kgsl_device *device = &adreno_dev->dev; + + if (context == NULL) + return; + + if (context->flags & CTXT_FLAGS_GPU_HANG) + KGSL_CTXT_WARN(device, + "Current active context has caused gpu hang\n"); + + if (!(context->flags & CTXT_FLAGS_PREAMBLE)) { + /* Fixup self modifying IBs for save operations */ + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, + context->save_fixup, 3); + + /* save registers and constants. */ + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, + context->regconstant_save, 3); + + if (context->flags & CTXT_FLAGS_SHADER_SAVE) { + /* Save shader instructions */ + adreno_ringbuffer_issuecmds(device, + KGSL_CMD_FLAGS_PMODE, context->shader_save, 3); + + context->flags |= CTXT_FLAGS_SHADER_RESTORE; + } + } + + if ((context->flags & CTXT_FLAGS_GMEM_SAVE) && + (context->flags & CTXT_FLAGS_GMEM_SHADOW)) { + /* + * Save GMEM (note: changes shader. shader must + * already be saved.) + */ + + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE, + context->context_gmem_shadow. + gmem_save, 3); + context->flags |= CTXT_FLAGS_GMEM_RESTORE; + } +} + +static void a3xx_drawctxt_restore(struct adreno_device *adreno_dev, + struct adreno_context *context) +{ + struct kgsl_device *device = &adreno_dev->dev; + unsigned int cmds[5]; + + if (context == NULL) { + /* No context - set the default pagetable and thats it */ + kgsl_mmu_setstate(device, device->mmu.defaultpagetable); + return; + } + + KGSL_CTXT_INFO(device, "context flags %08x\n", context->flags); + + cmds[0] = cp_nop_packet(1); + cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER; + cmds[2] = cp_type3_packet(CP_MEM_WRITE, 2); + cmds[3] = device->memstore.gpuaddr + + KGSL_DEVICE_MEMSTORE_OFFSET(current_context); + cmds[4] = (unsigned int)context; + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, cmds, 5); + kgsl_mmu_setstate(device, context->pagetable); + + /* + * Restore GMEM. (note: changes shader. + * Shader must not already be restored.) + */ + + if (context->flags & CTXT_FLAGS_GMEM_RESTORE) { + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE, + context->context_gmem_shadow. + gmem_restore, 3); + context->flags &= ~CTXT_FLAGS_GMEM_RESTORE; + } + + if (!(context->flags & CTXT_FLAGS_PREAMBLE)) { + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, + context->reg_restore, 3); + + /* Fixup self modifying IBs for restore operations */ + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, + context->restore_fixup, 3); + + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, + context->constant_restore, 3); + + if (context->flags & CTXT_FLAGS_SHADER_RESTORE) + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, + context->shader_restore, 3); + + /* Restore HLSQ_CONTROL_0 register */ + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, + context->hlsqcontrol_restore, 3); + } +} + +static void a3xx_rb_init(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb) +{ + unsigned int *cmds, cmds_gpu; + cmds = adreno_ringbuffer_allocspace(rb, 18); + cmds_gpu = rb->buffer_desc.gpuaddr + sizeof(uint) * (rb->wptr - 18); + + GSL_RB_WRITE(cmds, cmds_gpu, cp_type3_packet(CP_ME_INIT, 17)); + GSL_RB_WRITE(cmds, cmds_gpu, 0x000003f7); + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000080); + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000100); + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000180); + GSL_RB_WRITE(cmds, cmds_gpu, 0x00006600); + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000150); + GSL_RB_WRITE(cmds, cmds_gpu, 0x0000014e); + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000154); + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000001); + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); + /* Protected mode control - turned off for A3XX */ + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); + + adreno_ringbuffer_submit(rb); +} + +static void a3xx_err_callback(struct adreno_device *adreno_dev, int bit) +{ + struct kgsl_device *device = &adreno_dev->dev; + const char *err = ""; + + switch (bit) { + case A3XX_INT_RBBM_AHB_ERROR: { + unsigned int reg; + + adreno_regread(device, A3XX_RBBM_AHB_ERROR_STATUS, ®); + + /* + * Return the word address of the erroring register so that it + * matches the register specification + */ + + KGSL_DRV_CRIT(device, + "RBBM | AHB bus error | %s | addr=%x | ports=%x:%x\n", + reg & (1 << 28) ? "WRITE" : "READ", + (reg & 0xFFFFF) >> 2, (reg >> 20) & 0x3, + (reg >> 24) & 0x3); + + /* Clear the error */ + adreno_regwrite(device, A3XX_RBBM_AHB_CMD, (1 << 3)); + return; + } + case A3XX_INT_RBBM_REG_TIMEOUT: + err = "RBBM: AHB register timeout"; + break; + case A3XX_INT_RBBM_ME_MS_TIMEOUT: + err = "RBBM: ME master split timeout"; + break; + case A3XX_INT_RBBM_PFP_MS_TIMEOUT: + err = "RBBM: PFP master split timeout"; + break; + case A3XX_INT_RBBM_ATB_BUS_OVERFLOW: + err = "RBBM: ATB bus oveflow"; + break; + case A3XX_INT_VFD_ERROR: + err = "VFD: Out of bounds access"; + break; + case A3XX_INT_CP_T0_PACKET_IN_IB: + err = "ringbuffer TO packet in IB interrupt"; + break; + case A3XX_INT_CP_OPCODE_ERROR: + err = "ringbuffer opcode error interrupt"; + break; + case A3XX_INT_CP_RESERVED_BIT_ERROR: + err = "ringbuffer reserved bit error interrupt"; + break; + case A3XX_INT_CP_HW_FAULT: + err = "ringbuffer hardware fault"; + break; + case A3XX_INT_CP_REG_PROTECT_FAULT: + err = "ringbuffer protected mode error interrupt"; + break; + case A3XX_INT_CP_AHB_ERROR_HALT: + err = "ringbuffer AHB error interrupt"; + break; + case A3XX_INT_MISC_HANG_DETECT: + err = "MISC: GPU hang detected"; + break; + case A3XX_INT_UCHE_OOB_ACCESS: + err = "UCHE: Out of bounds access"; + break; + } + + KGSL_DRV_CRIT(device, "%s\n", err); + kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF); +} + +static void a3xx_cp_callback(struct adreno_device *adreno_dev, int irq) +{ + struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer; + + if (irq == A3XX_INT_CP_RB_INT) { + kgsl_sharedmem_writel(&rb->device->memstore, + KGSL_DEVICE_MEMSTORE_OFFSET(ts_cmp_enable), 0); + wmb(); + KGSL_CMD_WARN(rb->device, "ringbuffer rb interrupt\n"); + } + + wake_up_interruptible_all(&rb->device->wait_queue); + + /* Schedule work to free mem and issue ibs */ + queue_work(rb->device->work_queue, &rb->device->ts_expired_ws); + + atomic_notifier_call_chain(&rb->device->ts_notifier_list, + rb->device->id, NULL); +} + +#define A3XX_IRQ_CALLBACK(_c) { .func = _c } + +#define A3XX_INT_MASK \ + ((1 << A3XX_INT_RBBM_AHB_ERROR) | \ + (1 << A3XX_INT_RBBM_REG_TIMEOUT) | \ + (1 << A3XX_INT_RBBM_ME_MS_TIMEOUT) | \ + (1 << A3XX_INT_RBBM_PFP_MS_TIMEOUT) | \ + (1 << A3XX_INT_RBBM_ATB_BUS_OVERFLOW) | \ + (1 << A3XX_INT_VFD_ERROR) | \ + (1 << A3XX_INT_CP_T0_PACKET_IN_IB) | \ + (1 << A3XX_INT_CP_OPCODE_ERROR) | \ + (1 << A3XX_INT_CP_RESERVED_BIT_ERROR) | \ + (1 << A3XX_INT_CP_HW_FAULT) | \ + (1 << A3XX_INT_CP_IB1_INT) | \ + (1 << A3XX_INT_CP_IB2_INT) | \ + (1 << A3XX_INT_CP_RB_INT) | \ + (1 << A3XX_INT_CP_REG_PROTECT_FAULT) | \ + (1 << A3XX_INT_CP_AHB_ERROR_HALT) | \ + (1 << A3XX_INT_MISC_HANG_DETECT) | \ + (1 << A3XX_INT_UCHE_OOB_ACCESS)) + +static struct { + void (*func)(struct adreno_device *, int); +} a3xx_irq_funcs[] = { + A3XX_IRQ_CALLBACK(NULL), /* 0 - RBBM_GPU_IDLE */ + A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 1 - RBBM_AHB_ERROR */ + A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 2 - RBBM_REG_TIMEOUT */ + A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 3 - RBBM_ME_MS_TIMEOUT */ + A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 4 - RBBM_PFP_MS_TIMEOUT */ + A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 5 - RBBM_ATB_BUS_OVERFLOW */ + A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 6 - RBBM_VFD_ERROR */ + A3XX_IRQ_CALLBACK(NULL), /* 7 - CP_SW */ + A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 8 - CP_T0_PACKET_IN_IB */ + A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 9 - CP_OPCODE_ERROR */ + A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 10 - CP_RESERVED_BIT_ERROR */ + A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 11 - CP_HW_FAULT */ + A3XX_IRQ_CALLBACK(NULL), /* 12 - CP_DMA */ + A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 13 - CP_IB2_INT */ + A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 14 - CP_IB1_INT */ + A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 15 - CP_RB_INT */ + A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 16 - CP_REG_PROTECT_FAULT */ + A3XX_IRQ_CALLBACK(NULL), /* 17 - CP_RB_DONE_TS */ + A3XX_IRQ_CALLBACK(NULL), /* 18 - CP_VS_DONE_TS */ + A3XX_IRQ_CALLBACK(NULL), /* 19 - CP_PS_DONE_TS */ + A3XX_IRQ_CALLBACK(NULL), /* 20 - CP_CACHE_FLUSH_TS */ + A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 21 - CP_AHB_ERROR_FAULT */ + A3XX_IRQ_CALLBACK(NULL), /* 22 - Unused */ + A3XX_IRQ_CALLBACK(NULL), /* 23 - Unused */ + A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 24 - MISC_HANG_DETECT */ + A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 25 - UCHE_OOB_ACCESS */ + /* 26 to 31 - Unused */ +}; + +static irqreturn_t a3xx_irq_handler(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = &adreno_dev->dev; + irqreturn_t ret = IRQ_NONE; + unsigned int status, tmp; + int i; + + adreno_regread(&adreno_dev->dev, A3XX_RBBM_INT_0_STATUS, &status); + + for (tmp = status, i = 0; tmp && i < ARRAY_SIZE(a3xx_irq_funcs); i++) { + if (tmp & 1) { + if (a3xx_irq_funcs[i].func != NULL) { + a3xx_irq_funcs[i].func(adreno_dev, i); + ret = IRQ_HANDLED; + } else { + KGSL_DRV_CRIT(device, + "Unhandled interrupt bit %x\n", i); + } + } + + tmp >>= 1; + } + + if (status) + adreno_regwrite(&adreno_dev->dev, A3XX_RBBM_INT_CLEAR_CMD, + status); + return ret; +} + +static void a3xx_irq_control(struct adreno_device *adreno_dev, int state) +{ + struct kgsl_device *device = &adreno_dev->dev; + + if (state) + adreno_regwrite(device, A3XX_RBBM_INT_0_MASK, A3XX_INT_MASK); + else + adreno_regwrite(device, A3XX_RBBM_INT_0_MASK, 0); +} + +static unsigned int a3xx_busy_cycles(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = &adreno_dev->dev; + unsigned int reg, val; + + /* Freeze the counter */ + adreno_regread(device, A3XX_RBBM_RBBM_CTL, ®); + reg &= ~RBBM_RBBM_CTL_ENABLE_PWR_CTR1; + adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg); + + /* Read the value */ + adreno_regread(device, A3XX_RBBM_PERFCTR_PWR_1_LO, &val); + + /* Reset the counter */ + reg |= RBBM_RBBM_CTL_RESET_PWR_CTR1; + adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg); + + /* Re-enable the counter */ + reg &= ~RBBM_RBBM_CTL_RESET_PWR_CTR1; + reg |= RBBM_RBBM_CTL_ENABLE_PWR_CTR1; + adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg); + + return val; +} + +static void a3xx_start(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = &adreno_dev->dev; + + /* Reset the core */ + adreno_regwrite(device, A3XX_RBBM_SW_RESET_CMD, + 0x00000001); + msleep(20); + + /* + * enable fixed master AXI port of 0x0 for all clients to keep + * traffic from going to random places + */ + + adreno_regwrite(device, A3XX_VBIF_FIXED_SORT_EN, 0x0001003F); + adreno_regwrite(device, A3XX_VBIF_FIXED_SORT_SEL0, 0x00000000); + adreno_regwrite(device, A3XX_VBIF_FIXED_SORT_SEL1, 0x00000000); + + /* Make all blocks contribute to the GPU BUSY perf counter */ + adreno_regwrite(device, A3XX_RBBM_GPU_BUSY_MASKED, 0xFFFFFFFF); + + /* Enable the RBBM error reporting bits. This lets us get + useful information on failure */ + + adreno_regwrite(device, A3XX_RBBM_AHB_CTL0, 0x00000001); + + /* Enable AHB error reporting */ + adreno_regwrite(device, A3XX_RBBM_AHB_CTL1, 0xA6FFFFFF); + + /* Turn on the power counters */ + adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, 0x00003000); +} + +struct adreno_gpudev adreno_a3xx_gpudev = { + .reg_rbbm_status = A3XX_RBBM_STATUS, + .reg_cp_pfp_ucode_addr = A3XX_CP_PFP_UCODE_ADDR, + .reg_cp_pfp_ucode_data = A3XX_CP_PFP_UCODE_DATA, + + .ctxt_create = a3xx_drawctxt_create, + .ctxt_save = a3xx_drawctxt_save, + .ctxt_restore = a3xx_drawctxt_restore, + .rb_init = a3xx_rb_init, + .irq_control = a3xx_irq_control, + .irq_handler = a3xx_irq_handler, + .busy_cycles = a3xx_busy_cycles, + .start = a3xx_start, +}; diff --git a/drivers/gpu/msm/adreno_drawctxt.c b/drivers/gpu/msm/adreno_drawctxt.c index 218c4c09..87f9efe4 100755 --- a/drivers/gpu/msm/adreno_drawctxt.c +++ b/drivers/gpu/msm/adreno_drawctxt.c @@ -19,6 +19,7 @@ /* quad for copying GMEM to context shadow */ #define QUAD_LEN 12 +#define QUAD_RESTORE_LEN 14 static unsigned int gmem_copy_quad[QUAD_LEN] = { 0x00000000, 0x00000000, 0x3f800000, @@ -27,6 +28,14 @@ static unsigned int gmem_copy_quad[QUAD_LEN] = { 0x00000000, 0x00000000, 0x3f800000 }; +static unsigned int gmem_restore_quad[QUAD_RESTORE_LEN] = { + 0x00000000, 0x3f800000, 0x3f800000, + 0x00000000, 0x00000000, 0x00000000, + 0x3f800000, 0x00000000, 0x00000000, + 0x3f800000, 0x00000000, 0x00000000, + 0x3f800000, 0x3f800000, +}; + #define TEXCOORD_LEN 8 static unsigned int gmem_copy_texcoord[TEXCOORD_LEN] = { @@ -73,12 +82,12 @@ static void set_gmem_copy_quad(struct gmem_shadow_t *shadow) gmem_copy_quad[4] = uint2float(shadow->height); gmem_copy_quad[9] = uint2float(shadow->width); - gmem_copy_quad[0] = 0; - gmem_copy_quad[6] = 0; - gmem_copy_quad[7] = 0; - gmem_copy_quad[10] = 0; + gmem_restore_quad[5] = uint2float(shadow->height); + gmem_restore_quad[7] = uint2float(shadow->width); memcpy(shadow->quad_vertices.hostptr, gmem_copy_quad, QUAD_LEN << 2); + memcpy(shadow->quad_vertices_restore.hostptr, gmem_copy_quad, + QUAD_RESTORE_LEN << 2); memcpy(shadow->quad_texcoords.hostptr, gmem_copy_texcoord, TEXCOORD_LEN << 2); @@ -103,6 +112,13 @@ void build_quad_vtxbuff(struct adreno_context *drawctxt, cmd += QUAD_LEN; + /* Used by A3XX, but define for both to make the code easier */ + shadow->quad_vertices_restore.hostptr = cmd; + shadow->quad_vertices_restore.gpuaddr = + virt2gpu(cmd, &drawctxt->gpustate); + + cmd += QUAD_RESTORE_LEN; + /* tex coord buffer location (in GPU space) */ shadow->quad_texcoords.hostptr = cmd; shadow->quad_texcoords.gpuaddr = virt2gpu(cmd, &drawctxt->gpustate); diff --git a/drivers/gpu/msm/adreno_drawctxt.h b/drivers/gpu/msm/adreno_drawctxt.h index 26ff5344..50ee3450 100755 --- a/drivers/gpu/msm/adreno_drawctxt.h +++ b/drivers/gpu/msm/adreno_drawctxt.h @@ -50,37 +50,56 @@ struct kgsl_context; struct gmem_shadow_t { struct kgsl_memdesc gmemshadow; /* Shadow buffer address */ - /* 256 KB GMEM surface = 4 bytes-per-pixel x 256 pixels/row x - * 256 rows. */ - /* width & height must be a multiples of 32, in case tiled textures - * are used. */ - enum COLORFORMATX format; + /* + * 256 KB GMEM surface = 4 bytes-per-pixel x 256 pixels/row x + * 256 rows. Width & height must be multiples of 32 in case tiled + * textures are used + */ + + enum COLORFORMATX format; /* Unused on A3XX */ unsigned int size; /* Size of surface used to store GMEM */ unsigned int width; /* Width of surface used to store GMEM */ unsigned int height; /* Height of surface used to store GMEM */ unsigned int pitch; /* Pitch of surface used to store GMEM */ unsigned int gmem_pitch; /* Pitch value used for GMEM */ - unsigned int *gmem_save_commands; - unsigned int *gmem_restore_commands; + unsigned int *gmem_save_commands; /* Unused on A3XX */ + unsigned int *gmem_restore_commands; /* Unused on A3XX */ unsigned int gmem_save[3]; unsigned int gmem_restore[3]; struct kgsl_memdesc quad_vertices; struct kgsl_memdesc quad_texcoords; + struct kgsl_memdesc quad_vertices_restore; }; struct adreno_context { uint32_t flags; struct kgsl_pagetable *pagetable; struct kgsl_memdesc gpustate; - unsigned int reg_save[3]; unsigned int reg_restore[3]; unsigned int shader_save[3]; - unsigned int shader_fixup[3]; unsigned int shader_restore[3]; - unsigned int chicken_restore[3]; - unsigned int bin_base_offset; + /* Information of the GMEM shadow that is created in context create */ struct gmem_shadow_t context_gmem_shadow; + + /* A2XX specific items */ + unsigned int reg_save[3]; + unsigned int shader_fixup[3]; + unsigned int chicken_restore[3]; + unsigned int bin_base_offset; + + /* A3XX specific items */ + unsigned int regconstant_save[3]; + unsigned int constant_restore[3]; + unsigned int hlsqcontrol_restore[3]; + unsigned int save_fixup[3]; + unsigned int restore_fixup[3]; + struct kgsl_memdesc shader_load_commands[2]; + struct kgsl_memdesc shader_save_commands[4]; + struct kgsl_memdesc constant_save_commands[3]; + struct kgsl_memdesc constant_load_commands[3]; + struct kgsl_memdesc cond_execs[4]; + struct kgsl_memdesc hlsqcontrol_restore_commands[1]; }; int adreno_drawctxt_create(struct kgsl_device *device, diff --git a/drivers/gpu/msm/adreno_pm4types.h b/drivers/gpu/msm/adreno_pm4types.h old mode 100644 new mode 100755 index 8aea58c9..1dffc321 --- a/drivers/gpu/msm/adreno_pm4types.h +++ b/drivers/gpu/msm/adreno_pm4types.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved. +/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -157,6 +157,18 @@ #define CP_SET_PROTECTED_MODE 0x5f /* sets the register protection mode */ +/* + * for a3xx + */ + +#define CP_LOAD_STATE 0x30 /* load high level sequencer command */ + +#define CP_LOADSTATE_DSTOFFSET_SHIFT 0x00000000 +#define CP_LOADSTATE_STATESRC_SHIFT 0x00000010 +#define CP_LOADSTATE_STATEBLOCKID_SHIFT 0x00000013 +#define CP_LOADSTATE_NUMOFUNITS_SHIFT 0x00000016 +#define CP_LOADSTATE_STATETYPE_SHIFT 0x00000000 +#define CP_LOADSTATE_EXTSRCADDR_SHIFT 0x00000002 /* packet header building macros */ #define cp_type0_packet(regindx, cnt) \ diff --git a/drivers/gpu/msm/adreno_ringbuffer.c b/drivers/gpu/msm/adreno_ringbuffer.c index e5c28d71..8e47927f 100755 --- a/drivers/gpu/msm/adreno_ringbuffer.c +++ b/drivers/gpu/msm/adreno_ringbuffer.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved. +/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -24,28 +24,11 @@ #include "adreno_ringbuffer.h" #include "a2xx_reg.h" +#include "a3xx_reg.h" #define GSL_RB_NOP_SIZEDWORDS 2 -/* protected mode error checking below register address 0x800 -* note: if CP_INTERRUPT packet is used then checking needs -* to change to below register address 0x7C8 -*/ -#define GSL_RB_PROTECTED_MODE_CONTROL 0x200001F2 -/* Firmware file names - * Legacy names must remain but replacing macro names to - * match current kgsl model. - * a200 is yamato - * a220 is leia - */ -#define A200_PFP_FW "yamato_pfp.fw" -#define A200_PM4_FW "yamato_pm4.fw" -#define A220_PFP_470_FW "leia_pfp_470.fw" -#define A220_PM4_470_FW "leia_pm4_470.fw" -#define A225_PFP_FW "a225_pfp.fw" -#define A225_PM4_FW "a225_pm4.fw" - -static void adreno_ringbuffer_submit(struct adreno_ringbuffer *rb) +void adreno_ringbuffer_submit(struct adreno_ringbuffer *rb) { BUG_ON(rb->wptr == 0); @@ -104,8 +87,7 @@ adreno_ringbuffer_waitspace(struct adreno_ringbuffer *rb, unsigned int numcmds, } while ((freecmds != 0) && (freecmds <= numcmds)); } - -static unsigned int *adreno_ringbuffer_allocspace(struct adreno_ringbuffer *rb, +unsigned int *adreno_ringbuffer_allocspace(struct adreno_ringbuffer *rb, unsigned int numcmds) { unsigned int *ptr = NULL; @@ -231,9 +213,10 @@ static int adreno_ringbuffer_load_pfp_ucode(struct kgsl_device *device) KGSL_DRV_INFO(device, "loading pfp ucode version: %d\n", adreno_dev->pfp_fw[0]); - adreno_regwrite(device, REG_CP_PFP_UCODE_ADDR, 0); + adreno_regwrite(device, adreno_dev->gpudev->reg_cp_pfp_ucode_addr, 0); for (i = 1; i < adreno_dev->pfp_fw_size; i++) - adreno_regwrite(device, REG_CP_PFP_UCODE_DATA, + adreno_regwrite(device, + adreno_dev->gpudev->reg_cp_pfp_ucode_data, adreno_dev->pfp_fw[i]); err: return ret; @@ -244,10 +227,9 @@ int adreno_ringbuffer_start(struct adreno_ringbuffer *rb, unsigned int init_ram) int status; /*cp_rb_cntl_u cp_rb_cntl; */ union reg_cp_rb_cntl cp_rb_cntl; - unsigned int *cmds, rb_cntl; + unsigned int rb_cntl; struct kgsl_device *device = rb->device; struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - uint cmds_gpu; if (rb->flags & KGSL_FLAGS_STARTED) return 0; @@ -263,12 +245,15 @@ int adreno_ringbuffer_start(struct adreno_ringbuffer *rb, unsigned int init_ram) kgsl_sharedmem_set(&rb->buffer_desc, 0, 0xAA, (rb->sizedwords << 2)); + if (adreno_is_a2xx(adreno_dev)) { adreno_regwrite(device, REG_CP_RB_WPTR_BASE, (rb->memptrs_desc.gpuaddr + GSL_RB_MEMPTRS_WPTRPOLL_OFFSET)); /* setup WPTR delay */ - adreno_regwrite(device, REG_CP_RB_WPTR_DELAY, 0 /*0x70000010 */); + adreno_regwrite(device, REG_CP_RB_WPTR_DELAY, + 0 /*0x70000010 */); + } /*setup REG_CP_RB_CNTL */ adreno_regread(device, REG_CP_RB_CNTL, &rb_cntl); @@ -287,7 +272,11 @@ int adreno_ringbuffer_start(struct adreno_ringbuffer *rb, unsigned int init_ram) */ cp_rb_cntl.f.rb_blksz = ilog2(KGSL_RB_BLKSIZE >> 3); - cp_rb_cntl.f.rb_poll_en = GSL_RB_CNTL_POLL_EN; /* WPTR polling */ + if (adreno_is_a2xx(adreno_dev)) { + /* WPTR polling */ + cp_rb_cntl.f.rb_poll_en = GSL_RB_CNTL_POLL_EN; + } + /* mem RPTR writebacks */ cp_rb_cntl.f.rb_no_update = GSL_RB_CNTL_NO_UPDATE; @@ -299,8 +288,36 @@ int adreno_ringbuffer_start(struct adreno_ringbuffer *rb, unsigned int init_ram) rb->memptrs_desc.gpuaddr + GSL_RB_MEMPTRS_RPTR_OFFSET); + if (adreno_is_a3xx(adreno_dev)) { + /* enable access protection to privileged registers */ + adreno_regwrite(device, A3XX_CP_PROTECT_CTRL, 0x00000007); + + /* RBBM registers */ + adreno_regwrite(device, A3XX_CP_PROTECT_REG_0, 0x63000040); + adreno_regwrite(device, A3XX_CP_PROTECT_REG_1, 0x62000080); + adreno_regwrite(device, A3XX_CP_PROTECT_REG_2, 0x600000CC); + adreno_regwrite(device, A3XX_CP_PROTECT_REG_3, 0x60000108); + adreno_regwrite(device, A3XX_CP_PROTECT_REG_4, 0x64000140); + adreno_regwrite(device, A3XX_CP_PROTECT_REG_5, 0x66000400); + + /* CP registers */ + adreno_regwrite(device, A3XX_CP_PROTECT_REG_6, 0x65000700); + adreno_regwrite(device, A3XX_CP_PROTECT_REG_7, 0x610007D8); + adreno_regwrite(device, A3XX_CP_PROTECT_REG_8, 0x620007E0); + adreno_regwrite(device, A3XX_CP_PROTECT_REG_9, 0x61001178); + adreno_regwrite(device, A3XX_CP_PROTECT_REG_A, 0x64001180); + + /* RB registers */ + adreno_regwrite(device, A3XX_CP_PROTECT_REG_B, 0x60003300); + + /* VBIF registers */ + adreno_regwrite(device, A3XX_CP_PROTECT_REG_C, 0x6B00C000); + } + + if (adreno_is_a2xx(adreno_dev)) { /* explicitly clear all cp interrupts */ adreno_regwrite(device, REG_CP_INT_ACK, 0xFFFFFFFF); + } /* setup scratch/timestamp */ adreno_regwrite(device, REG_SCRATCH_ADDR, @@ -334,55 +351,8 @@ int adreno_ringbuffer_start(struct adreno_ringbuffer *rb, unsigned int init_ram) /* clear ME_HALT to start micro engine */ adreno_regwrite(device, REG_CP_ME_CNTL, 0); - /* ME_INIT */ - cmds = adreno_ringbuffer_allocspace(rb, 19); - cmds_gpu = rb->buffer_desc.gpuaddr + sizeof(uint)*(rb->wptr-19); - - GSL_RB_WRITE(cmds, cmds_gpu, CP_HDR_ME_INIT); - /* All fields present (bits 9:0) */ - GSL_RB_WRITE(cmds, cmds_gpu, 0x000003ff); - /* Disable/Enable Real-Time Stream processing (present but ignored) */ - GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); - /* Enable (2D <-> 3D) implicit synchronization (present but ignored) */ - GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); - - GSL_RB_WRITE(cmds, cmds_gpu, - SUBBLOCK_OFFSET(REG_RB_SURFACE_INFO)); - GSL_RB_WRITE(cmds, cmds_gpu, - SUBBLOCK_OFFSET(REG_PA_SC_WINDOW_OFFSET)); - GSL_RB_WRITE(cmds, cmds_gpu, - SUBBLOCK_OFFSET(REG_VGT_MAX_VTX_INDX)); - GSL_RB_WRITE(cmds, cmds_gpu, - SUBBLOCK_OFFSET(REG_SQ_PROGRAM_CNTL)); - GSL_RB_WRITE(cmds, cmds_gpu, - SUBBLOCK_OFFSET(REG_RB_DEPTHCONTROL)); - GSL_RB_WRITE(cmds, cmds_gpu, - SUBBLOCK_OFFSET(REG_PA_SU_POINT_SIZE)); - GSL_RB_WRITE(cmds, cmds_gpu, - SUBBLOCK_OFFSET(REG_PA_SC_LINE_CNTL)); - GSL_RB_WRITE(cmds, cmds_gpu, - SUBBLOCK_OFFSET(REG_PA_SU_POLY_OFFSET_FRONT_SCALE)); - - /* Instruction memory size: */ - GSL_RB_WRITE(cmds, cmds_gpu, - (adreno_encode_istore_size(adreno_dev) - | adreno_dev->pix_shader_start)); - /* Maximum Contexts */ - GSL_RB_WRITE(cmds, cmds_gpu, 0x00000001); - /* Write Confirm Interval and The CP will wait the - * wait_interval * 16 clocks between polling */ - GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); - - /* NQ and External Memory Swap */ - GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); - /* Protected mode error checking */ - GSL_RB_WRITE(cmds, cmds_gpu, GSL_RB_PROTECTED_MODE_CONTROL); - /* Disable header dumping and Header dump address */ - GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); - /* Header dump size */ - GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); - - adreno_ringbuffer_submit(rb); + /* ME init is GPU specific, so jump into the sub-function */ + adreno_dev->gpudev->rb_init(adreno_dev, rb); /* idle device to validate ME INIT */ status = adreno_idle(device, KGSL_TIMEOUT_DEFAULT); @@ -463,6 +433,7 @@ adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb, unsigned int flags, unsigned int *cmds, int sizedwords) { + struct adreno_device *adreno_dev = ADRENO_DEVICE(rb->device); unsigned int *ringcmds; unsigned int timestamp; unsigned int total_sizedwords = sizedwords + 6; @@ -476,6 +447,9 @@ adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb, total_sizedwords += !(flags & KGSL_CMD_FLAGS_NO_TS_CMP) ? 7 : 0; total_sizedwords += !(flags & KGSL_CMD_FLAGS_NOT_KERNEL_CMD) ? 2 : 0; + if (adreno_is_a3xx(adreno_dev)) + total_sizedwords += 7; + ringcmds = adreno_ringbuffer_allocspace(rb, total_sizedwords); rcmd_gpu = rb->buffer_desc.gpuaddr + sizeof(uint)*(rb->wptr-total_sizedwords); @@ -509,6 +483,21 @@ adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb, /* start-of-pipeline and end-of-pipeline timestamps */ GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type0_packet(REG_CP_TIMESTAMP, 1)); GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp); + + if (adreno_is_a3xx(adreno_dev)) { + /* + * FLush HLSQ lazy updates to make sure there are no + * rsources pending for indirect loads after the timestamp + */ + + GSL_RB_WRITE(ringcmds, rcmd_gpu, + cp_type3_packet(CP_EVENT_WRITE, 1)); + GSL_RB_WRITE(ringcmds, rcmd_gpu, 0x07); /* HLSQ_FLUSH */ + GSL_RB_WRITE(ringcmds, rcmd_gpu, + cp_type3_packet(CP_WAIT_FOR_IDLE, 1)); + GSL_RB_WRITE(ringcmds, rcmd_gpu, 0x00); + } + GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_EVENT_WRITE, 3)); GSL_RB_WRITE(ringcmds, rcmd_gpu, CACHE_FLUSH_TS); GSL_RB_WRITE(ringcmds, rcmd_gpu, @@ -532,6 +521,15 @@ adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb, GSL_RB_WRITE(ringcmds, rcmd_gpu, CP_INT_CNTL__RB_INT_MASK); } + if (adreno_is_a3xx(adreno_dev)) { + /* Dummy set-constant to trigger context rollover */ + GSL_RB_WRITE(ringcmds, rcmd_gpu, + cp_type3_packet(CP_SET_CONSTANT, 2)); + GSL_RB_WRITE(ringcmds, rcmd_gpu, + (0x4<<16)|(A3XX_HLSQ_CL_KERNEL_GROUP_X_REG - 0x2000)); + GSL_RB_WRITE(ringcmds, rcmd_gpu, 0); + } + adreno_ringbuffer_submit(rb); /* return timestamp of issued coREG_ands */ diff --git a/drivers/gpu/msm/adreno_ringbuffer.h b/drivers/gpu/msm/adreno_ringbuffer.h index 4494a1bc..9c048b14 100755 --- a/drivers/gpu/msm/adreno_ringbuffer.h +++ b/drivers/gpu/msm/adreno_ringbuffer.h @@ -1,5 +1,4 @@ -/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved. - * Copyright (C) 2011 Sony Ericsson Mobile Communications AB. +/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -105,6 +104,13 @@ struct adreno_ringbuffer { #define GSL_RB_CNTL_POLL_EN 0x0 /* disable */ +/* + * protected mode error checking below register address 0x800 + * note: if CP_INTERRUPT packet is used then checking needs + * to change to below register address 0x7C8 + */ +#define GSL_RB_PROTECTED_MODE_CONTROL 0x200001F2 + int adreno_ringbuffer_issueibcmds(struct kgsl_device_private *dev_priv, struct kgsl_context *context, struct kgsl_ibdesc *ibdesc, @@ -126,6 +132,8 @@ void adreno_ringbuffer_issuecmds(struct kgsl_device *device, unsigned int *cmdaddr, int sizedwords); +void adreno_ringbuffer_submit(struct adreno_ringbuffer *rb); + void kgsl_cp_intrcallback(struct kgsl_device *device); int adreno_ringbuffer_extract(struct adreno_ringbuffer *rb, @@ -136,6 +144,9 @@ void adreno_ringbuffer_restore(struct adreno_ringbuffer *rb, unsigned int *rb_buff, int num_rb_contents); +unsigned int *adreno_ringbuffer_allocspace(struct adreno_ringbuffer *rb, + unsigned int numcmds); + static inline int adreno_ringbuffer_count(struct adreno_ringbuffer *rb, unsigned int rptr) { From 411b4bcb906fc29b7c4776fa564ec8aa6329834c Mon Sep 17 00:00:00 2001 From: securecrt Date: Wed, 25 Jul 2012 19:12:41 +0800 Subject: [PATCH 058/155] reduced the PMEM_ADSP size as the HW decoder still can't work on HD2 --- arch/arm/mach-msm/board-htcleo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-msm/board-htcleo.h b/arch/arm/mach-msm/board-htcleo.h index 1a4cae6b..8d633974 100755 --- a/arch/arm/mach-msm/board-htcleo.h +++ b/arch/arm/mach-msm/board-htcleo.h @@ -43,7 +43,7 @@ #define MSM_PMEM_MDP_SIZE 0x02000000 #define MSM_PMEM_ADSP_BASE 0x3D700000 -#define MSM_PMEM_ADSP_SIZE 0x02900000 +#define MSM_PMEM_ADSP_SIZE 0x01800000 #define MSM_GPU_PHYS_BASE (MSM_PMEM_SMI_BASE + MSM_FB_SIZE) #define MSM_GPU_PHYS_SIZE 0x00800000 From 1b6fa28430fae9a660977c9d6ac38e0c8064fda6 Mon Sep 17 00:00:00 2001 From: securecrt Date: Wed, 25 Jul 2012 19:14:12 +0800 Subject: [PATCH 059/155] msm: kgsl: Update the GMEM and istore size for A320 Set the correct GMEM and istore sizes for A320 on APQ8064. The more GMEM we have the happier we are, so the code will work with 256K, but it will be better with 512K. For the instruction store the size is important during GPU snapshot and postmortem dump. Also, the size of each instruction is different on A3XX so remove the hard coded constants and add a GPU specific size variable. --- drivers/gpu/msm/adreno.c | 18 +++++++++++------- drivers/gpu/msm/adreno.h | 7 ++----- drivers/gpu/msm/adreno_a2xx.c | 3 ++- drivers/gpu/msm/adreno_a3xx.c | 3 +++ drivers/gpu/msm/adreno_debugfs.c | 3 ++- 5 files changed, 20 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c index e5465f46..8f35effd 100755 --- a/drivers/gpu/msm/adreno.c +++ b/drivers/gpu/msm/adreno.c @@ -139,31 +139,35 @@ static const struct { struct adreno_gpudev *gpudev; unsigned int istore_size; unsigned int pix_shader_start; + unsigned int instruction_size; /* Size of an instruction in dwords */ } adreno_gpulist[] = { { ADRENO_REV_A200, 0, 2, ANY_ID, ANY_ID, "yamato_pm4.fw", "yamato_pfp.fw", &adreno_a2xx_gpudev, - 512, 384}, + 512, 384, 3}, { ADRENO_REV_A205, 0, 1, 0, ANY_ID, "yamato_pm4.fw", "yamato_pfp.fw", &adreno_a2xx_gpudev, - 512, 384}, + 512, 384, 3}, { ADRENO_REV_A220, 2, 1, ANY_ID, ANY_ID, "leia_pm4_470.fw", "leia_pfp_470.fw", &adreno_a2xx_gpudev, - 512, 384}, + 512, 384, 3}, /* * patchlevel 5 (8960v2) needs special pm4 firmware to work around * a hardware problem. */ { ADRENO_REV_A225, 2, 2, 0, 5, "a225p5_pm4.fw", "a225_pfp.fw", &adreno_a2xx_gpudev, - 1536, 768 }, + 1536, 768, 3 }, { ADRENO_REV_A225, 2, 2, 0, 6, "a225_pm4.fw", "a225_pfp.fw", &adreno_a2xx_gpudev, - 1536, 768 }, + 1536, 768, 3 }, { ADRENO_REV_A225, 2, 2, ANY_ID, ANY_ID, "a225_pm4.fw", "a225_pfp.fw", &adreno_a2xx_gpudev, - 1536, 768 }, + 1536, 768, 3 }, + /* A3XX doesn't use the pix_shader_start */ { ADRENO_REV_A320, 3, 1, ANY_ID, ANY_ID, - "a300_pm4.fw", "a300_pfp.fw", &adreno_a3xx_gpudev }, + "a300_pm4.fw", "a300_pfp.fw", &adreno_a3xx_gpudev, + 512, 0, 2 }, + }; static irqreturn_t adreno_isr(int irq, void *data) diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h index 7c5cb7a4..3bf9b3b5 100755 --- a/drivers/gpu/msm/adreno.h +++ b/drivers/gpu/msm/adreno.h @@ -39,11 +39,7 @@ #define ADRENO_DEFAULT_PWRSCALE_POLICY NULL #endif -/* - * constants for the size of shader instructions - */ -#define ADRENO_ISTORE_BYTES 12 -#define ADRENO_ISTORE_WORDS 3 +#define ADRENO_ISTORE_START 0x5000 /* Istore offset */ enum adreno_gpurev { ADRENO_REV_UNKNOWN = 0, @@ -74,6 +70,7 @@ struct adreno_device { unsigned int wait_timeout; unsigned int istore_size; unsigned int pix_shader_start; + unsigned int instruction_size; }; struct adreno_gpudev { diff --git a/drivers/gpu/msm/adreno_a2xx.c b/drivers/gpu/msm/adreno_a2xx.c index bc55bd51..8486aae2 100755 --- a/drivers/gpu/msm/adreno_a2xx.c +++ b/drivers/gpu/msm/adreno_a2xx.c @@ -94,7 +94,8 @@ static inline int _shader_shadow_size(struct adreno_device *adreno_dev) { - return adreno_dev->istore_size*ADRENO_ISTORE_BYTES; + return adreno_dev->istore_size * + (adreno_dev->instruction_size * sizeof(unsigned int)); } static inline int _context_size(struct adreno_device *adreno_dev) diff --git a/drivers/gpu/msm/adreno_a3xx.c b/drivers/gpu/msm/adreno_a3xx.c index cbc7bed4..60e870f4 100755 --- a/drivers/gpu/msm/adreno_a3xx.c +++ b/drivers/gpu/msm/adreno_a3xx.c @@ -2502,6 +2502,9 @@ static void a3xx_start(struct adreno_device *adreno_dev) { struct kgsl_device *device = &adreno_dev->dev; + /* GMEM size on A320 is 512K */ + adreno_dev->gmemspace.sizebytes = SZ_512K; + /* Reset the core */ adreno_regwrite(device, A3XX_RBBM_SW_RESET_CMD, 0x00000001); diff --git a/drivers/gpu/msm/adreno_debugfs.c b/drivers/gpu/msm/adreno_debugfs.c index 419ce9d2..9a136699 100755 --- a/drivers/gpu/msm/adreno_debugfs.c +++ b/drivers/gpu/msm/adreno_debugfs.c @@ -239,7 +239,8 @@ static ssize_t kgsl_istore_read( return 0; adreno_dev = ADRENO_DEVICE(device); - count = adreno_dev->istore_size * ADRENO_ISTORE_WORDS; + count = adreno_dev->istore_size * adreno_dev->instruction_size; + remaining = count; for (i = 0; i < count; i += rowc) { unsigned int vals[rowc]; From 543247cd01b74a9a2f8efb908569c64ba614a97f Mon Sep 17 00:00:00 2001 From: securecrt Date: Wed, 25 Jul 2012 19:35:35 +0800 Subject: [PATCH 060/155] msm: kgsl: return correct error code for unknown ioctls Unknown ioctl code errors are supposed to be ENOIOCTLCMD, not EINVAL. --- drivers/gpu/msm/adreno.c | 3 ++- drivers/gpu/msm/kgsl.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c index 8f35effd..56efea08 100755 --- a/drivers/gpu/msm/adreno.c +++ b/drivers/gpu/msm/adreno.c @@ -415,6 +415,7 @@ adreno_identify_gpu(struct adreno_device *adreno_dev) adreno_dev->pm4_fwfile = adreno_gpulist[i].pm4fw; adreno_dev->istore_size = adreno_gpulist[i].istore_size; adreno_dev->pix_shader_start = adreno_gpulist[i].pix_shader_start; + adreno_dev->instruction_size = adreno_gpulist[i].instruction_size; } static int __devinit @@ -1204,7 +1205,7 @@ static long adreno_ioctl(struct kgsl_device_private *dev_priv, default: KGSL_DRV_INFO(dev_priv->device, "invalid ioctl code %08x\n", cmd); - result = -EINVAL; + result = -ENOIOCTLCMD; break; } return result; diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c index 65efe2f4..50fb5515 100755 --- a/drivers/gpu/msm/kgsl.c +++ b/drivers/gpu/msm/kgsl.c @@ -1964,7 +1964,7 @@ static long kgsl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) if (!func) { KGSL_DRV_INFO(dev_priv->device, "invalid ioctl code %08x\n", cmd); - ret = -EINVAL; + ret = -ENOIOCTLCMD; goto done; } lock = 1; From 93d86da2ee26355495eedd7a75020692e25f36c3 Mon Sep 17 00:00:00 2001 From: securecrt Date: Wed, 25 Jul 2012 19:41:35 +0800 Subject: [PATCH 061/155] msm: kgsl: handle all indirect buffer types in postmortem Postmortem dump was not parsing CP_INDIRECT_BUFFER_PFE commands. Snapshot was recently fixed to handle this, and this change extends support to postmortem dump. --- drivers/gpu/msm/adreno_pm4types.h | 17 +++++++++++++++++ drivers/gpu/msm/adreno_postmortem.c | 7 +++---- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/msm/adreno_pm4types.h b/drivers/gpu/msm/adreno_pm4types.h index 1dffc321..75512d05 100755 --- a/drivers/gpu/msm/adreno_pm4types.h +++ b/drivers/gpu/msm/adreno_pm4types.h @@ -163,6 +163,13 @@ #define CP_LOAD_STATE 0x30 /* load high level sequencer command */ +/* Conditionally load a IB based on a flag */ +#define CP_COND_INDIRECT_BUFFER_PFE 0x3A /* prefetch enabled */ +#define CP_COND_INDIRECT_BUFFER_PFD 0x32 /* prefetch disabled */ + +/* Load a buffer with pre-fetch enabled */ +#define CP_INDIRECT_BUFFER_PFE 0x3F + #define CP_LOADSTATE_DSTOFFSET_SHIFT 0x00000000 #define CP_LOADSTATE_STATESRC_SHIFT 0x00000010 #define CP_LOADSTATE_STATEBLOCKID_SHIFT 0x00000013 @@ -202,4 +209,14 @@ /* gmem command buffer length */ #define CP_REG(reg) ((0x4 << 16) | (SUBBLOCK_OFFSET(reg))) + +/* Return 1 if the command is an indirect buffer of any kind */ +static inline int adreno_cmd_is_ib(unsigned int cmd) +{ + return (cmd == cp_type3_packet(CP_INDIRECT_BUFFER_PFE, 2) || + cmd == cp_type3_packet(CP_INDIRECT_BUFFER_PFD, 2) || + cmd == cp_type3_packet(CP_COND_INDIRECT_BUFFER_PFE, 2) || + cmd == cp_type3_packet(CP_COND_INDIRECT_BUFFER_PFD, 2)); +} + #endif /* __ADRENO_PM4TYPES_H */ diff --git a/drivers/gpu/msm/adreno_postmortem.c b/drivers/gpu/msm/adreno_postmortem.c index b9b97377..2980e303 100755 --- a/drivers/gpu/msm/adreno_postmortem.c +++ b/drivers/gpu/msm/adreno_postmortem.c @@ -288,7 +288,7 @@ static void dump_ib1(struct kgsl_device *device, uint32_t pt_base, for (i = 0; i+3 < ib1_size; ) { value = ib1_addr[i++]; - if (value == cp_type3_packet(CP_INDIRECT_BUFFER_PFD, 2)) { + if (adreno_cmd_is_ib(value)) { uint32_t ib2_base = ib1_addr[i++]; uint32_t ib2_size = ib1_addr[i++]; @@ -714,7 +714,7 @@ static int adreno_dump(struct kgsl_device *device) i = 0; for (read_idx = 0; read_idx < num_item; ) { uint32_t this_cmd = rb_copy[read_idx++]; - if (this_cmd == cp_type3_packet(CP_INDIRECT_BUFFER_PFD, 2)) { + if (adreno_cmd_is_ib(this_cmd)) { uint32_t ib_addr = rb_copy[read_idx++]; uint32_t ib_size = rb_copy[read_idx++]; dump_ib1(device, cur_pt_base, (read_idx-3)<<2, ib_addr, @@ -746,8 +746,7 @@ static int adreno_dump(struct kgsl_device *device) for (read_idx = NUM_DWORDS_OF_RINGBUFFER_HISTORY; read_idx >= 0; --read_idx) { uint32_t this_cmd = rb_copy[read_idx]; - if (this_cmd == cp_type3_packet( - CP_INDIRECT_BUFFER_PFD, 2)) { + if (adreno_cmd_is_ib(this_cmd)) { uint32_t ib_addr = rb_copy[read_idx+1]; uint32_t ib_size = rb_copy[read_idx+2]; if (ib_size && cp_ib1_base == ib_addr) { From 41513329a1f1e396a383d61f808b578a87561cc9 Mon Sep 17 00:00:00 2001 From: securecrt Date: Wed, 25 Jul 2012 19:47:35 +0800 Subject: [PATCH 062/155] msm: kgsl: Detach memory objects from a process ahead of destroy time Previously, memory objects assumed that they remained attached to a process until they are destroyed. In the past this was mostly true, but worked by luck because a process could technically map the memory and then close the file descriptor which would eventually explode. Now we do the process related cleanup (MMU unmap, fixup statistics) when the object is released from the process so the process can go away without affecting the other holders of the mem object refcount. --- drivers/gpu/msm/kgsl.c | 21 ++++++++++++++++++--- drivers/gpu/msm/kgsl_mmu.c | 4 ++-- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c index 50fb5515..a954df73 100755 --- a/drivers/gpu/msm/kgsl.c +++ b/drivers/gpu/msm/kgsl.c @@ -199,6 +199,21 @@ void kgsl_mem_entry_attach_process(struct kgsl_mem_entry *entry, entry->priv = process; } +/* Detach a memory entry from a process and unmap it from the MMU */ + +static void kgsl_mem_entry_detach_process(struct kgsl_mem_entry *entry) +{ + if (entry == NULL) + return; + + entry->priv->stats[entry->memtype].cur -= entry->memdesc.size; + entry->priv = NULL; + + kgsl_mmu_unmap(entry->memdesc.pagetable, &entry->memdesc); + + kgsl_mem_entry_put(entry); +} + /* Allocate a new context id */ static struct kgsl_context * @@ -597,7 +612,7 @@ kgsl_put_process_private(struct kgsl_device *device, list_for_each_entry_safe(entry, entry_tmp, &private->mem_list, list) { list_del(&entry->list); - kgsl_mem_entry_put(entry); + kgsl_mem_entry_detach_process(entry); } kgsl_mmu_putpagetable(private->pagetable); @@ -1022,7 +1037,7 @@ static void kgsl_freemem_event_cb(struct kgsl_device *device, spin_lock(&entry->priv->mem_lock); list_del(&entry->list); spin_unlock(&entry->priv->mem_lock); - kgsl_mem_entry_put(entry); + kgsl_mem_entry_detach_process(entry); } static long kgsl_ioctl_cmdstream_freememontimestamp(struct kgsl_device_private @@ -1116,7 +1131,7 @@ static long kgsl_ioctl_sharedmem_free(struct kgsl_device_private *dev_priv, spin_unlock(&private->mem_lock); if (entry) { - kgsl_mem_entry_put(entry); + kgsl_mem_entry_detach_process(entry); } else { KGSL_CORE_ERR("invalid gpuaddr %08x\n", param->gpuaddr); result = -EINVAL; diff --git a/drivers/gpu/msm/kgsl_mmu.c b/drivers/gpu/msm/kgsl_mmu.c index 82f33c06..f03f4195 100755 --- a/drivers/gpu/msm/kgsl_mmu.c +++ b/drivers/gpu/msm/kgsl_mmu.c @@ -1,5 +1,4 @@ -/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved. - * Copyright (C) 2011 Sony Ericsson Mobile Communications AB. +/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -593,6 +592,7 @@ kgsl_mmu_unmap(struct kgsl_pagetable *pagetable, memdesc->gpuaddr & KGSL_MMU_ALIGN_MASK, memdesc->size); + memdesc->gpuaddr = 0; return 0; } EXPORT_SYMBOL(kgsl_mmu_unmap); From 15793c0aaadcdffe02789e3418151b01d42cd5f0 Mon Sep 17 00:00:00 2001 From: securecrt Date: Wed, 25 Jul 2012 19:54:21 +0800 Subject: [PATCH 063/155] msm: kgsl: Find a mem_entry by way of a GPU address and a pagetable base Given a pagetable base and a GPU address, find the struct kgsl_mem_entry that matches the object. Move this functionality out from inside another function and promote it to top level so it can be used by upcoming functionality. --- drivers/gpu/msm/adreno.c | 20 ++++---------------- drivers/gpu/msm/kgsl.c | 33 +++++++++++++++++++++++++++++++++ drivers/gpu/msm/kgsl.h | 6 +++++- 3 files changed, 42 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c index 56efea08..7b9343cb 100755 --- a/drivers/gpu/msm/adreno.c +++ b/drivers/gpu/msm/adreno.c @@ -904,7 +904,6 @@ struct kgsl_memdesc *adreno_find_region(struct kgsl_device *device, { struct kgsl_memdesc *result = NULL; struct kgsl_mem_entry *entry; - struct kgsl_process_private *priv; struct adreno_device *adreno_dev = ADRENO_DEVICE(device); struct adreno_ringbuffer *ringbuffer = &adreno_dev->ringbuffer; struct kgsl_context *context; @@ -919,21 +918,10 @@ struct kgsl_memdesc *adreno_find_region(struct kgsl_device *device, if (kgsl_gpuaddr_in_memdesc(&device->memstore, gpuaddr, size)) return &device->memstore; - mutex_lock(&kgsl_driver.process_mutex); - list_for_each_entry(priv, &kgsl_driver.process_list, list) { - if (!kgsl_mmu_pt_equal(priv->pagetable, pt_base)) - continue; - spin_lock(&priv->mem_lock); - entry = kgsl_sharedmem_find_region(priv, gpuaddr, size); - if (entry) { - result = &entry->memdesc; - spin_unlock(&priv->mem_lock); - mutex_unlock(&kgsl_driver.process_mutex); - return result; - } - spin_unlock(&priv->mem_lock); - } - mutex_unlock(&kgsl_driver.process_mutex); + entry = kgsl_get_mem_entry(pt_base, gpuaddr, size); + + if (entry) + return &entry->memdesc; while (1) { struct adreno_context *adreno_context = NULL; diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c index a954df73..8e620a46 100755 --- a/drivers/gpu/msm/kgsl.c +++ b/drivers/gpu/msm/kgsl.c @@ -135,6 +135,39 @@ static void kgsl_cancel_events(struct kgsl_device *device, } } +/* kgsl_get_mem_entry - get the mem_entry structure for the specified object + * @ptbase - the pagetable base of the object + * @gpuaddr - the GPU address of the object + * @size - Size of the region to search + */ + +struct kgsl_mem_entry *kgsl_get_mem_entry(unsigned int ptbase, + unsigned int gpuaddr, unsigned int size) +{ + struct kgsl_process_private *priv; + struct kgsl_mem_entry *entry; + + mutex_lock(&kgsl_driver.process_mutex); + + list_for_each_entry(priv, &kgsl_driver.process_list, list) { + if (!kgsl_mmu_pt_equal(priv->pagetable, ptbase)) + continue; + spin_lock(&priv->mem_lock); + entry = kgsl_sharedmem_find_region(priv, gpuaddr, size); + + if (entry) { + spin_unlock(&priv->mem_lock); + mutex_unlock(&kgsl_driver.process_mutex); + return entry; + } + spin_unlock(&priv->mem_lock); + } + mutex_unlock(&kgsl_driver.process_mutex); + + return NULL; +} +EXPORT_SYMBOL(kgsl_get_mem_entry); + static inline struct kgsl_mem_entry * kgsl_mem_entry_create(void) { diff --git a/drivers/gpu/msm/kgsl.h b/drivers/gpu/msm/kgsl.h index 91b42ffe..fb506b57 100755 --- a/drivers/gpu/msm/kgsl.h +++ b/drivers/gpu/msm/kgsl.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-2011, Code Aurora Forum. All rights reserved. +/* Copyright (c) 2008-2012, Code Aurora Forum. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -157,6 +157,10 @@ struct kgsl_mem_entry { #endif void kgsl_mem_entry_destroy(struct kref *kref); + +struct kgsl_mem_entry *kgsl_get_mem_entry(unsigned int ptbase, + unsigned int gpuaddr, unsigned int size); + struct kgsl_mem_entry *kgsl_sharedmem_find_region( struct kgsl_process_private *private, unsigned int gpuaddr, size_t size); From 503977ed6b744ff1f2c38ab904046f026d35bfc0 Mon Sep 17 00:00:00 2001 From: securecrt Date: Wed, 25 Jul 2012 20:23:24 +0800 Subject: [PATCH 064/155] fix #4151332 --- drivers/gpu/msm/kgsl.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c index 8e620a46..287b3ba0 100755 --- a/drivers/gpu/msm/kgsl.c +++ b/drivers/gpu/msm/kgsl.c @@ -188,8 +188,6 @@ kgsl_mem_entry_destroy(struct kref *kref) struct kgsl_mem_entry, refcount); - entry->priv->stats[entry->memtype].cur -= entry->memdesc.size; - if (entry->memtype != KGSL_MEM_ENTRY_KERNEL) kgsl_driver.stats.mapped -= entry->memdesc.size; From efa80a4cc16e6a869c8ed7f4e17cb7938c86a35d Mon Sep 17 00:00:00 2001 From: securecrt Date: Wed, 25 Jul 2012 20:27:26 +0800 Subject: [PATCH 065/155] msm: kgsl: Cleanup header file macros Remove macro logic for macros that are always defined. --- drivers/gpu/msm/adreno_ringbuffer.h | 26 +------------------------- 1 file changed, 1 insertion(+), 25 deletions(-) diff --git a/drivers/gpu/msm/adreno_ringbuffer.h b/drivers/gpu/msm/adreno_ringbuffer.h index 9c048b14..d0110b9f 100755 --- a/drivers/gpu/msm/adreno_ringbuffer.h +++ b/drivers/gpu/msm/adreno_ringbuffer.h @@ -13,10 +13,6 @@ #ifndef __ADRENO_RINGBUFFER_H #define __ADRENO_RINGBUFFER_H -#define GSL_RB_USE_MEM_RPTR -#define GSL_RB_USE_MEM_TIMESTAMP -#define GSL_DEVICE_SHADOW_MEMSTORE_TO_USER - /* * Adreno ringbuffer sizes in bytes - these are converted to * the appropriate log2 values in the code @@ -61,6 +57,7 @@ struct adreno_ringbuffer { uint32_t timestamp; }; + #define GSL_RB_WRITE(ring, gpuaddr, data) \ do { \ *ring = data; \ @@ -70,37 +67,16 @@ struct adreno_ringbuffer { gpuaddr += sizeof(uint); \ } while (0) -/* timestamp */ -#ifdef GSL_DEVICE_SHADOW_MEMSTORE_TO_USER -#define GSL_RB_USE_MEM_TIMESTAMP -#endif /* GSL_DEVICE_SHADOW_MEMSTORE_TO_USER */ - -#ifdef GSL_RB_USE_MEM_TIMESTAMP /* enable timestamp (...scratch0) memory shadowing */ #define GSL_RB_MEMPTRS_SCRATCH_MASK 0x1 #define GSL_RB_INIT_TIMESTAMP(rb) -#else -#define GSL_RB_MEMPTRS_SCRATCH_MASK 0x0 -#define GSL_RB_INIT_TIMESTAMP(rb) \ - adreno_regwrite((rb)->device->id, REG_CP_TIMESTAMP, 0) - -#endif /* GSL_RB_USE_MEMTIMESTAMP */ - /* mem rptr */ -#ifdef GSL_RB_USE_MEM_RPTR #define GSL_RB_CNTL_NO_UPDATE 0x0 /* enable */ #define GSL_RB_GET_READPTR(rb, data) \ do { \ *(data) = rb->memptrs->rptr; \ } while (0) -#else -#define GSL_RB_CNTL_NO_UPDATE 0x1 /* disable */ -#define GSL_RB_GET_READPTR(rb, data) \ - do { \ - adreno_regread((rb)->device->id, REG_CP_RB_RPTR, (data)); \ - } while (0) -#endif /* GSL_RB_USE_MEMRPTR */ #define GSL_RB_CNTL_POLL_EN 0x0 /* disable */ From 121a2a91a5379ea8964e4c5035fc069b0480195e Mon Sep 17 00:00:00 2001 From: securecrt Date: Wed, 25 Jul 2012 20:39:13 +0800 Subject: [PATCH 066/155] msm: kgsl: Add GMEM size configuration in gpu list To avoid msm or gpu specific code in the driver, added GMEM size configuration parameter as a part of gpu list. --- drivers/gpu/msm/adreno.c | 23 ++++++++++++++++------- drivers/gpu/msm/adreno.h | 2 ++ drivers/gpu/msm/adreno_a2xx.c | 5 ----- drivers/gpu/msm/adreno_a3xx.c | 3 --- 4 files changed, 18 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c index 7b9343cb..37a01eb0 100755 --- a/drivers/gpu/msm/adreno.c +++ b/drivers/gpu/msm/adreno.c @@ -140,33 +140,41 @@ static const struct { unsigned int istore_size; unsigned int pix_shader_start; unsigned int instruction_size; /* Size of an instruction in dwords */ + unsigned int gmem_size; /* size of gmem for gpu*/ } adreno_gpulist[] = { { ADRENO_REV_A200, 0, 2, ANY_ID, ANY_ID, "yamato_pm4.fw", "yamato_pfp.fw", &adreno_a2xx_gpudev, - 512, 384, 3}, + 512, 384, 3, SZ_256K }, + { ADRENO_REV_A203, 0, 1, 1, ANY_ID, + "yamato_pm4.fw", "yamato_pfp.fw", &adreno_a2xx_gpudev, + 512, 384, 3, SZ_256K }, { ADRENO_REV_A205, 0, 1, 0, ANY_ID, "yamato_pm4.fw", "yamato_pfp.fw", &adreno_a2xx_gpudev, - 512, 384, 3}, + 512, 384, 3, SZ_256K }, { ADRENO_REV_A220, 2, 1, ANY_ID, ANY_ID, "leia_pm4_470.fw", "leia_pfp_470.fw", &adreno_a2xx_gpudev, - 512, 384, 3}, + 512, 384, 3, SZ_512K }, /* * patchlevel 5 (8960v2) needs special pm4 firmware to work around * a hardware problem. */ { ADRENO_REV_A225, 2, 2, 0, 5, "a225p5_pm4.fw", "a225_pfp.fw", &adreno_a2xx_gpudev, - 1536, 768, 3 }, + 1536, 768, 3, SZ_512K }, { ADRENO_REV_A225, 2, 2, 0, 6, "a225_pm4.fw", "a225_pfp.fw", &adreno_a2xx_gpudev, - 1536, 768, 3 }, + 1536, 768, 3, SZ_512K }, { ADRENO_REV_A225, 2, 2, ANY_ID, ANY_ID, "a225_pm4.fw", "a225_pfp.fw", &adreno_a2xx_gpudev, - 1536, 768, 3 }, + 1536, 768, 3, SZ_512K }, + /* A3XX doesn't use the pix_shader_start */ + { ADRENO_REV_A305, 3, 1, ANY_ID, ANY_ID, + "a300_pm4.fw", "a300_pfp.fw", &adreno_a3xx_gpudev, + 512, 0, 2, SZ_256K }, /* A3XX doesn't use the pix_shader_start */ { ADRENO_REV_A320, 3, 1, ANY_ID, ANY_ID, "a300_pm4.fw", "a300_pfp.fw", &adreno_a3xx_gpudev, - 512, 0, 2 }, + 512, 0, 2, SZ_512K }, }; @@ -416,6 +424,7 @@ adreno_identify_gpu(struct adreno_device *adreno_dev) adreno_dev->istore_size = adreno_gpulist[i].istore_size; adreno_dev->pix_shader_start = adreno_gpulist[i].pix_shader_start; adreno_dev->instruction_size = adreno_gpulist[i].instruction_size; + adreno_dev->gmemspace.sizebytes = adreno_gpulist[i].gmem_size; } static int __devinit diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h index 3bf9b3b5..dfb9018e 100755 --- a/drivers/gpu/msm/adreno.h +++ b/drivers/gpu/msm/adreno.h @@ -44,9 +44,11 @@ enum adreno_gpurev { ADRENO_REV_UNKNOWN = 0, ADRENO_REV_A200 = 200, + ADRENO_REV_A203 = 203, ADRENO_REV_A205 = 205, ADRENO_REV_A220 = 220, ADRENO_REV_A225 = 225, + ADRENO_REV_A305 = 305, ADRENO_REV_A320 = 320, }; diff --git a/drivers/gpu/msm/adreno_a2xx.c b/drivers/gpu/msm/adreno_a2xx.c index 8486aae2..cc611779 100755 --- a/drivers/gpu/msm/adreno_a2xx.c +++ b/drivers/gpu/msm/adreno_a2xx.c @@ -1846,11 +1846,6 @@ static void a2xx_start(struct adreno_device *adreno_dev) adreno_regwrite(device, REG_CP_INT_CNTL, 0); adreno_regwrite(device, REG_SQ_INT_CNTL, 0); - if (adreno_is_a22x(adreno_dev)) - adreno_dev->gmemspace.sizebytes = SZ_512K; - else - adreno_dev->gmemspace.sizebytes = SZ_256K; - a2xx_gmeminit(adreno_dev); } diff --git a/drivers/gpu/msm/adreno_a3xx.c b/drivers/gpu/msm/adreno_a3xx.c index 60e870f4..cbc7bed4 100755 --- a/drivers/gpu/msm/adreno_a3xx.c +++ b/drivers/gpu/msm/adreno_a3xx.c @@ -2502,9 +2502,6 @@ static void a3xx_start(struct adreno_device *adreno_dev) { struct kgsl_device *device = &adreno_dev->dev; - /* GMEM size on A320 is 512K */ - adreno_dev->gmemspace.sizebytes = SZ_512K; - /* Reset the core */ adreno_regwrite(device, A3XX_RBBM_SW_RESET_CMD, 0x00000001); From 41b9064ec231b1a3010405b0aacb9af0cc2b6314 Mon Sep 17 00:00:00 2001 From: securecrt Date: Wed, 25 Jul 2012 21:08:59 +0800 Subject: [PATCH 067/155] msm: kgsl: don't clear gpuaddr when unmapping global mappings Memory mapped through kgsl_mmu_map_global() is supposed to have the same gpu address in all pagetables. And the memdesc will persist beyond the lifetime of any single pagetable. Therefore, memdesc->gpuaddr should not be zeroed for these memdescs. --- drivers/gpu/msm/kgsl_mmu.c | 6 ++++++ drivers/gpu/msm/kgsl_sharedmem.h | 2 ++ 2 files changed, 8 insertions(+) diff --git a/drivers/gpu/msm/kgsl_mmu.c b/drivers/gpu/msm/kgsl_mmu.c index f03f4195..d7585ef9 100755 --- a/drivers/gpu/msm/kgsl_mmu.c +++ b/drivers/gpu/msm/kgsl_mmu.c @@ -592,6 +592,11 @@ kgsl_mmu_unmap(struct kgsl_pagetable *pagetable, memdesc->gpuaddr & KGSL_MMU_ALIGN_MASK, memdesc->size); + /* + * Don't clear the gpuaddr on global mappings because they + * may be in use by other pagetables + */ + if (!(memdesc->priv & KGSL_MEMFLAGS_GLOBAL)) memdesc->gpuaddr = 0; return 0; } @@ -624,6 +629,7 @@ int kgsl_mmu_map_global(struct kgsl_pagetable *pagetable, gpuaddr, memdesc->gpuaddr); goto error_unmap; } + memdesc->priv |= KGSL_MEMFLAGS_GLOBAL; return result; error_unmap: kgsl_mmu_unmap(pagetable, memdesc); diff --git a/drivers/gpu/msm/kgsl_sharedmem.h b/drivers/gpu/msm/kgsl_sharedmem.h index 49694f90..16880b24 100755 --- a/drivers/gpu/msm/kgsl_sharedmem.h +++ b/drivers/gpu/msm/kgsl_sharedmem.h @@ -33,6 +33,8 @@ struct kgsl_process_private; /** Set if the memdesc describes cached memory */ #define KGSL_MEMFLAGS_CACHED 0x00000001 +/** Set if the memdesc is mapped into all pagetables */ +#define KGSL_MEMFLAGS_GLOBAL 0x00000002 extern struct kgsl_memdesc_ops kgsl_vmalloc_ops; From e2ff78936f9b383e25f06d76a1f6e3d9db2547ed Mon Sep 17 00:00:00 2001 From: securecrt Date: Wed, 25 Jul 2012 21:10:10 +0800 Subject: [PATCH 068/155] msm: kgsl: Do not dereference pointer before checking against NULL The pagetable pointer was checked against NULL after being used. Check against NULL first and then dereference it. --- drivers/gpu/msm/kgsl_gpummu.c | 4 ++-- drivers/gpu/msm/kgsl_iommu.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) mode change 100644 => 100755 drivers/gpu/msm/kgsl_iommu.c diff --git a/drivers/gpu/msm/kgsl_gpummu.c b/drivers/gpu/msm/kgsl_gpummu.c index 20f068e5..5d326658 100755 --- a/drivers/gpu/msm/kgsl_gpummu.c +++ b/drivers/gpu/msm/kgsl_gpummu.c @@ -356,8 +356,8 @@ err_ptpool_remove: int kgsl_gpummu_pt_equal(struct kgsl_pagetable *pt, unsigned int pt_base) { - struct kgsl_gpummu_pt *gpummu_pt = pt->priv; - return pt && pt_base && (gpummu_pt->base.gpuaddr == pt_base); + struct kgsl_gpummu_pt *gpummu_pt = pt ? pt->priv : NULL; + return gpummu_pt && pt_base && (gpummu_pt->base.gpuaddr == pt_base); } void kgsl_gpummu_destroy_pagetable(void *mmu_specific_pt) diff --git a/drivers/gpu/msm/kgsl_iommu.c b/drivers/gpu/msm/kgsl_iommu.c old mode 100644 new mode 100755 index 30365a3c..760cdb03 --- a/drivers/gpu/msm/kgsl_iommu.c +++ b/drivers/gpu/msm/kgsl_iommu.c @@ -34,8 +34,8 @@ struct kgsl_iommu { static int kgsl_iommu_pt_equal(struct kgsl_pagetable *pt, unsigned int pt_base) { - struct iommu_domain *domain = pt->priv; - return pt && pt_base && ((unsigned int)domain == pt_base); + struct iommu_domain *domain = pt ? pt->priv : NULL; + return domain && pt_base && ((unsigned int)domain == pt_base); } static void kgsl_iommu_destroy_pagetable(void *mmu_specific_pt) From 4822aef009f19ed97671260f14507c17fda5cf1e Mon Sep 17 00:00:00 2001 From: securecrt Date: Thu, 26 Jul 2012 13:52:28 +0800 Subject: [PATCH 069/155] msm: kgsl: Change name of vmalloc allocator Change the vmalloc allocation name to something more appropriate since we do not allocate memory using vmalloc for userspace driver. We directly allocate physical pages and map that to user address space. The name is changed to page_alloc instead of vmalloc. Add sysfs files to track memory usage via both vmalloc and page_alloc. --- drivers/gpu/msm/kgsl.c | 6 +- drivers/gpu/msm/kgsl.h | 2 + drivers/gpu/msm/kgsl_drm.c | 326 +++++++------------------------ drivers/gpu/msm/kgsl_sharedmem.c | 66 ++++--- drivers/gpu/msm/kgsl_sharedmem.h | 22 +-- 5 files changed, 122 insertions(+), 300 deletions(-) diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c index 287b3ba0..586f5c4f 100755 --- a/drivers/gpu/msm/kgsl.c +++ b/drivers/gpu/msm/kgsl.c @@ -1242,7 +1242,7 @@ kgsl_ioctl_sharedmem_from_vmalloc(struct kgsl_device_private *dev_priv, goto error; } - result = kgsl_sharedmem_vmalloc_user(&entry->memdesc, + result = kgsl_sharedmem_page_alloc_user(&entry->memdesc, private->pagetable, len, param->flags); if (result != 0) @@ -1253,7 +1253,7 @@ kgsl_ioctl_sharedmem_from_vmalloc(struct kgsl_device_private *dev_priv, result = kgsl_sharedmem_map_vma(vma, &entry->memdesc); if (result) { KGSL_CORE_ERR("kgsl_sharedmem_map_vma failed: %d\n", result); - goto error_free_vmalloc; + goto error_free_alloc; } param->gpuaddr = entry->memdesc.gpuaddr; @@ -1268,7 +1268,7 @@ kgsl_ioctl_sharedmem_from_vmalloc(struct kgsl_device_private *dev_priv, kgsl_check_idle(dev_priv->device); return 0; -error_free_vmalloc: +error_free_alloc: kgsl_sharedmem_free(&entry->memdesc); error_free_entry: diff --git a/drivers/gpu/msm/kgsl.h b/drivers/gpu/msm/kgsl.h index fb506b57..ad6973ba 100755 --- a/drivers/gpu/msm/kgsl.h +++ b/drivers/gpu/msm/kgsl.h @@ -95,6 +95,8 @@ struct kgsl_driver { struct { unsigned int vmalloc; unsigned int vmalloc_max; + unsigned int page_alloc; + unsigned int page_alloc_max; unsigned int coherent; unsigned int coherent_max; unsigned int mapped; diff --git a/drivers/gpu/msm/kgsl_drm.c b/drivers/gpu/msm/kgsl_drm.c index f8dd216b..66ac08f6 100755 --- a/drivers/gpu/msm/kgsl_drm.c +++ b/drivers/gpu/msm/kgsl_drm.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2009-2011, Code Aurora Forum. All rights reserved. +/* Copyright (c) 2009-2012, Code Aurora Forum. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -17,7 +17,6 @@ #include "drmP.h" #include "drm.h" #include -#include #include "kgsl.h" #include "kgsl_device.h" @@ -39,6 +38,9 @@ #define ENTRY_EMPTY -1 #define ENTRY_NEEDS_CLEANUP -2 +#define DRM_KGSL_NOT_INITED -1 +#define DRM_KGSL_INITED 1 + #define DRM_KGSL_NUM_FENCE_ENTRIES (DRM_KGSL_HANDLE_WAIT_ENTRIES << 2) #define DRM_KGSL_HANDLE_WAIT_ENTRIES 5 @@ -127,6 +129,8 @@ struct drm_kgsl_gem_object { struct list_head wait_list; }; +static int kgsl_drm_inited = DRM_KGSL_NOT_INITED; + /* This is a global list of all the memory currently mapped in the MMU */ static struct list_head kgsl_mem_list; @@ -152,22 +156,6 @@ static void kgsl_gem_mem_flush(struct kgsl_memdesc *memdesc, int type, int op) kgsl_cache_range_op(memdesc, cacheop); } -/* Flush all the memory mapped in the MMU */ - -void kgsl_gpu_mem_flush(int op) -{ - struct drm_kgsl_gem_object *entry; - - list_for_each_entry(entry, &kgsl_mem_list, list) { - kgsl_gem_mem_flush(&entry->memdesc, entry->type, op); - } - - /* Takes care of WT/WC case. - * More useful when we go barrierless - */ - dmb(); -} - /* TODO: * Add vsync wait */ @@ -186,41 +174,6 @@ struct kgsl_drm_device_priv { struct kgsl_device_private *devpriv[KGSL_DEVICE_MAX]; }; -static int kgsl_ts_notifier_cb(struct notifier_block *blk, - unsigned long code, void *_param); - -static struct notifier_block kgsl_ts_nb[KGSL_DEVICE_MAX]; - -static int kgsl_drm_firstopen(struct drm_device *dev) -{ - int i; - - for (i = 0; i < KGSL_DEVICE_MAX; i++) { - struct kgsl_device *device = kgsl_get_device(i); - - if (device == NULL) - continue; - - kgsl_ts_nb[i].notifier_call = kgsl_ts_notifier_cb; - kgsl_register_ts_notifier(device, &kgsl_ts_nb[i]); - } - - return 0; -} - -void kgsl_drm_lastclose(struct drm_device *dev) -{ - int i; - - for (i = 0; i < KGSL_DEVICE_MAX; i++) { - struct kgsl_device *device = kgsl_get_device(i); - if (device == NULL) - continue; - - kgsl_unregister_ts_notifier(device, &kgsl_ts_nb[i]); - } -} - void kgsl_drm_preclose(struct drm_device *dev, struct drm_file *file_priv) { } @@ -268,80 +221,71 @@ kgsl_gem_alloc_memory(struct drm_gem_object *obj) { struct drm_kgsl_gem_object *priv = obj->driver_private; int index; + int result = 0; /* Return if the memory is already allocated */ if (kgsl_gem_memory_allocated(obj) || TYPE_IS_FD(priv->type)) return 0; + if (priv->pagetable == NULL) { + priv->pagetable = kgsl_mmu_getpagetable(KGSL_MMU_GLOBAL_PT); + + if (priv->pagetable == NULL) { + DRM_ERROR("Unable to get the GPU MMU pagetable\n"); + return -EINVAL; + } + } + if (TYPE_IS_PMEM(priv->type)) { int type; if (priv->type == DRM_KGSL_GEM_TYPE_EBI || - priv->type & DRM_KGSL_GEM_PMEM_EBI) - type = PMEM_MEMTYPE_EBI1; - else - type = PMEM_MEMTYPE_SMI; - - priv->memdesc.physaddr = - pmem_kalloc(obj->size * priv->bufcount, - type | PMEM_ALIGNMENT_4K); - - if (IS_ERR((void *) priv->memdesc.physaddr)) { - DRM_ERROR("Unable to allocate PMEM memory\n"); - return -ENOMEM; + priv->type & DRM_KGSL_GEM_PMEM_EBI) { + type = PMEM_MEMTYPE_EBI1; + result = kgsl_sharedmem_ebimem_user( + &priv->memdesc, + priv->pagetable, + obj->size * priv->bufcount, + 0); + if (result) { + DRM_ERROR( + "Unable to allocate PMEM memory\n"); + return result; + } } - - priv->memdesc.size = obj->size * priv->bufcount; + else + return -EINVAL; } else if (TYPE_IS_MEM(priv->type)) { - priv->memdesc.hostptr = - vmalloc_user(obj->size * priv->bufcount); - if (priv->memdesc.hostptr == NULL) { - DRM_ERROR("Unable to allocate vmalloc memory\n"); - return -ENOMEM; + if (priv->type == DRM_KGSL_GEM_TYPE_KMEM || + priv->type & DRM_KGSL_GEM_CACHE_MASK) + list_add(&priv->list, &kgsl_mem_list); + + result = kgsl_sharedmem_page_alloc_user(&priv->memdesc, + priv->pagetable, + obj->size * priv->bufcount, 0); + + if (result != 0) { + DRM_ERROR( + "Unable to allocate Vmalloc user memory\n"); + return result; } - - priv->memdesc.size = obj->size * priv->bufcount; - priv->memdesc.ops = &kgsl_vmalloc_ops; } else return -EINVAL; - for (index = 0; index < priv->bufcount; index++) + for (index = 0; index < priv->bufcount; index++) { priv->bufs[index].offset = index * obj->size; - + priv->bufs[index].gpuaddr = + priv->memdesc.gpuaddr + + priv->bufs[index].offset; + } + priv->flags |= DRM_KGSL_GEM_FLAG_MAPPED; return 0; } -#ifdef CONFIG_MSM_KGSL_MMU -static void -kgsl_gem_unmap(struct drm_gem_object *obj) -{ - struct drm_kgsl_gem_object *priv = obj->driver_private; - - if (!priv->flags & DRM_KGSL_GEM_FLAG_MAPPED) - return; - - kgsl_mmu_unmap(priv->pagetable, &priv->memdesc); - - kgsl_mmu_putpagetable(priv->pagetable); - priv->pagetable = NULL; - - if ((priv->type == DRM_KGSL_GEM_TYPE_KMEM) || - (priv->type & DRM_KGSL_GEM_CACHE_MASK)) - list_del(&priv->list); - - priv->flags &= ~DRM_KGSL_GEM_FLAG_MAPPED; -} -#else -static void -kgsl_gem_unmap(struct drm_gem_object *obj) -{ -} -#endif - static void kgsl_gem_free_memory(struct drm_gem_object *obj) { @@ -353,12 +297,17 @@ kgsl_gem_free_memory(struct drm_gem_object *obj) kgsl_gem_mem_flush(&priv->memdesc, priv->type, DRM_KGSL_GEM_CACHE_OP_FROM_DEV); - kgsl_gem_unmap(obj); - - if (TYPE_IS_PMEM(priv->type)) - pmem_kfree(priv->memdesc.physaddr); - kgsl_sharedmem_free(&priv->memdesc); + + kgsl_mmu_putpagetable(priv->pagetable); + priv->pagetable = NULL; + + if ((priv->type == DRM_KGSL_GEM_TYPE_KMEM) || + (priv->type & DRM_KGSL_GEM_CACHE_MASK)) + list_del(&priv->list); + + priv->flags &= ~DRM_KGSL_GEM_FLAG_MAPPED; + } int @@ -454,7 +403,7 @@ kgsl_gem_obj_addr(int drm_fd, int handle, unsigned long *start, filp = fget(drm_fd); if (unlikely(filp == NULL)) { - DRM_ERROR("Unable to ghet the DRM file descriptor\n"); + DRM_ERROR("Unable to get the DRM file descriptor\n"); return -EINVAL; } file_priv = filp->private_data; @@ -527,7 +476,7 @@ kgsl_gem_init_obj(struct drm_device *dev, ret = drm_gem_handle_create(file_priv, obj, handle); - drm_gem_object_handle_unreference(obj); + drm_gem_object_unreference(obj); INIT_LIST_HEAD(&priv->wait_list); for (i = 0; i < DRM_KGSL_HANDLE_WAIT_ENTRIES; i++) { @@ -702,128 +651,14 @@ int kgsl_gem_unbind_gpu_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { - struct drm_kgsl_gem_bind_gpu *args = data; - struct drm_gem_object *obj; - struct drm_kgsl_gem_object *priv; - - obj = drm_gem_object_lookup(dev, file_priv, args->handle); - - if (obj == NULL) { - DRM_ERROR("Invalid GEM handle %x\n", args->handle); - return -EBADF; - } - - mutex_lock(&dev->struct_mutex); - priv = obj->driver_private; - - if (--priv->bound == 0) - kgsl_gem_unmap(obj); - - drm_gem_object_unreference(obj); - mutex_unlock(&dev->struct_mutex); return 0; } -#ifdef CONFIG_MSM_KGSL_MMU -static int -kgsl_gem_map(struct drm_gem_object *obj) -{ - struct drm_kgsl_gem_object *priv = obj->driver_private; - int index; - int ret = -EINVAL; - - if (priv->flags & DRM_KGSL_GEM_FLAG_MAPPED) - return 0; - - /* Get the global page table */ - - if (priv->pagetable == NULL) { - priv->pagetable = kgsl_mmu_getpagetable(KGSL_MMU_GLOBAL_PT); - - if (priv->pagetable == NULL) { - DRM_ERROR("Unable to get the GPU MMU pagetable\n"); - return -EINVAL; - } - } - - priv->memdesc.pagetable = priv->pagetable; - - ret = kgsl_mmu_map(priv->pagetable, &priv->memdesc, - GSL_PT_PAGE_RV | GSL_PT_PAGE_WV); - - if (!ret) { - for (index = 0; index < priv->bufcount; index++) { - priv->bufs[index].gpuaddr = - priv->memdesc.gpuaddr + - priv->bufs[index].offset; - } - } - - /* Add cached memory to the list to be cached */ - - if (priv->type == DRM_KGSL_GEM_TYPE_KMEM || - priv->type & DRM_KGSL_GEM_CACHE_MASK) - list_add(&priv->list, &kgsl_mem_list); - - priv->flags |= DRM_KGSL_GEM_FLAG_MAPPED; - - return ret; -} -#else -static int -kgsl_gem_map(struct drm_gem_object *obj) -{ - struct drm_kgsl_gem_object *priv = obj->driver_private; - int index; - - if (TYPE_IS_PMEM(priv->type)) { - for (index = 0; index < priv->bufcount; index++) - priv->bufs[index].gpuaddr = - priv->memdesc.physaddr + priv->bufs[index].offset; - - return 0; - } - - return -EINVAL; -} -#endif - int kgsl_gem_bind_gpu_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { - struct drm_kgsl_gem_bind_gpu *args = data; - struct drm_gem_object *obj; - struct drm_kgsl_gem_object *priv; - int ret = 0; - - obj = drm_gem_object_lookup(dev, file_priv, args->handle); - - if (obj == NULL) { - DRM_ERROR("Invalid GEM handle %x\n", args->handle); - return -EBADF; - } - - mutex_lock(&dev->struct_mutex); - priv = obj->driver_private; - - if (priv->bound++ == 0) { - - if (!kgsl_gem_memory_allocated(obj)) { - DRM_ERROR("Memory not allocated for this object\n"); - ret = -ENOMEM; - goto out; - } - - ret = kgsl_gem_map(obj); - - /* This is legacy behavior - use GET_BUFFERINFO instead */ - args->gpuptr = priv->bufs[0].gpuaddr; - } -out: - drm_gem_object_unreference(obj); - mutex_unlock(&dev->struct_mutex); - return ret; + return 0; } /* Allocate the memory and prepare it for CPU mapping */ @@ -1371,27 +1206,6 @@ wakeup_fence_entries(struct drm_kgsl_gem_object_fence *fence) fence->fence_id = ENTRY_NEEDS_CLEANUP; /* Mark it as needing cleanup */ } -static int kgsl_ts_notifier_cb(struct notifier_block *blk, - unsigned long code, void *_param) -{ - struct drm_kgsl_gem_object_fence *fence; - struct kgsl_device *device = kgsl_get_device(code); - int i; - - /* loop through the fences to see what things can be processed */ - - for (i = 0; i < DRM_KGSL_NUM_FENCE_ENTRIES; i++) { - fence = &gem_buf_fence[i]; - if (!fence->ts_valid || fence->ts_device != code) - continue; - - if (kgsl_check_timestamp(device, fence->timestamp)) - wakeup_fence_entries(fence); - } - - return 0; -} - int kgsl_gem_lock_handle_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) @@ -1584,7 +1398,7 @@ kgsl_gem_unlock_on_ts_ioctl(struct drm_device *dev, void *data, } device = kgsl_get_device(ts_device); - ts_done = kgsl_check_timestamp(device, args->timestamp); + ts_done = kgsl_check_timestamp(device, NULL, args->timestamp); mutex_lock(&dev->struct_mutex); @@ -1635,11 +1449,9 @@ struct drm_ioctl_desc kgsl_drm_ioctls[] = { }; static struct drm_driver driver = { - .driver_features = DRIVER_USE_PLATFORM_DEVICE | DRIVER_GEM, + .driver_features = DRIVER_GEM, .load = kgsl_drm_load, .unload = kgsl_drm_unload, - .firstopen = kgsl_drm_firstopen, - .lastclose = kgsl_drm_lastclose, .preclose = kgsl_drm_preclose, .suspend = kgsl_drm_suspend, .resume = kgsl_drm_resume, @@ -1670,8 +1482,13 @@ int kgsl_drm_init(struct platform_device *dev) { int i; + /* Only initialize once */ + if (kgsl_drm_inited == DRM_KGSL_INITED) + return 0; + + kgsl_drm_inited = DRM_KGSL_INITED; + driver.num_ioctls = DRM_ARRAY_SIZE(kgsl_drm_ioctls); - driver.platform_device = dev; INIT_LIST_HEAD(&kgsl_mem_list); @@ -1681,10 +1498,11 @@ int kgsl_drm_init(struct platform_device *dev) gem_buf_fence[i].fence_id = ENTRY_EMPTY; } - return drm_init(&driver); + return drm_platform_init(&driver, dev); } void kgsl_drm_exit(void) { - drm_exit(&driver); + kgsl_drm_inited = DRM_KGSL_NOT_INITED; + drm_platform_exit(&driver, driver.kdriver.platform_device); } diff --git a/drivers/gpu/msm/kgsl_sharedmem.c b/drivers/gpu/msm/kgsl_sharedmem.c index 3e490c93..7d63949e 100755 --- a/drivers/gpu/msm/kgsl_sharedmem.c +++ b/drivers/gpu/msm/kgsl_sharedmem.c @@ -1,5 +1,4 @@ -/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved. - * Copyright (C) 2011 Sony Ericsson Mobile Communications AB. +/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -205,6 +204,10 @@ static int kgsl_drv_memstat_show(struct device *dev, val = kgsl_driver.stats.vmalloc; else if (!strncmp(attr->attr.name, "vmalloc_max", 11)) val = kgsl_driver.stats.vmalloc_max; + else if (!strncmp(attr->attr.name, "page_alloc", 10)) + val = kgsl_driver.stats.page_alloc; + else if (!strncmp(attr->attr.name, "page_alloc_max", 14)) + val = kgsl_driver.stats.page_alloc_max; else if (!strncmp(attr->attr.name, "coherent", 8)) val = kgsl_driver.stats.coherent; else if (!strncmp(attr->attr.name, "coherent_max", 12)) @@ -234,6 +237,8 @@ static int kgsl_drv_histogram_show(struct device *dev, DEVICE_ATTR(vmalloc, 0444, kgsl_drv_memstat_show, NULL); DEVICE_ATTR(vmalloc_max, 0444, kgsl_drv_memstat_show, NULL); +DEVICE_ATTR(page_alloc, 0444, kgsl_drv_memstat_show, NULL); +DEVICE_ATTR(page_alloc_max, 0444, kgsl_drv_memstat_show, NULL); DEVICE_ATTR(coherent, 0444, kgsl_drv_memstat_show, NULL); DEVICE_ATTR(coherent_max, 0444, kgsl_drv_memstat_show, NULL); DEVICE_ATTR(mapped, 0444, kgsl_drv_memstat_show, NULL); @@ -243,6 +248,8 @@ DEVICE_ATTR(histogram, 0444, kgsl_drv_histogram_show, NULL); static struct device_attribute *drv_attr_list[] = { &dev_attr_vmalloc, &dev_attr_vmalloc_max, + &dev_attr_page_alloc, + &dev_attr_page_alloc_max, &dev_attr_coherent, &dev_attr_coherent_max, &dev_attr_mapped, @@ -297,7 +304,7 @@ static void outer_cache_range_op_sg(struct scatterlist *sg, int sglen, int op) } #endif -static int kgsl_vmalloc_vmfault(struct kgsl_memdesc *memdesc, +static int kgsl_page_alloc_vmfault(struct kgsl_memdesc *memdesc, struct vm_area_struct *vma, struct vm_fault *vmf) { @@ -318,18 +325,20 @@ static int kgsl_vmalloc_vmfault(struct kgsl_memdesc *memdesc, return 0; } -static int kgsl_vmalloc_vmflags(struct kgsl_memdesc *memdesc) +static int kgsl_page_alloc_vmflags(struct kgsl_memdesc *memdesc) { return VM_RESERVED | VM_DONTEXPAND; } -static void kgsl_vmalloc_free(struct kgsl_memdesc *memdesc) +static void kgsl_page_alloc_free(struct kgsl_memdesc *memdesc) { int i = 0; struct scatterlist *sg; - kgsl_driver.stats.vmalloc -= memdesc->size; - if (memdesc->hostptr) + kgsl_driver.stats.page_alloc -= memdesc->size; + if (memdesc->hostptr) { vunmap(memdesc->hostptr); + kgsl_driver.stats.vmalloc -= memdesc->size; + } if (memdesc->sg) for_each_sg(memdesc->sg, sg, memdesc->sglen, i) __free_page(sg_page(sg)); @@ -341,13 +350,14 @@ static int kgsl_contiguous_vmflags(struct kgsl_memdesc *memdesc) } /* - * kgsl_vmalloc_map_kernel - Map the memory in memdesc to kernel address space + * kgsl_page_alloc_map_kernel - Map the memory in memdesc to kernel address + * space * * @memdesc - The memory descriptor which contains information about the memory * * Return: 0 on success else error code */ -static int kgsl_vmalloc_map_kernel(struct kgsl_memdesc *memdesc) +static int kgsl_page_alloc_map_kernel(struct kgsl_memdesc *memdesc) { if (!memdesc->hostptr) { pgprot_t page_prot = pgprot_writecombine(PAGE_KERNEL); @@ -365,6 +375,8 @@ static int kgsl_vmalloc_map_kernel(struct kgsl_memdesc *memdesc) pages[i] = sg_page(sg); memdesc->hostptr = vmap(pages, memdesc->sglen, VM_IOREMAP, page_prot); + KGSL_STATS_ADD(memdesc->size, kgsl_driver.stats.vmalloc, + kgsl_driver.stats.vmalloc_max); vfree(pages); } if (!memdesc->hostptr) @@ -412,13 +424,13 @@ static void kgsl_coherent_free(struct kgsl_memdesc *memdesc) } /* Global - also used by kgsl_drm.c */ -struct kgsl_memdesc_ops kgsl_vmalloc_ops = { - .free = kgsl_vmalloc_free, - .vmflags = kgsl_vmalloc_vmflags, - .vmfault = kgsl_vmalloc_vmfault, - .map_kernel_mem = kgsl_vmalloc_map_kernel, +struct kgsl_memdesc_ops kgsl_page_alloc_ops = { + .free = kgsl_page_alloc_free, + .vmflags = kgsl_page_alloc_vmflags, + .vmfault = kgsl_page_alloc_vmfault, + .map_kernel_mem = kgsl_page_alloc_map_kernel, }; -EXPORT_SYMBOL(kgsl_vmalloc_ops); +EXPORT_SYMBOL(kgsl_page_alloc_ops); static struct kgsl_memdesc_ops kgsl_ebimem_ops = { .free = kgsl_ebimem_free, @@ -452,7 +464,7 @@ void kgsl_cache_range_op(struct kgsl_memdesc *memdesc, int op) EXPORT_SYMBOL(kgsl_cache_range_op); static int -_kgsl_sharedmem_vmalloc(struct kgsl_memdesc *memdesc, +_kgsl_sharedmem_page_alloc(struct kgsl_memdesc *memdesc, struct kgsl_pagetable *pagetable, size_t size, unsigned int protflags) { @@ -463,11 +475,13 @@ _kgsl_sharedmem_vmalloc(struct kgsl_memdesc *memdesc, memdesc->size = size; memdesc->pagetable = pagetable; memdesc->priv = KGSL_MEMFLAGS_CACHED; - memdesc->ops = &kgsl_vmalloc_ops; + memdesc->ops = &kgsl_page_alloc_ops; memdesc->sg = kgsl_sg_alloc(sglen); if (memdesc->sg == NULL) { + KGSL_CORE_ERR("vmalloc(%d) failed\n", + sglen * sizeof(struct scatterlist)); ret = -ENOMEM; goto done; } @@ -496,8 +510,8 @@ _kgsl_sharedmem_vmalloc(struct kgsl_memdesc *memdesc, if (ret) goto done; - KGSL_STATS_ADD(size, kgsl_driver.stats.vmalloc, - kgsl_driver.stats.vmalloc_max); + KGSL_STATS_ADD(size, kgsl_driver.stats.page_alloc, + kgsl_driver.stats.page_alloc_max); order = get_order(size); @@ -512,7 +526,7 @@ done: } int -kgsl_sharedmem_vmalloc(struct kgsl_memdesc *memdesc, +kgsl_sharedmem_page_alloc(struct kgsl_memdesc *memdesc, struct kgsl_pagetable *pagetable, size_t size) { int ret = 0; @@ -520,18 +534,18 @@ kgsl_sharedmem_vmalloc(struct kgsl_memdesc *memdesc, size = ALIGN(size, PAGE_SIZE * 2); - ret = _kgsl_sharedmem_vmalloc(memdesc, pagetable, size, + ret = _kgsl_sharedmem_page_alloc(memdesc, pagetable, size, GSL_PT_PAGE_RV | GSL_PT_PAGE_WV); if (!ret) - ret = kgsl_vmalloc_map_kernel(memdesc); + ret = kgsl_page_alloc_map_kernel(memdesc); if (ret) kgsl_sharedmem_free(memdesc); return ret; } -EXPORT_SYMBOL(kgsl_sharedmem_vmalloc); +EXPORT_SYMBOL(kgsl_sharedmem_page_alloc); int -kgsl_sharedmem_vmalloc_user(struct kgsl_memdesc *memdesc, +kgsl_sharedmem_page_alloc_user(struct kgsl_memdesc *memdesc, struct kgsl_pagetable *pagetable, size_t size, int flags) { @@ -543,10 +557,10 @@ kgsl_sharedmem_vmalloc_user(struct kgsl_memdesc *memdesc, if (!(flags & KGSL_MEMFLAGS_GPUREADONLY)) protflags |= GSL_PT_PAGE_WV; - return _kgsl_sharedmem_vmalloc(memdesc, pagetable, size, + return _kgsl_sharedmem_page_alloc(memdesc, pagetable, size, protflags); } -EXPORT_SYMBOL(kgsl_sharedmem_vmalloc_user); +EXPORT_SYMBOL(kgsl_sharedmem_page_alloc_user); int kgsl_sharedmem_alloc_coherent(struct kgsl_memdesc *memdesc, size_t size) diff --git a/drivers/gpu/msm/kgsl_sharedmem.h b/drivers/gpu/msm/kgsl_sharedmem.h index 16880b24..49a2be24 100755 --- a/drivers/gpu/msm/kgsl_sharedmem.h +++ b/drivers/gpu/msm/kgsl_sharedmem.h @@ -36,12 +36,12 @@ struct kgsl_process_private; /** Set if the memdesc is mapped into all pagetables */ #define KGSL_MEMFLAGS_GLOBAL 0x00000002 -extern struct kgsl_memdesc_ops kgsl_vmalloc_ops; +extern struct kgsl_memdesc_ops kgsl_page_alloc_ops; -int kgsl_sharedmem_vmalloc(struct kgsl_memdesc *memdesc, +int kgsl_sharedmem_page_alloc(struct kgsl_memdesc *memdesc, struct kgsl_pagetable *pagetable, size_t size); -int kgsl_sharedmem_vmalloc_user(struct kgsl_memdesc *memdesc, +int kgsl_sharedmem_page_alloc_user(struct kgsl_memdesc *memdesc, struct kgsl_pagetable *pagetable, size_t size, int flags); @@ -136,11 +136,7 @@ static inline int kgsl_allocate(struct kgsl_memdesc *memdesc, struct kgsl_pagetable *pagetable, size_t size) { -#ifdef CONFIG_MSM_KGSL_MMU - return kgsl_sharedmem_vmalloc(memdesc, pagetable, size); -#else - return kgsl_sharedmem_ebimem(memdesc, pagetable, size); -#endif + return kgsl_sharedmem_page_alloc(memdesc, pagetable, size); } static inline int @@ -148,21 +144,13 @@ kgsl_allocate_user(struct kgsl_memdesc *memdesc, struct kgsl_pagetable *pagetable, size_t size, unsigned int flags) { -#ifdef CONFIG_MSM_KGSL_MMU - return kgsl_sharedmem_vmalloc_user(memdesc, pagetable, size, flags); -#else - return kgsl_sharedmem_ebimem_user(memdesc, pagetable, size, flags); -#endif + return kgsl_sharedmem_page_alloc_user(memdesc, pagetable, size, flags); } static inline int kgsl_allocate_contiguous(struct kgsl_memdesc *memdesc, size_t size) { int ret = kgsl_sharedmem_alloc_coherent(memdesc, size); -#ifndef CONFIG_MSM_KGSL_MMU - if (!ret) - memdesc->gpuaddr = memdesc->physaddr; -#endif return ret; } From 394bda433aba2f6eb086c267d40cef10b40eabd0 Mon Sep 17 00:00:00 2001 From: securecrt Date: Thu, 26 Jul 2012 14:04:25 +0800 Subject: [PATCH 070/155] msm: kgsl: Map a guard page on the back of GPU MMU regions Add a guard page on the backside of page_alloc MMU mappings to protect against an over zealous GPU pre-fetch engine that sometimes oversteps the end of the mapped region. The same phsyical page can be re-used for each mapping so we only need to allocate one phsyical page to rule them all and in the darkness bind them. --- drivers/gpu/msm/kgsl.h | 3 ++ drivers/gpu/msm/kgsl_sharedmem.c | 56 ++++++++++++++++++++++++++++---- 2 files changed, 53 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/msm/kgsl.h b/drivers/gpu/msm/kgsl.h index ad6973ba..3f9ff843 100755 --- a/drivers/gpu/msm/kgsl.h +++ b/drivers/gpu/msm/kgsl.h @@ -118,6 +118,8 @@ struct kgsl_memdesc_ops { int (*map_kernel_mem)(struct kgsl_memdesc *); }; +#define KGSL_MEMDESC_GUARD_PAGE BIT(0) + /* shared memory allocation */ struct kgsl_memdesc { struct kgsl_pagetable *pagetable; @@ -129,6 +131,7 @@ struct kgsl_memdesc { struct scatterlist *sg; unsigned int sglen; struct kgsl_memdesc_ops *ops; + int flags; }; /* List of different memory entry types */ diff --git a/drivers/gpu/msm/kgsl_sharedmem.c b/drivers/gpu/msm/kgsl_sharedmem.c index 7d63949e..7962cae3 100755 --- a/drivers/gpu/msm/kgsl_sharedmem.c +++ b/drivers/gpu/msm/kgsl_sharedmem.c @@ -64,6 +64,13 @@ struct mem_entry_stats { } +/* + * One page allocation for a guard region to protect against over-zealous + * GPU pre-fetch + */ + +static struct page *kgsl_guard_page; + /** * Given a kobj, find the process structure attached to it */ @@ -334,13 +341,20 @@ static void kgsl_page_alloc_free(struct kgsl_memdesc *memdesc) { int i = 0; struct scatterlist *sg; + int sglen = memdesc->sglen; + + /* Don't free the guard page if it was used */ + if (memdesc->flags & KGSL_MEMDESC_GUARD_PAGE) + sglen--; + kgsl_driver.stats.page_alloc -= memdesc->size; + if (memdesc->hostptr) { vunmap(memdesc->hostptr); kgsl_driver.stats.vmalloc -= memdesc->size; } if (memdesc->sg) - for_each_sg(memdesc->sg, sg, memdesc->sglen, i) + for_each_sg(memdesc->sg, sg, sglen, i) __free_page(sg_page(sg)); } @@ -363,17 +377,23 @@ static int kgsl_page_alloc_map_kernel(struct kgsl_memdesc *memdesc) pgprot_t page_prot = pgprot_writecombine(PAGE_KERNEL); struct page **pages = NULL; struct scatterlist *sg; + int sglen = memdesc->sglen; int i; + + /* Don't map the guard page if it exists */ + if (memdesc->flags & KGSL_MEMDESC_GUARD_PAGE) + sglen--; + /* create a list of pages to call vmap */ - pages = vmalloc(memdesc->sglen * sizeof(struct page *)); + pages = vmalloc(sglen * sizeof(struct page *)); if (!pages) { KGSL_CORE_ERR("vmalloc(%d) failed\n", - memdesc->sglen * sizeof(struct page *)); + sglen * sizeof(struct page *)); return -ENOMEM; } - for_each_sg(memdesc->sg, sg, memdesc->sglen, i) + for_each_sg(memdesc->sg, sg, sglen, i) pages[i] = sg_page(sg); - memdesc->hostptr = vmap(pages, memdesc->sglen, + memdesc->hostptr = vmap(pages, sglen, VM_IOREMAP, page_prot); KGSL_STATS_ADD(memdesc->size, kgsl_driver.stats.vmalloc, kgsl_driver.stats.vmalloc_max); @@ -472,6 +492,14 @@ _kgsl_sharedmem_page_alloc(struct kgsl_memdesc *memdesc, int sglen = PAGE_ALIGN(size) / PAGE_SIZE; int i; + /* + * Add guard page to the end of the allocation when the + * IOMMU is in use. + */ + + if (kgsl_mmu_get_mmutype() == KGSL_MMU_TYPE_IOMMU) + sglen++; + memdesc->size = size; memdesc->pagetable = pagetable; memdesc->priv = KGSL_MEMFLAGS_CACHED; @@ -491,7 +519,7 @@ _kgsl_sharedmem_page_alloc(struct kgsl_memdesc *memdesc, memdesc->sglen = sglen; sg_init_table(memdesc->sg, sglen); - for (i = 0; i < memdesc->sglen; i++) { + for (i = 0; i < PAGE_ALIGN(size) / PAGE_SIZE; i++) { struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO | __GFP_HIGHMEM); if (!page) { @@ -502,6 +530,22 @@ _kgsl_sharedmem_page_alloc(struct kgsl_memdesc *memdesc, flush_dcache_page(page); sg_set_page(&memdesc->sg[i], page, PAGE_SIZE, 0); } + + /* ADd the guard page to the end of the sglist */ + + if (kgsl_mmu_get_mmutype() == KGSL_MMU_TYPE_IOMMU) { + if (kgsl_guard_page == NULL) + kgsl_guard_page = alloc_page(GFP_KERNEL | __GFP_ZERO | + __GFP_HIGHMEM); + + if (kgsl_guard_page != NULL) { + sg_set_page(&memdesc->sg[sglen - 1], kgsl_guard_page, + PAGE_SIZE, 0); + memdesc->flags |= KGSL_MEMDESC_GUARD_PAGE; + } else + memdesc->sglen--; + } + outer_cache_range_op_sg(memdesc->sg, memdesc->sglen, KGSL_CACHE_OP_FLUSH); From 1672c9446fd98ce29f8bf9d03885bef532c33fe2 Mon Sep 17 00:00:00 2001 From: securecrt Date: Thu, 26 Jul 2012 14:45:24 +0800 Subject: [PATCH 071/155] msm: kgsl: Optimize page_alloc allocations User memory needs to be zeroed out before it is sent to the user. To do this, the kernel maps the page, memsets it to zero and then unmaps it. By virtue of mapping it, this forces us to flush the dcache to ensure cache coherency between kernel and user mappings. Originally, the page_alloc loop was using GFP_ZERO (which does a map, memset, and unmap for each individual page) and then we were additionally calling flush_dcache_page() for each page killing us on performance. It is far more efficient, especially for large allocations (> 1MB), to allocate the pages without GFP_ZERO and then to vmap the entire allocation, memset it to zero, flush the cache and then unmap. This process is slightly slower for very small allocations, but only by a few microseconds, and is well within the margin of acceptability. In all, the new scheme is faster than the default for all sizes greater than 16k, and is almost 4X faster for 2MB and 4MB allocations which are common for textures and very large buffer objects. The downside is that if there isn't enough vmalloc room for the allocation that we are forced to fallback to a slow page by page memset/flush, but this should happen rarely (if at all) and is only included for completeness. --- drivers/gpu/msm/kgsl_sharedmem.c | 86 +++++++++++++++++++++++++++++--- 1 file changed, 79 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/msm/kgsl_sharedmem.c b/drivers/gpu/msm/kgsl_sharedmem.c index 7962cae3..b52fc1d5 100755 --- a/drivers/gpu/msm/kgsl_sharedmem.c +++ b/drivers/gpu/msm/kgsl_sharedmem.c @@ -10,11 +10,14 @@ * GNU General Public License for more details. * */ + +#include #include #include #include #include #include +#include #include "kgsl.h" #include "kgsl_sharedmem.h" @@ -488,9 +491,11 @@ _kgsl_sharedmem_page_alloc(struct kgsl_memdesc *memdesc, struct kgsl_pagetable *pagetable, size_t size, unsigned int protflags) { - int order, ret = 0; + int i, order, ret = 0; int sglen = PAGE_ALIGN(size) / PAGE_SIZE; - int i; + struct page **pages = NULL; + pgprot_t page_prot = pgprot_writecombine(PAGE_KERNEL); + void *ptr; /* * Add guard page to the end of the allocation when the @@ -514,26 +519,53 @@ _kgsl_sharedmem_page_alloc(struct kgsl_memdesc *memdesc, goto done; } + /* + * Allocate space to store the list of pages to send to vmap. + * This is an array of pointers so we can track 1024 pages per page of + * allocation which means we can handle up to a 8MB buffer request with + * two pages; well within the acceptable limits for using kmalloc. + */ + + pages = kmalloc(sglen * sizeof(struct page *), GFP_KERNEL); + + if (pages == NULL) { + KGSL_CORE_ERR("kmalloc (%d) failed\n", + sglen * sizeof(struct page *)); + ret = -ENOMEM; + goto done; + } + kmemleak_not_leak(memdesc->sg); memdesc->sglen = sglen; sg_init_table(memdesc->sg, sglen); for (i = 0; i < PAGE_ALIGN(size) / PAGE_SIZE; i++) { - struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO | - __GFP_HIGHMEM); - if (!page) { + + /* + * Don't use GFP_ZERO here because it is faster to memset the + * range ourselves (see below) + */ + + pages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); + if (pages[i] == NULL) { ret = -ENOMEM; memdesc->sglen = i; goto done; } - flush_dcache_page(page); - sg_set_page(&memdesc->sg[i], page, PAGE_SIZE, 0); + + sg_set_page(&memdesc->sg[i], pages[i], PAGE_SIZE, 0); } /* ADd the guard page to the end of the sglist */ if (kgsl_mmu_get_mmutype() == KGSL_MMU_TYPE_IOMMU) { + /* + * It doesn't matter if we use GFP_ZERO here, this never + * gets mapped, and we only allocate it once in the life + * of the system + */ + if (kgsl_guard_page == NULL) kgsl_guard_page = alloc_page(GFP_KERNEL | __GFP_ZERO | __GFP_HIGHMEM); @@ -546,6 +578,44 @@ _kgsl_sharedmem_page_alloc(struct kgsl_memdesc *memdesc, memdesc->sglen--; } + /* + * All memory that goes to the user has to be zeroed out before it gets + * exposed to userspace. This means that the memory has to be mapped in + * the kernel, zeroed (memset) and then unmapped. This also means that + * the dcache has to be flushed to ensure coherency between the kernel + * and user pages. We used to pass __GFP_ZERO to alloc_page which mapped + * zeroed and unmaped each individual page, and then we had to turn + * around and call flush_dcache_page() on that page to clear the caches. + * This was killing us for performance. Instead, we found it is much + * faster to allocate the pages without GFP_ZERO, map the entire range, + * memset it, flush the range and then unmap - this results in a factor + * of 4 improvement for speed for large buffers. There is a small + * increase in speed for small buffers, but only on the order of a few + * microseconds at best. The only downside is that there needs to be + * enough temporary space in vmalloc to accomodate the map. This + * shouldn't be a problem, but if it happens, fall back to a much slower + * path + */ + + ptr = vmap(pages, i, VM_IOREMAP, page_prot); + + if (ptr != NULL) { + memset(ptr, 0, memdesc->size); + dmac_flush_range(ptr, ptr + memdesc->size); + vunmap(ptr); + } else { + int j; + + /* Very, very, very slow path */ + + for (j = 0; j < i; j++) { + ptr = kmap_atomic(pages[j],KM_BOUNCE_READ); + memset(ptr, 0, PAGE_SIZE); + dmac_flush_range(ptr, ptr + PAGE_SIZE); + kunmap_atomic(ptr,KM_BOUNCE_READ); + } + } + outer_cache_range_op_sg(memdesc->sg, memdesc->sglen, KGSL_CACHE_OP_FLUSH); @@ -563,6 +633,8 @@ _kgsl_sharedmem_page_alloc(struct kgsl_memdesc *memdesc, kgsl_driver.stats.histogram[order]++; done: + kfree(pages); + if (ret) kgsl_sharedmem_free(memdesc); From d842173fc69c8aa0ff153110362298195045765e Mon Sep 17 00:00:00 2001 From: securecrt Date: Thu, 26 Jul 2012 15:37:36 +0800 Subject: [PATCH 072/155] msm: kgsl: Add markers to identify IB sequence start and end Add nop packets in ringbuffer at the start and end of IB buffers subnmitted by user space driver. These nop packets serve as markers that can be used during replay, recovery, and snapshot to get valid data for a GPU hang dump --- drivers/gpu/msm/adreno.h | 16 +++++++++++----- drivers/gpu/msm/adreno_ringbuffer.c | 22 ++++++++++++++++++---- 2 files changed, 29 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h index dfb9018e..f6414ab3 100755 --- a/drivers/gpu/msm/adreno.h +++ b/drivers/gpu/msm/adreno.h @@ -30,8 +30,10 @@ #define KGSL_CMD_FLAGS_NOT_KERNEL_CMD 0x00000004 /* Command identifiers */ -#define KGSL_CONTEXT_TO_MEM_IDENTIFIER 0xDEADBEEF -#define KGSL_CMD_IDENTIFIER 0xFEEDFACE +#define KGSL_CONTEXT_TO_MEM_IDENTIFIER 0x2EADBEEF +#define KGSL_CMD_IDENTIFIER 0x2EEDFACE +#define KGSL_START_OF_IB_IDENTIFIER 0x2EADEABE +#define KGSL_END_OF_IB_IDENTIFIER 0x2ABEDEAD #ifdef CONFIG_MSM_SCM #define ADRENO_DEFAULT_PWRSCALE_POLICY (&kgsl_pwrscale_policy_tz) @@ -117,15 +119,19 @@ static inline int adreno_is_a200(struct adreno_device *adreno_dev) return (adreno_dev->gpurev == ADRENO_REV_A200); } +static inline int adreno_is_a203(struct adreno_device *adreno_dev) +{ + return (adreno_dev->gpurev == ADRENO_REV_A203); +} + static inline int adreno_is_a205(struct adreno_device *adreno_dev) { - return (adreno_dev->gpurev == ADRENO_REV_A200); + return (adreno_dev->gpurev == ADRENO_REV_A205); } static inline int adreno_is_a20x(struct adreno_device *adreno_dev) { - return (adreno_dev->gpurev == ADRENO_REV_A200 || - adreno_dev->gpurev == ADRENO_REV_A205); + return (adreno_dev->gpurev <= 209); } static inline int adreno_is_a220(struct adreno_device *adreno_dev) diff --git a/drivers/gpu/msm/adreno_ringbuffer.c b/drivers/gpu/msm/adreno_ringbuffer.c index 8e47927f..6dd1336a 100755 --- a/drivers/gpu/msm/adreno_ringbuffer.c +++ b/drivers/gpu/msm/adreno_ringbuffer.c @@ -580,11 +580,12 @@ adreno_ringbuffer_issueibcmds(struct kgsl_device_private *dev_priv, drawctxt); return -EDEADLK; } - link = kzalloc(sizeof(unsigned int) * numibs * 3, GFP_KERNEL); - cmds = link; + + cmds = link = kzalloc(sizeof(unsigned int) * (numibs * 3 + 4), + GFP_KERNEL); if (!link) { - KGSL_MEM_ERR(device, "Failed to allocate memory for for command" - " submission, size %x\n", numibs * 3); + KGSL_CORE_ERR("kzalloc(%d) failed\n", + sizeof(unsigned int) * (numibs * 3 + 4)); return -ENOMEM; } @@ -596,6 +597,16 @@ adreno_ringbuffer_issueibcmds(struct kgsl_device_private *dev_priv, adreno_dev->drawctxt_active == drawctxt) start_index = 1; + if (!start_index) { + *cmds++ = cp_nop_packet(1); + *cmds++ = KGSL_START_OF_IB_IDENTIFIER; + } else { + *cmds++ = cp_nop_packet(4); + *cmds++ = KGSL_START_OF_IB_IDENTIFIER; + *cmds++ = CP_HDR_INDIRECT_BUFFER_PFD; + *cmds++ = ibdesc[0].gpuaddr; + *cmds++ = ibdesc[0].sizedwords; + } for (i = start_index; i < numibs; i++) { (void)kgsl_cffdump_parse_ibs(dev_priv, NULL, ibdesc[i].gpuaddr, ibdesc[i].sizedwords, false); @@ -605,6 +616,9 @@ adreno_ringbuffer_issueibcmds(struct kgsl_device_private *dev_priv, *cmds++ = ibdesc[i].sizedwords; } + *cmds++ = cp_nop_packet(1); + *cmds++ = KGSL_END_OF_IB_IDENTIFIER; + kgsl_setstate(device, kgsl_mmu_pt_get_flags(device->mmu.hwpagetable, device->id)); From c6e8ee54ffb0dec33d0118fdcf5673c288d39cd3 Mon Sep 17 00:00:00 2001 From: securecrt Date: Thu, 26 Jul 2012 16:14:35 +0800 Subject: [PATCH 073/155] msm: kgsl: rework ib checking Separate ib parse checking from cffdump as it is useful in other situations. This is controlled by a new debugfs file, ib_check. All ib checking is off (0) by default, because parsing and mem_entry lookup can have a performance impact on some benchmarks. Level 1 checking verifies the IB1's. Level 2 checking also verifies the IB2. --- drivers/gpu/msm/adreno.c | 2 + drivers/gpu/msm/adreno.h | 1 + drivers/gpu/msm/adreno_debugfs.c | 2 + drivers/gpu/msm/adreno_pm4types.h | 19 ++- drivers/gpu/msm/adreno_postmortem.c | 2 +- drivers/gpu/msm/adreno_ringbuffer.c | 216 +++++++++++++++++++++++++--- drivers/gpu/msm/kgsl.c | 51 ------- drivers/gpu/msm/kgsl_cffdump.c | 189 ------------------------ 8 files changed, 218 insertions(+), 264 deletions(-) diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c index 37a01eb0..4020fefd 100755 --- a/drivers/gpu/msm/adreno.c +++ b/drivers/gpu/msm/adreno.c @@ -122,8 +122,10 @@ static struct adreno_device device_3d0 = { .pfp_fw = NULL, .pm4_fw = NULL, .wait_timeout = 10000, /* in milliseconds */ + .ib_check_level = 0, }; + /* * This is the master list of all GPU cores that are supported by this * driver. diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h index f6414ab3..2a57203b 100755 --- a/drivers/gpu/msm/adreno.h +++ b/drivers/gpu/msm/adreno.h @@ -75,6 +75,7 @@ struct adreno_device { unsigned int istore_size; unsigned int pix_shader_start; unsigned int instruction_size; + unsigned int ib_check_level; }; struct adreno_gpudev { diff --git a/drivers/gpu/msm/adreno_debugfs.c b/drivers/gpu/msm/adreno_debugfs.c index 9a136699..9c9ee02c 100755 --- a/drivers/gpu/msm/adreno_debugfs.c +++ b/drivers/gpu/msm/adreno_debugfs.c @@ -444,6 +444,8 @@ void adreno_debugfs_init(struct kgsl_device *device) &kgsl_cff_dump_enable_fops); debugfs_create_u32("wait_timeout", 0644, device->d_debugfs, &adreno_dev->wait_timeout); + debugfs_create_u32("ib_check", 0644, device->d_debugfs, + &adreno_dev->ib_check_level); /* Create post mortem control files */ diff --git a/drivers/gpu/msm/adreno_pm4types.h b/drivers/gpu/msm/adreno_pm4types.h index 75512d05..9340f691 100755 --- a/drivers/gpu/msm/adreno_pm4types.h +++ b/drivers/gpu/msm/adreno_pm4types.h @@ -29,11 +29,6 @@ /* skip N 32-bit words to get to the next packet */ #define CP_NOP 0x10 -/* indirect buffer dispatch. prefetch parser uses this packet type to determine -* whether to pre-fetch the IB -*/ -#define CP_INDIRECT_BUFFER 0x3f - /* indirect buffer dispatch. same as IB, but init is pipelined */ #define CP_INDIRECT_BUFFER_PFD 0x37 @@ -117,6 +112,9 @@ /* load constants from a location in memory */ #define CP_LOAD_CONSTANT_CONTEXT 0x2e +/* (A2x) sets binning configuration registers */ +#define CP_SET_BIN_DATA 0x2f + /* selective invalidation of state pointers */ #define CP_INVALIDATE_STATE 0x3b @@ -197,11 +195,20 @@ #define cp_nop_packet(cnt) \ (CP_TYPE3_PKT | (((cnt)-1) << 16) | (CP_NOP << 8)) +#define pkt_is_type0(pkt) (((pkt) & 0XC0000000) == CP_TYPE0_PKT) + +#define type0_pkt_size(pkt) ((((pkt) >> 16) & 0x3FFF) + 1) +#define type0_pkt_offset(pkt) ((pkt) & 0x7FFF) + +#define pkt_is_type3(pkt) (((pkt) & 0xC0000000) == CP_TYPE3_PKT) + +#define cp_type3_opcode(pkt) (((pkt) >> 8) & 0xFF) +#define type3_pkt_size(pkt) ((((pkt) >> 16) & 0x3FFF) + 1) /* packet headers */ #define CP_HDR_ME_INIT cp_type3_packet(CP_ME_INIT, 18) #define CP_HDR_INDIRECT_BUFFER_PFD cp_type3_packet(CP_INDIRECT_BUFFER_PFD, 2) -#define CP_HDR_INDIRECT_BUFFER cp_type3_packet(CP_INDIRECT_BUFFER, 2) +#define CP_HDR_INDIRECT_BUFFER_PFE cp_type3_packet(CP_INDIRECT_BUFFER_PFE, 2) /* dword base address of the GFX decode space */ #define SUBBLOCK_OFFSET(reg) ((unsigned int)((reg) - (0x2000))) diff --git a/drivers/gpu/msm/adreno_postmortem.c b/drivers/gpu/msm/adreno_postmortem.c index 2980e303..7e073fd9 100755 --- a/drivers/gpu/msm/adreno_postmortem.c +++ b/drivers/gpu/msm/adreno_postmortem.c @@ -52,7 +52,7 @@ static const struct pm_id_name pm3_types[] = { {CP_IM_LOAD, "IN__LOAD"}, {CP_IM_LOAD_IMMEDIATE, "IM_LOADI"}, {CP_IM_STORE, "IM_STORE"}, - {CP_INDIRECT_BUFFER, "IND_BUF_"}, + {CP_INDIRECT_BUFFER_PFE, "IND_BUF_"}, {CP_INDIRECT_BUFFER_PFD, "IND_BUFP"}, {CP_INTERRUPT, "PM4_INTR"}, {CP_INVALIDATE_STATE, "INV_STAT"}, diff --git a/drivers/gpu/msm/adreno_ringbuffer.c b/drivers/gpu/msm/adreno_ringbuffer.c index 6dd1336a..71f239cc 100755 --- a/drivers/gpu/msm/adreno_ringbuffer.c +++ b/drivers/gpu/msm/adreno_ringbuffer.c @@ -22,6 +22,7 @@ #include "adreno.h" #include "adreno_pm4types.h" #include "adreno_ringbuffer.h" +#include "adreno_debugfs.h" #include "a2xx_reg.h" #include "a3xx_reg.h" @@ -550,6 +551,198 @@ adreno_ringbuffer_issuecmds(struct kgsl_device *device, adreno_ringbuffer_addcmds(rb, flags, cmds, sizedwords); } +static bool _parse_ibs(struct kgsl_device_private *dev_priv, uint gpuaddr, + int sizedwords); + +static bool +_handle_type3(struct kgsl_device_private *dev_priv, uint *hostaddr) +{ + unsigned int opcode = cp_type3_opcode(*hostaddr); + switch (opcode) { + case CP_INDIRECT_BUFFER_PFD: + case CP_INDIRECT_BUFFER_PFE: + case CP_COND_INDIRECT_BUFFER_PFE: + case CP_COND_INDIRECT_BUFFER_PFD: + return _parse_ibs(dev_priv, hostaddr[1], hostaddr[2]); + case CP_NOP: + case CP_WAIT_FOR_IDLE: + case CP_WAIT_REG_MEM: + case CP_WAIT_REG_EQ: + case CP_WAT_REG_GTE: + case CP_WAIT_UNTIL_READ: + case CP_WAIT_IB_PFD_COMPLETE: + case CP_REG_RMW: + case CP_REG_TO_MEM: + case CP_MEM_WRITE: + case CP_MEM_WRITE_CNTR: + case CP_COND_EXEC: + case CP_COND_WRITE: + case CP_EVENT_WRITE: + case CP_EVENT_WRITE_SHD: + case CP_EVENT_WRITE_CFL: + case CP_EVENT_WRITE_ZPD: + case CP_DRAW_INDX: + case CP_DRAW_INDX_2: + case CP_DRAW_INDX_BIN: + case CP_DRAW_INDX_2_BIN: + case CP_VIZ_QUERY: + case CP_SET_STATE: + case CP_SET_CONSTANT: + case CP_IM_LOAD: + case CP_IM_LOAD_IMMEDIATE: + case CP_LOAD_CONSTANT_CONTEXT: + case CP_INVALIDATE_STATE: + case CP_SET_SHADER_BASES: + case CP_SET_BIN_MASK: + case CP_SET_BIN_SELECT: + case CP_SET_BIN_BASE_OFFSET: + case CP_SET_BIN_DATA: + case CP_CONTEXT_UPDATE: + case CP_INTERRUPT: + case CP_IM_STORE: + case CP_LOAD_STATE: + break; + /* these shouldn't come from userspace */ + case CP_ME_INIT: + case CP_SET_PROTECTED_MODE: + default: + KGSL_CMD_ERR(dev_priv->device, "bad CP opcode %0x\n", opcode); + return false; + break; + } + + return true; +} + +static bool +_handle_type0(struct kgsl_device_private *dev_priv, uint *hostaddr) +{ + unsigned int reg = type0_pkt_offset(*hostaddr); + unsigned int cnt = type0_pkt_size(*hostaddr); + if (reg < 0x0192 || (reg + cnt) >= 0x8000) { + KGSL_CMD_ERR(dev_priv->device, "bad type0 reg: 0x%0x cnt: %d\n", + reg, cnt); + return false; + } + return true; +} + +/* + * Traverse IBs and dump them to test vector. Detect swap by inspecting + * register writes, keeping note of the current state, and dump + * framebuffer config to test vector + */ +static bool _parse_ibs(struct kgsl_device_private *dev_priv, + uint gpuaddr, int sizedwords) +{ + static uint level; /* recursion level */ + bool ret = false; + uint *hostaddr, *hoststart; + int dwords_left = sizedwords; /* dwords left in the current command + buffer */ + struct kgsl_mem_entry *entry; + + spin_lock(&dev_priv->process_priv->mem_lock); + entry = kgsl_sharedmem_find_region(dev_priv->process_priv, + gpuaddr, sizedwords * sizeof(uint)); + spin_unlock(&dev_priv->process_priv->mem_lock); + if (entry == NULL) { + KGSL_CMD_ERR(dev_priv->device, + "no mapping for gpuaddr: 0x%08x\n", gpuaddr); + return false; + } + + hostaddr = (uint *)kgsl_gpuaddr_to_vaddr(&entry->memdesc, gpuaddr); + if (hostaddr == NULL) { + KGSL_CMD_ERR(dev_priv->device, + "no mapping for gpuaddr: 0x%08x\n", gpuaddr); + return false; + } + + hoststart = hostaddr; + + level++; + + KGSL_CMD_INFO(dev_priv->device, "ib: gpuaddr:0x%08x, wc:%d, hptr:%p\n", + gpuaddr, sizedwords, hostaddr); + + mb(); + while (dwords_left > 0) { + bool cur_ret = true; + int count = 0; /* dword count including packet header */ + + switch (*hostaddr >> 30) { + case 0x0: /* type-0 */ + count = (*hostaddr >> 16)+2; + cur_ret = _handle_type0(dev_priv, hostaddr); + break; + case 0x1: /* type-1 */ + count = 2; + break; + case 0x3: /* type-3 */ + count = ((*hostaddr >> 16) & 0x3fff) + 2; + cur_ret = _handle_type3(dev_priv, hostaddr); + break; + default: + KGSL_CMD_ERR(dev_priv->device, "unexpected type: " + "type:%d, word:0x%08x @ 0x%p, gpu:0x%08x\n", + *hostaddr >> 30, *hostaddr, hostaddr, + gpuaddr+4*(sizedwords-dwords_left)); + cur_ret = false; + count = dwords_left; + break; + } + + if (!cur_ret) { + KGSL_CMD_ERR(dev_priv->device, + "bad sub-type: #:%d/%d, v:0x%08x" + " @ 0x%p[gb:0x%08x], level:%d\n", + sizedwords-dwords_left, sizedwords, *hostaddr, + hostaddr, gpuaddr+4*(sizedwords-dwords_left), + level); + + if (ADRENO_DEVICE(dev_priv->device)->ib_check_level + >= 2) + print_hex_dump(KERN_ERR, + level == 1 ? "IB1:" : "IB2:", + DUMP_PREFIX_OFFSET, 32, 4, hoststart, + sizedwords*4, 0); + goto done; + } + + /* jump to next packet */ + dwords_left -= count; + hostaddr += count; + if (dwords_left < 0) { + KGSL_CMD_ERR(dev_priv->device, + "bad count: c:%d, #:%d/%d, " + "v:0x%08x @ 0x%p[gb:0x%08x], level:%d\n", + count, sizedwords-(dwords_left+count), + sizedwords, *(hostaddr-count), hostaddr-count, + gpuaddr+4*(sizedwords-(dwords_left+count)), + level); + if (ADRENO_DEVICE(dev_priv->device)->ib_check_level + >= 2) + print_hex_dump(KERN_ERR, + level == 1 ? "IB1:" : "IB2:", + DUMP_PREFIX_OFFSET, 32, 4, hoststart, + sizedwords*4, 0); + goto done; + } + } + + ret = true; +done: + if (!ret) + KGSL_DRV_ERR(dev_priv->device, + "parsing failed: gpuaddr:0x%08x, " + "host:0x%p, wc:%d\n", gpuaddr, hoststart, sizedwords); + + level--; + + return ret; +} + int adreno_ringbuffer_issueibcmds(struct kgsl_device_private *dev_priv, struct kgsl_context *context, @@ -608,9 +801,12 @@ adreno_ringbuffer_issueibcmds(struct kgsl_device_private *dev_priv, *cmds++ = ibdesc[0].sizedwords; } for (i = start_index; i < numibs; i++) { - (void)kgsl_cffdump_parse_ibs(dev_priv, NULL, - ibdesc[i].gpuaddr, ibdesc[i].sizedwords, false); - + if (unlikely(adreno_dev->ib_check_level >= 1 && + !_parse_ibs(dev_priv, ibdesc[i].gpuaddr, + ibdesc[i].sizedwords))) { + kfree(link); + return -EINVAL; + } *cmds++ = CP_HDR_INDIRECT_BUFFER_PFD; *cmds++ = ibdesc[i].gpuaddr; *cmds++ = ibdesc[i].sizedwords; @@ -660,7 +856,6 @@ int adreno_ringbuffer_extract(struct adreno_ringbuffer *rb, unsigned int val3; unsigned int copy_rb_contents = 0; unsigned int cur_context; - unsigned int j; GSL_RB_GET_READPTR(rb, &rb->rptr); @@ -811,19 +1006,6 @@ int adreno_ringbuffer_extract(struct adreno_ringbuffer *rb, } *rb_size = temp_idx; - KGSL_DRV_ERR(device, "Extracted rb contents, size: %x\n", *rb_size); - for (temp_idx = 0; temp_idx < *rb_size;) { - char str[80]; - int idx = 0; - if ((temp_idx + 8) <= *rb_size) - j = 8; - else - j = *rb_size - temp_idx; - for (; j != 0; j--) - idx += scnprintf(str + idx, 80 - idx, - "%8.8X ", temp_rb_buffer[temp_idx++]); - printk(KERN_ALERT "%s", str); - } return 0; } diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c index 586f5c4f..e7c4ff8b 100755 --- a/drivers/gpu/msm/kgsl.c +++ b/drivers/gpu/msm/kgsl.c @@ -909,40 +909,6 @@ static long kgsl_ioctl_device_waittimestamp(struct kgsl_device_private return result; } -static bool check_ibdesc(struct kgsl_device_private *dev_priv, - struct kgsl_ibdesc *ibdesc, unsigned int numibs, - bool parse) -{ - bool result = true; - unsigned int i; - for (i = 0; i < numibs; i++) { - struct kgsl_mem_entry *entry; - spin_lock(&dev_priv->process_priv->mem_lock); - entry = kgsl_sharedmem_find_region(dev_priv->process_priv, - ibdesc[i].gpuaddr, ibdesc[i].sizedwords * sizeof(uint)); - spin_unlock(&dev_priv->process_priv->mem_lock); - if (entry == NULL) { - KGSL_DRV_ERR(dev_priv->device, - "invalid cmd buffer gpuaddr %08x " \ - "sizedwords %d\n", ibdesc[i].gpuaddr, - ibdesc[i].sizedwords); - result = false; - break; - } - - if (parse && !kgsl_cffdump_parse_ibs(dev_priv, &entry->memdesc, - ibdesc[i].gpuaddr, ibdesc[i].sizedwords, true)) { - KGSL_DRV_ERR(dev_priv->device, - "invalid cmd buffer gpuaddr %08x " \ - "sizedwords %d numibs %d/%d\n", - ibdesc[i].gpuaddr, - ibdesc[i].sizedwords, i+1, numibs); - result = false; - break; - } - } - return result; -} static long kgsl_ioctl_rb_issueibcmds(struct kgsl_device_private *dev_priv, unsigned int cmd, void *data) @@ -1012,12 +978,6 @@ static long kgsl_ioctl_rb_issueibcmds(struct kgsl_device_private *dev_priv, param->numibs = 1; } - if (!check_ibdesc(dev_priv, ibdesc, param->numibs, true)) { - KGSL_DRV_ERR(dev_priv->device, "bad ibdesc"); - result = -EINVAL; - goto free_ibdesc; - } - result = dev_priv->device->ftbl->issueibcmds(dev_priv, context, ibdesc, @@ -1025,17 +985,6 @@ static long kgsl_ioctl_rb_issueibcmds(struct kgsl_device_private *dev_priv, ¶m->timestamp, param->flags); - if (result != 0) - goto free_ibdesc; - - /* this is a check to try to detect if a command buffer was freed - * during issueibcmds(). - */ - if (!check_ibdesc(dev_priv, ibdesc, param->numibs, false)) { - KGSL_DRV_ERR(dev_priv->device, "bad ibdesc AFTER issue"); - result = -EINVAL; - goto free_ibdesc; - } free_ibdesc: kfree(ibdesc); diff --git a/drivers/gpu/msm/kgsl_cffdump.c b/drivers/gpu/msm/kgsl_cffdump.c index 945b535d..1ab8908f 100755 --- a/drivers/gpu/msm/kgsl_cffdump.c +++ b/drivers/gpu/msm/kgsl_cffdump.c @@ -497,195 +497,6 @@ int kgsl_cffdump_waitirq(void) } EXPORT_SYMBOL(kgsl_cffdump_waitirq); -#define ADDRESS_STACK_SIZE 256 -#define GET_PM4_TYPE3_OPCODE(x) ((*(x) >> 8) & 0xFF) -static unsigned int kgsl_cffdump_addr_count; - -static bool kgsl_cffdump_handle_type3(struct kgsl_device_private *dev_priv, - uint *hostaddr, bool check_only) -{ - static uint addr_stack[ADDRESS_STACK_SIZE]; - static uint size_stack[ADDRESS_STACK_SIZE]; - - switch (GET_PM4_TYPE3_OPCODE(hostaddr)) { - case CP_INDIRECT_BUFFER_PFD: - case CP_INDIRECT_BUFFER: - { - /* traverse indirect buffers */ - int i; - uint ibaddr = hostaddr[1]; - uint ibsize = hostaddr[2]; - - /* is this address already in encountered? */ - for (i = 0; - i < kgsl_cffdump_addr_count && addr_stack[i] != ibaddr; - ++i) - ; - - if (kgsl_cffdump_addr_count == i) { - addr_stack[kgsl_cffdump_addr_count] = ibaddr; - size_stack[kgsl_cffdump_addr_count++] = ibsize; - - if (kgsl_cffdump_addr_count >= ADDRESS_STACK_SIZE) { - KGSL_CORE_ERR("stack overflow\n"); - return false; - } - - return kgsl_cffdump_parse_ibs(dev_priv, NULL, - ibaddr, ibsize, check_only); - } else if (size_stack[i] != ibsize) { - KGSL_CORE_ERR("gpuaddr: 0x%08x, " - "wc: %u, with size wc: %u already on the " - "stack\n", ibaddr, ibsize, size_stack[i]); - return false; - } - } - break; - } - - return true; -} - -/* - * Traverse IBs and dump them to test vector. Detect swap by inspecting - * register writes, keeping note of the current state, and dump - * framebuffer config to test vector - */ -bool kgsl_cffdump_parse_ibs(struct kgsl_device_private *dev_priv, - const struct kgsl_memdesc *memdesc, uint gpuaddr, int sizedwords, - bool check_only) -{ - static uint level; /* recursion level */ - bool ret = true; - uint *hostaddr, *hoststart; - int dwords_left = sizedwords; /* dwords left in the current command - buffer */ - - if (level == 0) - kgsl_cffdump_addr_count = 0; - - if (memdesc == NULL) { - struct kgsl_mem_entry *entry; - spin_lock(&dev_priv->process_priv->mem_lock); - entry = kgsl_sharedmem_find_region(dev_priv->process_priv, - gpuaddr, sizedwords * sizeof(uint)); - spin_unlock(&dev_priv->process_priv->mem_lock); - if (entry == NULL) { - KGSL_CORE_ERR("did not find mapping " - "for gpuaddr: 0x%08x\n", gpuaddr); - return true; - } - memdesc = &entry->memdesc; - } - hostaddr = (uint *)kgsl_gpuaddr_to_vaddr(memdesc, gpuaddr); - if (hostaddr == NULL) { - KGSL_CORE_ERR("no kernel mapping for " - "gpuaddr: 0x%08x\n", gpuaddr); - return true; - } - - hoststart = hostaddr; - - level++; - - if (!memdesc->physaddr) { - KGSL_CORE_ERR("no physaddr"); - } else { - mb(); - kgsl_cache_range_op((struct kgsl_memdesc *)memdesc, - KGSL_CACHE_OP_INV); - } - -#ifdef DEBUG - pr_info("kgsl: cffdump: ib: gpuaddr:0x%08x, wc:%d, hptr:%p\n", - gpuaddr, sizedwords, hostaddr); -#endif - - while (dwords_left > 0) { - int count = 0; /* dword count including packet header */ - bool cur_ret = true; - - switch (*hostaddr >> 30) { - case 0x0: /* type-0 */ - count = (*hostaddr >> 16)+2; - break; - case 0x1: /* type-1 */ - count = 2; - break; - case 0x3: /* type-3 */ - count = ((*hostaddr >> 16) & 0x3fff) + 2; - cur_ret = kgsl_cffdump_handle_type3(dev_priv, - hostaddr, check_only); - break; - default: - pr_warn("kgsl: cffdump: parse-ib: unexpected type: " - "type:%d, word:0x%08x @ 0x%p, gpu:0x%08x\n", - *hostaddr >> 30, *hostaddr, hostaddr, - gpuaddr+4*(sizedwords-dwords_left)); - cur_ret = false; - count = dwords_left; - break; - } - -#ifdef DEBUG - if (!cur_ret) { - pr_info("kgsl: cffdump: bad sub-type: #:%d/%d, v:0x%08x" - " @ 0x%p[gb:0x%08x], level:%d\n", - sizedwords-dwords_left, sizedwords, *hostaddr, - hostaddr, gpuaddr+4*(sizedwords-dwords_left), - level); - - print_hex_dump(KERN_ERR, level == 1 ? "IB1:" : "IB2:", - DUMP_PREFIX_OFFSET, 32, 4, hoststart, - sizedwords*4, 0); - } -#endif - ret = ret && cur_ret; - - /* jump to next packet */ - dwords_left -= count; - hostaddr += count; - cur_ret = dwords_left >= 0; - -#ifdef DEBUG - if (!cur_ret) { - pr_info("kgsl: cffdump: bad count: c:%d, #:%d/%d, " - "v:0x%08x @ 0x%p[gb:0x%08x], level:%d\n", - count, sizedwords-(dwords_left+count), - sizedwords, *(hostaddr-count), hostaddr-count, - gpuaddr+4*(sizedwords-(dwords_left+count)), - level); - - print_hex_dump(KERN_ERR, level == 1 ? "IB1:" : "IB2:", - DUMP_PREFIX_OFFSET, 32, 4, hoststart, - sizedwords*4, 0); - } -#endif - - ret = ret && cur_ret; - } - - if (!ret) - pr_info("kgsl: cffdump: parsing failed: gpuaddr:0x%08x, " - "host:0x%p, wc:%d\n", gpuaddr, hoststart, sizedwords); - - if (!check_only) { -#ifdef DEBUG - uint offset = gpuaddr - memdesc->gpuaddr; - pr_info("kgsl: cffdump: ib-dump: hostptr:%p, gpuaddr:%08x, " - "physaddr:%08x, offset:%d, size:%d", hoststart, - gpuaddr, memdesc->physaddr + offset, offset, - sizedwords*4); -#endif - kgsl_cffdump_syncmem(dev_priv, memdesc, gpuaddr, sizedwords*4, - false); - } - - level--; - - return ret; -} - static int subbuf_start_handler(struct rchan_buf *buf, void *subbuf, void *prev_subbuf, uint prev_padding) { From 05369deec98ccb9fc7996d6b2b92f0476fcc83af Mon Sep 17 00:00:00 2001 From: securecrt Date: Tue, 31 Jul 2012 13:59:12 +0800 Subject: [PATCH 074/155] include: checkout msm_mdp from ics_chocolate *needs to match userspace* --- include/linux/msm_mdp.h | 442 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 430 insertions(+), 12 deletions(-) diff --git a/include/linux/msm_mdp.h b/include/linux/msm_mdp.h index a933facc..dcedc554 100644 --- a/include/linux/msm_mdp.h +++ b/include/linux/msm_mdp.h @@ -1,6 +1,7 @@ /* include/linux/msm_mdp.h * * Copyright (C) 2007 Google Incorporated + * Copyright (c) 2012 Code Aurora Forum. All rights reserved. * * This software is licensed under the terms of the GNU General Public * License version 2, as published by the Free Software Foundation, and @@ -15,25 +16,90 @@ #define _MSM_MDP_H_ #include +#include #define MSMFB_IOCTL_MAGIC 'm' #define MSMFB_GRP_DISP _IOW(MSMFB_IOCTL_MAGIC, 1, unsigned int) #define MSMFB_BLIT _IOW(MSMFB_IOCTL_MAGIC, 2, unsigned int) +#define MSMFB_SUSPEND_SW_REFRESHER _IOW(MSMFB_IOCTL_MAGIC, 128, unsigned int) +#define MSMFB_RESUME_SW_REFRESHER _IOW(MSMFB_IOCTL_MAGIC, 129, unsigned int) +#define MSMFB_CURSOR _IOW(MSMFB_IOCTL_MAGIC, 130, struct fb_cursor) +#define MSMFB_SET_LUT _IOW(MSMFB_IOCTL_MAGIC, 131, struct fb_cmap) +#define MSMFB_HISTOGRAM _IOWR(MSMFB_IOCTL_MAGIC, 132, struct mdp_histogram_data) +/* new ioctls's for set/get ccs matrix */ +#define MSMFB_GET_CCS_MATRIX _IOWR(MSMFB_IOCTL_MAGIC, 133, struct mdp_ccs) +#define MSMFB_SET_CCS_MATRIX _IOW(MSMFB_IOCTL_MAGIC, 134, struct mdp_ccs) +#define MSMFB_OVERLAY_SET _IOWR(MSMFB_IOCTL_MAGIC, 135, \ + struct mdp_overlay) +#define MSMFB_OVERLAY_UNSET _IOW(MSMFB_IOCTL_MAGIC, 136, unsigned int) +#define MSMFB_OVERLAY_PLAY _IOW(MSMFB_IOCTL_MAGIC, 137, \ + struct msmfb_overlay_data) +#define MSMFB_GET_PAGE_PROTECTION _IOR(MSMFB_IOCTL_MAGIC, 138, \ + struct mdp_page_protection) +#define MSMFB_SET_PAGE_PROTECTION _IOW(MSMFB_IOCTL_MAGIC, 139, \ + struct mdp_page_protection) +#define MSMFB_OVERLAY_GET _IOR(MSMFB_IOCTL_MAGIC, 140, \ + struct mdp_overlay) +#define MSMFB_OVERLAY_PLAY_ENABLE _IOW(MSMFB_IOCTL_MAGIC, 141, unsigned int) +#define MSMFB_OVERLAY_BLT _IOWR(MSMFB_IOCTL_MAGIC, 142, \ + struct msmfb_overlay_blt) +#define MSMFB_OVERLAY_BLT_OFFSET _IOW(MSMFB_IOCTL_MAGIC, 143, unsigned int) +#define MSMFB_HISTOGRAM_START _IOR(MSMFB_IOCTL_MAGIC, 144, \ + struct mdp_histogram_start_req) +#define MSMFB_HISTOGRAM_STOP _IOR(MSMFB_IOCTL_MAGIC, 145, unsigned int) +#define MSMFB_NOTIFY_UPDATE _IOW(MSMFB_IOCTL_MAGIC, 146, unsigned int) + +#define MSMFB_OVERLAY_3D _IOWR(MSMFB_IOCTL_MAGIC, 147, \ + struct msmfb_overlay_3d) + +#define MSMFB_MIXER_INFO _IOWR(MSMFB_IOCTL_MAGIC, 148, \ + struct msmfb_mixer_info_req) +#define MSMFB_OVERLAY_PLAY_WAIT _IOWR(MSMFB_IOCTL_MAGIC, 149, \ + struct msmfb_overlay_data) +#define MSMFB_WRITEBACK_INIT _IO(MSMFB_IOCTL_MAGIC, 150) +#define MSMFB_WRITEBACK_START _IO(MSMFB_IOCTL_MAGIC, 151) +#define MSMFB_WRITEBACK_STOP _IO(MSMFB_IOCTL_MAGIC, 152) +#define MSMFB_WRITEBACK_QUEUE_BUFFER _IOW(MSMFB_IOCTL_MAGIC, 153, \ + struct msmfb_data) +#define MSMFB_WRITEBACK_DEQUEUE_BUFFER _IOW(MSMFB_IOCTL_MAGIC, 154, \ + struct msmfb_data) +#define MSMFB_WRITEBACK_TERMINATE _IO(MSMFB_IOCTL_MAGIC, 155) +#define MSMFB_MDP_PP _IOWR(MSMFB_IOCTL_MAGIC, 156, struct msmfb_mdp_pp) + +#define FB_TYPE_3D_PANEL 0x10101010 +#define MDP_IMGTYPE2_START 0x10000 +#define MSMFB_DRIVER_VERSION 0xF9E8D701 enum { - MDP_RGB_565, /* RGB 565 planar */ + NOTIFY_UPDATE_START, + NOTIFY_UPDATE_STOP, +}; + +enum { + MDP_RGB_565, /* RGB 565 planer */ MDP_XRGB_8888, /* RGB 888 padded */ - MDP_Y_CBCR_H2V2, /* Y and CbCr, pseudo planar w/ Cb is in MSB */ + MDP_Y_CBCR_H2V2, /* Y and CbCr, pseudo planer w/ Cb is in MSB */ + MDP_Y_CBCR_H2V2_ADRENO, MDP_ARGB_8888, /* ARGB 888 */ - MDP_RGB_888, /* RGB 888 planar */ - MDP_Y_CRCB_H2V2, /* Y and CrCb, pseudo planar w/ Cr is in MSB */ + MDP_RGB_888, /* RGB 888 planer */ + MDP_Y_CRCB_H2V2, /* Y and CrCb, pseudo planer w/ Cr is in MSB */ MDP_YCRYCB_H2V1, /* YCrYCb interleave */ - MDP_Y_CRCB_H2V1, /* Y and CrCb, pseduo planar w/ Cr is in MSB */ - MDP_Y_CBCR_H2V1, /* Y and CrCb, pseduo planar w/ Cr is in MSB */ + MDP_Y_CRCB_H2V1, /* Y and CrCb, pseduo planer w/ Cr is in MSB */ + MDP_Y_CBCR_H2V1, /* Y and CrCb, pseduo planer w/ Cr is in MSB */ MDP_RGBA_8888, /* ARGB 888 */ MDP_BGRA_8888, /* ABGR 888 */ MDP_RGBX_8888, /* RGBX 888 */ - MDP_IMGTYPE_LIMIT /* Non valid image type after this enum */ + MDP_Y_CRCB_H2V2_TILE, /* Y and CrCb, pseudo planer tile */ + MDP_Y_CBCR_H2V2_TILE, /* Y and CbCr, pseudo planer tile */ + MDP_Y_CR_CB_H2V2, /* Y, Cr and Cb, planar */ + MDP_Y_CR_CB_GH2V2, /* Y, Cr and Cb, planar aligned to Android YV12 */ + MDP_Y_CB_CR_H2V2, /* Y, Cb and Cr, planar */ + MDP_Y_CRCB_H1V1, /* Y and CrCb, pseduo planer w/ Cr is in MSB */ + MDP_Y_CBCR_H1V1, /* Y and CbCr, pseduo planer w/ Cb is in MSB */ + MDP_IMGTYPE_LIMIT, + MDP_BGR_565 = MDP_IMGTYPE2_START, /* BGR 565 planer */ + MDP_FB_FORMAT, /* framebuffer format */ + MDP_IMGTYPE_LIMIT2 /* Non valid image type after this enum */ }; enum { @@ -41,24 +107,57 @@ enum { FB_IMG, }; -/* flag values */ +enum { + HSIC_HUE = 0, + HSIC_SAT, + HSIC_INT, + HSIC_CON, + NUM_HSIC_PARAM, +}; + +/* mdp_blit_req flag values */ #define MDP_ROT_NOP 0 #define MDP_FLIP_LR 0x1 #define MDP_FLIP_UD 0x2 #define MDP_ROT_90 0x4 #define MDP_ROT_180 (MDP_FLIP_UD|MDP_FLIP_LR) #define MDP_ROT_270 (MDP_ROT_90|MDP_FLIP_UD|MDP_FLIP_LR) -#define MDP_ROT_MASK 0x7 #define MDP_DITHER 0x8 #define MDP_BLUR 0x10 #define MDP_BLEND_FG_PREMULT 0x20000 +#define MDP_DEINTERLACE 0x80000000 +#define MDP_SHARPENING 0x40000000 +#define MDP_NO_DMA_BARRIER_START 0x20000000 +#define MDP_NO_DMA_BARRIER_END 0x10000000 +#define MDP_NO_BLIT 0x08000000 +#define MDP_BLIT_WITH_DMA_BARRIERS 0x000 +#define MDP_BLIT_WITH_NO_DMA_BARRIERS \ + (MDP_NO_DMA_BARRIER_START | MDP_NO_DMA_BARRIER_END) +#define MDP_BLIT_SRC_GEM 0x04000000 +#define MDP_BLIT_DST_GEM 0x02000000 +#define MDP_BLIT_NON_CACHED 0x01000000 +#define MDP_OV_PIPE_SHARE 0x00800000 +#define MDP_DEINTERLACE_ODD 0x00400000 +#define MDP_OV_PLAY_NOWAIT 0x00200000 +#define MDP_SOURCE_ROTATED_90 0x00100000 +#define MDP_DPP_HSIC 0x00080000 +#define MDP_BACKEND_COMPOSITION 0x00040000 +#define MDP_BORDERFILL_SUPPORTED 0x00010000 +#define MDP_SECURE_OVERLAY_SESSION 0x00008000 +#define MDP_MEMORY_ID_TYPE_FB 0x00001000 #define MDP_TRANSP_NOP 0xffffffff #define MDP_ALPHA_NOP 0xff -/* drewis: added for android 4.0 */ -#define MDP_BLIT_NON_CACHED 0x01000000 -/* drewis: end */ +#define MDP_FB_PAGE_PROTECTION_NONCACHED (0) +#define MDP_FB_PAGE_PROTECTION_WRITECOMBINE (1) +#define MDP_FB_PAGE_PROTECTION_WRITETHROUGHCACHE (2) +#define MDP_FB_PAGE_PROTECTION_WRITEBACKCACHE (3) +#define MDP_FB_PAGE_PROTECTION_WRITEBACKWACACHE (4) +/* Sentinel: Don't use! */ +#define MDP_FB_PAGE_PROTECTION_INVALID (5) +/* Count of the number of MDP_FB_PAGE_PROTECTION_... values. */ +#define MDP_NUM_FB_PAGE_PROTECTION_VALUES (5) struct mdp_rect { uint32_t x; @@ -73,8 +172,41 @@ struct mdp_img { uint32_t format; uint32_t offset; int memory_id; /* the file descriptor */ + uint32_t priv; }; +/* + * {3x3} + {3} ccs matrix + */ + +#define MDP_CCS_RGB2YUV 0 +#define MDP_CCS_YUV2RGB 1 + +#define MDP_CCS_SIZE 9 +#define MDP_BV_SIZE 3 + +struct mdp_ccs { + int direction; /* MDP_CCS_RGB2YUV or YUV2RGB */ + uint16_t ccs[MDP_CCS_SIZE]; /* 3x3 color coefficients */ + uint16_t bv[MDP_BV_SIZE]; /* 1x3 bias vector */ +}; + +struct mdp_csc { + int id; + uint32_t csc_mv[9]; + uint32_t csc_pre_bv[3]; + uint32_t csc_post_bv[3]; + uint32_t csc_pre_lv[6]; + uint32_t csc_post_lv[6]; +}; + +/* The version of the mdp_blit_req structure so that + * user applications can selectively decide which functionality + * to include + */ + +#define MDP_BLIT_REQ_VERSION 2 + struct mdp_blit_req { struct mdp_img src; struct mdp_img dst; @@ -83,6 +215,7 @@ struct mdp_blit_req { uint32_t alpha; uint32_t transp_mask; uint32_t flags; + int sharpening_strength; /* -127 <--> 127, default 64 */ }; struct mdp_blit_req_list { @@ -90,4 +223,289 @@ struct mdp_blit_req_list { struct mdp_blit_req req[]; }; +#define MSMFB_DATA_VERSION 2 + +struct msmfb_data { + uint32_t offset; + int memory_id; + int id; + uint32_t flags; + uint32_t priv; + uint32_t iova; +}; + +#define MSMFB_NEW_REQUEST -1 + +struct msmfb_overlay_data { + uint32_t id; + struct msmfb_data data; + uint32_t version_key; + struct msmfb_data plane1_data; + struct msmfb_data plane2_data; +}; + +struct msmfb_img { + uint32_t width; + uint32_t height; + uint32_t format; +}; + +#define MSMFB_WRITEBACK_DEQUEUE_BLOCKING 0x1 +struct msmfb_writeback_data { + struct msmfb_data buf_info; + struct msmfb_img img; +}; + +struct dpp_ctrl { + /* + *'sharp_strength' has inputs = -128 <-> 127 + * Increasingly positive values correlate with increasingly sharper + * picture. Increasingly negative values correlate with increasingly + * smoothed picture. + */ + int8_t sharp_strength; + int8_t hsic_params[NUM_HSIC_PARAM]; +}; + +struct mdp_overlay { + struct msmfb_img src; + struct mdp_rect src_rect; + struct mdp_rect dst_rect; + uint32_t z_order; /* stage number */ + uint32_t is_fg; /* control alpha & transp */ + uint32_t alpha; + uint32_t transp_mask; + uint32_t flags; + uint32_t id; + uint32_t user_data[8]; + struct dpp_ctrl dpp; +}; + +struct msmfb_overlay_3d { + uint32_t is_3d; + uint32_t width; + uint32_t height; +}; + + +struct msmfb_overlay_blt { + uint32_t enable; + uint32_t offset; + uint32_t width; + uint32_t height; + uint32_t bpp; +}; + +struct mdp_histogram { + uint32_t frame_cnt; + uint32_t bin_cnt; + uint32_t *r; + uint32_t *g; + uint32_t *b; +}; + + +/* + + mdp_block_type defines the identifiers for each of pipes in MDP 4.3 + + MDP_BLOCK_RESERVED is provided for backward compatibility and is + deprecated. It corresponds to DMA_P. So MDP_BLOCK_DMA_P should be used + instead. + +*/ + +enum { + MDP_BLOCK_RESERVED = 0, + MDP_BLOCK_OVERLAY_0, + MDP_BLOCK_OVERLAY_1, + MDP_BLOCK_VG_1, + MDP_BLOCK_VG_2, + MDP_BLOCK_RGB_1, + MDP_BLOCK_RGB_2, + MDP_BLOCK_DMA_P, + MDP_BLOCK_DMA_S, + MDP_BLOCK_DMA_E, + MDP_BLOCK_MAX, +}; + +/* +mdp_histogram_start_req is used to provide the parameters for +histogram start request +*/ + +struct mdp_histogram_start_req { + uint32_t block; + uint8_t frame_cnt; + uint8_t bit_mask; + uint8_t num_bins; +}; + + +/* + + mdp_histogram_data is used to return the histogram data, once + the histogram is done/stopped/cance + + */ + + +struct mdp_histogram_data { + uint32_t block; + uint8_t bin_cnt; + uint32_t *c0; + uint32_t *c1; + uint32_t *c2; + uint32_t *extra_info; +}; + +struct mdp_pcc_coeff { + uint32_t c, r, g, b, rr, gg, bb, rg, gb, rb, rgb_0, rgb_1; +}; + +struct mdp_pcc_cfg_data { + uint32_t block; + uint32_t ops; + struct mdp_pcc_coeff r, g, b; +}; + +#define MDP_CSC_FLAG_ENABLE 0x1 +#define MDP_CSC_FLAG_YUV_IN 0x2 +#define MDP_CSC_FLAG_YUV_OUT 0x4 + +struct mdp_csc_cfg { + /* flags for enable CSC, toggling RGB,YUV input/output */ + uint32_t flags; + uint32_t csc_mv[9]; + uint32_t csc_pre_bv[3]; + uint32_t csc_post_bv[3]; + uint32_t csc_pre_lv[6]; + uint32_t csc_post_lv[6]; +}; + +struct mdp_csc_cfg_data { + uint32_t block; + struct mdp_csc_cfg csc_data; +}; + +enum { + mdp_lut_igc, + mdp_lut_pgc, + mdp_lut_hist, + mdp_lut_max, +}; + + +struct mdp_igc_lut_data { + uint32_t block; + uint32_t len, ops; + uint32_t *c0_c1_data; + uint32_t *c2_data; +}; + +struct mdp_ar_gc_lut_data { + uint32_t x_start; + uint32_t slope; + uint32_t offset; +}; + +struct mdp_pgc_lut_data { + uint32_t block; + uint32_t flags; + uint8_t num_r_stages; + uint8_t num_g_stages; + uint8_t num_b_stages; + struct mdp_ar_gc_lut_data *r_data; + struct mdp_ar_gc_lut_data *g_data; + struct mdp_ar_gc_lut_data *b_data; +}; + + +struct mdp_hist_lut_data { + uint32_t block; + uint32_t ops; + uint32_t len; + uint32_t *data; +}; + + +struct mdp_lut_cfg_data { + uint32_t lut_type; + union { + struct mdp_igc_lut_data igc_lut_data; + struct mdp_pgc_lut_data pgc_lut_data; + struct mdp_hist_lut_data hist_lut_data; + } data; +}; + +struct mdp_qseed_cfg_data { + uint32_t block; + uint32_t table_num; + uint32_t ops; + uint32_t len; + uint32_t *data; +}; + + +enum { + mdp_op_pcc_cfg, + mdp_op_csc_cfg, + mdp_op_lut_cfg, + mdp_op_qseed_cfg, + mdp_op_max, +}; + +struct msmfb_mdp_pp { + uint32_t op; + union { + struct mdp_pcc_cfg_data pcc_cfg_data; + struct mdp_csc_cfg_data csc_cfg_data; + struct mdp_lut_cfg_data lut_cfg_data; + struct mdp_qseed_cfg_data qseed_cfg_data; + } data; +}; + + +struct mdp_page_protection { + uint32_t page_protection; +}; + + +struct mdp_mixer_info { + int pndx; + int pnum; + int ptype; + int mixer_num; + int z_order; +}; + +#define MAX_PIPE_PER_MIXER 4 + +struct msmfb_mixer_info_req { + int mixer_num; + int cnt; + struct mdp_mixer_info info[MAX_PIPE_PER_MIXER]; +}; + +enum { + DISPLAY_SUBSYSTEM_ID, + ROTATOR_SUBSYSTEM_ID, +}; + +#ifdef __KERNEL__ + +/* get the framebuffer physical address information */ +int get_fb_phys_info(unsigned long *start, unsigned long *len, int fb_num, + int subsys_id); +struct fb_info *msm_fb_get_writeback_fb(void); +int msm_fb_writeback_init(struct fb_info *info); +int msm_fb_writeback_start(struct fb_info *info); +int msm_fb_writeback_queue_buffer(struct fb_info *info, + struct msmfb_data *data); +int msm_fb_writeback_dequeue_buffer(struct fb_info *info, + struct msmfb_data *data); +int msm_fb_writeback_stop(struct fb_info *info); +int msm_fb_writeback_terminate(struct fb_info *info); +#endif + #endif /* _MSM_MDP_H_ */ From bc9444ab0441ac1f9ef9c0815e9c74640e12da83 Mon Sep 17 00:00:00 2001 From: tytung Date: Wed, 1 Aug 2012 00:21:30 +0800 Subject: [PATCH 075/155] htcleo: updated htcleo_defconfig to tytung_HWA_r3.3 --- arch/arm/configs/htcleo_defconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/configs/htcleo_defconfig b/arch/arm/configs/htcleo_defconfig index ce9ee98a..2f518fa7 100644 --- a/arch/arm/configs/htcleo_defconfig +++ b/arch/arm/configs/htcleo_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: 2.6.32-ics -# Sat Jun 2 00:52:50 CST 2012 +# Wed Aug 1 00:15:06 CST 2012 # CONFIG_ARM=y CONFIG_SYS_SUPPORTS_APM_EMULATION=y @@ -32,7 +32,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_BROKEN_ON_SMP=y CONFIG_LOCK_KERNEL=y CONFIG_INIT_ENV_ARG_LIMIT=32 -CONFIG_LOCALVERSION="_tytung_HWA_r3" +CONFIG_LOCALVERSION="_tytung_HWA_r3.3" # CONFIG_LOCALVERSION_AUTO is not set CONFIG_HAVE_KERNEL_GZIP=y CONFIG_HAVE_KERNEL_BZIP2=y From 8bdae42d959e8aa197fcc9571d12d7adaa2d6610 Mon Sep 17 00:00:00 2001 From: securecrt Date: Wed, 1 Aug 2012 10:55:06 +0800 Subject: [PATCH 076/155] net: netfilter: enable bandwidth control to be able to set mobile data limit --- arch/arm/configs/htcleo_defconfig | 26 +++++++++++++++++++------- net/netfilter/xt_TPROXY.c | 2 ++ net/netfilter/xt_socket.c | 2 ++ 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/arch/arm/configs/htcleo_defconfig b/arch/arm/configs/htcleo_defconfig index d330e5ef..12f408f2 100644 --- a/arch/arm/configs/htcleo_defconfig +++ b/arch/arm/configs/htcleo_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: 2.6.32-ics -# Fri Jun 1 01:10:03 CST 2012 +# Fri Aug 1 01:10:03 CST 2012 # CONFIG_ARM=y CONFIG_SYS_SUPPORTS_APM_EMULATION=y @@ -32,7 +32,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_BROKEN_ON_SMP=y CONFIG_LOCK_KERNEL=y CONFIG_INIT_ENV_ARG_LIMIT=32 -CONFIG_LOCALVERSION="_tytung_HWA_r2.5" +CONFIG_LOCALVERSION="_tytung_HWA_r3.3" # CONFIG_LOCALVERSION_AUTO is not set CONFIG_HAVE_KERNEL_GZIP=y CONFIG_HAVE_KERNEL_BZIP2=y @@ -604,11 +604,10 @@ CONFIG_NETFILTER_XT_CONNMARK=y # CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y CONFIG_NETFILTER_XT_TARGET_CONNMARK=y -# CONFIG_NETFILTER_XT_TARGET_CT is not set # CONFIG_NETFILTER_XT_TARGET_DSCP is not set # CONFIG_NETFILTER_XT_TARGET_HL is not set CONFIG_NETFILTER_XT_TARGET_MARK=y -# CONFIG_NETFILTER_XT_TARGET_NFLOG is not set +CONFIG_NETFILTER_XT_TARGET_NFLOG=y CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y # CONFIG_NETFILTER_XT_TARGET_NOTRACK is not set # CONFIG_NETFILTER_XT_TARGET_RATEEST is not set @@ -632,7 +631,7 @@ CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y # CONFIG_NETFILTER_XT_MATCH_ESP is not set CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y CONFIG_NETFILTER_XT_MATCH_HELPER=y -CONFIG_NETFILTER_XT_MATCH_HL=y +# CONFIG_NETFILTER_XT_MATCH_HL is not set CONFIG_NETFILTER_XT_MATCH_IPRANGE=y CONFIG_NETFILTER_XT_MATCH_LENGTH=y CONFIG_NETFILTER_XT_MATCH_LIMIT=y @@ -709,8 +708,21 @@ CONFIG_IP_NF_ARP_MANGLE=y CONFIG_NF_DEFRAG_IPV6=y CONFIG_NF_CONNTRACK_IPV6=y # CONFIG_IP6_NF_QUEUE is not set -# CONFIG_IP6_NF_IPTABLES is not set -# CONFIG_BRIDGE_NF_EBTABLES is not set +CONFIG_IP6_NF_IPTABLES=y +# CONFIG_IP6_NF_MATCH_AH is not set +# CONFIG_IP6_NF_MATCH_EUI64 is not set +# CONFIG_IP6_NF_MATCH_FRAG is not set +# CONFIG_IP6_NF_MATCH_OPTS is not set +# CONFIG_IP6_NF_MATCH_HL is not set +# CONFIG_IP6_NF_MATCH_IPV6HEADER is not set +# CONFIG_IP6_NF_MATCH_MH is not set +# CONFIG_IP6_NF_MATCH_RT is not set +# CONFIG_IP6_NF_TARGET_HL is not set +CONFIG_IP6_NF_TARGET_LOG=y +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IP6_NF_MANGLE=y +CONFIG_IP6_NF_RAW=y # CONFIG_IP_DCCP is not set # CONFIG_IP_SCTP is not set # CONFIG_RDS is not set diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index 4fa12857..10640fdd 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -22,6 +22,7 @@ #include +/* #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) #define XT_TPROXY_HAVE_IPV6 1 #include @@ -29,6 +30,7 @@ #include #include #endif +*/ #include #include diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 9b38fd15..0f10dfc6 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -22,11 +22,13 @@ #include #include +/* #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) #define XT_SOCKET_HAVE_IPV6 1 #include #include #endif +*/ #include From 1e188cc5e18bd168de398dd9c3dff19c3dc20464 Mon Sep 17 00:00:00 2001 From: securecrt Date: Thu, 2 Aug 2012 13:21:31 +0800 Subject: [PATCH 077/155] net: netfilter: enable bandwidth control to be able to set mobile data limit --- arch/arm/configs/htcleo_defconfig | 21 +++++++++++++++++---- net/netfilter/xt_TPROXY.c | 2 ++ net/netfilter/xt_socket.c | 2 ++ 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/arch/arm/configs/htcleo_defconfig b/arch/arm/configs/htcleo_defconfig index d330e5ef..96d43e38 100644 --- a/arch/arm/configs/htcleo_defconfig +++ b/arch/arm/configs/htcleo_defconfig @@ -32,7 +32,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_BROKEN_ON_SMP=y CONFIG_LOCK_KERNEL=y CONFIG_INIT_ENV_ARG_LIMIT=32 -CONFIG_LOCALVERSION="_tytung_HWA_r2.5" +CONFIG_LOCALVERSION="_tytung_HWA_r3.3_JB" # CONFIG_LOCALVERSION_AUTO is not set CONFIG_HAVE_KERNEL_GZIP=y CONFIG_HAVE_KERNEL_BZIP2=y @@ -608,7 +608,7 @@ CONFIG_NETFILTER_XT_TARGET_CONNMARK=y # CONFIG_NETFILTER_XT_TARGET_DSCP is not set # CONFIG_NETFILTER_XT_TARGET_HL is not set CONFIG_NETFILTER_XT_TARGET_MARK=y -# CONFIG_NETFILTER_XT_TARGET_NFLOG is not set +CONFIG_NETFILTER_XT_TARGET_NFLOG=y CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y # CONFIG_NETFILTER_XT_TARGET_NOTRACK is not set # CONFIG_NETFILTER_XT_TARGET_RATEEST is not set @@ -709,8 +709,21 @@ CONFIG_IP_NF_ARP_MANGLE=y CONFIG_NF_DEFRAG_IPV6=y CONFIG_NF_CONNTRACK_IPV6=y # CONFIG_IP6_NF_QUEUE is not set -# CONFIG_IP6_NF_IPTABLES is not set -# CONFIG_BRIDGE_NF_EBTABLES is not set +ONFIG_IP6_NF_IPTABLES=y +# CONFIG_IP6_NF_MATCH_AH is not set +# CONFIG_IP6_NF_MATCH_EUI64 is not set +# CONFIG_IP6_NF_MATCH_FRAG is not set +# CONFIG_IP6_NF_MATCH_OPTS is not set +# CONFIG_IP6_NF_MATCH_HL is not set +# CONFIG_IP6_NF_MATCH_IPV6HEADER is not set +# CONFIG_IP6_NF_MATCH_MH is not set +# CONFIG_IP6_NF_MATCH_RT is not set +# CONFIG_IP6_NF_TARGET_HL is not set +CONFIG_IP6_NF_TARGET_LOG=y +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IP6_NF_MANGLE=y +CONFIG_IP6_NF_RAW=y # CONFIG_IP_DCCP is not set # CONFIG_IP_SCTP is not set # CONFIG_RDS is not set diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index 4fa12857..10640fdd 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -22,6 +22,7 @@ #include +/* #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) #define XT_TPROXY_HAVE_IPV6 1 #include @@ -29,6 +30,7 @@ #include #include #endif +*/ #include #include diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 9b38fd15..0f10dfc6 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -22,11 +22,13 @@ #include #include +/* #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) #define XT_SOCKET_HAVE_IPV6 1 #include #include #endif +*/ #include From 55ece772cde41f9925aaaaa64664cb723e12d87e Mon Sep 17 00:00:00 2001 From: securecrt Date: Thu, 2 Aug 2012 17:04:07 +0800 Subject: [PATCH 078/155] msm: kgsl: Add per context timestamp Add new ioctls for per context timestamps. Timestamp functions (read/write/wait) will now be context specific rather than only using the global timestamp. Per context timestamps is a requirement for priority based queueing. --- drivers/gpu/msm/adreno.c | 128 +++++++++----- drivers/gpu/msm/adreno_a2xx.c | 24 +-- drivers/gpu/msm/adreno_a3xx.c | 14 +- drivers/gpu/msm/adreno_drawctxt.c | 12 +- drivers/gpu/msm/adreno_drawctxt.h | 5 + drivers/gpu/msm/adreno_postmortem.c | 18 +- drivers/gpu/msm/adreno_ringbuffer.c | 128 ++++++++++---- drivers/gpu/msm/adreno_ringbuffer.h | 3 +- drivers/gpu/msm/kgsl.c | 254 ++++++++++++++++++++++------ drivers/gpu/msm/kgsl.h | 9 + drivers/gpu/msm/kgsl_device.h | 19 ++- drivers/gpu/msm/z180.c | 14 +- include/linux/msm_kgsl.h | 59 ++++++- 13 files changed, 515 insertions(+), 172 deletions(-) mode change 100644 => 100755 drivers/gpu/msm/z180.c diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c index 4020fefd..4e434e4c 100755 --- a/drivers/gpu/msm/adreno.c +++ b/drivers/gpu/msm/adreno.c @@ -565,13 +565,13 @@ adreno_recover_hang(struct kgsl_device *device) struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer; unsigned int timestamp; unsigned int num_rb_contents; - unsigned int bad_context; unsigned int reftimestamp; unsigned int enable_ts; unsigned int soptimestamp; unsigned int eoptimestamp; - struct adreno_context *drawctxt; + unsigned int context_id; struct kgsl_context *context; + struct adreno_context *adreno_context; int next = 0; KGSL_DRV_ERR(device, "Starting recovery from 3D GPU hang....\n"); @@ -587,22 +587,35 @@ adreno_recover_hang(struct kgsl_device *device) ret = adreno_ringbuffer_extract(rb, rb_buffer, &num_rb_contents); if (ret) goto done; - timestamp = rb->timestamp; - KGSL_DRV_ERR(device, "Last issued timestamp: %x\n", timestamp); - kgsl_sharedmem_readl(&device->memstore, &bad_context, - KGSL_DEVICE_MEMSTORE_OFFSET(current_context)); + kgsl_sharedmem_readl(&device->memstore, &context_id, + KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, + current_context)); + context = idr_find(&device->context_idr, context_id); + if (context == NULL) { + KGSL_DRV_ERR(device, "Last context unknown id:%d\n", + context_id); + context_id = KGSL_MEMSTORE_GLOBAL; + } + + timestamp = rb->timestamp[KGSL_MEMSTORE_GLOBAL]; + KGSL_DRV_ERR(device, "Last issued global timestamp: %x\n", timestamp); + kgsl_sharedmem_readl(&device->memstore, &reftimestamp, - KGSL_DEVICE_MEMSTORE_OFFSET(ref_wait_ts)); + KGSL_MEMSTORE_OFFSET(context_id, + ref_wait_ts)); kgsl_sharedmem_readl(&device->memstore, &enable_ts, - KGSL_DEVICE_MEMSTORE_OFFSET(ts_cmp_enable)); + KGSL_MEMSTORE_OFFSET(context_id, + ts_cmp_enable)); kgsl_sharedmem_readl(&device->memstore, &soptimestamp, - KGSL_DEVICE_MEMSTORE_OFFSET(soptimestamp)); + KGSL_MEMSTORE_OFFSET(context_id, + soptimestamp)); kgsl_sharedmem_readl(&device->memstore, &eoptimestamp, - KGSL_DEVICE_MEMSTORE_OFFSET(eoptimestamp)); + KGSL_MEMSTORE_OFFSET(context_id, + eoptimestamp)); /* Make sure memory is synchronized before restarting the GPU */ mb(); KGSL_CTXT_ERR(device, - "Context that caused a GPU hang: %x\n", bad_context); + "Context id that caused a GPU hang: %d\n", context_id); /* restart device */ ret = adreno_stop(device); if (ret) @@ -613,20 +626,20 @@ adreno_recover_hang(struct kgsl_device *device) KGSL_DRV_ERR(device, "Device has been restarted after hang\n"); /* Restore timestamp states */ kgsl_sharedmem_writel(&device->memstore, - KGSL_DEVICE_MEMSTORE_OFFSET(soptimestamp), + KGSL_MEMSTORE_OFFSET(context_id, soptimestamp), soptimestamp); kgsl_sharedmem_writel(&device->memstore, - KGSL_DEVICE_MEMSTORE_OFFSET(eoptimestamp), + KGSL_MEMSTORE_OFFSET(context_id, eoptimestamp), eoptimestamp); kgsl_sharedmem_writel(&device->memstore, - KGSL_DEVICE_MEMSTORE_OFFSET(soptimestamp), + KGSL_MEMSTORE_OFFSET(context_id, soptimestamp), soptimestamp); if (num_rb_contents) { kgsl_sharedmem_writel(&device->memstore, - KGSL_DEVICE_MEMSTORE_OFFSET(ref_wait_ts), + KGSL_MEMSTORE_OFFSET(context_id, ref_wait_ts), reftimestamp); kgsl_sharedmem_writel(&device->memstore, - KGSL_DEVICE_MEMSTORE_OFFSET(ts_cmp_enable), + KGSL_MEMSTORE_OFFSET(context_id, ts_cmp_enable), enable_ts); } /* Make sure all writes are posted before the GPU reads them */ @@ -634,12 +647,12 @@ adreno_recover_hang(struct kgsl_device *device) /* Mark the invalid context so no more commands are accepted from * that context */ - drawctxt = (struct adreno_context *) bad_context; + adreno_context = context->devctxt; KGSL_CTXT_ERR(device, - "Context that caused a GPU hang: %x\n", bad_context); + "Context that caused a GPU hang: %d\n", adreno_context->id); - drawctxt->flags |= CTXT_FLAGS_GPU_HANG; + adreno_context->flags |= CTXT_FLAGS_GPU_HANG; /* * Set the reset status of all contexts to @@ -649,7 +662,7 @@ adreno_recover_hang(struct kgsl_device *device) while ((context = idr_get_next(&device->context_idr, &next))) { if (KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT != context->reset_status) { - if (context->devctxt != drawctxt) + if (context->id != context_id) context->reset_status = KGSL_CTX_STAT_INNOCENT_CONTEXT_RESET_EXT; else @@ -661,7 +674,7 @@ adreno_recover_hang(struct kgsl_device *device) /* Restore valid commands in ringbuffer */ adreno_ringbuffer_restore(rb, rb_buffer, num_rb_contents); - rb->timestamp = timestamp; + rb->timestamp[KGSL_MEMSTORE_GLOBAL] = timestamp; done: vfree(rb_buffer); return ret; @@ -755,7 +768,8 @@ static int adreno_getproperty(struct kgsl_device *device, shadowprop.size = device->memstore.size; /* GSL needs this to be set, even if it appears to be meaningless */ - shadowprop.flags = KGSL_FLAGS_INITIALIZED; + shadowprop.flags = KGSL_FLAGS_INITIALIZED | + KGSL_FLAGS_PER_CONTEXT_TIMESTAMPS; } if (copy_to_user(value, &shadowprop, sizeof(shadowprop))) { @@ -1011,38 +1025,58 @@ void adreno_regwrite(struct kgsl_device *device, unsigned int offsetwords, __raw_writel(value, reg); } +static unsigned int _get_context_id(struct kgsl_context *k_ctxt) +{ + unsigned int context_id = KGSL_MEMSTORE_GLOBAL; + + if (k_ctxt != NULL) { + struct adreno_context *a_ctxt = k_ctxt->devctxt; + /* + * if the context was not created with per context timestamp + * support, we must use the global timestamp since issueibcmds + * will be returning that one. + */ + if (a_ctxt->flags & CTXT_FLAGS_PER_CONTEXT_TS) + context_id = a_ctxt->id; + } + + return context_id; +} + static int kgsl_check_interrupt_timestamp(struct kgsl_device *device, - unsigned int timestamp) + struct kgsl_context *context, unsigned int timestamp) { int status; unsigned int ref_ts, enableflag; + unsigned int context_id = _get_context_id(context); - status = kgsl_check_timestamp(device, timestamp); + status = kgsl_check_timestamp(device, context, timestamp); if (!status) { mutex_lock(&device->mutex); kgsl_sharedmem_readl(&device->memstore, &enableflag, - KGSL_DEVICE_MEMSTORE_OFFSET(ts_cmp_enable)); + KGSL_MEMSTORE_OFFSET(context_id, ts_cmp_enable)); mb(); if (enableflag) { kgsl_sharedmem_readl(&device->memstore, &ref_ts, - KGSL_DEVICE_MEMSTORE_OFFSET(ref_wait_ts)); + KGSL_MEMSTORE_OFFSET(context_id, + ref_wait_ts)); mb(); if (timestamp_cmp(ref_ts, timestamp) >= 0) { kgsl_sharedmem_writel(&device->memstore, - KGSL_DEVICE_MEMSTORE_OFFSET(ref_wait_ts), - timestamp); + KGSL_MEMSTORE_OFFSET(context_id, + ref_wait_ts), timestamp); wmb(); } } else { unsigned int cmds[2]; kgsl_sharedmem_writel(&device->memstore, - KGSL_DEVICE_MEMSTORE_OFFSET(ref_wait_ts), - timestamp); + KGSL_MEMSTORE_OFFSET(context_id, + ref_wait_ts), timestamp); enableflag = 1; kgsl_sharedmem_writel(&device->memstore, - KGSL_DEVICE_MEMSTORE_OFFSET(ts_cmp_enable), - enableflag); + KGSL_MEMSTORE_OFFSET(context_id, + ts_cmp_enable), enableflag); wmb(); /* submit a dummy packet so that even if all * commands upto timestamp get executed we will still @@ -1076,6 +1110,7 @@ static int kgsl_check_interrupt_timestamp(struct kgsl_device *device, /* MUST be called with the device mutex held */ static int adreno_waittimestamp(struct kgsl_device *device, + struct kgsl_context *context, unsigned int timestamp, unsigned int msecs) { @@ -1087,15 +1122,19 @@ static int adreno_waittimestamp(struct kgsl_device *device, int retries; unsigned int msecs_first; unsigned int msecs_part; + unsigned int ts_issued; + unsigned int context_id = _get_context_id(context); + + ts_issued = adreno_dev->ringbuffer.timestamp[context_id]; /* Don't wait forever, set a max value for now */ if (msecs == -1) msecs = adreno_dev->wait_timeout; - if (timestamp_cmp(timestamp, adreno_dev->ringbuffer.timestamp) > 0) { - KGSL_DRV_ERR(device, "Cannot wait for invalid ts: %x, " - "rb->timestamp: %x\n", - timestamp, adreno_dev->ringbuffer.timestamp); + if (timestamp_cmp(timestamp, ts_issued) > 0) { + KGSL_DRV_ERR(device, "Cannot wait for invalid ts <%d:0x%x>, " + "last issued ts <%d:0x%x>\n", + context_id, timestamp, context_id, ts_issued); status = -EINVAL; goto done; } @@ -1107,7 +1146,7 @@ static int adreno_waittimestamp(struct kgsl_device *device, msecs_first = (msecs <= 100) ? ((msecs + 4) / 5) : 100; msecs_part = (msecs - msecs_first + 3) / 4; for (retries = 0; retries < 5; retries++) { - if (kgsl_check_timestamp(device, timestamp)) { + if (kgsl_check_timestamp(device, context, timestamp)) { /* if the timestamp happens while we're not * waiting, there's a chance that an interrupt * will not be generated and thus the timestamp @@ -1130,7 +1169,7 @@ static int adreno_waittimestamp(struct kgsl_device *device, status = kgsl_wait_event_interruptible_timeout( device->wait_queue, kgsl_check_interrupt_timestamp(device, - timestamp), + context, timestamp), msecs_to_jiffies(retries ? msecs_part : msecs_first), io); mutex_lock(&device->mutex); @@ -1147,9 +1186,10 @@ static int adreno_waittimestamp(struct kgsl_device *device, } status = -ETIMEDOUT; KGSL_DRV_ERR(device, - "Device hang detected while waiting for timestamp: %x," - "last submitted(rb->timestamp): %x, wptr: %x\n", - timestamp, adreno_dev->ringbuffer.timestamp, + "Device hang detected while waiting for timestamp: " + "<%d:0x%x>, last submitted timestamp: <%d:0x%x>, " + "wptr: 0x%x\n", + context_id, timestamp, context_id, ts_issued, adreno_dev->ringbuffer.wptr); if (!adreno_dump_and_recover(device)) { /* wait for idle after recovery as the @@ -1163,15 +1203,17 @@ done: } static unsigned int adreno_readtimestamp(struct kgsl_device *device, - enum kgsl_timestamp_type type) + struct kgsl_context *context, enum kgsl_timestamp_type type) { unsigned int timestamp = 0; + unsigned int context_id = _get_context_id(context); if (type == KGSL_TIMESTAMP_CONSUMED) adreno_regread(device, REG_CP_TIMESTAMP, ×tamp); else if (type == KGSL_TIMESTAMP_RETIRED) kgsl_sharedmem_readl(&device->memstore, ×tamp, - KGSL_DEVICE_MEMSTORE_OFFSET(eoptimestamp)); + KGSL_MEMSTORE_OFFSET(context_id, + eoptimestamp)); rmb(); return timestamp; diff --git a/drivers/gpu/msm/adreno_a2xx.c b/drivers/gpu/msm/adreno_a2xx.c index cc611779..dc43062e 100755 --- a/drivers/gpu/msm/adreno_a2xx.c +++ b/drivers/gpu/msm/adreno_a2xx.c @@ -1427,8 +1427,8 @@ static void a2xx_drawctxt_restore(struct adreno_device *adreno_dev, cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER; cmds[2] = cp_type3_packet(CP_MEM_WRITE, 2); cmds[3] = device->memstore.gpuaddr + - KGSL_DEVICE_MEMSTORE_OFFSET(current_context); - cmds[4] = (unsigned int) context; + KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, current_context); + cmds[4] = context->id; adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, cmds, 5); kgsl_mmu_setstate(device, context->pagetable); @@ -1551,11 +1551,18 @@ static void a2xx_cp_intrcallback(struct kgsl_device *device) if (status & CP_INT_CNTL__RB_INT_MASK) { /* signal intr completion event */ - unsigned int enableflag = 0; + unsigned int context_id; + kgsl_sharedmem_readl(&device->memstore, + &context_id, + KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, + current_context)); + if (context_id < KGSL_MEMSTORE_MAX) { kgsl_sharedmem_writel(&rb->device->memstore, - KGSL_DEVICE_MEMSTORE_OFFSET(ts_cmp_enable), - enableflag); + KGSL_MEMSTORE_OFFSET(context_id, + ts_cmp_enable), 0); + device->last_expired_ctxt_id = context_id; wmb(); + } KGSL_CMD_WARN(rb->device, "ringbuffer rb interrupt\n"); } @@ -1780,7 +1787,6 @@ static void a2xx_gmeminit(struct adreno_device *adreno_dev) static void a2xx_start(struct adreno_device *adreno_dev) { struct kgsl_device *device = &adreno_dev->dev; - int init_reftimestamp = 0x7fffffff; /* * We need to make sure all blocks are powered up and clocked @@ -1833,12 +1839,6 @@ static void a2xx_start(struct adreno_device *adreno_dev) else adreno_regwrite(device, REG_RBBM_PM_OVERRIDE2, 0x80); - kgsl_sharedmem_set(&device->memstore, 0, 0, device->memstore.size); - - kgsl_sharedmem_writel(&device->memstore, - KGSL_DEVICE_MEMSTORE_OFFSET(ref_wait_ts), - init_reftimestamp); - adreno_regwrite(device, REG_RBBM_DEBUG, 0x00080000); /* Make sure interrupts are disabled */ diff --git a/drivers/gpu/msm/adreno_a3xx.c b/drivers/gpu/msm/adreno_a3xx.c index cbc7bed4..507ad02e 100755 --- a/drivers/gpu/msm/adreno_a3xx.c +++ b/drivers/gpu/msm/adreno_a3xx.c @@ -2222,8 +2222,8 @@ static void a3xx_drawctxt_restore(struct adreno_device *adreno_dev, cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER; cmds[2] = cp_type3_packet(CP_MEM_WRITE, 2); cmds[3] = device->memstore.gpuaddr + - KGSL_DEVICE_MEMSTORE_OFFSET(current_context); - cmds[4] = (unsigned int)context; + KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, current_context); + cmds[4] = context->id; adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, cmds, 5); kgsl_mmu_setstate(device, context->pagetable); @@ -2366,9 +2366,17 @@ static void a3xx_cp_callback(struct adreno_device *adreno_dev, int irq) struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer; if (irq == A3XX_INT_CP_RB_INT) { + unsigned int context_id; + kgsl_sharedmem_readl(&adreno_dev->dev.memstore, + &context_id, + KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, + current_context)); + if (context_id < KGSL_MEMSTORE_MAX) { kgsl_sharedmem_writel(&rb->device->memstore, - KGSL_DEVICE_MEMSTORE_OFFSET(ts_cmp_enable), 0); + KGSL_MEMSTORE_OFFSET(context_id, + ts_cmp_enable), 0); wmb(); + } KGSL_CMD_WARN(rb->device, "ringbuffer rb interrupt\n"); } diff --git a/drivers/gpu/msm/adreno_drawctxt.c b/drivers/gpu/msm/adreno_drawctxt.c index 87f9efe4..fc4789ad 100755 --- a/drivers/gpu/msm/adreno_drawctxt.c +++ b/drivers/gpu/msm/adreno_drawctxt.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved. +/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -17,6 +17,8 @@ #include "kgsl_sharedmem.h" #include "adreno.h" +#define KGSL_INIT_REFTIMESTAMP 0x7FFFFFFF + /* quad for copying GMEM to context shadow */ #define QUAD_LEN 12 #define QUAD_RESTORE_LEN 14 @@ -154,6 +156,7 @@ int adreno_drawctxt_create(struct kgsl_device *device, drawctxt->pagetable = pagetable; drawctxt->bin_base_offset = 0; + drawctxt->id = context->id; if (flags & KGSL_CONTEXT_PREAMBLE) drawctxt->flags |= CTXT_FLAGS_PREAMBLE; @@ -161,10 +164,17 @@ int adreno_drawctxt_create(struct kgsl_device *device, if (flags & KGSL_CONTEXT_NO_GMEM_ALLOC) drawctxt->flags |= CTXT_FLAGS_NOGMEMALLOC; + if (flags & KGSL_CONTEXT_PER_CONTEXT_TS) + drawctxt->flags |= CTXT_FLAGS_PER_CONTEXT_TS; + ret = adreno_dev->gpudev->ctxt_create(adreno_dev, drawctxt); if (ret) goto err; + kgsl_sharedmem_writel(&device->memstore, + KGSL_MEMSTORE_OFFSET(drawctxt->id, ref_wait_ts), + KGSL_INIT_REFTIMESTAMP); + context->devctxt = drawctxt; return 0; err: diff --git a/drivers/gpu/msm/adreno_drawctxt.h b/drivers/gpu/msm/adreno_drawctxt.h index 50ee3450..61198ebd 100755 --- a/drivers/gpu/msm/adreno_drawctxt.h +++ b/drivers/gpu/msm/adreno_drawctxt.h @@ -40,6 +40,10 @@ #define CTXT_FLAGS_GPU_HANG 0x00008000 /* Specifies there is no need to save GMEM */ #define CTXT_FLAGS_NOGMEMALLOC 0x00010000 +/* Trash state for context */ +#define CTXT_FLAGS_TRASHSTATE 0x00020000 +/* per context timestamps enabled */ +#define CTXT_FLAGS_PER_CONTEXT_TS 0x00040000 struct kgsl_device; struct adreno_device; @@ -72,6 +76,7 @@ struct gmem_shadow_t { }; struct adreno_context { + unsigned int id; uint32_t flags; struct kgsl_pagetable *pagetable; struct kgsl_memdesc gpustate; diff --git a/drivers/gpu/msm/adreno_postmortem.c b/drivers/gpu/msm/adreno_postmortem.c index 7e073fd9..427741f1 100755 --- a/drivers/gpu/msm/adreno_postmortem.c +++ b/drivers/gpu/msm/adreno_postmortem.c @@ -14,6 +14,7 @@ #include #include "kgsl.h" +#include "kgsl_sharedmem.h" #include "adreno.h" #include "adreno_pm4types.h" @@ -464,7 +465,9 @@ static int adreno_dump(struct kgsl_device *device) const uint32_t *rb_vaddr; int num_item = 0; int read_idx, write_idx; - unsigned int ts_processed; + unsigned int ts_processed = 0xdeaddead; + struct kgsl_context *context; + unsigned int context_id; static struct ib_list ib_list; @@ -660,9 +663,18 @@ static int adreno_dump(struct kgsl_device *device) KGSL_LOG_DUMP(device, "MH_INTERRUPT: MASK = %08X | STATUS = %08X\n", r1, r2); - ts_processed = device->ftbl->readtimestamp(device, + kgsl_sharedmem_readl(&device->memstore, + (unsigned int *) &context_id, + KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, + current_context)); + context = idr_find(&device->context_idr, context_id); + if (context) { + ts_processed = device->ftbl->readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED); - KGSL_LOG_DUMP(device, "TIMESTM RTRD: %08X\n", ts_processed); + KGSL_LOG_DUMP(device, "CTXT: %d TIMESTM RTRD: %08X\n", + context->id, ts_processed); + } else + KGSL_LOG_DUMP(device, "BAD CTXT: %d\n", context_id); num_item = adreno_ringbuffer_count(&adreno_dev->ringbuffer, cp_rb_rptr); diff --git a/drivers/gpu/msm/adreno_ringbuffer.c b/drivers/gpu/msm/adreno_ringbuffer.c index 71f239cc..da80576f 100755 --- a/drivers/gpu/msm/adreno_ringbuffer.c +++ b/drivers/gpu/msm/adreno_ringbuffer.c @@ -236,7 +236,7 @@ int adreno_ringbuffer_start(struct adreno_ringbuffer *rb, unsigned int init_ram) return 0; if (init_ram) { - rb->timestamp = 0; + rb->timestamp[KGSL_MEMSTORE_GLOBAL] = 0; GSL_RB_INIT_TIMESTAMP(rb); } @@ -321,18 +321,13 @@ int adreno_ringbuffer_start(struct adreno_ringbuffer *rb, unsigned int init_ram) } /* setup scratch/timestamp */ - adreno_regwrite(device, REG_SCRATCH_ADDR, - device->memstore.gpuaddr + - KGSL_DEVICE_MEMSTORE_OFFSET(soptimestamp)); + adreno_regwrite(device, REG_SCRATCH_ADDR, device->memstore.gpuaddr + + KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, + soptimestamp)); adreno_regwrite(device, REG_SCRATCH_UMSK, GSL_RB_MEMPTRS_SCRATCH_MASK); - /* update the eoptimestamp field with the last retired timestamp */ - kgsl_sharedmem_writel(&device->memstore, - KGSL_DEVICE_MEMSTORE_OFFSET(eoptimestamp), - rb->timestamp); - /* load the CP ucode */ status = adreno_ringbuffer_load_pm4_ucode(device); @@ -431,15 +426,28 @@ void adreno_ringbuffer_close(struct adreno_ringbuffer *rb) static uint32_t adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb, + struct adreno_context *context, unsigned int flags, unsigned int *cmds, int sizedwords) { struct adreno_device *adreno_dev = ADRENO_DEVICE(rb->device); unsigned int *ringcmds; unsigned int timestamp; - unsigned int total_sizedwords = sizedwords + 6; + unsigned int total_sizedwords = sizedwords; unsigned int i; unsigned int rcmd_gpu; + unsigned int context_id = KGSL_MEMSTORE_GLOBAL; + unsigned int gpuaddr = rb->device->memstore.gpuaddr; + + if (context != NULL) { + /* + * if the context was not created with per context timestamp + * support, we must use the global timestamp since issueibcmds + * will be returning that one. + */ + if (context->flags & CTXT_FLAGS_PER_CONTEXT_TS) + context_id = context->id; + } /* reserve space to temporarily turn off protected mode * error checking if needed @@ -451,6 +459,13 @@ adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb, if (adreno_is_a3xx(adreno_dev)) total_sizedwords += 7; + total_sizedwords += 2; /* scratchpad ts for recovery */ + if (context) { + total_sizedwords += 3; /* sop timestamp */ + total_sizedwords += 4; /* eop timestamp */ + } + total_sizedwords += 4; /* global timestamp for recovery*/ + ringcmds = adreno_ringbuffer_allocspace(rb, total_sizedwords); rcmd_gpu = rb->buffer_desc.gpuaddr + sizeof(uint)*(rb->wptr-total_sizedwords); @@ -478,12 +493,20 @@ adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb, GSL_RB_WRITE(ringcmds, rcmd_gpu, 1); } - rb->timestamp++; - timestamp = rb->timestamp; + /* always increment the global timestamp. once. */ + rb->timestamp[KGSL_MEMSTORE_GLOBAL]++; + if (context) { + if (context_id == KGSL_MEMSTORE_GLOBAL) + rb->timestamp[context_id] = + rb->timestamp[KGSL_MEMSTORE_GLOBAL]; + else + rb->timestamp[context_id]++; + } + timestamp = rb->timestamp[context_id]; - /* start-of-pipeline and end-of-pipeline timestamps */ + /* scratchpad ts for recovery */ GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type0_packet(REG_CP_TIMESTAMP, 1)); - GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp); + GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp[KGSL_MEMSTORE_GLOBAL]); if (adreno_is_a3xx(adreno_dev)) { /* @@ -499,22 +522,41 @@ adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb, GSL_RB_WRITE(ringcmds, rcmd_gpu, 0x00); } + if (context) { + /* start-of-pipeline timestamp */ + GSL_RB_WRITE(ringcmds, rcmd_gpu, + cp_type3_packet(CP_MEM_WRITE, 2)); + GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr + + KGSL_MEMSTORE_OFFSET(context->id, soptimestamp))); + GSL_RB_WRITE(ringcmds, rcmd_gpu, timestamp); + + /* end-of-pipeline timestamp */ + GSL_RB_WRITE(ringcmds, rcmd_gpu, + cp_type3_packet(CP_EVENT_WRITE, 3)); + GSL_RB_WRITE(ringcmds, rcmd_gpu, CACHE_FLUSH_TS); + GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr + + KGSL_MEMSTORE_OFFSET(context->id, eoptimestamp))); + GSL_RB_WRITE(ringcmds, rcmd_gpu, timestamp); + } + GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_EVENT_WRITE, 3)); GSL_RB_WRITE(ringcmds, rcmd_gpu, CACHE_FLUSH_TS); - GSL_RB_WRITE(ringcmds, rcmd_gpu, - (rb->device->memstore.gpuaddr + - KGSL_DEVICE_MEMSTORE_OFFSET(eoptimestamp))); - GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp); + GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr + + KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, + eoptimestamp))); + GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp[KGSL_MEMSTORE_GLOBAL]); if (!(flags & KGSL_CMD_FLAGS_NO_TS_CMP)) { /* Conditional execution based on memory values */ GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_COND_EXEC, 4)); - GSL_RB_WRITE(ringcmds, rcmd_gpu, (rb->device->memstore.gpuaddr + - KGSL_DEVICE_MEMSTORE_OFFSET(ts_cmp_enable)) >> 2); - GSL_RB_WRITE(ringcmds, rcmd_gpu, (rb->device->memstore.gpuaddr + - KGSL_DEVICE_MEMSTORE_OFFSET(ref_wait_ts)) >> 2); - GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp); + GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr + + KGSL_MEMSTORE_OFFSET( + context_id, ts_cmp_enable)) >> 2); + GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr + + KGSL_MEMSTORE_OFFSET( + context_id, ref_wait_ts)) >> 2); + GSL_RB_WRITE(ringcmds, rcmd_gpu, timestamp); /* # of conditional command DWORDs */ GSL_RB_WRITE(ringcmds, rcmd_gpu, 2); GSL_RB_WRITE(ringcmds, rcmd_gpu, @@ -533,7 +575,6 @@ adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb, adreno_ringbuffer_submit(rb); - /* return timestamp of issued coREG_ands */ return timestamp; } @@ -548,7 +589,7 @@ adreno_ringbuffer_issuecmds(struct kgsl_device *device, if (device->state & KGSL_STATE_HUNG) return; - adreno_ringbuffer_addcmds(rb, flags, cmds, sizedwords); + adreno_ringbuffer_addcmds(rb, NULL, flags, cmds, sizedwords); } static bool _parse_ibs(struct kgsl_device_private *dev_priv, uint gpuaddr, @@ -769,8 +810,8 @@ adreno_ringbuffer_issueibcmds(struct kgsl_device_private *dev_priv, if (drawctxt->flags & CTXT_FLAGS_GPU_HANG) { KGSL_CTXT_WARN(device, "Context %p caused a gpu hang.." - " will not accept commands for this context\n", - drawctxt); + " will not accept commands for context %d\n", + drawctxt, drawctxt->id); return -EDEADLK; } @@ -822,6 +863,7 @@ adreno_ringbuffer_issueibcmds(struct kgsl_device_private *dev_priv, adreno_drawctxt_switch(adreno_dev, drawctxt, flags); *timestamp = adreno_ringbuffer_addcmds(&adreno_dev->ringbuffer, + drawctxt, KGSL_CMD_FLAGS_NOT_KERNEL_CMD, &link[0], (cmds - link)); @@ -855,11 +897,25 @@ int adreno_ringbuffer_extract(struct adreno_ringbuffer *rb, unsigned int val2; unsigned int val3; unsigned int copy_rb_contents = 0; - unsigned int cur_context; + struct kgsl_context *context; + unsigned int context_id; GSL_RB_GET_READPTR(rb, &rb->rptr); - retired_timestamp = device->ftbl->readtimestamp(device, + /* current_context is the context that is presently active in the + * GPU, i.e the context in which the hang is caused */ + kgsl_sharedmem_readl(&device->memstore, &context_id, + KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, + current_context)); + KGSL_DRV_ERR(device, "Last context id: %d\n", context_id); + context = idr_find(&device->context_idr, context_id); + if (context == NULL) { + KGSL_DRV_ERR(device, + "GPU recovery from hang not possible because last" + " context id is invalid.\n"); + return -EINVAL; + } + retired_timestamp = device->ftbl->readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED); KGSL_DRV_ERR(device, "GPU successfully executed till ts: %x\n", retired_timestamp); @@ -894,7 +950,8 @@ int adreno_ringbuffer_extract(struct adreno_ringbuffer *rb, (val1 == cp_type3_packet(CP_EVENT_WRITE, 3) && val2 == CACHE_FLUSH_TS && val3 == (rb->device->memstore.gpuaddr + - KGSL_DEVICE_MEMSTORE_OFFSET(eoptimestamp)))) { + KGSL_MEMSTORE_OFFSET(context_id, + eoptimestamp)))) { rb_rptr = adreno_ringbuffer_inc_wrapped(rb_rptr, rb->buffer_desc.size); KGSL_DRV_ERR(device, @@ -940,10 +997,6 @@ int adreno_ringbuffer_extract(struct adreno_ringbuffer *rb, return -EINVAL; } - /* current_context is the context that is presently active in the - * GPU, i.e the context in which the hang is caused */ - kgsl_sharedmem_readl(&device->memstore, &cur_context, - KGSL_DEVICE_MEMSTORE_OFFSET(current_context)); while ((rb_rptr / sizeof(unsigned int)) != rb->wptr) { kgsl_sharedmem_readl(&rb->buffer_desc, &value, rb_rptr); rb_rptr = adreno_ringbuffer_inc_wrapped(rb_rptr, @@ -958,7 +1011,8 @@ int adreno_ringbuffer_extract(struct adreno_ringbuffer *rb, rb_rptr = adreno_ringbuffer_inc_wrapped(rb_rptr, rb->buffer_desc.size); BUG_ON(val1 != (device->memstore.gpuaddr + - KGSL_DEVICE_MEMSTORE_OFFSET(current_context))); + KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, + current_context))); kgsl_sharedmem_readl(&rb->buffer_desc, &value, rb_rptr); rb_rptr = adreno_ringbuffer_inc_wrapped(rb_rptr, rb->buffer_desc.size); @@ -970,7 +1024,7 @@ int adreno_ringbuffer_extract(struct adreno_ringbuffer *rb, * and leave. */ - if ((copy_rb_contents == 0) && (value == cur_context)) { + if ((copy_rb_contents == 0) && (value == context_id)) { KGSL_DRV_ERR(device, "GPU recovery could not " "find the previous context\n"); return -EINVAL; @@ -986,7 +1040,7 @@ int adreno_ringbuffer_extract(struct adreno_ringbuffer *rb, /* if context switches to a context that did not cause * hang then start saving the rb contents as those * commands can be executed */ - if (value != cur_context) { + if (value != context_id) { copy_rb_contents = 1; temp_rb_buffer[temp_idx++] = cp_nop_packet(1); temp_rb_buffer[temp_idx++] = diff --git a/drivers/gpu/msm/adreno_ringbuffer.h b/drivers/gpu/msm/adreno_ringbuffer.h index d0110b9f..7c93b3b5 100755 --- a/drivers/gpu/msm/adreno_ringbuffer.h +++ b/drivers/gpu/msm/adreno_ringbuffer.h @@ -54,7 +54,8 @@ struct adreno_ringbuffer { unsigned int wptr; /* write pointer offset in dwords from baseaddr */ unsigned int rptr; /* read pointer offset in dwords from baseaddr */ - uint32_t timestamp; + + unsigned int timestamp[KGSL_MEMSTORE_MAX]; }; diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c index e7c4ff8b..491944ba 100755 --- a/drivers/gpu/msm/kgsl.c +++ b/drivers/gpu/msm/kgsl.c @@ -58,22 +58,30 @@ static struct ion_client *kgsl_ion_client; * @returns - 0 on success or error code on failure */ -static int kgsl_add_event(struct kgsl_device *device, u32 ts, - void (*cb)(struct kgsl_device *, void *, u32), void *priv, +static int kgsl_add_event(struct kgsl_device *device, u32 id, u32 ts, + void (*cb)(struct kgsl_device *, void *, u32, u32), void *priv, struct kgsl_device_private *owner) { struct kgsl_event *event; struct list_head *n; - unsigned int cur = device->ftbl->readtimestamp(device, - KGSL_TIMESTAMP_RETIRED); + unsigned int cur_ts; + struct kgsl_context *context = NULL; if (cb == NULL) return -EINVAL; + if (id != KGSL_MEMSTORE_GLOBAL) { + context = idr_find(&device->context_idr, id); + if (context == NULL) + return -EINVAL; + } + cur_ts = device->ftbl->readtimestamp(device, context, + KGSL_TIMESTAMP_RETIRED); + /* Check to see if the requested timestamp has already fired */ - if (timestamp_cmp(cur, ts) >= 0) { - cb(device, priv, cur); + if (timestamp_cmp(cur_ts, ts) >= 0) { + cb(device, priv, id, cur_ts); return 0; } @@ -81,17 +89,24 @@ static int kgsl_add_event(struct kgsl_device *device, u32 ts, if (event == NULL) return -ENOMEM; + event->context = context; event->timestamp = ts; event->priv = priv; event->func = cb; event->owner = owner; - /* Add the event in order to the list */ + /* + * Add the event in order to the list. Order is by context id + * first and then by timestamp for that context. + */ for (n = device->events.next ; n != &device->events; n = n->next) { struct kgsl_event *e = list_entry(n, struct kgsl_event, list); + if (e->context != context) + continue; + if (timestamp_cmp(e->timestamp, ts) > 0) { list_add(&event->list, n->prev); break; @@ -115,12 +130,16 @@ static void kgsl_cancel_events(struct kgsl_device *device, struct kgsl_device_private *owner) { struct kgsl_event *event, *event_tmp; - unsigned int cur = device->ftbl->readtimestamp(device, - KGSL_TIMESTAMP_RETIRED); + unsigned int id, cur; list_for_each_entry_safe(event, event_tmp, &device->events, list) { if (event->owner != owner) continue; + + cur = device->ftbl->readtimestamp(device, event->context, + KGSL_TIMESTAMP_RETIRED); + + id = event->context ? event->context->id : KGSL_MEMSTORE_GLOBAL; /* * "cancel" the events by calling their callback. * Currently, events are used for lock and memory @@ -128,7 +147,7 @@ static void kgsl_cancel_events(struct kgsl_device *device, * thing to do is release or free. */ if (event->func) - event->func(device, event->priv, cur); + event->func(device, event->priv, id, cur); list_del(&event->list); kfree(event); @@ -265,8 +284,8 @@ kgsl_create_context(struct kgsl_device_private *dev_priv) return NULL; } - ret = idr_get_new(&dev_priv->device->context_idr, - context, &id); + ret = idr_get_new_above(&dev_priv->device->context_idr, + context, 1, &id); if (ret != -EAGAIN) break; @@ -277,6 +296,16 @@ kgsl_create_context(struct kgsl_device_private *dev_priv) return NULL; } + /* MAX - 1, there is one memdesc in memstore for device info */ + if (id >= KGSL_MEMSTORE_MAX) { + KGSL_DRV_ERR(dev_priv->device, "cannot have more than %d " + "ctxts due to memstore limitation\n", + KGSL_MEMSTORE_MAX); + idr_remove(&dev_priv->device->context_idr, id); + kfree(context); + return NULL; + } + context->id = id; context->dev_priv = dev_priv; @@ -307,25 +336,28 @@ static void kgsl_timestamp_expired(struct work_struct *work) ts_expired_ws); struct kgsl_event *event, *event_tmp; uint32_t ts_processed; + unsigned int id; mutex_lock(&device->mutex); - /* get current EOP timestamp */ - ts_processed = device->ftbl->readtimestamp(device, - KGSL_TIMESTAMP_RETIRED); - /* Process expired events */ list_for_each_entry_safe(event, event_tmp, &device->events, list) { + ts_processed = device->ftbl->readtimestamp(device, + event->context, KGSL_TIMESTAMP_RETIRED); if (timestamp_cmp(ts_processed, event->timestamp) < 0) - break; + continue; + + id = event->context ? event->context->id : KGSL_MEMSTORE_GLOBAL; if (event->func) - event->func(device, event->priv, ts_processed); + event->func(device, event->priv, id, ts_processed); list_del(&event->list); kfree(event); } + device->last_expired_ctxt_id = KGSL_CONTEXT_INVALID; + mutex_unlock(&device->mutex); } @@ -400,11 +432,15 @@ int kgsl_unregister_ts_notifier(struct kgsl_device *device, } EXPORT_SYMBOL(kgsl_unregister_ts_notifier); -int kgsl_check_timestamp(struct kgsl_device *device, unsigned int timestamp) +int kgsl_check_timestamp(struct kgsl_device *device, + struct kgsl_context *context, unsigned int timestamp) { unsigned int ts_processed; + unsigned int global; - ts_processed = device->ftbl->readtimestamp(device, + ts_processed = device->ftbl->readtimestamp(device, context, + KGSL_TIMESTAMP_RETIRED); + global = device->ftbl->readtimestamp(device, NULL, KGSL_TIMESTAMP_RETIRED); return (timestamp_cmp(ts_processed, timestamp) >= 0); @@ -745,6 +781,9 @@ static int kgsl_open(struct inode *inodep, struct file *filep) kgsl_check_suspended(device); if (device->open_count == 0) { + kgsl_sharedmem_set(&device->memstore, 0, 0, + device->memstore.size); + result = device->ftbl->start(device, true); if (result) { @@ -885,21 +924,35 @@ static long kgsl_ioctl_device_getproperty(struct kgsl_device_private *dev_priv, return result; } -static long kgsl_ioctl_device_waittimestamp(struct kgsl_device_private - *dev_priv, unsigned int cmd, - void *data) +static long kgsl_ioctl_device_setproperty(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) { int result = 0; - struct kgsl_device_waittimestamp *param = data; + /* The getproperty struct is reused for setproperty too */ + struct kgsl_device_getproperty *param = data; - /* Set the active count so that suspend doesn't do the - wrong thing */ + if (dev_priv->device->ftbl->setproperty) + result = dev_priv->device->ftbl->setproperty( + dev_priv->device, param->type, + param->value, param->sizebytes); + + return result; +} + +static long _device_waittimestamp(struct kgsl_device_private *dev_priv, + struct kgsl_context *context, + unsigned int timestamp, + unsigned int timeout) +{ + int result = 0; + + /* Set the active count so that suspend doesn't do the wrong thing */ dev_priv->device->active_cnt++; result = dev_priv->device->ftbl->waittimestamp(dev_priv->device, - param->timestamp, - param->timeout); + context, timestamp, timeout); + /* Fire off any pending suspend operations that are in flight */ @@ -910,6 +963,34 @@ static long kgsl_ioctl_device_waittimestamp(struct kgsl_device_private return result; } +static long kgsl_ioctl_device_waittimestamp(struct kgsl_device_private + *dev_priv, unsigned int cmd, + void *data) +{ + struct kgsl_device_waittimestamp *param = data; + + return _device_waittimestamp(dev_priv, KGSL_MEMSTORE_GLOBAL, + param->timestamp, param->timeout); +} + +static long kgsl_ioctl_device_waittimestamp_ctxtid(struct kgsl_device_private + *dev_priv, unsigned int cmd, + void *data) +{ + struct kgsl_device_waittimestamp_ctxtid *param = data; + struct kgsl_context *context; + + context = kgsl_find_context(dev_priv, param->context_id); + if (context == NULL) { + KGSL_DRV_ERR(dev_priv->device, "invalid context_id %d\n", + param->context_id); + return -EINVAL; + } + + return _device_waittimestamp(dev_priv, context, + param->timestamp, param->timeout); +} + static long kgsl_ioctl_rb_issueibcmds(struct kgsl_device_private *dev_priv, unsigned int cmd, void *data) { @@ -926,7 +1007,7 @@ static long kgsl_ioctl_rb_issueibcmds(struct kgsl_device_private *dev_priv, if (context == NULL) { result = -EINVAL; KGSL_DRV_ERR(dev_priv->device, - "invalid drawctxt drawctxt_id %d\n", + "invalid context_id %d\n", param->drawctxt_id); goto done; } @@ -997,21 +1078,46 @@ done: return result; } +static long _cmdstream_readtimestamp(struct kgsl_device_private *dev_priv, + struct kgsl_context *context, unsigned int type, + unsigned int *timestamp) +{ + *timestamp = dev_priv->device->ftbl->readtimestamp(dev_priv->device, + context, type); + + return 0; +} + static long kgsl_ioctl_cmdstream_readtimestamp(struct kgsl_device_private *dev_priv, unsigned int cmd, void *data) { struct kgsl_cmdstream_readtimestamp *param = data; - param->timestamp = - dev_priv->device->ftbl->readtimestamp(dev_priv->device, - param->type); + return _cmdstream_readtimestamp(dev_priv, NULL, + param->type, ¶m->timestamp); +} - return 0; +static long kgsl_ioctl_cmdstream_readtimestamp_ctxtid(struct kgsl_device_private + *dev_priv, unsigned int cmd, + void *data) +{ + struct kgsl_cmdstream_readtimestamp_ctxtid *param = data; + struct kgsl_context *context; + + context = kgsl_find_context(dev_priv, param->context_id); + if (context == NULL) { + KGSL_DRV_ERR(dev_priv->device, "invalid context_id %d\n", + param->context_id); + return -EINVAL; + } + + return _cmdstream_readtimestamp(dev_priv, context, + param->type, ¶m->timestamp); } static void kgsl_freemem_event_cb(struct kgsl_device *device, - void *priv, u32 timestamp) + void *priv, u32 id, u32 timestamp) { struct kgsl_mem_entry *entry = priv; spin_lock(&entry->priv->mem_lock); @@ -1020,30 +1126,65 @@ static void kgsl_freemem_event_cb(struct kgsl_device *device, kgsl_mem_entry_detach_process(entry); } -static long kgsl_ioctl_cmdstream_freememontimestamp(struct kgsl_device_private - *dev_priv, unsigned int cmd, - void *data) +static long _cmdstream_freememontimestamp(struct kgsl_device_private *dev_priv, + unsigned int gpuaddr, struct kgsl_context *context, + unsigned int timestamp, unsigned int type) { int result = 0; - struct kgsl_cmdstream_freememontimestamp *param = data; struct kgsl_mem_entry *entry = NULL; + struct kgsl_device *device = dev_priv->device; + unsigned int cur; + unsigned int context_id = context ? context->id : KGSL_MEMSTORE_GLOBAL; spin_lock(&dev_priv->process_priv->mem_lock); - entry = kgsl_sharedmem_find(dev_priv->process_priv, param->gpuaddr); + entry = kgsl_sharedmem_find(dev_priv->process_priv, gpuaddr); spin_unlock(&dev_priv->process_priv->mem_lock); if (entry) { - result = kgsl_add_event(dev_priv->device, param->timestamp, - kgsl_freemem_event_cb, entry, dev_priv); + cur = device->ftbl->readtimestamp(device, context, + KGSL_TIMESTAMP_RETIRED); + + result = kgsl_add_event(dev_priv->device, context_id, + timestamp, kgsl_freemem_event_cb, + entry, dev_priv); } else { KGSL_DRV_ERR(dev_priv->device, - "invalid gpuaddr %08x\n", param->gpuaddr); + "invalid gpuaddr %08x\n", gpuaddr); result = -EINVAL; } return result; } +static long kgsl_ioctl_cmdstream_freememontimestamp(struct kgsl_device_private + *dev_priv, unsigned int cmd, + void *data) +{ + struct kgsl_cmdstream_freememontimestamp *param = data; + + return _cmdstream_freememontimestamp(dev_priv, param->gpuaddr, + NULL, param->timestamp, param->type); +} + +static long kgsl_ioctl_cmdstream_freememontimestamp_ctxtid( + struct kgsl_device_private + *dev_priv, unsigned int cmd, + void *data) +{ + struct kgsl_cmdstream_freememontimestamp_ctxtid *param = data; + struct kgsl_context *context; + + context = kgsl_find_context(dev_priv, param->context_id); + if (context == NULL) { + KGSL_DRV_ERR(dev_priv->device, + "invalid drawctxt context_id %d\n", param->context_id); + return -EINVAL; + } + + return _cmdstream_freememontimestamp(dev_priv, param->gpuaddr, + context, param->timestamp, param->type); +} + static long kgsl_ioctl_drawctxt_create(struct kgsl_device_private *dev_priv, unsigned int cmd, void *data) { @@ -1760,13 +1901,14 @@ struct kgsl_genlock_event_priv { * kgsl_genlock_event_cb - Event callback for a genlock timestamp event * @device - The KGSL device that expired the timestamp * @priv - private data for the event + * @context_id - the context id that goes with the timestamp * @timestamp - the timestamp that triggered the event * * Release a genlock lock following the expiration of a timestamp */ static void kgsl_genlock_event_cb(struct kgsl_device *device, - void *priv, u32 timestamp) + void *priv, u32 context_id, u32 timestamp) { struct kgsl_genlock_event_priv *ev = priv; int ret; @@ -1794,7 +1936,7 @@ static void kgsl_genlock_event_cb(struct kgsl_device *device, */ static int kgsl_add_genlock_event(struct kgsl_device *device, - u32 timestamp, void __user *data, int len, + u32 context_id, u32 timestamp, void __user *data, int len, struct kgsl_device_private *owner) { struct kgsl_genlock_event_priv *event; @@ -1820,8 +1962,8 @@ static int kgsl_add_genlock_event(struct kgsl_device *device, return ret; } - ret = kgsl_add_event(device, timestamp, kgsl_genlock_event_cb, event, - owner); + ret = kgsl_add_event(device, context_id, timestamp, + kgsl_genlock_event_cb, event, owner); if (ret) kfree(event); @@ -1829,7 +1971,7 @@ static int kgsl_add_genlock_event(struct kgsl_device *device, } #else static long kgsl_add_genlock_event(struct kgsl_device *device, - u32 timestamp, void __user *data, int len, + u32 context_id, u32 timestamp, void __user *data, int len, struct kgsl_device_private *owner) { return -EINVAL; @@ -1853,8 +1995,8 @@ static long kgsl_ioctl_timestamp_event(struct kgsl_device_private *dev_priv, switch (param->type) { case KGSL_TIMESTAMP_EVENT_GENLOCK: ret = kgsl_add_genlock_event(dev_priv->device, - param->timestamp, param->priv, param->len, - dev_priv); + param->context_id, param->timestamp, param->priv, + param->len, dev_priv); break; default: ret = -EINVAL; @@ -1878,12 +2020,18 @@ static const struct { kgsl_ioctl_device_getproperty, 1), KGSL_IOCTL_FUNC(IOCTL_KGSL_DEVICE_WAITTIMESTAMP, kgsl_ioctl_device_waittimestamp, 1), + KGSL_IOCTL_FUNC(IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID, + kgsl_ioctl_device_waittimestamp_ctxtid, 1), KGSL_IOCTL_FUNC(IOCTL_KGSL_RINGBUFFER_ISSUEIBCMDS, kgsl_ioctl_rb_issueibcmds, 1), KGSL_IOCTL_FUNC(IOCTL_KGSL_CMDSTREAM_READTIMESTAMP, kgsl_ioctl_cmdstream_readtimestamp, 1), + KGSL_IOCTL_FUNC(IOCTL_KGSL_CMDSTREAM_READTIMESTAMP_CTXTID, + kgsl_ioctl_cmdstream_readtimestamp_ctxtid, 1), KGSL_IOCTL_FUNC(IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP, kgsl_ioctl_cmdstream_freememontimestamp, 1), + KGSL_IOCTL_FUNC(IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP_CTXTID, + kgsl_ioctl_cmdstream_freememontimestamp_ctxtid, 1), KGSL_IOCTL_FUNC(IOCTL_KGSL_DRAWCTXT_CREATE, kgsl_ioctl_drawctxt_create, 1), KGSL_IOCTL_FUNC(IOCTL_KGSL_DRAWCTXT_DESTROY, @@ -1906,6 +2054,8 @@ static const struct { kgsl_ioctl_cff_user_event, 0), KGSL_IOCTL_FUNC(IOCTL_KGSL_TIMESTAMP_EVENT, kgsl_ioctl_timestamp_event, 1), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SETPROPERTY, + kgsl_ioctl_device_setproperty, 1), }; static long kgsl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) @@ -2211,13 +2361,13 @@ kgsl_register_device(struct kgsl_device *device) INIT_LIST_HEAD(&device->events); + device->last_expired_ctxt_id = KGSL_CONTEXT_INVALID; + ret = kgsl_mmu_init(device); if (ret != 0) goto err_dest_work_q; - ret = kgsl_allocate_contiguous(&device->memstore, - sizeof(struct kgsl_devmemstore)); - + ret = kgsl_allocate_contiguous(&device->memstore, KGSL_MEMSTORE_SIZE); if (ret != 0) goto err_close_mmu; diff --git a/drivers/gpu/msm/kgsl.h b/drivers/gpu/msm/kgsl.h index 3f9ff843..aff17338 100755 --- a/drivers/gpu/msm/kgsl.h +++ b/drivers/gpu/msm/kgsl.h @@ -25,6 +25,14 @@ #define KGSL_NAME "kgsl" +/* The number of memstore arrays limits the number of contexts allowed. + * If more contexts are needed, update multiple for MEMSTORE_SIZE + */ +#define KGSL_MEMSTORE_SIZE ((int)(PAGE_SIZE * 2)) +#define KGSL_MEMSTORE_GLOBAL (0) +#define KGSL_MEMSTORE_MAX (KGSL_MEMSTORE_SIZE / \ + sizeof(struct kgsl_devmemstore) - 1) + /* Timestamp window used to detect rollovers */ #define KGSL_TIMESTAMP_WINDOW 0x80000000 @@ -150,6 +158,7 @@ struct kgsl_mem_entry { void *priv_data; struct list_head list; uint32_t free_timestamp; + unsigned int context_id; /* back pointer to private structure under whose context this * allocation is made */ struct kgsl_process_private *priv; diff --git a/drivers/gpu/msm/kgsl_device.h b/drivers/gpu/msm/kgsl_device.h index ff78ae3f..45e41d91 100755 --- a/drivers/gpu/msm/kgsl_device.h +++ b/drivers/gpu/msm/kgsl_device.h @@ -1,5 +1,4 @@ -/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved. - * Copyright (C) 2011 Sony Ericsson Mobile Communications AB. +/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -47,6 +46,7 @@ #define KGSL_STATE_SUSPEND 0x00000010 #define KGSL_STATE_HUNG 0x00000020 #define KGSL_STATE_DUMP_AND_RECOVER 0x00000040 +#define KGSL_STATE_SLUMBER 0x00000080 #define KGSL_GRAPHICS_MEMORY_LOW_WATERMARK 0x1000000 @@ -76,9 +76,10 @@ struct kgsl_functable { enum kgsl_property_type type, void *value, unsigned int sizebytes); int (*waittimestamp) (struct kgsl_device *device, - unsigned int timestamp, unsigned int msecs); + struct kgsl_context *context, unsigned int timestamp, + unsigned int msecs); unsigned int (*readtimestamp) (struct kgsl_device *device, - enum kgsl_timestamp_type type); + struct kgsl_context *context, enum kgsl_timestamp_type type); int (*issueibcmds) (struct kgsl_device_private *dev_priv, struct kgsl_context *context, struct kgsl_ibdesc *ibdesc, unsigned int sizedwords, uint32_t *timestamp, @@ -101,6 +102,9 @@ struct kgsl_functable { struct kgsl_context *context); long (*ioctl) (struct kgsl_device_private *dev_priv, unsigned int cmd, void *data); + int (*setproperty) (struct kgsl_device *device, + enum kgsl_property_type type, void *value, + unsigned int sizebytes); }; struct kgsl_memregion { @@ -120,8 +124,9 @@ struct kgsl_mh { }; struct kgsl_event { + struct kgsl_context *context; uint32_t timestamp; - void (*func)(struct kgsl_device *, void *, u32); + void (*func)(struct kgsl_device *, void *, u32, u32); void *priv; struct list_head list; struct kgsl_device_private *owner; @@ -153,6 +158,7 @@ struct kgsl_device { uint32_t state; uint32_t requested_state; + unsigned int last_expired_ctxt_id; unsigned int active_cnt; struct completion suspend_gate; @@ -304,7 +310,8 @@ kgsl_find_context(struct kgsl_device_private *dev_priv, uint32_t id) return (ctxt && ctxt->dev_priv == dev_priv) ? ctxt : NULL; } -int kgsl_check_timestamp(struct kgsl_device *device, unsigned int timestamp); +int kgsl_check_timestamp(struct kgsl_device *device, + struct kgsl_context *context, unsigned int timestamp); int kgsl_register_ts_notifier(struct kgsl_device *device, struct notifier_block *nb); diff --git a/drivers/gpu/msm/z180.c b/drivers/gpu/msm/z180.c old mode 100644 new mode 100755 index e7a1d521..688f23d8 --- a/drivers/gpu/msm/z180.c +++ b/drivers/gpu/msm/z180.c @@ -100,6 +100,7 @@ enum z180_cmdwindow_type { static int z180_start(struct kgsl_device *device, unsigned int init_ram); static int z180_stop(struct kgsl_device *device); static int z180_wait(struct kgsl_device *device, + struct kgsl_context *context, unsigned int timestamp, unsigned int msecs); static void z180_regread(struct kgsl_device *device, @@ -382,8 +383,8 @@ static int z180_idle(struct kgsl_device *device, unsigned int timeout) if (timestamp_cmp(z180_dev->current_timestamp, z180_dev->timestamp) > 0) - status = z180_wait(device, z180_dev->current_timestamp, - timeout); + status = z180_wait(device, NULL, + z180_dev->current_timestamp, timeout); if (status) KGSL_DRV_ERR(device, "z180_waittimestamp() timed out\n"); @@ -793,14 +794,16 @@ static void z180_cmdwindow_write(struct kgsl_device *device, } static unsigned int z180_readtimestamp(struct kgsl_device *device, - enum kgsl_timestamp_type type) + struct kgsl_context *context, enum kgsl_timestamp_type type) { struct z180_device *z180_dev = Z180_DEVICE(device); + (void)context; /* get current EOP timestamp */ return z180_dev->timestamp; } static int z180_waittimestamp(struct kgsl_device *device, + struct kgsl_context *context, unsigned int timestamp, unsigned int msecs) { @@ -811,13 +814,14 @@ static int z180_waittimestamp(struct kgsl_device *device, msecs = 10 * MSEC_PER_SEC; mutex_unlock(&device->mutex); - status = z180_wait(device, timestamp, msecs); + status = z180_wait(device, context, timestamp, msecs); mutex_lock(&device->mutex); return status; } static int z180_wait(struct kgsl_device *device, + struct kgsl_context *context, unsigned int timestamp, unsigned int msecs) { @@ -826,7 +830,7 @@ static int z180_wait(struct kgsl_device *device, timeout = wait_io_event_interruptible_timeout( device->wait_queue, - kgsl_check_timestamp(device, timestamp), + kgsl_check_timestamp(device, context, timestamp), msecs_to_jiffies(msecs)); if (timeout > 0) diff --git a/include/linux/msm_kgsl.h b/include/linux/msm_kgsl.h index 36357e08..92b41a5d 100755 --- a/include/linux/msm_kgsl.h +++ b/include/linux/msm_kgsl.h @@ -35,14 +35,18 @@ #define _MSM_KGSL_H #define KGSL_VERSION_MAJOR 3 -#define KGSL_VERSION_MINOR 8 +#define KGSL_VERSION_MINOR 10 /*context flags */ -#define KGSL_CONTEXT_SAVE_GMEM 1 -#define KGSL_CONTEXT_NO_GMEM_ALLOC 2 -#define KGSL_CONTEXT_SUBMIT_IB_LIST 4 -#define KGSL_CONTEXT_CTX_SWITCH 8 -#define KGSL_CONTEXT_PREAMBLE 16 +#define KGSL_CONTEXT_SAVE_GMEM 0x00000001 +#define KGSL_CONTEXT_NO_GMEM_ALLOC 0x00000002 +#define KGSL_CONTEXT_SUBMIT_IB_LIST 0x00000004 +#define KGSL_CONTEXT_CTX_SWITCH 0x00000008 +#define KGSL_CONTEXT_PREAMBLE 0x00000010 +#define KGSL_CONTEXT_TRASH_STATE 0x00000020 +#define KGSL_CONTEXT_PER_CONTEXT_TS 0x00000040 + +#define KGSL_CONTEXT_INVALID 0xffffffff /* Memory allocayion flags */ #define KGSL_MEMFLAGS_GPUREADONLY 0x01000000 @@ -58,6 +62,7 @@ #define KGSL_FLAGS_RESERVED1 0x00000040 #define KGSL_FLAGS_RESERVED2 0x00000080 #define KGSL_FLAGS_SOFT_RESET 0x00000100 +#define KGSL_FLAGS_PER_CONTEXT_TIMESTAMPS 0x00000200 /* Clock flags to show which clocks should be controled by a given platform */ #define KGSL_CLK_SRC 0x00000001 @@ -132,9 +137,9 @@ struct kgsl_devmemstore { unsigned int sbz5; }; -#define KGSL_DEVICE_MEMSTORE_OFFSET(field) \ - offsetof(struct kgsl_devmemstore, field) - +#define KGSL_MEMSTORE_OFFSET(ctxt_id, field) \ + ((ctxt_id)*sizeof(struct kgsl_devmemstore) + \ + offsetof(struct kgsl_devmemstore, field)) /* timestamp id*/ enum kgsl_timestamp_type { @@ -268,6 +273,14 @@ struct kgsl_device_waittimestamp { #define IOCTL_KGSL_DEVICE_WAITTIMESTAMP \ _IOW(KGSL_IOC_TYPE, 0x6, struct kgsl_device_waittimestamp) +struct kgsl_device_waittimestamp_ctxtid { + unsigned int context_id; + unsigned int timestamp; + unsigned int timeout; +}; + +#define IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID \ + _IOW(KGSL_IOC_TYPE, 0x7, struct kgsl_device_waittimestamp_ctxtid) /* issue indirect commands to the GPU. * drawctxt_id must have been created with IOCTL_KGSL_DRAWCTXT_CREATE @@ -361,6 +374,26 @@ struct kgsl_map_user_mem { #define IOCTL_KGSL_MAP_USER_MEM \ _IOWR(KGSL_IOC_TYPE, 0x15, struct kgsl_map_user_mem) +struct kgsl_cmdstream_readtimestamp_ctxtid { + unsigned int context_id; + unsigned int type; + unsigned int timestamp; /*output param */ +}; + +#define IOCTL_KGSL_CMDSTREAM_READTIMESTAMP_CTXTID \ + _IOWR(KGSL_IOC_TYPE, 0x16, struct kgsl_cmdstream_readtimestamp_ctxtid) + +struct kgsl_cmdstream_freememontimestamp_ctxtid { + unsigned int context_id; + unsigned int gpuaddr; + unsigned int type; + unsigned int timestamp; +}; + +#define IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP_CTXTID \ + _IOW(KGSL_IOC_TYPE, 0x17, \ + struct kgsl_cmdstream_freememontimestamp_ctxtid) + /* add a block of pmem or fb into the GPU address space */ struct kgsl_sharedmem_from_pmem { int pmem_fd; @@ -504,6 +537,14 @@ struct kgsl_timestamp_event_genlock { int handle; /* Handle of the genlock lock to release */ }; +/* + * Set a property within the kernel. Uses the same structure as + * IOCTL_KGSL_GETPROPERTY + */ + +#define IOCTL_KGSL_SETPROPERTY \ + _IOW(KGSL_IOC_TYPE, 0x32, struct kgsl_device_getproperty) + #ifdef __KERNEL__ #ifdef CONFIG_MSM_KGSL_DRM int kgsl_gem_obj_addr(int drm_fd, int handle, unsigned long *start, From 83bc93927fbc3e5d1452846e5e1d17551e3fca0f Mon Sep 17 00:00:00 2001 From: marc1706 Date: Sun, 5 Aug 2012 22:55:20 +0200 Subject: [PATCH 079/155] power: ds2746: Add procfs interface for fast charge --- drivers/power/ds2746_battery.c | 70 ++++++++++++++++++++++++++++++++-- 1 file changed, 66 insertions(+), 4 deletions(-) diff --git a/drivers/power/ds2746_battery.c b/drivers/power/ds2746_battery.c index 5f071a39..885ed338 100644 --- a/drivers/power/ds2746_battery.c +++ b/drivers/power/ds2746_battery.c @@ -46,6 +46,8 @@ Original Auther: /*#include "../w1/slaves/w1_ds2784.h"*/ #include #include +#include +#include struct ds2746_device_info { @@ -64,6 +66,45 @@ struct ds2746_device_info { }; static struct wake_lock vbus_wake_lock; +/* + * proc_fs interface for fast charge + * by marc1706 + */ +#define PROC_FAST_CHARGE_NAME "fast_charge" + +static struct proc_dir_entry *fast_charge; +static int allow_fast_charge = 0; + +static int proc_read_fast_charge(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + int ret; + + ret = sprintf(page, "%i\n", allow_fast_charge); + + return ret; +} + +static int proc_write_fast_charge(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + char temp_buff[count + 1]; + int ret; + int len = count; + + if (copy_from_user(temp_buff, buffer, len)) + return -EFAULT; + + sscanf(temp_buff, "%i", &ret); + + if (!ret || ret == 1) + allow_fast_charge = ret; + else + printk(KERN_ALERT "%s: Incorrect value:%i\n", __func__, ret); + + return ret; +} + /*======================================================================================== HTC power algorithm helper member and functions @@ -279,10 +320,18 @@ static BOOL is_charging_avaiable(void) static BOOL is_high_current_charging_avaialable(void) { - if (!poweralg.protect_flags.is_charging_high_current_avaialble) return FALSE; - //if (!poweralg.is_china_ac_in) return FALSE; /* allow high current charging on china chargers */ - if (poweralg.charge_state == CHARGE_STATE_UNKNOWN) return FALSE; - return TRUE; + bool ret; + + if (!poweralg.protect_flags.is_charging_high_current_avaialble) + ret = FALSE; + else if (!poweralg.is_china_ac_in && !allow_fast_charge) + ret = FALSE; + else if (poweralg.charge_state == CHARGE_STATE_UNKNOWN) + ret = FALSE; + else + ret = TRUE; + + return ret; } static void update_next_charge_state(void) @@ -1245,6 +1294,19 @@ static int __init ds2746_battery_init(void) return ret; } + fast_charge = create_proc_entry(PROC_FAST_CHARGE_NAME, 0644, NULL); + + if (fast_charge == NULL) { + remove_proc_entry(PROC_FAST_CHARGE_NAME, NULL); + printk(KERN_ALERT "%s: Unable to create /proc/%s\n", __func__, + PROC_FAST_CHARGE_NAME); + } + fast_charge->read_proc = proc_read_fast_charge; + fast_charge->write_proc = proc_write_fast_charge; + fast_charge->uid = 0; + fast_charge->gid = 0; + printk(KERN_INFO "/proc/%s created\n", PROC_FAST_CHARGE_NAME); + /*mutex_init(&htc_batt_info.lock);*/ return platform_driver_register(&ds2746_battery_driver); } From 8d701dfec69bcf100598d5d1bb7977117e6c8afd Mon Sep 17 00:00:00 2001 From: tytung Date: Tue, 7 Aug 2012 00:04:47 +0800 Subject: [PATCH 080/155] htcleo: updated htcleo_defconfig to tytung_HWA_r3.4 --- arch/arm/configs/htcleo_defconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/configs/htcleo_defconfig b/arch/arm/configs/htcleo_defconfig index 2f518fa7..04b77e31 100644 --- a/arch/arm/configs/htcleo_defconfig +++ b/arch/arm/configs/htcleo_defconfig @@ -32,7 +32,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_BROKEN_ON_SMP=y CONFIG_LOCK_KERNEL=y CONFIG_INIT_ENV_ARG_LIMIT=32 -CONFIG_LOCALVERSION="_tytung_HWA_r3.3" +CONFIG_LOCALVERSION="_tytung_HWA_r3.4" # CONFIG_LOCALVERSION_AUTO is not set CONFIG_HAVE_KERNEL_GZIP=y CONFIG_HAVE_KERNEL_BZIP2=y From 65b62e5ee41c10c0e780718682613c86a8e650ad Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Tue, 7 Aug 2012 00:45:11 +0800 Subject: [PATCH 081/155] base: genlock: Remove genlock_release_lock and associated ioctl base: genlock: allow synchronization with a single gralloc handle base: genlock: remove BIT macro usage --- drivers/base/genlock.c | 305 +++++++++++++++++++++++++++++++--------- include/linux/genlock.h | 11 +- 2 files changed, 251 insertions(+), 65 deletions(-) mode change 100644 => 100755 drivers/base/genlock.c mode change 100644 => 100755 include/linux/genlock.h diff --git a/drivers/base/genlock.c b/drivers/base/genlock.c old mode 100644 new mode 100755 index afe8eb1c..a89ee7ee --- a/drivers/base/genlock.c +++ b/drivers/base/genlock.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2011, Code Aurora Forum. All rights reserved. +/* Copyright (c) 2011-2012, Code Aurora Forum. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -22,7 +22,6 @@ #include #include #include -#include /* for in_interrupt() */ /* Lock states - can either be unlocked, held as an exclusive write lock or a * shared read lock @@ -32,6 +31,9 @@ #define _RDLOCK GENLOCK_RDLOCK #define _WRLOCK GENLOCK_WRLOCK +#define GENLOCK_LOG_ERR(fmt, args...) \ +pr_err("genlock: %s: " fmt, __func__, ##args) + struct genlock { struct list_head active; /* List of handles holding lock */ spinlock_t lock; /* Spinlock to protect the lock internals */ @@ -49,12 +51,29 @@ struct genlock_handle { taken */ }; +/* + * Create a spinlock to protect against a race condition when a lock gets + * released while another process tries to attach it + */ + +static DEFINE_SPINLOCK(genlock_file_lock); + static void genlock_destroy(struct kref *kref) { - struct genlock *lock = container_of(kref, struct genlock, - refcount); + struct genlock *lock = container_of(kref, struct genlock, + refcount); - kfree(lock); + /* + * Clear the private data for the file descriptor in case the fd is + * still active after the lock gets released + */ + + spin_lock(&genlock_file_lock); + if (lock->file) + lock->file->private_data = NULL; + spin_unlock(&genlock_file_lock); + + kfree(lock); } /* @@ -64,6 +83,15 @@ static void genlock_destroy(struct kref *kref) static int genlock_release(struct inode *inodep, struct file *file) { + struct genlock *lock = file->private_data; + /* + * Clear the refrence back to this file structure to avoid + * somehow reusing the lock after the file has been destroyed + */ + + if (lock) + lock->file = NULL; + return 0; } @@ -82,12 +110,21 @@ struct genlock *genlock_create_lock(struct genlock_handle *handle) { struct genlock *lock; - if (handle->lock != NULL) + if (IS_ERR_OR_NULL(handle)) { + GENLOCK_LOG_ERR("Invalid handle\n"); return ERR_PTR(-EINVAL); + } + + if (handle->lock != NULL) { + GENLOCK_LOG_ERR("Handle already has a lock attached\n"); + return ERR_PTR(-EINVAL); + } lock = kzalloc(sizeof(*lock), GFP_KERNEL); - if (lock == NULL) + if (lock == NULL) { + GENLOCK_LOG_ERR("Unable to allocate memory for a lock\n"); return ERR_PTR(-ENOMEM); + } INIT_LIST_HEAD(&lock->active); init_waitqueue_head(&lock->queue); @@ -120,8 +157,10 @@ static int genlock_get_fd(struct genlock *lock) { int ret; - if (!lock->file) + if (!lock->file) { + GENLOCK_LOG_ERR("No file attached to the lock\n"); return -EINVAL; + } ret = get_unused_fd_flags(0); if (ret < 0) @@ -143,19 +182,37 @@ struct genlock *genlock_attach_lock(struct genlock_handle *handle, int fd) struct file *file; struct genlock *lock; - if (handle->lock != NULL) + if (IS_ERR_OR_NULL(handle)) { + GENLOCK_LOG_ERR("Invalid handle\n"); return ERR_PTR(-EINVAL); + } + + if (handle->lock != NULL) { + GENLOCK_LOG_ERR("Handle already has a lock attached\n"); + return ERR_PTR(-EINVAL); + } file = fget(fd); - if (file == NULL) + if (file == NULL) { + GENLOCK_LOG_ERR("Bad file descriptor\n"); return ERR_PTR(-EBADF); + } + /* + * take a spinlock to avoid a race condition if the lock is + * released and then attached + */ + + spin_lock(&genlock_file_lock); lock = file->private_data; + spin_unlock(&genlock_file_lock); fput(file); - if (lock == NULL) + if (lock == NULL) { + GENLOCK_LOG_ERR("File descriptor is invalid\n"); return ERR_PTR(-EINVAL); + } handle->lock = lock; kref_get(&lock->refcount); @@ -199,13 +256,16 @@ static int _genlock_unlock(struct genlock *lock, struct genlock_handle *handle) spin_lock_irqsave(&lock->lock, irqflags); - if (lock->state == _UNLOCKED) + if (lock->state == _UNLOCKED) { + GENLOCK_LOG_ERR("Trying to unlock an unlocked handle\n"); goto done; + } /* Make sure this handle is an owner of the lock */ - if (!handle_has_lock(lock, handle)) + if (!handle_has_lock(lock, handle)) { + GENLOCK_LOG_ERR("handle does not have lock attached to it\n"); goto done; - + } /* If the handle holds no more references to the lock then release it (maybe) */ @@ -228,7 +288,7 @@ static int _genlock_lock(struct genlock *lock, struct genlock_handle *handle, { unsigned long irqflags; int ret = 0; - unsigned int ticks = msecs_to_jiffies(timeout); + unsigned long ticks = msecs_to_jiffies(timeout); spin_lock_irqsave(&lock->lock, irqflags); @@ -236,8 +296,8 @@ static int _genlock_lock(struct genlock *lock, struct genlock_handle *handle, * succeed to not block, the mere idea is too dangerous to continue */ - if (in_interrupt() && !(flags & GENLOCK_NOBLOCK)) - BUG(); +// if (in_interrupt() && !(flags & GENLOCK_NOBLOCK)) +// BUG(); /* Fast path - the lock is unlocked, so go do the needful */ @@ -247,12 +307,15 @@ static int _genlock_lock(struct genlock *lock, struct genlock_handle *handle, if (handle_has_lock(lock, handle)) { /* - * If the handle already holds the lock and the type matches, - * then just increment the active pointer. This allows the - * handle to do recursive locks + * If the handle already holds the lock and the lock type is + * a read lock then just increment the active pointer. This + * allows the handle to do recursive read locks. Recursive + * write locks are not allowed in order to support + * synchronization within a process using a single gralloc + * handle. */ - if (lock->state == op) { + if (lock->state == _RDLOCK && op == _RDLOCK) { handle->active++; goto done; } @@ -261,32 +324,46 @@ static int _genlock_lock(struct genlock *lock, struct genlock_handle *handle, * If the handle holds a write lock then the owner can switch * to a read lock if they want. Do the transition atomically * then wake up any pending waiters in case they want a read - * lock too. + * lock too. In order to support synchronization within a + * process the caller must explicity request to convert the + * lock type with the GENLOCK_WRITE_TO_READ flag. */ - if (op == _RDLOCK && handle->active == 1) { - lock->state = _RDLOCK; - wake_up(&lock->queue); + if (flags & GENLOCK_WRITE_TO_READ) { + if (lock->state == _WRLOCK && op == _RDLOCK) { + lock->state = _RDLOCK; + wake_up(&lock->queue); + goto done; + } else { + GENLOCK_LOG_ERR("Invalid state to convert" + "write to read\n"); + ret = -EINVAL; + goto done; + } + } + } else { + + /* + * Check to ensure the caller has not attempted to convert a + * write to a read without holding the lock. + */ + + if (flags & GENLOCK_WRITE_TO_READ) { + GENLOCK_LOG_ERR("Handle must have lock to convert" + "write to read\n"); + ret = -EINVAL; goto done; } /* - * Otherwise the user tried to turn a read into a write, and we - * don't allow that. + * If we request a read and the lock is held by a read, then go + * ahead and share the lock */ - ret = -EINVAL; - goto done; + if (op == GENLOCK_RDLOCK && lock->state == _RDLOCK) + goto dolock; } - /* - * If we request a read and the lock is held by a read, then go - * ahead and share the lock - */ - - if (op == GENLOCK_RDLOCK && lock->state == _RDLOCK) - goto dolock; - /* Treat timeout 0 just like a NOBLOCK flag and return if the lock cannot be aquired without blocking */ @@ -295,15 +372,26 @@ static int _genlock_lock(struct genlock *lock, struct genlock_handle *handle, goto done; } - /* Wait while the lock remains in an incompatible state */ + /* + * Wait while the lock remains in an incompatible state + * state op wait + * ------------------- + * unlocked n/a no + * read read no + * read write yes + * write n/a yes + */ - while (lock->state != _UNLOCKED) { - unsigned int elapsed; + while ((lock->state == _RDLOCK && op == _WRLOCK) || + lock->state == _WRLOCK) { + signed long elapsed; spin_unlock_irqrestore(&lock->lock, irqflags); elapsed = wait_event_interruptible_timeout(lock->queue, - lock->state == _UNLOCKED, ticks); + lock->state == _UNLOCKED || + (lock->state == _RDLOCK && op == _RDLOCK), + ticks); spin_lock_irqsave(&lock->lock, irqflags); @@ -312,7 +400,7 @@ static int _genlock_lock(struct genlock *lock, struct genlock_handle *handle, goto done; } - ticks = elapsed; + ticks = (unsigned long) elapsed; } dolock: @@ -320,7 +408,7 @@ dolock: list_add_tail(&handle->entry, &lock->active); lock->state = op; - handle->active = 1; + handle->active++; done: spin_unlock_irqrestore(&lock->lock, irqflags); @@ -329,7 +417,7 @@ done: } /** - * genlock_lock - Acquire or release a lock + * genlock_lock - Acquire or release a lock (depreciated) * @handle - pointer to the genlock handle that is requesting the lock * @op - the operation to perform (RDLOCK, WRLOCK, UNLOCK) * @flags - flags to control the operation @@ -341,11 +429,76 @@ done: int genlock_lock(struct genlock_handle *handle, int op, int flags, uint32_t timeout) { - struct genlock *lock = handle->lock; + struct genlock *lock; + unsigned long irqflags; + int ret = 0; - if (lock == NULL) + if (IS_ERR_OR_NULL(handle)) { + GENLOCK_LOG_ERR("Invalid handle\n"); return -EINVAL; + } + + lock = handle->lock; + + if (lock == NULL) { + GENLOCK_LOG_ERR("Handle does not have a lock attached\n"); + return -EINVAL; + } + + switch (op) { + case GENLOCK_UNLOCK: + ret = _genlock_unlock(lock, handle); + break; + case GENLOCK_RDLOCK: + spin_lock_irqsave(&lock->lock, irqflags); + if (handle_has_lock(lock, handle)) { + /* request the WRITE_TO_READ flag for compatibility */ + flags |= GENLOCK_WRITE_TO_READ; + } + spin_unlock_irqrestore(&lock->lock, irqflags); + /* fall through to take lock */ + case GENLOCK_WRLOCK: + ret = _genlock_lock(lock, handle, op, flags, timeout); + break; + default: + GENLOCK_LOG_ERR("Invalid lock operation\n"); + ret = -EINVAL; + break; + } + + return ret; +} +EXPORT_SYMBOL(genlock_lock); + +/** + * genlock_dreadlock - Acquire or release a lock + * @handle - pointer to the genlock handle that is requesting the lock + * @op - the operation to perform (RDLOCK, WRLOCK, UNLOCK) + * @flags - flags to control the operation + * @timeout - optional timeout to wait for the lock to come free + * + * Returns: 0 on success or error code on failure + */ + +int genlock_dreadlock(struct genlock_handle *handle, int op, int flags, + uint32_t timeout) +{ + struct genlock *lock; + + int ret = 0; + + if (IS_ERR_OR_NULL(handle)) { + GENLOCK_LOG_ERR("Invalid handle\n"); + return -EINVAL; + } + + lock = handle->lock; + + if (lock == NULL) { + GENLOCK_LOG_ERR("Handle does not have a lock attached\n"); + return -EINVAL; + } switch (op) { case GENLOCK_UNLOCK: @@ -356,13 +509,14 @@ int genlock_lock(struct genlock_handle *handle, int op, int flags, ret = _genlock_lock(lock, handle, op, flags, timeout); break; default: + GENLOCK_LOG_ERR("Invalid lock operation\n"); ret = -EINVAL; break; } return ret; } -EXPORT_SYMBOL(genlock_lock); +EXPORT_SYMBOL(genlock_dreadlock); /** * genlock_wait - Wait for the lock to be released @@ -372,13 +526,22 @@ EXPORT_SYMBOL(genlock_lock); int genlock_wait(struct genlock_handle *handle, uint32_t timeout) { - struct genlock *lock = handle->lock; + struct genlock *lock; unsigned long irqflags; int ret = 0; - unsigned int ticks = msecs_to_jiffies(timeout); + unsigned long ticks = msecs_to_jiffies(timeout); - if (lock == NULL) + if (IS_ERR_OR_NULL(handle)) { + GENLOCK_LOG_ERR("Invalid handle\n"); return -EINVAL; + } + + lock = handle->lock; + + if (lock == NULL) { + GENLOCK_LOG_ERR("Handle does not have a lock attached\n"); + return -EINVAL; + } spin_lock_irqsave(&lock->lock, irqflags); @@ -393,7 +556,7 @@ int genlock_wait(struct genlock_handle *handle, uint32_t timeout) } while (lock->state != _UNLOCKED) { - unsigned int elapsed; + signed long elapsed; spin_unlock_irqrestore(&lock->lock, irqflags); @@ -407,7 +570,7 @@ int genlock_wait(struct genlock_handle *handle, uint32_t timeout) break; } - ticks = elapsed; + ticks = (unsigned long) elapsed; } done: @@ -415,12 +578,7 @@ done: return ret; } -/** - * genlock_release_lock - Release a lock attached to a handle - * @handle - Pointer to the handle holding the lock - */ - -void genlock_release_lock(struct genlock_handle *handle) +static void genlock_release_lock(struct genlock_handle *handle) { unsigned long flags; @@ -441,7 +599,6 @@ void genlock_release_lock(struct genlock_handle *handle) handle->lock = NULL; handle->active = 0; } -EXPORT_SYMBOL(genlock_release_lock); /* * Release function called when all references to a handle are released @@ -468,8 +625,10 @@ static const struct file_operations genlock_handle_fops = { static struct genlock_handle *_genlock_get_handle(void) { struct genlock_handle *handle = kzalloc(sizeof(*handle), GFP_KERNEL); - if (handle == NULL) + if (handle == NULL) { + GENLOCK_LOG_ERR("Unable to allocate memory for the handle\n"); return ERR_PTR(-ENOMEM); + } return handle; } @@ -531,6 +690,9 @@ static long genlock_dev_ioctl(struct file *filep, unsigned int cmd, struct genlock *lock; int ret; + if (IS_ERR_OR_NULL(handle)) + return -EINVAL; + switch (cmd) { case GENLOCK_IOC_NEW: { lock = genlock_create_lock(handle); @@ -540,8 +702,11 @@ static long genlock_dev_ioctl(struct file *filep, unsigned int cmd, return 0; } case GENLOCK_IOC_EXPORT: { - if (handle->lock == NULL) + if (handle->lock == NULL) { + GENLOCK_LOG_ERR("Handle does not have a lock" + "attached\n"); return -EINVAL; + } ret = genlock_get_fd(handle->lock); if (ret < 0) @@ -574,6 +739,14 @@ static long genlock_dev_ioctl(struct file *filep, unsigned int cmd, return genlock_lock(handle, param.op, param.flags, param.timeout); } + case GENLOCK_IOC_DREADLOCK: { + if (copy_from_user(¶m, (void __user *) arg, + sizeof(param))) + return -EFAULT; + + return genlock_dreadlock(handle, param.op, param.flags, + param.timeout); + } case GENLOCK_IOC_WAIT: { if (copy_from_user(¶m, (void __user *) arg, sizeof(param))) @@ -582,10 +755,16 @@ static long genlock_dev_ioctl(struct file *filep, unsigned int cmd, return genlock_wait(handle, param.timeout); } case GENLOCK_IOC_RELEASE: { - genlock_release_lock(handle); - return 0; + /* + * Return error - this ioctl has been deprecated. + * Locks should only be released when the handle is + * destroyed + */ + GENLOCK_LOG_ERR("Deprecated RELEASE ioctl called\n"); + return -EINVAL; } default: + GENLOCK_LOG_ERR("Invalid ioctl\n"); return -EINVAL; } } diff --git a/include/linux/genlock.h b/include/linux/genlock.h old mode 100644 new mode 100755 index 2e9f9d68..587c49df --- a/include/linux/genlock.h +++ b/include/linux/genlock.h @@ -12,7 +12,7 @@ void genlock_put_handle(struct genlock_handle *handle); struct genlock *genlock_create_lock(struct genlock_handle *); struct genlock *genlock_attach_lock(struct genlock_handle *, int fd); int genlock_wait(struct genlock_handle *handle, u32 timeout); -void genlock_release_lock(struct genlock_handle *); +/* genlock_release_lock was deprecated */ int genlock_lock(struct genlock_handle *handle, int op, int flags, u32 timeout); #endif @@ -21,7 +21,8 @@ int genlock_lock(struct genlock_handle *handle, int op, int flags, #define GENLOCK_WRLOCK 1 #define GENLOCK_RDLOCK 2 -#define GENLOCK_NOBLOCK (1 << 0) +#define GENLOCK_NOBLOCK (1 << 0) +#define GENLOCK_WRITE_TO_READ (1 << 1) struct genlock_lock { int fd; @@ -37,9 +38,15 @@ struct genlock_lock { struct genlock_lock) #define GENLOCK_IOC_ATTACH _IOW(GENLOCK_IOC_MAGIC, 2, \ struct genlock_lock) + +/* Deprecated */ #define GENLOCK_IOC_LOCK _IOW(GENLOCK_IOC_MAGIC, 3, \ struct genlock_lock) + +/* Deprecated */ #define GENLOCK_IOC_RELEASE _IO(GENLOCK_IOC_MAGIC, 4) #define GENLOCK_IOC_WAIT _IOW(GENLOCK_IOC_MAGIC, 5, \ struct genlock_lock) +#define GENLOCK_IOC_DREADLOCK _IOW(GENLOCK_IOC_MAGIC, 6, \ + struct genlock_lock) #endif From 11a56d2216439215f6acdbf7a41f19b8a6d4dd57 Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Tue, 7 Aug 2012 00:53:53 +0800 Subject: [PATCH 082/155] add in_interrupt back --- drivers/base/genlock.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/base/genlock.c b/drivers/base/genlock.c index a89ee7ee..b5f8e42e 100755 --- a/drivers/base/genlock.c +++ b/drivers/base/genlock.c @@ -22,6 +22,7 @@ #include #include #include +#include /* Lock states - can either be unlocked, held as an exclusive write lock or a * shared read lock @@ -296,8 +297,8 @@ static int _genlock_lock(struct genlock *lock, struct genlock_handle *handle, * succeed to not block, the mere idea is too dangerous to continue */ -// if (in_interrupt() && !(flags & GENLOCK_NOBLOCK)) -// BUG(); + if (in_interrupt() && !(flags & GENLOCK_NOBLOCK)) + BUG(); /* Fast path - the lock is unlocked, so go do the needful */ From 310719402884355d73d2ce5cc8a4bcab7ee821f1 Mon Sep 17 00:00:00 2001 From: securecrt Date: Tue, 7 Aug 2012 16:52:14 +0800 Subject: [PATCH 083/155] fixed typo in config file --- arch/arm/configs/htcleo_defconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/configs/htcleo_defconfig b/arch/arm/configs/htcleo_defconfig index 96d43e38..86053034 100644 --- a/arch/arm/configs/htcleo_defconfig +++ b/arch/arm/configs/htcleo_defconfig @@ -709,7 +709,7 @@ CONFIG_IP_NF_ARP_MANGLE=y CONFIG_NF_DEFRAG_IPV6=y CONFIG_NF_CONNTRACK_IPV6=y # CONFIG_IP6_NF_QUEUE is not set -ONFIG_IP6_NF_IPTABLES=y +CONFIG_IP6_NF_IPTABLES=y # CONFIG_IP6_NF_MATCH_AH is not set # CONFIG_IP6_NF_MATCH_EUI64 is not set # CONFIG_IP6_NF_MATCH_FRAG is not set From e20f7d7077255a4a2b250d3d3b25c8101a48526b Mon Sep 17 00:00:00 2001 From: securecrt Date: Tue, 14 Aug 2012 13:45:36 +0800 Subject: [PATCH 084/155] base: genlock: Remove genlock_release_lock and associated ioctl base: genlock: allow synchronization with a single gralloc handle base: genlock: remove BIT macro usage --- drivers/base/genlock.c | 302 ++++++++++++++++++++++++++++++++-------- include/linux/genlock.h | 11 +- 2 files changed, 250 insertions(+), 63 deletions(-) mode change 100644 => 100755 drivers/base/genlock.c mode change 100644 => 100755 include/linux/genlock.h diff --git a/drivers/base/genlock.c b/drivers/base/genlock.c old mode 100644 new mode 100755 index afe8eb1c..b5f8e42e --- a/drivers/base/genlock.c +++ b/drivers/base/genlock.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2011, Code Aurora Forum. All rights reserved. +/* Copyright (c) 2011-2012, Code Aurora Forum. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -22,7 +22,7 @@ #include #include #include -#include /* for in_interrupt() */ +#include /* Lock states - can either be unlocked, held as an exclusive write lock or a * shared read lock @@ -32,6 +32,9 @@ #define _RDLOCK GENLOCK_RDLOCK #define _WRLOCK GENLOCK_WRLOCK +#define GENLOCK_LOG_ERR(fmt, args...) \ +pr_err("genlock: %s: " fmt, __func__, ##args) + struct genlock { struct list_head active; /* List of handles holding lock */ spinlock_t lock; /* Spinlock to protect the lock internals */ @@ -49,12 +52,29 @@ struct genlock_handle { taken */ }; +/* + * Create a spinlock to protect against a race condition when a lock gets + * released while another process tries to attach it + */ + +static DEFINE_SPINLOCK(genlock_file_lock); + static void genlock_destroy(struct kref *kref) { - struct genlock *lock = container_of(kref, struct genlock, - refcount); + struct genlock *lock = container_of(kref, struct genlock, + refcount); - kfree(lock); + /* + * Clear the private data for the file descriptor in case the fd is + * still active after the lock gets released + */ + + spin_lock(&genlock_file_lock); + if (lock->file) + lock->file->private_data = NULL; + spin_unlock(&genlock_file_lock); + + kfree(lock); } /* @@ -64,6 +84,15 @@ static void genlock_destroy(struct kref *kref) static int genlock_release(struct inode *inodep, struct file *file) { + struct genlock *lock = file->private_data; + /* + * Clear the refrence back to this file structure to avoid + * somehow reusing the lock after the file has been destroyed + */ + + if (lock) + lock->file = NULL; + return 0; } @@ -82,12 +111,21 @@ struct genlock *genlock_create_lock(struct genlock_handle *handle) { struct genlock *lock; - if (handle->lock != NULL) + if (IS_ERR_OR_NULL(handle)) { + GENLOCK_LOG_ERR("Invalid handle\n"); return ERR_PTR(-EINVAL); + } + + if (handle->lock != NULL) { + GENLOCK_LOG_ERR("Handle already has a lock attached\n"); + return ERR_PTR(-EINVAL); + } lock = kzalloc(sizeof(*lock), GFP_KERNEL); - if (lock == NULL) + if (lock == NULL) { + GENLOCK_LOG_ERR("Unable to allocate memory for a lock\n"); return ERR_PTR(-ENOMEM); + } INIT_LIST_HEAD(&lock->active); init_waitqueue_head(&lock->queue); @@ -120,8 +158,10 @@ static int genlock_get_fd(struct genlock *lock) { int ret; - if (!lock->file) + if (!lock->file) { + GENLOCK_LOG_ERR("No file attached to the lock\n"); return -EINVAL; + } ret = get_unused_fd_flags(0); if (ret < 0) @@ -143,19 +183,37 @@ struct genlock *genlock_attach_lock(struct genlock_handle *handle, int fd) struct file *file; struct genlock *lock; - if (handle->lock != NULL) + if (IS_ERR_OR_NULL(handle)) { + GENLOCK_LOG_ERR("Invalid handle\n"); return ERR_PTR(-EINVAL); + } + + if (handle->lock != NULL) { + GENLOCK_LOG_ERR("Handle already has a lock attached\n"); + return ERR_PTR(-EINVAL); + } file = fget(fd); - if (file == NULL) + if (file == NULL) { + GENLOCK_LOG_ERR("Bad file descriptor\n"); return ERR_PTR(-EBADF); + } + /* + * take a spinlock to avoid a race condition if the lock is + * released and then attached + */ + + spin_lock(&genlock_file_lock); lock = file->private_data; + spin_unlock(&genlock_file_lock); fput(file); - if (lock == NULL) + if (lock == NULL) { + GENLOCK_LOG_ERR("File descriptor is invalid\n"); return ERR_PTR(-EINVAL); + } handle->lock = lock; kref_get(&lock->refcount); @@ -199,13 +257,16 @@ static int _genlock_unlock(struct genlock *lock, struct genlock_handle *handle) spin_lock_irqsave(&lock->lock, irqflags); - if (lock->state == _UNLOCKED) + if (lock->state == _UNLOCKED) { + GENLOCK_LOG_ERR("Trying to unlock an unlocked handle\n"); goto done; + } /* Make sure this handle is an owner of the lock */ - if (!handle_has_lock(lock, handle)) + if (!handle_has_lock(lock, handle)) { + GENLOCK_LOG_ERR("handle does not have lock attached to it\n"); goto done; - + } /* If the handle holds no more references to the lock then release it (maybe) */ @@ -228,7 +289,7 @@ static int _genlock_lock(struct genlock *lock, struct genlock_handle *handle, { unsigned long irqflags; int ret = 0; - unsigned int ticks = msecs_to_jiffies(timeout); + unsigned long ticks = msecs_to_jiffies(timeout); spin_lock_irqsave(&lock->lock, irqflags); @@ -247,12 +308,15 @@ static int _genlock_lock(struct genlock *lock, struct genlock_handle *handle, if (handle_has_lock(lock, handle)) { /* - * If the handle already holds the lock and the type matches, - * then just increment the active pointer. This allows the - * handle to do recursive locks + * If the handle already holds the lock and the lock type is + * a read lock then just increment the active pointer. This + * allows the handle to do recursive read locks. Recursive + * write locks are not allowed in order to support + * synchronization within a process using a single gralloc + * handle. */ - if (lock->state == op) { + if (lock->state == _RDLOCK && op == _RDLOCK) { handle->active++; goto done; } @@ -261,32 +325,46 @@ static int _genlock_lock(struct genlock *lock, struct genlock_handle *handle, * If the handle holds a write lock then the owner can switch * to a read lock if they want. Do the transition atomically * then wake up any pending waiters in case they want a read - * lock too. + * lock too. In order to support synchronization within a + * process the caller must explicity request to convert the + * lock type with the GENLOCK_WRITE_TO_READ flag. */ - if (op == _RDLOCK && handle->active == 1) { - lock->state = _RDLOCK; - wake_up(&lock->queue); + if (flags & GENLOCK_WRITE_TO_READ) { + if (lock->state == _WRLOCK && op == _RDLOCK) { + lock->state = _RDLOCK; + wake_up(&lock->queue); + goto done; + } else { + GENLOCK_LOG_ERR("Invalid state to convert" + "write to read\n"); + ret = -EINVAL; + goto done; + } + } + } else { + + /* + * Check to ensure the caller has not attempted to convert a + * write to a read without holding the lock. + */ + + if (flags & GENLOCK_WRITE_TO_READ) { + GENLOCK_LOG_ERR("Handle must have lock to convert" + "write to read\n"); + ret = -EINVAL; goto done; } /* - * Otherwise the user tried to turn a read into a write, and we - * don't allow that. + * If we request a read and the lock is held by a read, then go + * ahead and share the lock */ - ret = -EINVAL; - goto done; + if (op == GENLOCK_RDLOCK && lock->state == _RDLOCK) + goto dolock; } - /* - * If we request a read and the lock is held by a read, then go - * ahead and share the lock - */ - - if (op == GENLOCK_RDLOCK && lock->state == _RDLOCK) - goto dolock; - /* Treat timeout 0 just like a NOBLOCK flag and return if the lock cannot be aquired without blocking */ @@ -295,15 +373,26 @@ static int _genlock_lock(struct genlock *lock, struct genlock_handle *handle, goto done; } - /* Wait while the lock remains in an incompatible state */ + /* + * Wait while the lock remains in an incompatible state + * state op wait + * ------------------- + * unlocked n/a no + * read read no + * read write yes + * write n/a yes + */ - while (lock->state != _UNLOCKED) { - unsigned int elapsed; + while ((lock->state == _RDLOCK && op == _WRLOCK) || + lock->state == _WRLOCK) { + signed long elapsed; spin_unlock_irqrestore(&lock->lock, irqflags); elapsed = wait_event_interruptible_timeout(lock->queue, - lock->state == _UNLOCKED, ticks); + lock->state == _UNLOCKED || + (lock->state == _RDLOCK && op == _RDLOCK), + ticks); spin_lock_irqsave(&lock->lock, irqflags); @@ -312,7 +401,7 @@ static int _genlock_lock(struct genlock *lock, struct genlock_handle *handle, goto done; } - ticks = elapsed; + ticks = (unsigned long) elapsed; } dolock: @@ -320,7 +409,7 @@ dolock: list_add_tail(&handle->entry, &lock->active); lock->state = op; - handle->active = 1; + handle->active++; done: spin_unlock_irqrestore(&lock->lock, irqflags); @@ -329,7 +418,7 @@ done: } /** - * genlock_lock - Acquire or release a lock + * genlock_lock - Acquire or release a lock (depreciated) * @handle - pointer to the genlock handle that is requesting the lock * @op - the operation to perform (RDLOCK, WRLOCK, UNLOCK) * @flags - flags to control the operation @@ -341,11 +430,76 @@ done: int genlock_lock(struct genlock_handle *handle, int op, int flags, uint32_t timeout) { - struct genlock *lock = handle->lock; + struct genlock *lock; + unsigned long irqflags; + int ret = 0; - if (lock == NULL) + if (IS_ERR_OR_NULL(handle)) { + GENLOCK_LOG_ERR("Invalid handle\n"); return -EINVAL; + } + + lock = handle->lock; + + if (lock == NULL) { + GENLOCK_LOG_ERR("Handle does not have a lock attached\n"); + return -EINVAL; + } + + switch (op) { + case GENLOCK_UNLOCK: + ret = _genlock_unlock(lock, handle); + break; + case GENLOCK_RDLOCK: + spin_lock_irqsave(&lock->lock, irqflags); + if (handle_has_lock(lock, handle)) { + /* request the WRITE_TO_READ flag for compatibility */ + flags |= GENLOCK_WRITE_TO_READ; + } + spin_unlock_irqrestore(&lock->lock, irqflags); + /* fall through to take lock */ + case GENLOCK_WRLOCK: + ret = _genlock_lock(lock, handle, op, flags, timeout); + break; + default: + GENLOCK_LOG_ERR("Invalid lock operation\n"); + ret = -EINVAL; + break; + } + + return ret; +} +EXPORT_SYMBOL(genlock_lock); + +/** + * genlock_dreadlock - Acquire or release a lock + * @handle - pointer to the genlock handle that is requesting the lock + * @op - the operation to perform (RDLOCK, WRLOCK, UNLOCK) + * @flags - flags to control the operation + * @timeout - optional timeout to wait for the lock to come free + * + * Returns: 0 on success or error code on failure + */ + +int genlock_dreadlock(struct genlock_handle *handle, int op, int flags, + uint32_t timeout) +{ + struct genlock *lock; + + int ret = 0; + + if (IS_ERR_OR_NULL(handle)) { + GENLOCK_LOG_ERR("Invalid handle\n"); + return -EINVAL; + } + + lock = handle->lock; + + if (lock == NULL) { + GENLOCK_LOG_ERR("Handle does not have a lock attached\n"); + return -EINVAL; + } switch (op) { case GENLOCK_UNLOCK: @@ -356,13 +510,14 @@ int genlock_lock(struct genlock_handle *handle, int op, int flags, ret = _genlock_lock(lock, handle, op, flags, timeout); break; default: + GENLOCK_LOG_ERR("Invalid lock operation\n"); ret = -EINVAL; break; } return ret; } -EXPORT_SYMBOL(genlock_lock); +EXPORT_SYMBOL(genlock_dreadlock); /** * genlock_wait - Wait for the lock to be released @@ -372,13 +527,22 @@ EXPORT_SYMBOL(genlock_lock); int genlock_wait(struct genlock_handle *handle, uint32_t timeout) { - struct genlock *lock = handle->lock; + struct genlock *lock; unsigned long irqflags; int ret = 0; - unsigned int ticks = msecs_to_jiffies(timeout); + unsigned long ticks = msecs_to_jiffies(timeout); - if (lock == NULL) + if (IS_ERR_OR_NULL(handle)) { + GENLOCK_LOG_ERR("Invalid handle\n"); return -EINVAL; + } + + lock = handle->lock; + + if (lock == NULL) { + GENLOCK_LOG_ERR("Handle does not have a lock attached\n"); + return -EINVAL; + } spin_lock_irqsave(&lock->lock, irqflags); @@ -393,7 +557,7 @@ int genlock_wait(struct genlock_handle *handle, uint32_t timeout) } while (lock->state != _UNLOCKED) { - unsigned int elapsed; + signed long elapsed; spin_unlock_irqrestore(&lock->lock, irqflags); @@ -407,7 +571,7 @@ int genlock_wait(struct genlock_handle *handle, uint32_t timeout) break; } - ticks = elapsed; + ticks = (unsigned long) elapsed; } done: @@ -415,12 +579,7 @@ done: return ret; } -/** - * genlock_release_lock - Release a lock attached to a handle - * @handle - Pointer to the handle holding the lock - */ - -void genlock_release_lock(struct genlock_handle *handle) +static void genlock_release_lock(struct genlock_handle *handle) { unsigned long flags; @@ -441,7 +600,6 @@ void genlock_release_lock(struct genlock_handle *handle) handle->lock = NULL; handle->active = 0; } -EXPORT_SYMBOL(genlock_release_lock); /* * Release function called when all references to a handle are released @@ -468,8 +626,10 @@ static const struct file_operations genlock_handle_fops = { static struct genlock_handle *_genlock_get_handle(void) { struct genlock_handle *handle = kzalloc(sizeof(*handle), GFP_KERNEL); - if (handle == NULL) + if (handle == NULL) { + GENLOCK_LOG_ERR("Unable to allocate memory for the handle\n"); return ERR_PTR(-ENOMEM); + } return handle; } @@ -531,6 +691,9 @@ static long genlock_dev_ioctl(struct file *filep, unsigned int cmd, struct genlock *lock; int ret; + if (IS_ERR_OR_NULL(handle)) + return -EINVAL; + switch (cmd) { case GENLOCK_IOC_NEW: { lock = genlock_create_lock(handle); @@ -540,8 +703,11 @@ static long genlock_dev_ioctl(struct file *filep, unsigned int cmd, return 0; } case GENLOCK_IOC_EXPORT: { - if (handle->lock == NULL) + if (handle->lock == NULL) { + GENLOCK_LOG_ERR("Handle does not have a lock" + "attached\n"); return -EINVAL; + } ret = genlock_get_fd(handle->lock); if (ret < 0) @@ -574,6 +740,14 @@ static long genlock_dev_ioctl(struct file *filep, unsigned int cmd, return genlock_lock(handle, param.op, param.flags, param.timeout); } + case GENLOCK_IOC_DREADLOCK: { + if (copy_from_user(¶m, (void __user *) arg, + sizeof(param))) + return -EFAULT; + + return genlock_dreadlock(handle, param.op, param.flags, + param.timeout); + } case GENLOCK_IOC_WAIT: { if (copy_from_user(¶m, (void __user *) arg, sizeof(param))) @@ -582,10 +756,16 @@ static long genlock_dev_ioctl(struct file *filep, unsigned int cmd, return genlock_wait(handle, param.timeout); } case GENLOCK_IOC_RELEASE: { - genlock_release_lock(handle); - return 0; + /* + * Return error - this ioctl has been deprecated. + * Locks should only be released when the handle is + * destroyed + */ + GENLOCK_LOG_ERR("Deprecated RELEASE ioctl called\n"); + return -EINVAL; } default: + GENLOCK_LOG_ERR("Invalid ioctl\n"); return -EINVAL; } } diff --git a/include/linux/genlock.h b/include/linux/genlock.h old mode 100644 new mode 100755 index 2e9f9d68..587c49df --- a/include/linux/genlock.h +++ b/include/linux/genlock.h @@ -12,7 +12,7 @@ void genlock_put_handle(struct genlock_handle *handle); struct genlock *genlock_create_lock(struct genlock_handle *); struct genlock *genlock_attach_lock(struct genlock_handle *, int fd); int genlock_wait(struct genlock_handle *handle, u32 timeout); -void genlock_release_lock(struct genlock_handle *); +/* genlock_release_lock was deprecated */ int genlock_lock(struct genlock_handle *handle, int op, int flags, u32 timeout); #endif @@ -21,7 +21,8 @@ int genlock_lock(struct genlock_handle *handle, int op, int flags, #define GENLOCK_WRLOCK 1 #define GENLOCK_RDLOCK 2 -#define GENLOCK_NOBLOCK (1 << 0) +#define GENLOCK_NOBLOCK (1 << 0) +#define GENLOCK_WRITE_TO_READ (1 << 1) struct genlock_lock { int fd; @@ -37,9 +38,15 @@ struct genlock_lock { struct genlock_lock) #define GENLOCK_IOC_ATTACH _IOW(GENLOCK_IOC_MAGIC, 2, \ struct genlock_lock) + +/* Deprecated */ #define GENLOCK_IOC_LOCK _IOW(GENLOCK_IOC_MAGIC, 3, \ struct genlock_lock) + +/* Deprecated */ #define GENLOCK_IOC_RELEASE _IO(GENLOCK_IOC_MAGIC, 4) #define GENLOCK_IOC_WAIT _IOW(GENLOCK_IOC_MAGIC, 5, \ struct genlock_lock) +#define GENLOCK_IOC_DREADLOCK _IOW(GENLOCK_IOC_MAGIC, 6, \ + struct genlock_lock) #endif From 9b8229cafbe29f53a31c5c646f3839259a5dfd17 Mon Sep 17 00:00:00 2001 From: securecrt Date: Tue, 14 Aug 2012 16:48:25 +0800 Subject: [PATCH 085/155] Staging: android: binder: Don't call dump_stack in binder_vma_open Staging: android: binder: Fix crashes when sharing a binder file between processes drivers:staging:android Typos: fix some comments that have typos in them. fs: Remove missed ->fds_bits from cessation use of fd_set structs internally Staging:android: Change type for binder_debug_no_lock switch to bool Staging: android: binder: Fix use-after-free bug --- drivers/staging/android/binder.c | 270 ++++++++++++++----------------- 1 file changed, 120 insertions(+), 150 deletions(-) mode change 100644 => 100755 drivers/staging/android/binder.c diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c old mode 100644 new mode 100755 index a0763da9..c44eb407 --- a/drivers/staging/android/binder.c +++ b/drivers/staging/android/binder.c @@ -3,7 +3,6 @@ * Android IPC Subsystem * * Copyright (C) 2007-2008 Google, Inc. - * Copyright (c) 2012, Code Aurora Forum. All rights reserved. * * This software is licensed under the terms of the GNU General Public * License version 2, as published by the Free Software Foundation, and @@ -31,14 +30,15 @@ #include #include #include -#include #include #include +#include #include "binder.h" static DEFINE_MUTEX(binder_lock); static DEFINE_MUTEX(binder_deferred_lock); +static DEFINE_MUTEX(binder_mmap_lock); static HLIST_HEAD(binder_procs); static HLIST_HEAD(binder_deferred_list); @@ -98,12 +98,12 @@ enum { BINDER_DEBUG_BUFFER_ALLOC = 1U << 13, BINDER_DEBUG_PRIORITY_CAP = 1U << 14, BINDER_DEBUG_BUFFER_ALLOC_ASYNC = 1U << 15, - BINDER_DEBUG_TOP_ERRORS = 1U << 16, + BINDER_DEBUG_TOP_ERRORS = 1U << 16, }; static uint32_t binder_debug_mask; module_param_named(debug_mask, binder_debug_mask, uint, S_IWUSR | S_IRUGO); -static int binder_debug_no_lock; +static bool binder_debug_no_lock; module_param_named(proc_no_lock, binder_debug_no_lock, bool, S_IWUSR | S_IRUGO); static DECLARE_WAIT_QUEUE_HEAD(binder_user_error_wait); @@ -258,7 +258,7 @@ struct binder_ref { }; struct binder_buffer { - struct list_head entry; /* free and allocated entries by addesss */ + struct list_head entry; /* free and allocated entries by address */ struct rb_node rb_node; /* free entry by size or allocated entry */ /* by address */ unsigned free:1; @@ -288,6 +288,7 @@ struct binder_proc { struct rb_root refs_by_node; int pid; struct vm_area_struct *vma; + struct mm_struct *vma_vm_mm; struct task_struct *tsk; struct files_struct *files; struct hlist_node deferred_work_node; @@ -380,8 +381,7 @@ int task_get_unused_fd_flags(struct binder_proc *proc, int flags) repeat: fdt = files_fdtable(files); - fd = find_next_zero_bit(fdt->open_fds->fds_bits, fdt->max_fds, - files->next_fd); + fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, files->next_fd); /* * N.B. For clone tasks sharing a files structure, this test @@ -633,6 +633,11 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate, if (mm) { down_write(&mm->mmap_sem); vma = proc->vma; + if (vma && mm != proc->vma_vm_mm) { + pr_err("binder: %d: vma mm and task mm mismatch\n", + proc->pid); + vma = NULL; + } } if (allocate == 0) @@ -640,8 +645,8 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate, if (vma == NULL) { binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder: %d: binder_alloc_buf failed to " - "map pages in userspace, no vma\n", proc->pid); + "binder: %d: binder_alloc_buf failed to " + "map pages in userspace, no vma\n", proc->pid); goto err_no_vma; } @@ -654,8 +659,8 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate, *page = alloc_page(GFP_KERNEL | __GFP_ZERO); if (*page == NULL) { binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder: %d: binder_alloc_buf failed " - "for page at %p\n", proc->pid, page_addr); + "binder: %d: binder_alloc_buf failed " + "for page at %p\n", proc->pid, page_addr); goto err_alloc_page_failed; } tmp_area.addr = page_addr; @@ -664,9 +669,9 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate, ret = map_vm_area(&tmp_area, PAGE_KERNEL, &page_array_ptr); if (ret) { binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder: %d: binder_alloc_buf failed " - "to map page at %p in kernel\n", - proc->pid, page_addr); + "binder: %d: binder_alloc_buf failed " + "to map page at %p in kernel\n", + proc->pid, page_addr); goto err_map_kernel_failed; } user_page_addr = @@ -674,9 +679,9 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate, ret = vm_insert_page(vma, user_page_addr, page[0]); if (ret) { binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder: %d: binder_alloc_buf failed " - "to map page at %lx in userspace\n", - proc->pid, user_page_addr); + "binder: %d: binder_alloc_buf failed " + "to map page at %lx in userspace\n", + proc->pid, user_page_addr); goto err_vm_insert_page_failed; } /* vm_insert_page does not seem to increment the refcount */ @@ -724,8 +729,8 @@ static struct binder_buffer *binder_alloc_buf(struct binder_proc *proc, if (proc->vma == NULL) { binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder: %d: binder_alloc_buf, no vma\n", - proc->pid); + "binder: %d: binder_alloc_buf, no vma\n", + proc->pid); return NULL; } @@ -763,8 +768,8 @@ static struct binder_buffer *binder_alloc_buf(struct binder_proc *proc, } if (best_fit == NULL) { binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder: %d: binder_alloc_buf size %zd failed, " - "no address space\n", proc->pid, size); + "binder: %d: binder_alloc_buf size %zd failed, " + "no address space\n", proc->pid, size); return NULL; } if (n == NULL) { @@ -999,8 +1004,8 @@ static int binder_inc_node(struct binder_node *node, int strong, int internal, !(node == binder_context_mgr_node && node->has_strong_ref)) { binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder: invalid inc strong " - "node for %d\n", node->debug_id); + "binder: invalid inc strong " + "node for %d\n", node->debug_id); return -EINVAL; } node->internal_strong_refs++; @@ -1016,8 +1021,8 @@ static int binder_inc_node(struct binder_node *node, int strong, int internal, if (!node->has_weak_ref && list_empty(&node->work.entry)) { if (target_list == NULL) { binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder: invalid inc weak node " - "for %d\n", node->debug_id); + "binder: invalid inc weak node " + "for %d\n", node->debug_id); return -EINVAL; } list_add_tail(&node->work.entry, target_list); @@ -1053,7 +1058,7 @@ static int binder_dec_node(struct binder_node *node, int strong, int internal) if (node->proc) { rb_erase(&node->rb_node, &node->proc->nodes); binder_debug(BINDER_DEBUG_INTERNAL_REFS, - "binder: refless node %d deleted\n", + "binder: refless node %d deleted\n", node->debug_id); } else { hlist_del(&node->dead_node); @@ -1272,8 +1277,7 @@ static void binder_send_failed_reply(struct binder_transaction *t, binder_debug(BINDER_DEBUG_FAILED_TRANSACTION, "binder: send failed reply for " "transaction %d to %d:%d\n", - t->debug_id, - target_thread->proc->pid, + t->debug_id, target_thread->proc->pid, target_thread->pid); binder_pop_transaction(target_thread, t); @@ -1281,11 +1285,12 @@ static void binder_send_failed_reply(struct binder_transaction *t, wake_up_interruptible(&target_thread->wait); } else { binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder: reply failed, target " - "thread, %d:%d, has error code %d " - "already\n", target_thread->proc->pid, - target_thread->pid, - target_thread->return_error); + "binder: reply failed, target " + "thread, %d:%d, has error code %d " + "already\n", + target_thread->proc->pid, + target_thread->pid, + target_thread->return_error); } return; } else { @@ -1319,15 +1324,14 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, int debug_id = buffer->debug_id; binder_debug(BINDER_DEBUG_TRANSACTION, - "binder: %d buffer release %d, size %zd-%zd, failed at" - " %p\n", proc->pid, buffer->debug_id, + "binder: %d buffer release %d, size %zd-%zd, failed at %p\n", + proc->pid, buffer->debug_id, buffer->data_size, buffer->offsets_size, failed_at); if (buffer->target_node) binder_dec_node(buffer->target_node, 1, 0); - offp = (size_t *)(buffer->data + ALIGN(buffer->data_size, - sizeof(void *))); + offp = (size_t *)(buffer->data + ALIGN(buffer->data_size, sizeof(void *))); if (failed_at) off_end = failed_at; else @@ -1338,44 +1342,41 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, buffer->data_size < sizeof(*fp) || !IS_ALIGNED(*offp, sizeof(void *))) { binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder: transaction release %d bad" - "offset %zd, size %zd\n", debug_id, - *offp, buffer->data_size); + "binder: transaction release %d bad" + "offset %zd, size %zd\n", debug_id, + *offp, buffer->data_size); continue; } fp = (struct flat_binder_object *)(buffer->data + *offp); switch (fp->type) { case BINDER_TYPE_BINDER: case BINDER_TYPE_WEAK_BINDER: { - struct binder_node *node = binder_get_node(proc, - fp->binder); + struct binder_node *node = binder_get_node(proc, fp->binder); if (node == NULL) { binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder: transaction release %d" - " bad node %p\n", debug_id, fp->binder); + "binder: transaction release %d" + " bad node %p\n", debug_id, + fp->binder); break; } binder_debug(BINDER_DEBUG_TRANSACTION, " node %d u%p\n", node->debug_id, node->ptr); - binder_dec_node(node, fp->type == BINDER_TYPE_BINDER, - 0); + binder_dec_node(node, fp->type == BINDER_TYPE_BINDER, 0); } break; case BINDER_TYPE_HANDLE: case BINDER_TYPE_WEAK_HANDLE: { - struct binder_ref *ref = binder_get_ref(proc, - fp->handle); + struct binder_ref *ref = binder_get_ref(proc, fp->handle); if (ref == NULL) { binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder: transaction release %d" - " bad handle %ld\n", debug_id, - fp->handle); + "binder: transaction release %d" + " bad handle %ld\n", debug_id, + fp->handle); break; } binder_debug(BINDER_DEBUG_TRANSACTION, " ref %d desc %d (node %d)\n", - ref->debug_id, ref->desc, - ref->node->debug_id); + ref->debug_id, ref->desc, ref->node->debug_id); binder_dec_ref(ref, fp->type == BINDER_TYPE_HANDLE); } break; @@ -1388,8 +1389,8 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, default: binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder: transaction release %d bad " - "object type %lx\n", debug_id, fp->type); + "binder: transaction release %d bad " + "object type %lx\n", debug_id, fp->type); break; } } @@ -1614,19 +1615,15 @@ static void binder_transaction(struct binder_proc *proc, case BINDER_TYPE_BINDER: case BINDER_TYPE_WEAK_BINDER: { struct binder_ref *ref; - struct binder_node *node = binder_get_node(proc, - fp->binder); + struct binder_node *node = binder_get_node(proc, fp->binder); if (node == NULL) { - node = binder_new_node(proc, fp->binder, - fp->cookie); + node = binder_new_node(proc, fp->binder, fp->cookie); if (node == NULL) { return_error = BR_FAILED_REPLY; goto err_binder_new_node_failed; } - node->min_priority = fp->flags & - FLAT_BINDER_FLAG_PRIORITY_MASK; - node->accept_fds = !!(fp->flags & - FLAT_BINDER_FLAG_ACCEPTS_FDS); + node->min_priority = fp->flags & FLAT_BINDER_FLAG_PRIORITY_MASK; + node->accept_fds = !!(fp->flags & FLAT_BINDER_FLAG_ACCEPTS_FDS); } if (fp->cookie != node->cookie) { binder_user_error("binder: %d:%d sending u%p " @@ -1656,8 +1653,7 @@ static void binder_transaction(struct binder_proc *proc, } break; case BINDER_TYPE_HANDLE: case BINDER_TYPE_WEAK_HANDLE: { - struct binder_ref *ref = binder_get_ref(proc, - fp->handle); + struct binder_ref *ref = binder_get_ref(proc, fp->handle); if (ref == NULL) { binder_user_error("binder: %d:%d got " "transaction with invalid " @@ -1673,31 +1669,24 @@ static void binder_transaction(struct binder_proc *proc, fp->type = BINDER_TYPE_WEAK_BINDER; fp->binder = ref->node->ptr; fp->cookie = ref->node->cookie; - binder_inc_node(ref->node, fp->type == - BINDER_TYPE_BINDER, 0, NULL); + binder_inc_node(ref->node, fp->type == BINDER_TYPE_BINDER, 0, NULL); binder_debug(BINDER_DEBUG_TRANSACTION, - " ref %d desc %d -> node %d u%p\n", - ref->debug_id, ref->desc, - ref->node->debug_id, - ref->node->ptr); + " ref %d desc %d -> node %d u%p\n", + ref->debug_id, ref->desc, ref->node->debug_id, + ref->node->ptr); } else { struct binder_ref *new_ref; - new_ref = binder_get_ref_for_node(target_proc, - ref->node); + new_ref = binder_get_ref_for_node(target_proc, ref->node); if (new_ref == NULL) { return_error = BR_FAILED_REPLY; goto err_binder_get_ref_for_node_failed; } fp->handle = new_ref->desc; - binder_inc_ref(new_ref, fp->type == - BINDER_TYPE_HANDLE, NULL); + binder_inc_ref(new_ref, fp->type == BINDER_TYPE_HANDLE, NULL); binder_debug(BINDER_DEBUG_TRANSACTION, - " ref %d desc %d -> ref %d" - " desc %d (node %d)\n", - ref->debug_id, ref->desc, - new_ref->debug_id, - new_ref->desc, - ref->node->debug_id); + " ref %d desc %d -> ref %d desc %d (node %d)\n", + ref->debug_id, ref->desc, new_ref->debug_id, + new_ref->desc, ref->node->debug_id); } } break; @@ -1707,19 +1696,13 @@ static void binder_transaction(struct binder_proc *proc, if (reply) { if (!(in_reply_to->flags & TF_ACCEPT_FDS)) { - binder_user_error("binder: %d:%d got" - " reply with fd, %ld, but" - " target does not allow fds\n", - proc->pid, thread->pid, - fp->handle); + binder_user_error("binder: %d:%d got reply with fd, %ld, but target does not allow fds\n", + proc->pid, thread->pid, fp->handle); return_error = BR_FAILED_REPLY; goto err_fd_not_allowed; } } else if (!target_node->accept_fds) { - binder_user_error( - "binder: %d:%d got transaction" - " with fd, %ld, but target does" - " not allow fds\n", + binder_user_error("binder: %d:%d got transaction with fd, %ld, but target does not allow fds\n", proc->pid, thread->pid, fp->handle); return_error = BR_FAILED_REPLY; goto err_fd_not_allowed; @@ -1727,15 +1710,12 @@ static void binder_transaction(struct binder_proc *proc, file = fget(fp->handle); if (file == NULL) { - binder_user_error( - "binder: %d:%d got transaction" - " with invalid fd, %ld\n", + binder_user_error("binder: %d:%d got transaction with invalid fd, %ld\n", proc->pid, thread->pid, fp->handle); return_error = BR_FAILED_REPLY; goto err_fget_failed; } - target_fd = task_get_unused_fd_flags(target_proc, - O_CLOEXEC); + target_fd = task_get_unused_fd_flags(target_proc, O_CLOEXEC); if (target_fd < 0) { fput(file); return_error = BR_FAILED_REPLY; @@ -1743,8 +1723,7 @@ static void binder_transaction(struct binder_proc *proc, } task_fd_install(target_proc, target_fd, file); binder_debug(BINDER_DEBUG_TRANSACTION, - " fd %ld -> %d\n", fp->handle, - target_fd); + " fd %ld -> %d\n", fp->handle, target_fd); /* TODO: fput? */ fp->handle = target_fd; } break; @@ -1893,11 +1872,9 @@ int binder_thread_write(struct binder_proc *proc, struct binder_thread *thread, break; } binder_debug(BINDER_DEBUG_USER_REFS, - "binder: %d:%d %s ref %d desc %d s %d w %d" - " for node %d\n", proc->pid, thread->pid, - debug_string, ref->debug_id, ref->desc, - ref->strong, ref->weak, - ref->node->debug_id); + "binder: %d:%d %s ref %d desc %d s %d w %d for node %d\n", + proc->pid, thread->pid, debug_string, ref->debug_id, + ref->desc, ref->strong, ref->weak, ref->node->debug_id); break; } case BC_INCREFS_DONE: @@ -1958,19 +1935,17 @@ int binder_thread_write(struct binder_proc *proc, struct binder_thread *thread, binder_debug(BINDER_DEBUG_USER_REFS, "binder: %d:%d %s node %d ls %d lw %d\n", proc->pid, thread->pid, - cmd == BC_INCREFS_DONE ? "BC_INCREFS_DONE" - : "BC_ACQUIRE_DONE", - node->debug_id, node->local_strong_refs, - node->local_weak_refs); + cmd == BC_INCREFS_DONE ? "BC_INCREFS_DONE" : "BC_ACQUIRE_DONE", + node->debug_id, node->local_strong_refs, node->local_weak_refs); break; } case BC_ATTEMPT_ACQUIRE: binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder: BC_ATTEMPT_ACQUIRE not supported\n"); + "binder: BC_ATTEMPT_ACQUIRE not supported\n"); return -EINVAL; case BC_ACQUIRE_RESULT: - binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder: BC_ACQUIRE_RESULT not supported\n"); + binder_debug(BINDER_DEBUG_TOP_ERRORS, + "binder: BC_ACQUIRE_RESULT not supported\n"); return -EINVAL; case BC_FREE_BUFFER: { @@ -1996,11 +1971,9 @@ int binder_thread_write(struct binder_proc *proc, struct binder_thread *thread, break; } binder_debug(BINDER_DEBUG_FREE_BUFFER, - "binder: %d:%d BC_FREE_BUFFER u%p found" - " buffer %d for %s transaction\n", - proc->pid, thread->pid, data_ptr, - buffer->debug_id, buffer->transaction ? - "active" : "finished"); + "binder: %d:%d BC_FREE_BUFFER u%p found buffer %d for %s transaction\n", + proc->pid, thread->pid, data_ptr, buffer->debug_id, + buffer->transaction ? "active" : "finished"); if (buffer->transaction) { buffer->transaction->buffer = NULL; @@ -2097,15 +2070,13 @@ int binder_thread_write(struct binder_proc *proc, struct binder_thread *thread, } binder_debug(BINDER_DEBUG_DEATH_NOTIFICATION, - "binder: %d:%d %s %p ref %d desc %d s %d" - " w %d for node %d\n", + "binder: %d:%d %s %p ref %d desc %d s %d w %d for node %d\n", proc->pid, thread->pid, cmd == BC_REQUEST_DEATH_NOTIFICATION ? "BC_REQUEST_DEATH_NOTIFICATION" : "BC_CLEAR_DEATH_NOTIFICATION", cookie, ref->debug_id, ref->desc, - ref->strong, ref->weak, - ref->node->debug_id); + ref->strong, ref->weak, ref->node->debug_id); if (cmd == BC_REQUEST_DEATH_NOTIFICATION) { if (ref->death) { @@ -2119,12 +2090,10 @@ int binder_thread_write(struct binder_proc *proc, struct binder_thread *thread, death = kzalloc(sizeof(*death), GFP_KERNEL); if (death == NULL) { thread->return_error = BR_ERROR; - binder_debug( - BINDER_DEBUG_FAILED_TRANSACTION, - "binder: %d:%d " - "BC_REQUEST_DEATH_NOTIFICATION" - " failed\n", - proc->pid, thread->pid); + binder_debug(BINDER_DEBUG_FAILED_TRANSACTION, + "binder: %d:%d " + "BC_REQUEST_DEATH_NOTIFICATION failed\n", + proc->pid, thread->pid); break; } binder_stats_created(BINDER_STAT_DEATH); @@ -2214,8 +2183,8 @@ int binder_thread_write(struct binder_proc *proc, struct binder_thread *thread, default: binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder: %d:%d unknown command %d\n", - proc->pid, thread->pid, cmd); + "binder: %d:%d unknown command %d\n", + proc->pid, thread->pid, cmd); return -EINVAL; } *consumed = ptr - buffer; @@ -2684,11 +2653,9 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) unsigned int size = _IOC_SIZE(cmd); void __user *ubuf = (void __user *)arg; - /*binder_debug(BINDER_DEBUG_TOP_ERRORS, "binder_ioctl: %d:%d %x %lx\n", - proc->pid, current->pid, cmd, arg);*/ + /*printk(KERN_INFO "binder_ioctl: %d:%d %x %lx\n", proc->pid, current->pid, cmd, arg);*/ - ret = wait_event_interruptible(binder_user_error_wait, - binder_stop_on_user_error < 2); + ret = wait_event_interruptible(binder_user_error_wait, binder_stop_on_user_error < 2); if (ret) return ret; @@ -2745,8 +2712,7 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) break; } case BINDER_SET_MAX_THREADS: - if (copy_from_user(&proc->max_threads, ubuf, - sizeof(proc->max_threads))) { + if (copy_from_user(&proc->max_threads, ubuf, sizeof(proc->max_threads))) { ret = -EINVAL; goto err; } @@ -2754,17 +2720,17 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) case BINDER_SET_CONTEXT_MGR: if (binder_context_mgr_node != NULL) { binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder: BINDER_SET_CONTEXT_MGR already set\n"); + "binder: BINDER_SET_CONTEXT_MGR already set\n"); ret = -EBUSY; goto err; } if (binder_context_mgr_uid != -1) { if (binder_context_mgr_uid != current->cred->euid) { binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder: BINDER_SET_" - "CONTEXT_MGR bad uid %d != %d\n", - current->cred->euid, - binder_context_mgr_uid); + "binder: BINDER_SET_" + "CONTEXT_MGR bad uid %d != %d\n", + current->cred->euid, + binder_context_mgr_uid); ret = -EPERM; goto err; } @@ -2808,8 +2774,8 @@ err: wait_event_interruptible(binder_user_error_wait, binder_stop_on_user_error < 2); if (ret && ret != -ERESTARTSYS) binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder: %d:%d ioctl %x %lx returned %d\n", - proc->pid, current->pid, cmd, arg, ret); + "binder: %d:%d ioctl %x %lx returned %d\n", + proc->pid, current->pid, cmd, arg, ret); return ret; } @@ -2821,7 +2787,6 @@ static void binder_vma_open(struct vm_area_struct *vma) proc->pid, vma->vm_start, vma->vm_end, (vma->vm_end - vma->vm_start) / SZ_1K, vma->vm_flags, (unsigned long)pgprot_val(vma->vm_page_prot)); - dump_stack(); } static void binder_vma_close(struct vm_area_struct *vma) @@ -2833,6 +2798,7 @@ static void binder_vma_close(struct vm_area_struct *vma) (vma->vm_end - vma->vm_start) / SZ_1K, vma->vm_flags, (unsigned long)pgprot_val(vma->vm_page_prot)); proc->vma = NULL; + proc->vma_vm_mm = NULL; binder_defer_work(proc, BINDER_DEFERRED_PUT_FILES); } @@ -2865,6 +2831,7 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma) } vma->vm_flags = (vma->vm_flags | VM_DONTCOPY) & ~VM_MAYWRITE; + mutex_lock(&binder_mmap_lock); if (proc->buffer) { ret = -EBUSY; failure_string = "already mapped"; @@ -2879,13 +2846,13 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma) } proc->buffer = area->addr; proc->user_buffer_offset = vma->vm_start - (uintptr_t)proc->buffer; + mutex_unlock(&binder_mmap_lock); #ifdef CONFIG_CPU_CACHE_VIPT if (cache_is_vipt_aliasing()) { while (CACHE_COLOUR((vma->vm_start ^ (uint32_t)proc->buffer))) { binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder_mmap: %d %lx-%lx maps %p bad alignment\n", - proc->pid, vma->vm_start, vma->vm_end, proc->buffer); + "binder_mmap: %d %lx-%lx maps %p bad alignment\n", proc->pid, vma->vm_start, vma->vm_end, proc->buffer); vma->vm_start += PAGE_SIZE; } } @@ -2913,11 +2880,11 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma) binder_insert_free_buffer(proc, buffer); proc->free_async_space = proc->buffer_size / 2; barrier(); - proc->files = get_files_struct(current); + proc->files = get_files_struct(proc->tsk); proc->vma = vma; + proc->vma_vm_mm = vma->vm_mm; - /*binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder_mmap: %d %lx-%lx maps %p\n", + /*printk(KERN_INFO "binder_mmap: %d %lx-%lx maps %p\n", proc->pid, vma->vm_start, vma->vm_end, proc->buffer);*/ return 0; @@ -2925,14 +2892,17 @@ err_alloc_small_buf_failed: kfree(proc->pages); proc->pages = NULL; err_alloc_pages_failed: + mutex_lock(&binder_mmap_lock); vfree(proc->buffer); proc->buffer = NULL; err_get_vm_area_failed: err_already_mapped: + mutex_unlock(&binder_mmap_lock); err_bad_arg: binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder_mmap: %d %lx-%lx %s failed %d\n", - proc->pid, vma->vm_start, vma->vm_end, failure_string, ret); + "binder_mmap: %d %lx-%lx %s failed %d\n", + proc->pid, vma->vm_start, vma->vm_end, failure_string, + ret); return ret; } @@ -3087,9 +3057,9 @@ static void binder_deferred_release(struct binder_proc *proc) t->buffer = NULL; buffer->transaction = NULL; binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder: release proc %d, " - "transaction %d, not freed\n", - proc->pid, t->debug_id); + "binder: release proc %d, " + "transaction %d, not freed\n", + proc->pid, t->debug_id); /*BUG();*/ } binder_free_buf(proc, buffer); From d6b41b0def6ca7bf09bc79255bbc534f80d270be Mon Sep 17 00:00:00 2001 From: securecrt Date: Wed, 15 Aug 2012 13:37:30 +0800 Subject: [PATCH 086/155] staging: android: lowmemorykiller: sysfs node and notifications android, lowmemorykiller: remove task handoff notifier staging: android: lowmemorykiller: Fix task_struct leak staging: android/lowmemorykiller: Don't unregister notifier from atomic context staging: android, lowmemorykiller: convert to use oom_score_adj staging: android/lowmemorykiller: Do not kill kernel threads staging: android/lowmemorykiller: No need for task->signal check staging: android/lowmemorykiller: Better mm handling staging: android/lowmemorykiller: Don't grab tasklist_lock staging: android: lowmemorykiller: Don't wait more than one second for a process to die Staging: android: fixed 80 characters warnings in lowmemorykiller.c staging: android: lowmemorykiller: Ignore shmem pages in page-cache staging: android: lowmemorykiller: Remove bitrotted codepath staging: android: lowmemkiller: Substantially reduce overhead during reclaim staging: android: lowmemorykiller: Don't try to kill the same pid over and over --- drivers/staging/android/lowmemorykiller.c | 201 ++++++++++++++++++---- 1 file changed, 164 insertions(+), 37 deletions(-) mode change 100644 => 100755 drivers/staging/android/lowmemorykiller.c diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c old mode 100644 new mode 100755 index 42cd93ea..05ebece0 --- a/drivers/staging/android/lowmemorykiller.c +++ b/drivers/staging/android/lowmemorykiller.c @@ -29,12 +29,17 @@ * */ -#include #include +#include +#include +#include #include +#include +#include #include #include -#include +#include +#include static uint32_t lowmem_debug_level = 2; static int lowmem_adj[6] = { @@ -52,8 +57,12 @@ static size_t lowmem_minfree[6] = { }; static int lowmem_minfree_size = 4; +static size_t lowmem_minfree_notif_trigger; + +static unsigned int offlining; static struct task_struct *lowmem_deathpending; -static DEFINE_SPINLOCK(lowmem_deathpending_lock); +static unsigned long lowmem_deathpending_timeout; +static struct kobject *lowmem_kobj; #define lowmem_print(level, x...) \ do { \ @@ -68,29 +77,66 @@ static struct notifier_block task_nb = { .notifier_call = task_notify_func, }; - -static void task_free_fn(struct work_struct *work) -{ - unsigned long flags; - - task_free_unregister(&task_nb); - spin_lock_irqsave(&lowmem_deathpending_lock, flags); - lowmem_deathpending = NULL; - spin_unlock_irqrestore(&lowmem_deathpending_lock, flags); -} -static DECLARE_WORK(task_free_work, task_free_fn); - static int task_notify_func(struct notifier_block *self, unsigned long val, void *data) { struct task_struct *task = data; - if (task == lowmem_deathpending) { - schedule_work(&task_free_work); - } + if (task == lowmem_deathpending) + lowmem_deathpending = NULL; + return NOTIFY_OK; } +#ifdef CONFIG_MEMORY_HOTPLUG +static int lmk_hotplug_callback(struct notifier_block *self, + unsigned long cmd, void *data) +{ + switch (cmd) { + /* Don't care LMK cases */ + case MEM_ONLINE: + case MEM_OFFLINE: + case MEM_CANCEL_ONLINE: + case MEM_CANCEL_OFFLINE: + case MEM_GOING_ONLINE: + offlining = 0; + lowmem_print(4, "lmk in normal mode\n"); + break; + /* LMK should account for movable zone */ + case MEM_GOING_OFFLINE: + offlining = 1; + lowmem_print(4, "lmk in hotplug mode\n"); + break; + } + return NOTIFY_DONE; +} +#endif + + + +static void lowmem_notify_killzone_approach(void); + +static inline void get_free_ram(int *other_free, int *other_file) +{ + struct zone *zone; + *other_free = global_page_state(NR_FREE_PAGES); + *other_file = global_page_state(NR_FILE_PAGES) - + global_page_state(NR_SHMEM); + + if (offlining) { + /* Discount all free space in the section being offlined */ + for_each_zone(zone) { + if (zone_idx(zone) == ZONE_MOVABLE) { + *other_free -= zone_page_state(zone, + NR_FREE_PAGES); + lowmem_print(4, "lowmem_shrink discounted " + "%lu pages in movable zone\n", + zone_page_state(zone, NR_FREE_PAGES)); + } + } + } +} + static int lowmem_shrink(int nr_to_scan, gfp_t gfp_mask) { struct task_struct *p; @@ -102,10 +148,8 @@ static int lowmem_shrink(int nr_to_scan, gfp_t gfp_mask) int selected_tasksize = 0; int selected_oom_adj; int array_size = ARRAY_SIZE(lowmem_adj); - int other_free = global_page_state(NR_FREE_PAGES); - int other_file = global_page_state(NR_FILE_PAGES); - unsigned long flags; - + int other_free; + int other_file; /* * If we already have a death outstanding, then * bail out right away; indicating to vmscan @@ -113,15 +157,24 @@ static int lowmem_shrink(int nr_to_scan, gfp_t gfp_mask) * this pass. * */ - if (lowmem_deathpending) + if (lowmem_deathpending && + time_before_eq(jiffies, lowmem_deathpending_timeout)) return 0; + get_free_ram(&other_free, &other_file); + + if (other_free < lowmem_minfree_notif_trigger && + other_file < lowmem_minfree_notif_trigger) { + lowmem_notify_killzone_approach(); + } + if (lowmem_adj_size < array_size) array_size = lowmem_adj_size; if (lowmem_minfree_size < array_size) array_size = lowmem_minfree_size; for (i = 0; i < array_size; i++) { - if (other_file < lowmem_minfree[i]) { + if (other_free < lowmem_minfree[i] && + other_file < lowmem_minfree[i]) { min_adj = lowmem_adj[i]; break; } @@ -176,20 +229,14 @@ static int lowmem_shrink(int nr_to_scan, gfp_t gfp_mask) lowmem_print(2, "select %d (%s), adj %d, size %d, to kill\n", p->pid, p->comm, oom_adj, tasksize); } - if (selected) { - spin_lock_irqsave(&lowmem_deathpending_lock, flags); - if (!lowmem_deathpending) { - lowmem_print(1, - "send sigkill to %d (%s), adj %d, size %d\n", - selected->pid, selected->comm, - selected_oom_adj, selected_tasksize); - lowmem_deathpending = selected; - task_free_register(&task_nb); - force_sig(SIGKILL, selected); - rem -= selected_tasksize; - } - spin_unlock_irqrestore(&lowmem_deathpending_lock, flags); + lowmem_print(1, "send sigkill to %d (%s), adj %d, size %d\n", + selected->pid, selected->comm, + selected_oom_adj, selected_tasksize); + lowmem_deathpending = selected; + lowmem_deathpending_timeout = jiffies + HZ; + force_sig(SIGKILL, selected); + rem -= selected_tasksize; } lowmem_print(4, "lowmem_shrink %d, %x, return %d\n", nr_to_scan, gfp_mask, rem); @@ -202,15 +249,93 @@ static struct shrinker lowmem_shrinker = { .seeks = DEFAULT_SEEKS * 16 }; +static void lowmem_notify_killzone_approach(void) +{ + lowmem_print(3, "notification trigger activated\n"); + sysfs_notify(lowmem_kobj, NULL, "notify_trigger_active"); +} + +static ssize_t lowmem_notify_trigger_active_show(struct kobject *k, + struct kobj_attribute *attr, char *buf) +{ + int other_free, other_file; + get_free_ram(&other_free, &other_file); + if (other_free < lowmem_minfree_notif_trigger && + other_file < lowmem_minfree_notif_trigger) + return snprintf(buf, 3, "1\n"); + else + return snprintf(buf, 3, "0\n"); +} + +static struct kobj_attribute lowmem_notify_trigger_active_attr = + __ATTR(notify_trigger_active, S_IRUGO, + lowmem_notify_trigger_active_show, NULL); + +static struct attribute *lowmem_default_attrs[] = { + &lowmem_notify_trigger_active_attr.attr, + NULL, +}; + +static ssize_t lowmem_show(struct kobject *k, struct attribute *attr, char *buf) +{ + struct kobj_attribute *kobj_attr; + kobj_attr = container_of(attr, struct kobj_attribute, attr); + return kobj_attr->show(k, kobj_attr, buf); +} + +static const struct sysfs_ops lowmem_ops = { + .show = lowmem_show, +}; + +static void lowmem_kobj_release(struct kobject *kobj) +{ + /* Nothing to be done here */ +} + +static struct kobj_type lowmem_kobj_type = { + .release = lowmem_kobj_release, + .sysfs_ops = &lowmem_ops, + .default_attrs = lowmem_default_attrs, +}; + static int __init lowmem_init(void) { + int rc; + task_free_register(&task_nb); register_shrinker(&lowmem_shrinker); +#ifdef CONFIG_MEMORY_HOTPLUG + hotplug_memory_notifier(lmk_hotplug_callback, 0); +#endif + + lowmem_kobj = kzalloc(sizeof(*lowmem_kobj), GFP_KERNEL); + if (!lowmem_kobj) { + rc = -ENOMEM; + goto err; + } + + rc = kobject_init_and_add(lowmem_kobj, &lowmem_kobj_type, + mm_kobj, "lowmemkiller"); + if (rc) + goto err_kobj; + return 0; + +err_kobj: + kfree(lowmem_kobj); + +err: + unregister_shrinker(&lowmem_shrinker); + task_free_unregister(&task_nb); + + return rc; } static void __exit lowmem_exit(void) { + kobject_put(lowmem_kobj); + kfree(lowmem_kobj); unregister_shrinker(&lowmem_shrinker); + task_free_unregister(&task_nb); } module_param_named(cost, lowmem_shrinker.seeks, int, S_IRUGO | S_IWUSR); @@ -219,6 +344,8 @@ module_param_array_named(adj, lowmem_adj, int, &lowmem_adj_size, module_param_array_named(minfree, lowmem_minfree, uint, &lowmem_minfree_size, S_IRUGO | S_IWUSR); module_param_named(debug_level, lowmem_debug_level, uint, S_IRUGO | S_IWUSR); +module_param_named(notify_trigger, lowmem_minfree_notif_trigger, uint, + S_IRUGO | S_IWUSR); module_init(lowmem_init); module_exit(lowmem_exit); From 9c3257d8592f5d9e6f82a59cea4b1ac172288d20 Mon Sep 17 00:00:00 2001 From: securecrt Date: Wed, 15 Aug 2012 14:56:37 +0800 Subject: [PATCH 087/155] android: logger: Allow a UID to read it's own log entries staging: logger: hold mutex while removing reader staging: android: logger: clarify non-update of w_off in do_write_log_from_user staging: android: logger: clarify code in clock_interval staging: android: logger: reorder prepare_to_wait and mutex_lock staging: android: logger: simplify and optimize get_entry_len staging: android: logger: Change logger_offset() from macro to function Staging: android: fixed white spaces coding style issue in logger.c android: logger: bump up the logger buffer sizes --- drivers/staging/android/logger.c | 268 +++++++++++++++++++++++++------ drivers/staging/android/logger.h | 29 +++- include/linux/capability.h | 6 +- 3 files changed, 249 insertions(+), 54 deletions(-) mode change 100644 => 100755 drivers/staging/android/logger.c mode change 100644 => 100755 drivers/staging/android/logger.h mode change 100644 => 100755 include/linux/capability.h diff --git a/drivers/staging/android/logger.c b/drivers/staging/android/logger.c old mode 100644 new mode 100755 index 1a0c1391..eb3d4ca5 --- a/drivers/staging/android/logger.c +++ b/drivers/staging/android/logger.c @@ -37,7 +37,7 @@ * mutex 'mutex'. */ struct logger_log { - unsigned char *buffer;/* the ring buffer itself */ + unsigned char *buffer;/* the ring buffer itself */ struct miscdevice misc; /* misc device representing the log */ wait_queue_head_t wq; /* wait queue for readers */ struct list_head readers; /* this log's readers */ @@ -57,19 +57,25 @@ struct logger_reader { struct logger_log *log; /* associated log */ struct list_head list; /* entry in logger_log's list */ size_t r_off; /* current read head offset */ + bool r_all; /* reader can read all entries */ + int r_ver; /* reader ABI version */ }; /* logger_offset - returns index 'n' into the log via (optimized) modulus */ -#define logger_offset(n) ((n) & (log->size - 1)) +size_t logger_offset(struct logger_log *log, size_t n) +{ + return n & (log->size-1); +} + /* * file_get_log - Given a file structure, return the associated log * * This isn't aesthetic. We have several goals: * - * 1) Need to quickly obtain the associated log during an I/O operation - * 2) Readers need to maintain state (logger_reader) - * 3) Writers need to be very fast (open() should be a near no-op) + * 1) Need to quickly obtain the associated log during an I/O operation + * 2) Readers need to maintain state (logger_reader) + * 3) Writers need to be very fast (open() should be a near no-op) * * In the reader case, we can trivially go file->logger_reader->logger_log. * For a writer, we don't want to maintain a logger_reader, so we just go @@ -86,25 +92,75 @@ static inline struct logger_log *file_get_log(struct file *file) } /* - * get_entry_len - Grabs the length of the payload of the next entry starting - * from 'off'. + * get_entry_header - returns a pointer to the logger_entry header within + * 'log' starting at offset 'off'. A temporary logger_entry 'scratch' must + * be provided. Typically the return value will be a pointer within + * 'logger->buf'. However, a pointer to 'scratch' may be returned if + * the log entry spans the end and beginning of the circular buffer. + */ +static struct logger_entry *get_entry_header(struct logger_log *log, + size_t off, struct logger_entry *scratch) +{ + size_t len = min(sizeof(struct logger_entry), log->size - off); + if (len != sizeof(struct logger_entry)) { + memcpy(((void *) scratch), log->buffer + off, len); + memcpy(((void *) scratch) + len, log->buffer, + sizeof(struct logger_entry) - len); + return scratch; + } + + return (struct logger_entry *) (log->buffer + off); +} + +/* + * get_entry_msg_len - Grabs the length of the message of the entry + * starting from from 'off'. + * + * An entry length is 2 bytes (16 bits) in host endian order. + * In the log, the length does not include the size of the log entry structure. + * This function returns the size including the log entry structure. * * Caller needs to hold log->mutex. */ -static __u32 get_entry_len(struct logger_log *log, size_t off) +static __u32 get_entry_msg_len(struct logger_log *log, size_t off) { - __u16 val; + struct logger_entry scratch; + struct logger_entry *entry; - switch (log->size - off) { - case 1: - memcpy(&val, log->buffer + off, 1); - memcpy(((char *) &val) + 1, log->buffer, 1); - break; - default: - memcpy(&val, log->buffer + off, 2); + entry = get_entry_header(log, off, &scratch); + return entry->len; +} + +static size_t get_user_hdr_len(int ver) +{ + if (ver < 2) + return sizeof(struct user_logger_entry_compat); + else + return sizeof(struct logger_entry); +} + +static ssize_t copy_header_to_user(int ver, struct logger_entry *entry, + char __user *buf) +{ + void *hdr; + size_t hdr_len; + struct user_logger_entry_compat v1; + + if (ver < 2) { + v1.len = entry->len; + v1.__pad = 0; + v1.pid = entry->pid; + v1.tid = entry->tid; + v1.sec = entry->sec; + v1.nsec = entry->nsec; + hdr = &v1; + hdr_len = sizeof(struct user_logger_entry_compat); + } else { + hdr = entry; + hdr_len = sizeof(struct logger_entry); } - return sizeof(struct logger_entry) + val; + return copy_to_user(buf, hdr, hdr_len); } /* @@ -118,15 +174,31 @@ static ssize_t do_read_log_to_user(struct logger_log *log, char __user *buf, size_t count) { + struct logger_entry scratch; + struct logger_entry *entry; size_t len; + size_t msg_start; /* - * We read from the log in two disjoint operations. First, we read from - * the current read head offset up to 'count' bytes or to the end of + * First, copy the header to userspace, using the version of + * the header requested + */ + entry = get_entry_header(log, reader->r_off, &scratch); + if (copy_header_to_user(reader->r_ver, entry, buf)) + return -EFAULT; + + count -= get_user_hdr_len(reader->r_ver); + buf += get_user_hdr_len(reader->r_ver); + msg_start = logger_offset(log, + reader->r_off + sizeof(struct logger_entry)); + + /* + * We read from the msg in two disjoint operations. First, we read from + * the current msg head offset up to 'count' bytes or to the end of * the log, whichever comes first. */ - len = min(count, log->size - reader->r_off); - if (copy_to_user(buf, log->buffer + reader->r_off, len)) + len = min(count, log->size - msg_start); + if (copy_to_user(buf, log->buffer + msg_start, len)) return -EFAULT; /* @@ -137,9 +209,34 @@ static ssize_t do_read_log_to_user(struct logger_log *log, if (copy_to_user(buf + len, log->buffer, count - len)) return -EFAULT; - reader->r_off = logger_offset(reader->r_off + count); + reader->r_off = logger_offset(log, reader->r_off + + sizeof(struct logger_entry) + count); - return count; + return count + get_user_hdr_len(reader->r_ver); +} + +/* + * get_next_entry_by_uid - Starting at 'off', returns an offset into + * 'log->buffer' which contains the first entry readable by 'euid' + */ +static size_t get_next_entry_by_uid(struct logger_log *log, + size_t off, uid_t euid) +{ + while (off != log->w_off) { + struct logger_entry *entry; + struct logger_entry scratch; + size_t next_len; + + entry = get_entry_header(log, off, &scratch); + + if (entry->euid == euid) + return off; + + next_len = sizeof(struct logger_entry) + entry->len; + off = logger_offset(log, off + next_len); + } + + return off; } /* @@ -147,11 +244,11 @@ static ssize_t do_read_log_to_user(struct logger_log *log, * * Behavior: * - * - O_NONBLOCK works - * - If there are no log entries to read, blocks until log is written to - * - Atomically reads exactly one log entry + * - O_NONBLOCK works + * - If there are no log entries to read, blocks until log is written to + * - Atomically reads exactly one log entry * - * Optimal read size is LOGGER_ENTRY_MAX_LEN. Will set errno to EINVAL if read + * Will set errno to EINVAL if read * buffer is insufficient to hold next entry. */ static ssize_t logger_read(struct file *file, char __user *buf, @@ -164,9 +261,10 @@ static ssize_t logger_read(struct file *file, char __user *buf, start: while (1) { + mutex_lock(&log->mutex); + prepare_to_wait(&log->wq, &wait, TASK_INTERRUPTIBLE); - mutex_lock(&log->mutex); ret = (log->w_off == reader->r_off); mutex_unlock(&log->mutex); if (!ret) @@ -191,6 +289,10 @@ start: mutex_lock(&log->mutex); + if (!reader->r_all) + reader->r_off = get_next_entry_by_uid(log, + reader->r_off, current_euid()); + /* is there still something to read or did we race? */ if (unlikely(log->w_off == reader->r_off)) { mutex_unlock(&log->mutex); @@ -198,7 +300,8 @@ start: } /* get the size of the next entry */ - ret = get_entry_len(log, reader->r_off); + ret = get_user_hdr_len(reader->r_ver) + + get_entry_msg_len(log, reader->r_off); if (count < ret) { ret = -EINVAL; goto out; @@ -224,8 +327,9 @@ static size_t get_next_entry(struct logger_log *log, size_t off, size_t len) size_t count = 0; do { - size_t nr = get_entry_len(log, off); - off = logger_offset(off + nr); + size_t nr = sizeof(struct logger_entry) + + get_entry_msg_len(log, off); + off = logger_offset(log, off + nr); count += nr; } while (count < len); @@ -233,16 +337,28 @@ static size_t get_next_entry(struct logger_log *log, size_t off, size_t len) } /* - * clock_interval - is a < c < b in mod-space? Put another way, does the line - * from a to b cross c? + * is_between - is a < c < b, accounting for wrapping of a, b, and c + * positions in the buffer + * + * That is, if ab, check for c outside (not between) a and b + * + * |------- a xxxxxxxx b --------| + * c^ + * + * |xxxxx b --------- a xxxxxxxxx| + * c^ + * or c^ */ -static inline int clock_interval(size_t a, size_t b, size_t c) +static inline int is_between(size_t a, size_t b, size_t c) { - if (b < a) { - if (a < c || b >= c) + if (a < b) { + /* is c between a and b? */ + if (a < c && c <= b) return 1; } else { - if (a < c && b >= c) + /* is c outside of b through a? */ + if (c <= b || a < c) return 1; } @@ -260,14 +376,14 @@ static inline int clock_interval(size_t a, size_t b, size_t c) static void fix_up_readers(struct logger_log *log, size_t len) { size_t old = log->w_off; - size_t new = logger_offset(old + len); + size_t new = logger_offset(log, old + len); struct logger_reader *reader; - if (clock_interval(old, new, log->head)) + if (is_between(old, new, log->head)) log->head = get_next_entry(log, log->head, len); list_for_each_entry(reader, &log->readers, list) - if (clock_interval(old, new, reader->r_off)) + if (is_between(old, new, reader->r_off)) reader->r_off = get_next_entry(log, reader->r_off, len); } @@ -286,7 +402,7 @@ static void do_write_log(struct logger_log *log, const void *buf, size_t count) if (count != len) memcpy(log->buffer, buf + len, count - len); - log->w_off = logger_offset(log->w_off + count); + log->w_off = logger_offset(log, log->w_off + count); } @@ -309,9 +425,15 @@ static ssize_t do_write_log_from_user(struct logger_log *log, if (count != len) if (copy_from_user(log->buffer, buf + len, count - len)) + /* + * Note that by not updating w_off, this abandons the + * portion of the new entry that *was* successfully + * copied, just above. This is intentional to avoid + * message corruption from missing fragments. + */ return -EFAULT; - log->w_off = logger_offset(log->w_off + count); + log->w_off = logger_offset(log, log->w_off + count); return count; } @@ -336,7 +458,9 @@ ssize_t logger_aio_write(struct kiocb *iocb, const struct iovec *iov, header.tid = current->pid; header.sec = now.tv_sec; header.nsec = now.tv_nsec; + header.euid = current_euid(); header.len = min_t(size_t, iocb->ki_left, LOGGER_ENTRY_MAX_PAYLOAD); + header.hdr_size = sizeof(struct logger_entry); /* null writes succeed, return zero */ if (unlikely(!header.len)) @@ -409,6 +533,10 @@ static int logger_open(struct inode *inode, struct file *file) return -ENOMEM; reader->log = log; + reader->r_ver = 1; + reader->r_all = in_egroup_p(inode->i_gid) || + capable(CAP_SYSLOG); + INIT_LIST_HEAD(&reader->list); mutex_lock(&log->mutex); @@ -433,9 +561,11 @@ static int logger_release(struct inode *ignored, struct file *file) if (file->f_mode & FMODE_READ) { struct logger_reader *reader = file->private_data; struct logger_log *log = reader->log; + mutex_lock(&log->mutex); list_del(&reader->list); mutex_unlock(&log->mutex); + kfree(reader); } @@ -466,6 +596,10 @@ static unsigned int logger_poll(struct file *file, poll_table *wait) poll_wait(file, &log->wq, wait); mutex_lock(&log->mutex); + if (!reader->r_all) + reader->r_off = get_next_entry_by_uid(log, + reader->r_off, current_euid()); + if (log->w_off != reader->r_off) ret |= POLLIN | POLLRDNORM; mutex_unlock(&log->mutex); @@ -473,11 +607,25 @@ static unsigned int logger_poll(struct file *file, poll_table *wait) return ret; } +static long logger_set_version(struct logger_reader *reader, void __user *arg) +{ + int version; + if (copy_from_user(&version, arg, sizeof(int))) + return -EFAULT; + + if ((version < 1) || (version > 2)) + return -EINVAL; + + reader->r_ver = version; + return 0; +} + static long logger_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct logger_log *log = file_get_log(file); struct logger_reader *reader; - long ret = -ENOTTY; + long ret = -EINVAL; + void __user *argp = (void __user *) arg; mutex_lock(&log->mutex); @@ -502,8 +650,14 @@ static long logger_ioctl(struct file *file, unsigned int cmd, unsigned long arg) break; } reader = file->private_data; + + if (!reader->r_all) + reader->r_off = get_next_entry_by_uid(log, + reader->r_off, current_euid()); + if (log->w_off != reader->r_off) - ret = get_entry_len(log, reader->r_off); + ret = get_user_hdr_len(reader->r_ver) + + get_entry_msg_len(log, reader->r_off); else ret = 0; break; @@ -517,6 +671,22 @@ static long logger_ioctl(struct file *file, unsigned int cmd, unsigned long arg) log->head = log->w_off; ret = 0; break; + case LOGGER_GET_VERSION: + if (!(file->f_mode & FMODE_READ)) { + ret = -EBADF; + break; + } + reader = file->private_data; + ret = reader->r_ver; + break; + case LOGGER_SET_VERSION: + if (!(file->f_mode & FMODE_READ)) { + ret = -EBADF; + break; + } + reader = file->private_data; + ret = logger_set_version(reader, argp); + break; } mutex_unlock(&log->mutex); @@ -537,8 +707,8 @@ static const struct file_operations logger_fops = { /* * Defines a log structure with name 'NAME' and a size of 'SIZE' bytes, which - * must be a power of two, greater than LOGGER_ENTRY_MAX_LEN, and less than - * LONG_MAX minus LOGGER_ENTRY_MAX_LEN. + * must be a power of two, and greater than + * (LOGGER_ENTRY_MAX_PAYLOAD + sizeof(struct logger_entry)). */ #define DEFINE_LOGGER_DEVICE(VAR, NAME, SIZE) \ static unsigned char _buf_ ## VAR[SIZE]; \ @@ -558,10 +728,10 @@ static struct logger_log VAR = { \ .size = SIZE, \ }; -DEFINE_LOGGER_DEVICE(log_main, LOGGER_LOG_MAIN, 64*1024) +DEFINE_LOGGER_DEVICE(log_main, LOGGER_LOG_MAIN, 256*1024) DEFINE_LOGGER_DEVICE(log_events, LOGGER_LOG_EVENTS, 256*1024) -DEFINE_LOGGER_DEVICE(log_radio, LOGGER_LOG_RADIO, 64*1024) -DEFINE_LOGGER_DEVICE(log_system, LOGGER_LOG_SYSTEM, 64*1024) +DEFINE_LOGGER_DEVICE(log_radio, LOGGER_LOG_RADIO, 256*1024) +DEFINE_LOGGER_DEVICE(log_system, LOGGER_LOG_SYSTEM, 256*1024) static struct logger_log *get_log_from_minor(int minor) { diff --git a/drivers/staging/android/logger.h b/drivers/staging/android/logger.h old mode 100644 new mode 100755 index 2cb06e9d..3f612a3b --- a/drivers/staging/android/logger.h +++ b/drivers/staging/android/logger.h @@ -20,7 +20,12 @@ #include #include -struct logger_entry { +/* + * The userspace structure for version 1 of the logger_entry ABI. + * This structure is returned to userspace unless the caller requests + * an upgrade to a newer ABI version. + */ +struct user_logger_entry_compat { __u16 len; /* length of the payload */ __u16 __pad; /* no matter what, we get 2 bytes of padding */ __s32 pid; /* generating process's pid */ @@ -30,14 +35,28 @@ struct logger_entry { char msg[0]; /* the entry's payload */ }; +/* + * The structure for version 2 of the logger_entry ABI. + * This structure is returned to userspace if ioctl(LOGGER_SET_VERSION) + * is called with version >= 2 + */ +struct logger_entry { + __u16 len; /* length of the payload */ + __u16 hdr_size; /* sizeof(struct logger_entry_v2) */ + __s32 pid; /* generating process's pid */ + __s32 tid; /* generating process's tid */ + __s32 sec; /* seconds since Epoch */ + __s32 nsec; /* nanoseconds */ + uid_t euid; /* effective UID of logger */ + char msg[0]; /* the entry's payload */ +}; + #define LOGGER_LOG_RADIO "log_radio" /* radio-related messages */ #define LOGGER_LOG_EVENTS "log_events" /* system/hardware events */ #define LOGGER_LOG_SYSTEM "log_system" /* system/framework messages */ #define LOGGER_LOG_MAIN "log_main" /* everything else */ -#define LOGGER_ENTRY_MAX_LEN (4*1024) -#define LOGGER_ENTRY_MAX_PAYLOAD \ - (LOGGER_ENTRY_MAX_LEN - sizeof(struct logger_entry)) +#define LOGGER_ENTRY_MAX_PAYLOAD 4076 #define __LOGGERIO 0xAE @@ -45,5 +64,7 @@ struct logger_entry { #define LOGGER_GET_LOG_LEN _IO(__LOGGERIO, 2) /* used log len */ #define LOGGER_GET_NEXT_ENTRY_LEN _IO(__LOGGERIO, 3) /* next entry len */ #define LOGGER_FLUSH_LOG _IO(__LOGGERIO, 4) /* flush log */ +#define LOGGER_GET_VERSION _IO(__LOGGERIO, 5) /* abi version */ +#define LOGGER_SET_VERSION _IO(__LOGGERIO, 6) /* abi version */ #endif /* _LINUX_LOGGER_H */ diff --git a/include/linux/capability.h b/include/linux/capability.h old mode 100644 new mode 100755 index c8f2a5f7..c4f6d94d --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -357,7 +357,11 @@ struct cpu_vfs_cap_data { #define CAP_MAC_ADMIN 33 -#define CAP_LAST_CAP CAP_MAC_ADMIN +/* Allow configuring the kernel's syslog (printk behaviour) */ + +#define CAP_SYSLOG 34 + +#define CAP_LAST_CAP CAP_SYSLOG #define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP) From e04d028ecf0cbc29b16a6af5b85981ef93a1020c Mon Sep 17 00:00:00 2001 From: securecrt Date: Wed, 15 Aug 2012 18:03:44 +0800 Subject: [PATCH 088/155] reduced the log size --- drivers/staging/android/logger.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/staging/android/logger.c b/drivers/staging/android/logger.c index eb3d4ca5..05559eec 100755 --- a/drivers/staging/android/logger.c +++ b/drivers/staging/android/logger.c @@ -728,10 +728,10 @@ static struct logger_log VAR = { \ .size = SIZE, \ }; -DEFINE_LOGGER_DEVICE(log_main, LOGGER_LOG_MAIN, 256*1024) +DEFINE_LOGGER_DEVICE(log_main, LOGGER_LOG_MAIN, 64*1024) DEFINE_LOGGER_DEVICE(log_events, LOGGER_LOG_EVENTS, 256*1024) -DEFINE_LOGGER_DEVICE(log_radio, LOGGER_LOG_RADIO, 256*1024) -DEFINE_LOGGER_DEVICE(log_system, LOGGER_LOG_SYSTEM, 256*1024) +DEFINE_LOGGER_DEVICE(log_radio, LOGGER_LOG_RADIO, 64*1024) +DEFINE_LOGGER_DEVICE(log_system, LOGGER_LOG_SYSTEM, 64*1024) static struct logger_log *get_log_from_minor(int minor) { From fced437cd2706f5cb8af6346df85478743dce64e Mon Sep 17 00:00:00 2001 From: securecrt Date: Wed, 15 Aug 2012 18:04:29 +0800 Subject: [PATCH 089/155] pmem: Check for valid virtual address while flushing pmem: Correctly account for aligned blocks --- drivers/misc/pmem.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/misc/pmem.c b/drivers/misc/pmem.c index f1523fd5..bc083e17 100755 --- a/drivers/misc/pmem.c +++ b/drivers/misc/pmem.c @@ -1,7 +1,7 @@ /* drivers/android/pmem.c * * Copyright (C) 2007 Google, Inc. - * Copyright (c) 2009-2010, Code Aurora Forum. All rights reserved. + * Copyright (c) 2009-2012, Code Aurora Forum. All rights reserved. * * This software is licensed under the terms of the GNU General Public * License version 2, as published by the Free Software Foundation, and @@ -1074,17 +1074,17 @@ static void bitmap_bits_set_all(uint32_t *bitp, int bit_start, int bit_end) static int bitmap_allocate_contiguous(uint32_t *bitp, int num_bits_to_alloc, - int total_bits, int spacing) + int total_bits, int spacing, int start_bit) { int bit_start, last_bit, word_index; if (num_bits_to_alloc <= 0) return -1; - for (bit_start = 0; ; - bit_start = (last_bit + + for (bit_start = start_bit; ; + bit_start = ((last_bit + (word_index << PMEM_32BIT_WORD_ORDER) + spacing - 1) - & ~(spacing - 1)) { + & ~(spacing - 1)) + start_bit) { int bit_end = bit_start + num_bits_to_alloc, total_words; if (bit_end > total_bits) @@ -1162,7 +1162,8 @@ static int reserve_quanta(const unsigned int quanta_needed, ret = bitmap_allocate_contiguous(pmem[id].allocator.bitmap.bitmap, quanta_needed, (pmem[id].size + pmem[id].quantum - 1) / pmem[id].quantum, - spacing); + spacing, + start_bit); #if PMEM_DEBUG if (ret < 0) @@ -1915,6 +1916,13 @@ int pmem_cache_maint(struct file *file, unsigned int cmd, if (!file) return -EBADF; + /* + * check that the vaddr passed for flushing is valid + * so that you don't crash the kernel + */ + if (!pmem_addr->vaddr) + return -EINVAL; + data = file->private_data; id = get_id(file); From cdcb35c854ea4426832c0270bb60441684954258 Mon Sep 17 00:00:00 2001 From: securecrt Date: Wed, 15 Aug 2012 18:06:24 +0800 Subject: [PATCH 090/155] pmem_adsp (user-space) as non-cached pmem_venc (user-space) as non-cached --- arch/arm/mach-msm/board-htcleo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-msm/board-htcleo.c b/arch/arm/mach-msm/board-htcleo.c index 4697ef53..4913f043 100755 --- a/arch/arm/mach-msm/board-htcleo.c +++ b/arch/arm/mach-msm/board-htcleo.c @@ -771,7 +771,7 @@ static struct android_pmem_platform_data android_pmem_adsp_pdata = { #else .no_allocator = 0, #endif - .cached = 1, + .cached = 0, }; @@ -784,7 +784,7 @@ static struct android_pmem_platform_data android_pmem_venc_pdata = { #else .no_allocator = 0, #endif - .cached = 1, + .cached = 0, }; static struct platform_device android_pmem_mdp_device = { From 0c4a37e304734e297d586e8a6dfdebcea74e68b3 Mon Sep 17 00:00:00 2001 From: securecrt Date: Wed, 15 Aug 2012 18:07:29 +0800 Subject: [PATCH 091/155] change MSM_NAND_DMA_BUFFER_SIZE to SZ_1M --- drivers/mtd/devices/htcleo_nand.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/devices/htcleo_nand.c b/drivers/mtd/devices/htcleo_nand.c index 9b23d680..12a9b390 100755 --- a/drivers/mtd/devices/htcleo_nand.c +++ b/drivers/mtd/devices/htcleo_nand.c @@ -51,7 +51,7 @@ unsigned crci_mask; #include "msm_nand.h" -#define MSM_NAND_DMA_BUFFER_SIZE SZ_4K +#define MSM_NAND_DMA_BUFFER_SIZE SZ_1M #define MSM_NAND_DMA_BUFFER_SLOTS \ (MSM_NAND_DMA_BUFFER_SIZE / (sizeof(((atomic_t *)0)->counter) * 8)) From 0fa4a5529ca5820b445ff48cb8862fe723ea21a7 Mon Sep 17 00:00:00 2001 From: securecrt Date: Wed, 15 Aug 2012 18:52:07 +0800 Subject: [PATCH 092/155] Staging: android: binder: Don't call dump_stack in binder_vma_open Staging: android: binder: Fix crashes when sharing a binder file between processes drivers:staging:android Typos: fix some comments that have typos in them. fs: Remove missed ->fds_bits from cessation use of fd_set structs internally Staging:android: Change type for binder_debug_no_lock switch to bool Staging: android: binder: Fix use-after-free bug --- drivers/staging/android/binder.c | 270 ++++++++++++++----------------- 1 file changed, 120 insertions(+), 150 deletions(-) diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c index a0763da9..c44eb407 100644 --- a/drivers/staging/android/binder.c +++ b/drivers/staging/android/binder.c @@ -3,7 +3,6 @@ * Android IPC Subsystem * * Copyright (C) 2007-2008 Google, Inc. - * Copyright (c) 2012, Code Aurora Forum. All rights reserved. * * This software is licensed under the terms of the GNU General Public * License version 2, as published by the Free Software Foundation, and @@ -31,14 +30,15 @@ #include #include #include -#include #include #include +#include #include "binder.h" static DEFINE_MUTEX(binder_lock); static DEFINE_MUTEX(binder_deferred_lock); +static DEFINE_MUTEX(binder_mmap_lock); static HLIST_HEAD(binder_procs); static HLIST_HEAD(binder_deferred_list); @@ -98,12 +98,12 @@ enum { BINDER_DEBUG_BUFFER_ALLOC = 1U << 13, BINDER_DEBUG_PRIORITY_CAP = 1U << 14, BINDER_DEBUG_BUFFER_ALLOC_ASYNC = 1U << 15, - BINDER_DEBUG_TOP_ERRORS = 1U << 16, + BINDER_DEBUG_TOP_ERRORS = 1U << 16, }; static uint32_t binder_debug_mask; module_param_named(debug_mask, binder_debug_mask, uint, S_IWUSR | S_IRUGO); -static int binder_debug_no_lock; +static bool binder_debug_no_lock; module_param_named(proc_no_lock, binder_debug_no_lock, bool, S_IWUSR | S_IRUGO); static DECLARE_WAIT_QUEUE_HEAD(binder_user_error_wait); @@ -258,7 +258,7 @@ struct binder_ref { }; struct binder_buffer { - struct list_head entry; /* free and allocated entries by addesss */ + struct list_head entry; /* free and allocated entries by address */ struct rb_node rb_node; /* free entry by size or allocated entry */ /* by address */ unsigned free:1; @@ -288,6 +288,7 @@ struct binder_proc { struct rb_root refs_by_node; int pid; struct vm_area_struct *vma; + struct mm_struct *vma_vm_mm; struct task_struct *tsk; struct files_struct *files; struct hlist_node deferred_work_node; @@ -380,8 +381,7 @@ int task_get_unused_fd_flags(struct binder_proc *proc, int flags) repeat: fdt = files_fdtable(files); - fd = find_next_zero_bit(fdt->open_fds->fds_bits, fdt->max_fds, - files->next_fd); + fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, files->next_fd); /* * N.B. For clone tasks sharing a files structure, this test @@ -633,6 +633,11 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate, if (mm) { down_write(&mm->mmap_sem); vma = proc->vma; + if (vma && mm != proc->vma_vm_mm) { + pr_err("binder: %d: vma mm and task mm mismatch\n", + proc->pid); + vma = NULL; + } } if (allocate == 0) @@ -640,8 +645,8 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate, if (vma == NULL) { binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder: %d: binder_alloc_buf failed to " - "map pages in userspace, no vma\n", proc->pid); + "binder: %d: binder_alloc_buf failed to " + "map pages in userspace, no vma\n", proc->pid); goto err_no_vma; } @@ -654,8 +659,8 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate, *page = alloc_page(GFP_KERNEL | __GFP_ZERO); if (*page == NULL) { binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder: %d: binder_alloc_buf failed " - "for page at %p\n", proc->pid, page_addr); + "binder: %d: binder_alloc_buf failed " + "for page at %p\n", proc->pid, page_addr); goto err_alloc_page_failed; } tmp_area.addr = page_addr; @@ -664,9 +669,9 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate, ret = map_vm_area(&tmp_area, PAGE_KERNEL, &page_array_ptr); if (ret) { binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder: %d: binder_alloc_buf failed " - "to map page at %p in kernel\n", - proc->pid, page_addr); + "binder: %d: binder_alloc_buf failed " + "to map page at %p in kernel\n", + proc->pid, page_addr); goto err_map_kernel_failed; } user_page_addr = @@ -674,9 +679,9 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate, ret = vm_insert_page(vma, user_page_addr, page[0]); if (ret) { binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder: %d: binder_alloc_buf failed " - "to map page at %lx in userspace\n", - proc->pid, user_page_addr); + "binder: %d: binder_alloc_buf failed " + "to map page at %lx in userspace\n", + proc->pid, user_page_addr); goto err_vm_insert_page_failed; } /* vm_insert_page does not seem to increment the refcount */ @@ -724,8 +729,8 @@ static struct binder_buffer *binder_alloc_buf(struct binder_proc *proc, if (proc->vma == NULL) { binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder: %d: binder_alloc_buf, no vma\n", - proc->pid); + "binder: %d: binder_alloc_buf, no vma\n", + proc->pid); return NULL; } @@ -763,8 +768,8 @@ static struct binder_buffer *binder_alloc_buf(struct binder_proc *proc, } if (best_fit == NULL) { binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder: %d: binder_alloc_buf size %zd failed, " - "no address space\n", proc->pid, size); + "binder: %d: binder_alloc_buf size %zd failed, " + "no address space\n", proc->pid, size); return NULL; } if (n == NULL) { @@ -999,8 +1004,8 @@ static int binder_inc_node(struct binder_node *node, int strong, int internal, !(node == binder_context_mgr_node && node->has_strong_ref)) { binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder: invalid inc strong " - "node for %d\n", node->debug_id); + "binder: invalid inc strong " + "node for %d\n", node->debug_id); return -EINVAL; } node->internal_strong_refs++; @@ -1016,8 +1021,8 @@ static int binder_inc_node(struct binder_node *node, int strong, int internal, if (!node->has_weak_ref && list_empty(&node->work.entry)) { if (target_list == NULL) { binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder: invalid inc weak node " - "for %d\n", node->debug_id); + "binder: invalid inc weak node " + "for %d\n", node->debug_id); return -EINVAL; } list_add_tail(&node->work.entry, target_list); @@ -1053,7 +1058,7 @@ static int binder_dec_node(struct binder_node *node, int strong, int internal) if (node->proc) { rb_erase(&node->rb_node, &node->proc->nodes); binder_debug(BINDER_DEBUG_INTERNAL_REFS, - "binder: refless node %d deleted\n", + "binder: refless node %d deleted\n", node->debug_id); } else { hlist_del(&node->dead_node); @@ -1272,8 +1277,7 @@ static void binder_send_failed_reply(struct binder_transaction *t, binder_debug(BINDER_DEBUG_FAILED_TRANSACTION, "binder: send failed reply for " "transaction %d to %d:%d\n", - t->debug_id, - target_thread->proc->pid, + t->debug_id, target_thread->proc->pid, target_thread->pid); binder_pop_transaction(target_thread, t); @@ -1281,11 +1285,12 @@ static void binder_send_failed_reply(struct binder_transaction *t, wake_up_interruptible(&target_thread->wait); } else { binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder: reply failed, target " - "thread, %d:%d, has error code %d " - "already\n", target_thread->proc->pid, - target_thread->pid, - target_thread->return_error); + "binder: reply failed, target " + "thread, %d:%d, has error code %d " + "already\n", + target_thread->proc->pid, + target_thread->pid, + target_thread->return_error); } return; } else { @@ -1319,15 +1324,14 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, int debug_id = buffer->debug_id; binder_debug(BINDER_DEBUG_TRANSACTION, - "binder: %d buffer release %d, size %zd-%zd, failed at" - " %p\n", proc->pid, buffer->debug_id, + "binder: %d buffer release %d, size %zd-%zd, failed at %p\n", + proc->pid, buffer->debug_id, buffer->data_size, buffer->offsets_size, failed_at); if (buffer->target_node) binder_dec_node(buffer->target_node, 1, 0); - offp = (size_t *)(buffer->data + ALIGN(buffer->data_size, - sizeof(void *))); + offp = (size_t *)(buffer->data + ALIGN(buffer->data_size, sizeof(void *))); if (failed_at) off_end = failed_at; else @@ -1338,44 +1342,41 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, buffer->data_size < sizeof(*fp) || !IS_ALIGNED(*offp, sizeof(void *))) { binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder: transaction release %d bad" - "offset %zd, size %zd\n", debug_id, - *offp, buffer->data_size); + "binder: transaction release %d bad" + "offset %zd, size %zd\n", debug_id, + *offp, buffer->data_size); continue; } fp = (struct flat_binder_object *)(buffer->data + *offp); switch (fp->type) { case BINDER_TYPE_BINDER: case BINDER_TYPE_WEAK_BINDER: { - struct binder_node *node = binder_get_node(proc, - fp->binder); + struct binder_node *node = binder_get_node(proc, fp->binder); if (node == NULL) { binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder: transaction release %d" - " bad node %p\n", debug_id, fp->binder); + "binder: transaction release %d" + " bad node %p\n", debug_id, + fp->binder); break; } binder_debug(BINDER_DEBUG_TRANSACTION, " node %d u%p\n", node->debug_id, node->ptr); - binder_dec_node(node, fp->type == BINDER_TYPE_BINDER, - 0); + binder_dec_node(node, fp->type == BINDER_TYPE_BINDER, 0); } break; case BINDER_TYPE_HANDLE: case BINDER_TYPE_WEAK_HANDLE: { - struct binder_ref *ref = binder_get_ref(proc, - fp->handle); + struct binder_ref *ref = binder_get_ref(proc, fp->handle); if (ref == NULL) { binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder: transaction release %d" - " bad handle %ld\n", debug_id, - fp->handle); + "binder: transaction release %d" + " bad handle %ld\n", debug_id, + fp->handle); break; } binder_debug(BINDER_DEBUG_TRANSACTION, " ref %d desc %d (node %d)\n", - ref->debug_id, ref->desc, - ref->node->debug_id); + ref->debug_id, ref->desc, ref->node->debug_id); binder_dec_ref(ref, fp->type == BINDER_TYPE_HANDLE); } break; @@ -1388,8 +1389,8 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, default: binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder: transaction release %d bad " - "object type %lx\n", debug_id, fp->type); + "binder: transaction release %d bad " + "object type %lx\n", debug_id, fp->type); break; } } @@ -1614,19 +1615,15 @@ static void binder_transaction(struct binder_proc *proc, case BINDER_TYPE_BINDER: case BINDER_TYPE_WEAK_BINDER: { struct binder_ref *ref; - struct binder_node *node = binder_get_node(proc, - fp->binder); + struct binder_node *node = binder_get_node(proc, fp->binder); if (node == NULL) { - node = binder_new_node(proc, fp->binder, - fp->cookie); + node = binder_new_node(proc, fp->binder, fp->cookie); if (node == NULL) { return_error = BR_FAILED_REPLY; goto err_binder_new_node_failed; } - node->min_priority = fp->flags & - FLAT_BINDER_FLAG_PRIORITY_MASK; - node->accept_fds = !!(fp->flags & - FLAT_BINDER_FLAG_ACCEPTS_FDS); + node->min_priority = fp->flags & FLAT_BINDER_FLAG_PRIORITY_MASK; + node->accept_fds = !!(fp->flags & FLAT_BINDER_FLAG_ACCEPTS_FDS); } if (fp->cookie != node->cookie) { binder_user_error("binder: %d:%d sending u%p " @@ -1656,8 +1653,7 @@ static void binder_transaction(struct binder_proc *proc, } break; case BINDER_TYPE_HANDLE: case BINDER_TYPE_WEAK_HANDLE: { - struct binder_ref *ref = binder_get_ref(proc, - fp->handle); + struct binder_ref *ref = binder_get_ref(proc, fp->handle); if (ref == NULL) { binder_user_error("binder: %d:%d got " "transaction with invalid " @@ -1673,31 +1669,24 @@ static void binder_transaction(struct binder_proc *proc, fp->type = BINDER_TYPE_WEAK_BINDER; fp->binder = ref->node->ptr; fp->cookie = ref->node->cookie; - binder_inc_node(ref->node, fp->type == - BINDER_TYPE_BINDER, 0, NULL); + binder_inc_node(ref->node, fp->type == BINDER_TYPE_BINDER, 0, NULL); binder_debug(BINDER_DEBUG_TRANSACTION, - " ref %d desc %d -> node %d u%p\n", - ref->debug_id, ref->desc, - ref->node->debug_id, - ref->node->ptr); + " ref %d desc %d -> node %d u%p\n", + ref->debug_id, ref->desc, ref->node->debug_id, + ref->node->ptr); } else { struct binder_ref *new_ref; - new_ref = binder_get_ref_for_node(target_proc, - ref->node); + new_ref = binder_get_ref_for_node(target_proc, ref->node); if (new_ref == NULL) { return_error = BR_FAILED_REPLY; goto err_binder_get_ref_for_node_failed; } fp->handle = new_ref->desc; - binder_inc_ref(new_ref, fp->type == - BINDER_TYPE_HANDLE, NULL); + binder_inc_ref(new_ref, fp->type == BINDER_TYPE_HANDLE, NULL); binder_debug(BINDER_DEBUG_TRANSACTION, - " ref %d desc %d -> ref %d" - " desc %d (node %d)\n", - ref->debug_id, ref->desc, - new_ref->debug_id, - new_ref->desc, - ref->node->debug_id); + " ref %d desc %d -> ref %d desc %d (node %d)\n", + ref->debug_id, ref->desc, new_ref->debug_id, + new_ref->desc, ref->node->debug_id); } } break; @@ -1707,19 +1696,13 @@ static void binder_transaction(struct binder_proc *proc, if (reply) { if (!(in_reply_to->flags & TF_ACCEPT_FDS)) { - binder_user_error("binder: %d:%d got" - " reply with fd, %ld, but" - " target does not allow fds\n", - proc->pid, thread->pid, - fp->handle); + binder_user_error("binder: %d:%d got reply with fd, %ld, but target does not allow fds\n", + proc->pid, thread->pid, fp->handle); return_error = BR_FAILED_REPLY; goto err_fd_not_allowed; } } else if (!target_node->accept_fds) { - binder_user_error( - "binder: %d:%d got transaction" - " with fd, %ld, but target does" - " not allow fds\n", + binder_user_error("binder: %d:%d got transaction with fd, %ld, but target does not allow fds\n", proc->pid, thread->pid, fp->handle); return_error = BR_FAILED_REPLY; goto err_fd_not_allowed; @@ -1727,15 +1710,12 @@ static void binder_transaction(struct binder_proc *proc, file = fget(fp->handle); if (file == NULL) { - binder_user_error( - "binder: %d:%d got transaction" - " with invalid fd, %ld\n", + binder_user_error("binder: %d:%d got transaction with invalid fd, %ld\n", proc->pid, thread->pid, fp->handle); return_error = BR_FAILED_REPLY; goto err_fget_failed; } - target_fd = task_get_unused_fd_flags(target_proc, - O_CLOEXEC); + target_fd = task_get_unused_fd_flags(target_proc, O_CLOEXEC); if (target_fd < 0) { fput(file); return_error = BR_FAILED_REPLY; @@ -1743,8 +1723,7 @@ static void binder_transaction(struct binder_proc *proc, } task_fd_install(target_proc, target_fd, file); binder_debug(BINDER_DEBUG_TRANSACTION, - " fd %ld -> %d\n", fp->handle, - target_fd); + " fd %ld -> %d\n", fp->handle, target_fd); /* TODO: fput? */ fp->handle = target_fd; } break; @@ -1893,11 +1872,9 @@ int binder_thread_write(struct binder_proc *proc, struct binder_thread *thread, break; } binder_debug(BINDER_DEBUG_USER_REFS, - "binder: %d:%d %s ref %d desc %d s %d w %d" - " for node %d\n", proc->pid, thread->pid, - debug_string, ref->debug_id, ref->desc, - ref->strong, ref->weak, - ref->node->debug_id); + "binder: %d:%d %s ref %d desc %d s %d w %d for node %d\n", + proc->pid, thread->pid, debug_string, ref->debug_id, + ref->desc, ref->strong, ref->weak, ref->node->debug_id); break; } case BC_INCREFS_DONE: @@ -1958,19 +1935,17 @@ int binder_thread_write(struct binder_proc *proc, struct binder_thread *thread, binder_debug(BINDER_DEBUG_USER_REFS, "binder: %d:%d %s node %d ls %d lw %d\n", proc->pid, thread->pid, - cmd == BC_INCREFS_DONE ? "BC_INCREFS_DONE" - : "BC_ACQUIRE_DONE", - node->debug_id, node->local_strong_refs, - node->local_weak_refs); + cmd == BC_INCREFS_DONE ? "BC_INCREFS_DONE" : "BC_ACQUIRE_DONE", + node->debug_id, node->local_strong_refs, node->local_weak_refs); break; } case BC_ATTEMPT_ACQUIRE: binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder: BC_ATTEMPT_ACQUIRE not supported\n"); + "binder: BC_ATTEMPT_ACQUIRE not supported\n"); return -EINVAL; case BC_ACQUIRE_RESULT: - binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder: BC_ACQUIRE_RESULT not supported\n"); + binder_debug(BINDER_DEBUG_TOP_ERRORS, + "binder: BC_ACQUIRE_RESULT not supported\n"); return -EINVAL; case BC_FREE_BUFFER: { @@ -1996,11 +1971,9 @@ int binder_thread_write(struct binder_proc *proc, struct binder_thread *thread, break; } binder_debug(BINDER_DEBUG_FREE_BUFFER, - "binder: %d:%d BC_FREE_BUFFER u%p found" - " buffer %d for %s transaction\n", - proc->pid, thread->pid, data_ptr, - buffer->debug_id, buffer->transaction ? - "active" : "finished"); + "binder: %d:%d BC_FREE_BUFFER u%p found buffer %d for %s transaction\n", + proc->pid, thread->pid, data_ptr, buffer->debug_id, + buffer->transaction ? "active" : "finished"); if (buffer->transaction) { buffer->transaction->buffer = NULL; @@ -2097,15 +2070,13 @@ int binder_thread_write(struct binder_proc *proc, struct binder_thread *thread, } binder_debug(BINDER_DEBUG_DEATH_NOTIFICATION, - "binder: %d:%d %s %p ref %d desc %d s %d" - " w %d for node %d\n", + "binder: %d:%d %s %p ref %d desc %d s %d w %d for node %d\n", proc->pid, thread->pid, cmd == BC_REQUEST_DEATH_NOTIFICATION ? "BC_REQUEST_DEATH_NOTIFICATION" : "BC_CLEAR_DEATH_NOTIFICATION", cookie, ref->debug_id, ref->desc, - ref->strong, ref->weak, - ref->node->debug_id); + ref->strong, ref->weak, ref->node->debug_id); if (cmd == BC_REQUEST_DEATH_NOTIFICATION) { if (ref->death) { @@ -2119,12 +2090,10 @@ int binder_thread_write(struct binder_proc *proc, struct binder_thread *thread, death = kzalloc(sizeof(*death), GFP_KERNEL); if (death == NULL) { thread->return_error = BR_ERROR; - binder_debug( - BINDER_DEBUG_FAILED_TRANSACTION, - "binder: %d:%d " - "BC_REQUEST_DEATH_NOTIFICATION" - " failed\n", - proc->pid, thread->pid); + binder_debug(BINDER_DEBUG_FAILED_TRANSACTION, + "binder: %d:%d " + "BC_REQUEST_DEATH_NOTIFICATION failed\n", + proc->pid, thread->pid); break; } binder_stats_created(BINDER_STAT_DEATH); @@ -2214,8 +2183,8 @@ int binder_thread_write(struct binder_proc *proc, struct binder_thread *thread, default: binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder: %d:%d unknown command %d\n", - proc->pid, thread->pid, cmd); + "binder: %d:%d unknown command %d\n", + proc->pid, thread->pid, cmd); return -EINVAL; } *consumed = ptr - buffer; @@ -2684,11 +2653,9 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) unsigned int size = _IOC_SIZE(cmd); void __user *ubuf = (void __user *)arg; - /*binder_debug(BINDER_DEBUG_TOP_ERRORS, "binder_ioctl: %d:%d %x %lx\n", - proc->pid, current->pid, cmd, arg);*/ + /*printk(KERN_INFO "binder_ioctl: %d:%d %x %lx\n", proc->pid, current->pid, cmd, arg);*/ - ret = wait_event_interruptible(binder_user_error_wait, - binder_stop_on_user_error < 2); + ret = wait_event_interruptible(binder_user_error_wait, binder_stop_on_user_error < 2); if (ret) return ret; @@ -2745,8 +2712,7 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) break; } case BINDER_SET_MAX_THREADS: - if (copy_from_user(&proc->max_threads, ubuf, - sizeof(proc->max_threads))) { + if (copy_from_user(&proc->max_threads, ubuf, sizeof(proc->max_threads))) { ret = -EINVAL; goto err; } @@ -2754,17 +2720,17 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) case BINDER_SET_CONTEXT_MGR: if (binder_context_mgr_node != NULL) { binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder: BINDER_SET_CONTEXT_MGR already set\n"); + "binder: BINDER_SET_CONTEXT_MGR already set\n"); ret = -EBUSY; goto err; } if (binder_context_mgr_uid != -1) { if (binder_context_mgr_uid != current->cred->euid) { binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder: BINDER_SET_" - "CONTEXT_MGR bad uid %d != %d\n", - current->cred->euid, - binder_context_mgr_uid); + "binder: BINDER_SET_" + "CONTEXT_MGR bad uid %d != %d\n", + current->cred->euid, + binder_context_mgr_uid); ret = -EPERM; goto err; } @@ -2808,8 +2774,8 @@ err: wait_event_interruptible(binder_user_error_wait, binder_stop_on_user_error < 2); if (ret && ret != -ERESTARTSYS) binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder: %d:%d ioctl %x %lx returned %d\n", - proc->pid, current->pid, cmd, arg, ret); + "binder: %d:%d ioctl %x %lx returned %d\n", + proc->pid, current->pid, cmd, arg, ret); return ret; } @@ -2821,7 +2787,6 @@ static void binder_vma_open(struct vm_area_struct *vma) proc->pid, vma->vm_start, vma->vm_end, (vma->vm_end - vma->vm_start) / SZ_1K, vma->vm_flags, (unsigned long)pgprot_val(vma->vm_page_prot)); - dump_stack(); } static void binder_vma_close(struct vm_area_struct *vma) @@ -2833,6 +2798,7 @@ static void binder_vma_close(struct vm_area_struct *vma) (vma->vm_end - vma->vm_start) / SZ_1K, vma->vm_flags, (unsigned long)pgprot_val(vma->vm_page_prot)); proc->vma = NULL; + proc->vma_vm_mm = NULL; binder_defer_work(proc, BINDER_DEFERRED_PUT_FILES); } @@ -2865,6 +2831,7 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma) } vma->vm_flags = (vma->vm_flags | VM_DONTCOPY) & ~VM_MAYWRITE; + mutex_lock(&binder_mmap_lock); if (proc->buffer) { ret = -EBUSY; failure_string = "already mapped"; @@ -2879,13 +2846,13 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma) } proc->buffer = area->addr; proc->user_buffer_offset = vma->vm_start - (uintptr_t)proc->buffer; + mutex_unlock(&binder_mmap_lock); #ifdef CONFIG_CPU_CACHE_VIPT if (cache_is_vipt_aliasing()) { while (CACHE_COLOUR((vma->vm_start ^ (uint32_t)proc->buffer))) { binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder_mmap: %d %lx-%lx maps %p bad alignment\n", - proc->pid, vma->vm_start, vma->vm_end, proc->buffer); + "binder_mmap: %d %lx-%lx maps %p bad alignment\n", proc->pid, vma->vm_start, vma->vm_end, proc->buffer); vma->vm_start += PAGE_SIZE; } } @@ -2913,11 +2880,11 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma) binder_insert_free_buffer(proc, buffer); proc->free_async_space = proc->buffer_size / 2; barrier(); - proc->files = get_files_struct(current); + proc->files = get_files_struct(proc->tsk); proc->vma = vma; + proc->vma_vm_mm = vma->vm_mm; - /*binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder_mmap: %d %lx-%lx maps %p\n", + /*printk(KERN_INFO "binder_mmap: %d %lx-%lx maps %p\n", proc->pid, vma->vm_start, vma->vm_end, proc->buffer);*/ return 0; @@ -2925,14 +2892,17 @@ err_alloc_small_buf_failed: kfree(proc->pages); proc->pages = NULL; err_alloc_pages_failed: + mutex_lock(&binder_mmap_lock); vfree(proc->buffer); proc->buffer = NULL; err_get_vm_area_failed: err_already_mapped: + mutex_unlock(&binder_mmap_lock); err_bad_arg: binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder_mmap: %d %lx-%lx %s failed %d\n", - proc->pid, vma->vm_start, vma->vm_end, failure_string, ret); + "binder_mmap: %d %lx-%lx %s failed %d\n", + proc->pid, vma->vm_start, vma->vm_end, failure_string, + ret); return ret; } @@ -3087,9 +3057,9 @@ static void binder_deferred_release(struct binder_proc *proc) t->buffer = NULL; buffer->transaction = NULL; binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder: release proc %d, " - "transaction %d, not freed\n", - proc->pid, t->debug_id); + "binder: release proc %d, " + "transaction %d, not freed\n", + proc->pid, t->debug_id); /*BUG();*/ } binder_free_buf(proc, buffer); From 9f705870677ca748c3baccfc411667cdd4ba1137 Mon Sep 17 00:00:00 2001 From: securecrt Date: Wed, 15 Aug 2012 18:53:09 +0800 Subject: [PATCH 093/155] staging: android: lowmemorykiller: sysfs node and notifications android, lowmemorykiller: remove task handoff notifier staging: android: lowmemorykiller: Fix task_struct leak staging: android/lowmemorykiller: Don't unregister notifier from atomic context staging: android, lowmemorykiller: convert to use oom_score_adj staging: android/lowmemorykiller: Do not kill kernel threads staging: android/lowmemorykiller: No need for task->signal check staging: android/lowmemorykiller: Better mm handling staging: android/lowmemorykiller: Don't grab tasklist_lock staging: android: lowmemorykiller: Don't wait more than one second for a process to die Staging: android: fixed 80 characters warnings in lowmemorykiller.c staging: android: lowmemorykiller: Ignore shmem pages in page-cache staging: android: lowmemorykiller: Remove bitrotted codepath staging: android: lowmemkiller: Substantially reduce overhead during reclaim staging: android: lowmemorykiller: Don't try to kill the same pid over and over --- drivers/staging/android/lowmemorykiller.c | 201 ++++++++++++++++++---- 1 file changed, 164 insertions(+), 37 deletions(-) diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c index 42cd93ea..05ebece0 100644 --- a/drivers/staging/android/lowmemorykiller.c +++ b/drivers/staging/android/lowmemorykiller.c @@ -29,12 +29,17 @@ * */ -#include #include +#include +#include +#include #include +#include +#include #include #include -#include +#include +#include static uint32_t lowmem_debug_level = 2; static int lowmem_adj[6] = { @@ -52,8 +57,12 @@ static size_t lowmem_minfree[6] = { }; static int lowmem_minfree_size = 4; +static size_t lowmem_minfree_notif_trigger; + +static unsigned int offlining; static struct task_struct *lowmem_deathpending; -static DEFINE_SPINLOCK(lowmem_deathpending_lock); +static unsigned long lowmem_deathpending_timeout; +static struct kobject *lowmem_kobj; #define lowmem_print(level, x...) \ do { \ @@ -68,29 +77,66 @@ static struct notifier_block task_nb = { .notifier_call = task_notify_func, }; - -static void task_free_fn(struct work_struct *work) -{ - unsigned long flags; - - task_free_unregister(&task_nb); - spin_lock_irqsave(&lowmem_deathpending_lock, flags); - lowmem_deathpending = NULL; - spin_unlock_irqrestore(&lowmem_deathpending_lock, flags); -} -static DECLARE_WORK(task_free_work, task_free_fn); - static int task_notify_func(struct notifier_block *self, unsigned long val, void *data) { struct task_struct *task = data; - if (task == lowmem_deathpending) { - schedule_work(&task_free_work); - } + if (task == lowmem_deathpending) + lowmem_deathpending = NULL; + return NOTIFY_OK; } +#ifdef CONFIG_MEMORY_HOTPLUG +static int lmk_hotplug_callback(struct notifier_block *self, + unsigned long cmd, void *data) +{ + switch (cmd) { + /* Don't care LMK cases */ + case MEM_ONLINE: + case MEM_OFFLINE: + case MEM_CANCEL_ONLINE: + case MEM_CANCEL_OFFLINE: + case MEM_GOING_ONLINE: + offlining = 0; + lowmem_print(4, "lmk in normal mode\n"); + break; + /* LMK should account for movable zone */ + case MEM_GOING_OFFLINE: + offlining = 1; + lowmem_print(4, "lmk in hotplug mode\n"); + break; + } + return NOTIFY_DONE; +} +#endif + + + +static void lowmem_notify_killzone_approach(void); + +static inline void get_free_ram(int *other_free, int *other_file) +{ + struct zone *zone; + *other_free = global_page_state(NR_FREE_PAGES); + *other_file = global_page_state(NR_FILE_PAGES) - + global_page_state(NR_SHMEM); + + if (offlining) { + /* Discount all free space in the section being offlined */ + for_each_zone(zone) { + if (zone_idx(zone) == ZONE_MOVABLE) { + *other_free -= zone_page_state(zone, + NR_FREE_PAGES); + lowmem_print(4, "lowmem_shrink discounted " + "%lu pages in movable zone\n", + zone_page_state(zone, NR_FREE_PAGES)); + } + } + } +} + static int lowmem_shrink(int nr_to_scan, gfp_t gfp_mask) { struct task_struct *p; @@ -102,10 +148,8 @@ static int lowmem_shrink(int nr_to_scan, gfp_t gfp_mask) int selected_tasksize = 0; int selected_oom_adj; int array_size = ARRAY_SIZE(lowmem_adj); - int other_free = global_page_state(NR_FREE_PAGES); - int other_file = global_page_state(NR_FILE_PAGES); - unsigned long flags; - + int other_free; + int other_file; /* * If we already have a death outstanding, then * bail out right away; indicating to vmscan @@ -113,15 +157,24 @@ static int lowmem_shrink(int nr_to_scan, gfp_t gfp_mask) * this pass. * */ - if (lowmem_deathpending) + if (lowmem_deathpending && + time_before_eq(jiffies, lowmem_deathpending_timeout)) return 0; + get_free_ram(&other_free, &other_file); + + if (other_free < lowmem_minfree_notif_trigger && + other_file < lowmem_minfree_notif_trigger) { + lowmem_notify_killzone_approach(); + } + if (lowmem_adj_size < array_size) array_size = lowmem_adj_size; if (lowmem_minfree_size < array_size) array_size = lowmem_minfree_size; for (i = 0; i < array_size; i++) { - if (other_file < lowmem_minfree[i]) { + if (other_free < lowmem_minfree[i] && + other_file < lowmem_minfree[i]) { min_adj = lowmem_adj[i]; break; } @@ -176,20 +229,14 @@ static int lowmem_shrink(int nr_to_scan, gfp_t gfp_mask) lowmem_print(2, "select %d (%s), adj %d, size %d, to kill\n", p->pid, p->comm, oom_adj, tasksize); } - if (selected) { - spin_lock_irqsave(&lowmem_deathpending_lock, flags); - if (!lowmem_deathpending) { - lowmem_print(1, - "send sigkill to %d (%s), adj %d, size %d\n", - selected->pid, selected->comm, - selected_oom_adj, selected_tasksize); - lowmem_deathpending = selected; - task_free_register(&task_nb); - force_sig(SIGKILL, selected); - rem -= selected_tasksize; - } - spin_unlock_irqrestore(&lowmem_deathpending_lock, flags); + lowmem_print(1, "send sigkill to %d (%s), adj %d, size %d\n", + selected->pid, selected->comm, + selected_oom_adj, selected_tasksize); + lowmem_deathpending = selected; + lowmem_deathpending_timeout = jiffies + HZ; + force_sig(SIGKILL, selected); + rem -= selected_tasksize; } lowmem_print(4, "lowmem_shrink %d, %x, return %d\n", nr_to_scan, gfp_mask, rem); @@ -202,15 +249,93 @@ static struct shrinker lowmem_shrinker = { .seeks = DEFAULT_SEEKS * 16 }; +static void lowmem_notify_killzone_approach(void) +{ + lowmem_print(3, "notification trigger activated\n"); + sysfs_notify(lowmem_kobj, NULL, "notify_trigger_active"); +} + +static ssize_t lowmem_notify_trigger_active_show(struct kobject *k, + struct kobj_attribute *attr, char *buf) +{ + int other_free, other_file; + get_free_ram(&other_free, &other_file); + if (other_free < lowmem_minfree_notif_trigger && + other_file < lowmem_minfree_notif_trigger) + return snprintf(buf, 3, "1\n"); + else + return snprintf(buf, 3, "0\n"); +} + +static struct kobj_attribute lowmem_notify_trigger_active_attr = + __ATTR(notify_trigger_active, S_IRUGO, + lowmem_notify_trigger_active_show, NULL); + +static struct attribute *lowmem_default_attrs[] = { + &lowmem_notify_trigger_active_attr.attr, + NULL, +}; + +static ssize_t lowmem_show(struct kobject *k, struct attribute *attr, char *buf) +{ + struct kobj_attribute *kobj_attr; + kobj_attr = container_of(attr, struct kobj_attribute, attr); + return kobj_attr->show(k, kobj_attr, buf); +} + +static const struct sysfs_ops lowmem_ops = { + .show = lowmem_show, +}; + +static void lowmem_kobj_release(struct kobject *kobj) +{ + /* Nothing to be done here */ +} + +static struct kobj_type lowmem_kobj_type = { + .release = lowmem_kobj_release, + .sysfs_ops = &lowmem_ops, + .default_attrs = lowmem_default_attrs, +}; + static int __init lowmem_init(void) { + int rc; + task_free_register(&task_nb); register_shrinker(&lowmem_shrinker); +#ifdef CONFIG_MEMORY_HOTPLUG + hotplug_memory_notifier(lmk_hotplug_callback, 0); +#endif + + lowmem_kobj = kzalloc(sizeof(*lowmem_kobj), GFP_KERNEL); + if (!lowmem_kobj) { + rc = -ENOMEM; + goto err; + } + + rc = kobject_init_and_add(lowmem_kobj, &lowmem_kobj_type, + mm_kobj, "lowmemkiller"); + if (rc) + goto err_kobj; + return 0; + +err_kobj: + kfree(lowmem_kobj); + +err: + unregister_shrinker(&lowmem_shrinker); + task_free_unregister(&task_nb); + + return rc; } static void __exit lowmem_exit(void) { + kobject_put(lowmem_kobj); + kfree(lowmem_kobj); unregister_shrinker(&lowmem_shrinker); + task_free_unregister(&task_nb); } module_param_named(cost, lowmem_shrinker.seeks, int, S_IRUGO | S_IWUSR); @@ -219,6 +344,8 @@ module_param_array_named(adj, lowmem_adj, int, &lowmem_adj_size, module_param_array_named(minfree, lowmem_minfree, uint, &lowmem_minfree_size, S_IRUGO | S_IWUSR); module_param_named(debug_level, lowmem_debug_level, uint, S_IRUGO | S_IWUSR); +module_param_named(notify_trigger, lowmem_minfree_notif_trigger, uint, + S_IRUGO | S_IWUSR); module_init(lowmem_init); module_exit(lowmem_exit); From a8ff4f89b4c1dbea97072bf32dc8278cf7af35fb Mon Sep 17 00:00:00 2001 From: securecrt Date: Wed, 15 Aug 2012 18:53:52 +0800 Subject: [PATCH 094/155] android: logger: Allow a UID to read it's own log entries staging: logger: hold mutex while removing reader staging: android: logger: clarify non-update of w_off in do_write_log_from_user staging: android: logger: clarify code in clock_interval staging: android: logger: reorder prepare_to_wait and mutex_lock staging: android: logger: simplify and optimize get_entry_len staging: android: logger: Change logger_offset() from macro to function Staging: android: fixed white spaces coding style issue in logger.c android: logger: bump up the logger buffer sizes --- drivers/staging/android/logger.c | 262 +++++++++++++++++++++++++------ drivers/staging/android/logger.h | 29 +++- include/linux/capability.h | 6 +- 3 files changed, 246 insertions(+), 51 deletions(-) diff --git a/drivers/staging/android/logger.c b/drivers/staging/android/logger.c index 1a0c1391..05559eec 100644 --- a/drivers/staging/android/logger.c +++ b/drivers/staging/android/logger.c @@ -37,7 +37,7 @@ * mutex 'mutex'. */ struct logger_log { - unsigned char *buffer;/* the ring buffer itself */ + unsigned char *buffer;/* the ring buffer itself */ struct miscdevice misc; /* misc device representing the log */ wait_queue_head_t wq; /* wait queue for readers */ struct list_head readers; /* this log's readers */ @@ -57,19 +57,25 @@ struct logger_reader { struct logger_log *log; /* associated log */ struct list_head list; /* entry in logger_log's list */ size_t r_off; /* current read head offset */ + bool r_all; /* reader can read all entries */ + int r_ver; /* reader ABI version */ }; /* logger_offset - returns index 'n' into the log via (optimized) modulus */ -#define logger_offset(n) ((n) & (log->size - 1)) +size_t logger_offset(struct logger_log *log, size_t n) +{ + return n & (log->size-1); +} + /* * file_get_log - Given a file structure, return the associated log * * This isn't aesthetic. We have several goals: * - * 1) Need to quickly obtain the associated log during an I/O operation - * 2) Readers need to maintain state (logger_reader) - * 3) Writers need to be very fast (open() should be a near no-op) + * 1) Need to quickly obtain the associated log during an I/O operation + * 2) Readers need to maintain state (logger_reader) + * 3) Writers need to be very fast (open() should be a near no-op) * * In the reader case, we can trivially go file->logger_reader->logger_log. * For a writer, we don't want to maintain a logger_reader, so we just go @@ -86,25 +92,75 @@ static inline struct logger_log *file_get_log(struct file *file) } /* - * get_entry_len - Grabs the length of the payload of the next entry starting - * from 'off'. + * get_entry_header - returns a pointer to the logger_entry header within + * 'log' starting at offset 'off'. A temporary logger_entry 'scratch' must + * be provided. Typically the return value will be a pointer within + * 'logger->buf'. However, a pointer to 'scratch' may be returned if + * the log entry spans the end and beginning of the circular buffer. + */ +static struct logger_entry *get_entry_header(struct logger_log *log, + size_t off, struct logger_entry *scratch) +{ + size_t len = min(sizeof(struct logger_entry), log->size - off); + if (len != sizeof(struct logger_entry)) { + memcpy(((void *) scratch), log->buffer + off, len); + memcpy(((void *) scratch) + len, log->buffer, + sizeof(struct logger_entry) - len); + return scratch; + } + + return (struct logger_entry *) (log->buffer + off); +} + +/* + * get_entry_msg_len - Grabs the length of the message of the entry + * starting from from 'off'. + * + * An entry length is 2 bytes (16 bits) in host endian order. + * In the log, the length does not include the size of the log entry structure. + * This function returns the size including the log entry structure. * * Caller needs to hold log->mutex. */ -static __u32 get_entry_len(struct logger_log *log, size_t off) +static __u32 get_entry_msg_len(struct logger_log *log, size_t off) { - __u16 val; + struct logger_entry scratch; + struct logger_entry *entry; - switch (log->size - off) { - case 1: - memcpy(&val, log->buffer + off, 1); - memcpy(((char *) &val) + 1, log->buffer, 1); - break; - default: - memcpy(&val, log->buffer + off, 2); + entry = get_entry_header(log, off, &scratch); + return entry->len; +} + +static size_t get_user_hdr_len(int ver) +{ + if (ver < 2) + return sizeof(struct user_logger_entry_compat); + else + return sizeof(struct logger_entry); +} + +static ssize_t copy_header_to_user(int ver, struct logger_entry *entry, + char __user *buf) +{ + void *hdr; + size_t hdr_len; + struct user_logger_entry_compat v1; + + if (ver < 2) { + v1.len = entry->len; + v1.__pad = 0; + v1.pid = entry->pid; + v1.tid = entry->tid; + v1.sec = entry->sec; + v1.nsec = entry->nsec; + hdr = &v1; + hdr_len = sizeof(struct user_logger_entry_compat); + } else { + hdr = entry; + hdr_len = sizeof(struct logger_entry); } - return sizeof(struct logger_entry) + val; + return copy_to_user(buf, hdr, hdr_len); } /* @@ -118,15 +174,31 @@ static ssize_t do_read_log_to_user(struct logger_log *log, char __user *buf, size_t count) { + struct logger_entry scratch; + struct logger_entry *entry; size_t len; + size_t msg_start; /* - * We read from the log in two disjoint operations. First, we read from - * the current read head offset up to 'count' bytes or to the end of + * First, copy the header to userspace, using the version of + * the header requested + */ + entry = get_entry_header(log, reader->r_off, &scratch); + if (copy_header_to_user(reader->r_ver, entry, buf)) + return -EFAULT; + + count -= get_user_hdr_len(reader->r_ver); + buf += get_user_hdr_len(reader->r_ver); + msg_start = logger_offset(log, + reader->r_off + sizeof(struct logger_entry)); + + /* + * We read from the msg in two disjoint operations. First, we read from + * the current msg head offset up to 'count' bytes or to the end of * the log, whichever comes first. */ - len = min(count, log->size - reader->r_off); - if (copy_to_user(buf, log->buffer + reader->r_off, len)) + len = min(count, log->size - msg_start); + if (copy_to_user(buf, log->buffer + msg_start, len)) return -EFAULT; /* @@ -137,9 +209,34 @@ static ssize_t do_read_log_to_user(struct logger_log *log, if (copy_to_user(buf + len, log->buffer, count - len)) return -EFAULT; - reader->r_off = logger_offset(reader->r_off + count); + reader->r_off = logger_offset(log, reader->r_off + + sizeof(struct logger_entry) + count); - return count; + return count + get_user_hdr_len(reader->r_ver); +} + +/* + * get_next_entry_by_uid - Starting at 'off', returns an offset into + * 'log->buffer' which contains the first entry readable by 'euid' + */ +static size_t get_next_entry_by_uid(struct logger_log *log, + size_t off, uid_t euid) +{ + while (off != log->w_off) { + struct logger_entry *entry; + struct logger_entry scratch; + size_t next_len; + + entry = get_entry_header(log, off, &scratch); + + if (entry->euid == euid) + return off; + + next_len = sizeof(struct logger_entry) + entry->len; + off = logger_offset(log, off + next_len); + } + + return off; } /* @@ -147,11 +244,11 @@ static ssize_t do_read_log_to_user(struct logger_log *log, * * Behavior: * - * - O_NONBLOCK works - * - If there are no log entries to read, blocks until log is written to - * - Atomically reads exactly one log entry + * - O_NONBLOCK works + * - If there are no log entries to read, blocks until log is written to + * - Atomically reads exactly one log entry * - * Optimal read size is LOGGER_ENTRY_MAX_LEN. Will set errno to EINVAL if read + * Will set errno to EINVAL if read * buffer is insufficient to hold next entry. */ static ssize_t logger_read(struct file *file, char __user *buf, @@ -164,9 +261,10 @@ static ssize_t logger_read(struct file *file, char __user *buf, start: while (1) { + mutex_lock(&log->mutex); + prepare_to_wait(&log->wq, &wait, TASK_INTERRUPTIBLE); - mutex_lock(&log->mutex); ret = (log->w_off == reader->r_off); mutex_unlock(&log->mutex); if (!ret) @@ -191,6 +289,10 @@ start: mutex_lock(&log->mutex); + if (!reader->r_all) + reader->r_off = get_next_entry_by_uid(log, + reader->r_off, current_euid()); + /* is there still something to read or did we race? */ if (unlikely(log->w_off == reader->r_off)) { mutex_unlock(&log->mutex); @@ -198,7 +300,8 @@ start: } /* get the size of the next entry */ - ret = get_entry_len(log, reader->r_off); + ret = get_user_hdr_len(reader->r_ver) + + get_entry_msg_len(log, reader->r_off); if (count < ret) { ret = -EINVAL; goto out; @@ -224,8 +327,9 @@ static size_t get_next_entry(struct logger_log *log, size_t off, size_t len) size_t count = 0; do { - size_t nr = get_entry_len(log, off); - off = logger_offset(off + nr); + size_t nr = sizeof(struct logger_entry) + + get_entry_msg_len(log, off); + off = logger_offset(log, off + nr); count += nr; } while (count < len); @@ -233,16 +337,28 @@ static size_t get_next_entry(struct logger_log *log, size_t off, size_t len) } /* - * clock_interval - is a < c < b in mod-space? Put another way, does the line - * from a to b cross c? + * is_between - is a < c < b, accounting for wrapping of a, b, and c + * positions in the buffer + * + * That is, if ab, check for c outside (not between) a and b + * + * |------- a xxxxxxxx b --------| + * c^ + * + * |xxxxx b --------- a xxxxxxxxx| + * c^ + * or c^ */ -static inline int clock_interval(size_t a, size_t b, size_t c) +static inline int is_between(size_t a, size_t b, size_t c) { - if (b < a) { - if (a < c || b >= c) + if (a < b) { + /* is c between a and b? */ + if (a < c && c <= b) return 1; } else { - if (a < c && b >= c) + /* is c outside of b through a? */ + if (c <= b || a < c) return 1; } @@ -260,14 +376,14 @@ static inline int clock_interval(size_t a, size_t b, size_t c) static void fix_up_readers(struct logger_log *log, size_t len) { size_t old = log->w_off; - size_t new = logger_offset(old + len); + size_t new = logger_offset(log, old + len); struct logger_reader *reader; - if (clock_interval(old, new, log->head)) + if (is_between(old, new, log->head)) log->head = get_next_entry(log, log->head, len); list_for_each_entry(reader, &log->readers, list) - if (clock_interval(old, new, reader->r_off)) + if (is_between(old, new, reader->r_off)) reader->r_off = get_next_entry(log, reader->r_off, len); } @@ -286,7 +402,7 @@ static void do_write_log(struct logger_log *log, const void *buf, size_t count) if (count != len) memcpy(log->buffer, buf + len, count - len); - log->w_off = logger_offset(log->w_off + count); + log->w_off = logger_offset(log, log->w_off + count); } @@ -309,9 +425,15 @@ static ssize_t do_write_log_from_user(struct logger_log *log, if (count != len) if (copy_from_user(log->buffer, buf + len, count - len)) + /* + * Note that by not updating w_off, this abandons the + * portion of the new entry that *was* successfully + * copied, just above. This is intentional to avoid + * message corruption from missing fragments. + */ return -EFAULT; - log->w_off = logger_offset(log->w_off + count); + log->w_off = logger_offset(log, log->w_off + count); return count; } @@ -336,7 +458,9 @@ ssize_t logger_aio_write(struct kiocb *iocb, const struct iovec *iov, header.tid = current->pid; header.sec = now.tv_sec; header.nsec = now.tv_nsec; + header.euid = current_euid(); header.len = min_t(size_t, iocb->ki_left, LOGGER_ENTRY_MAX_PAYLOAD); + header.hdr_size = sizeof(struct logger_entry); /* null writes succeed, return zero */ if (unlikely(!header.len)) @@ -409,6 +533,10 @@ static int logger_open(struct inode *inode, struct file *file) return -ENOMEM; reader->log = log; + reader->r_ver = 1; + reader->r_all = in_egroup_p(inode->i_gid) || + capable(CAP_SYSLOG); + INIT_LIST_HEAD(&reader->list); mutex_lock(&log->mutex); @@ -433,9 +561,11 @@ static int logger_release(struct inode *ignored, struct file *file) if (file->f_mode & FMODE_READ) { struct logger_reader *reader = file->private_data; struct logger_log *log = reader->log; + mutex_lock(&log->mutex); list_del(&reader->list); mutex_unlock(&log->mutex); + kfree(reader); } @@ -466,6 +596,10 @@ static unsigned int logger_poll(struct file *file, poll_table *wait) poll_wait(file, &log->wq, wait); mutex_lock(&log->mutex); + if (!reader->r_all) + reader->r_off = get_next_entry_by_uid(log, + reader->r_off, current_euid()); + if (log->w_off != reader->r_off) ret |= POLLIN | POLLRDNORM; mutex_unlock(&log->mutex); @@ -473,11 +607,25 @@ static unsigned int logger_poll(struct file *file, poll_table *wait) return ret; } +static long logger_set_version(struct logger_reader *reader, void __user *arg) +{ + int version; + if (copy_from_user(&version, arg, sizeof(int))) + return -EFAULT; + + if ((version < 1) || (version > 2)) + return -EINVAL; + + reader->r_ver = version; + return 0; +} + static long logger_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct logger_log *log = file_get_log(file); struct logger_reader *reader; - long ret = -ENOTTY; + long ret = -EINVAL; + void __user *argp = (void __user *) arg; mutex_lock(&log->mutex); @@ -502,8 +650,14 @@ static long logger_ioctl(struct file *file, unsigned int cmd, unsigned long arg) break; } reader = file->private_data; + + if (!reader->r_all) + reader->r_off = get_next_entry_by_uid(log, + reader->r_off, current_euid()); + if (log->w_off != reader->r_off) - ret = get_entry_len(log, reader->r_off); + ret = get_user_hdr_len(reader->r_ver) + + get_entry_msg_len(log, reader->r_off); else ret = 0; break; @@ -517,6 +671,22 @@ static long logger_ioctl(struct file *file, unsigned int cmd, unsigned long arg) log->head = log->w_off; ret = 0; break; + case LOGGER_GET_VERSION: + if (!(file->f_mode & FMODE_READ)) { + ret = -EBADF; + break; + } + reader = file->private_data; + ret = reader->r_ver; + break; + case LOGGER_SET_VERSION: + if (!(file->f_mode & FMODE_READ)) { + ret = -EBADF; + break; + } + reader = file->private_data; + ret = logger_set_version(reader, argp); + break; } mutex_unlock(&log->mutex); @@ -537,8 +707,8 @@ static const struct file_operations logger_fops = { /* * Defines a log structure with name 'NAME' and a size of 'SIZE' bytes, which - * must be a power of two, greater than LOGGER_ENTRY_MAX_LEN, and less than - * LONG_MAX minus LOGGER_ENTRY_MAX_LEN. + * must be a power of two, and greater than + * (LOGGER_ENTRY_MAX_PAYLOAD + sizeof(struct logger_entry)). */ #define DEFINE_LOGGER_DEVICE(VAR, NAME, SIZE) \ static unsigned char _buf_ ## VAR[SIZE]; \ diff --git a/drivers/staging/android/logger.h b/drivers/staging/android/logger.h index 2cb06e9d..3f612a3b 100644 --- a/drivers/staging/android/logger.h +++ b/drivers/staging/android/logger.h @@ -20,7 +20,12 @@ #include #include -struct logger_entry { +/* + * The userspace structure for version 1 of the logger_entry ABI. + * This structure is returned to userspace unless the caller requests + * an upgrade to a newer ABI version. + */ +struct user_logger_entry_compat { __u16 len; /* length of the payload */ __u16 __pad; /* no matter what, we get 2 bytes of padding */ __s32 pid; /* generating process's pid */ @@ -30,14 +35,28 @@ struct logger_entry { char msg[0]; /* the entry's payload */ }; +/* + * The structure for version 2 of the logger_entry ABI. + * This structure is returned to userspace if ioctl(LOGGER_SET_VERSION) + * is called with version >= 2 + */ +struct logger_entry { + __u16 len; /* length of the payload */ + __u16 hdr_size; /* sizeof(struct logger_entry_v2) */ + __s32 pid; /* generating process's pid */ + __s32 tid; /* generating process's tid */ + __s32 sec; /* seconds since Epoch */ + __s32 nsec; /* nanoseconds */ + uid_t euid; /* effective UID of logger */ + char msg[0]; /* the entry's payload */ +}; + #define LOGGER_LOG_RADIO "log_radio" /* radio-related messages */ #define LOGGER_LOG_EVENTS "log_events" /* system/hardware events */ #define LOGGER_LOG_SYSTEM "log_system" /* system/framework messages */ #define LOGGER_LOG_MAIN "log_main" /* everything else */ -#define LOGGER_ENTRY_MAX_LEN (4*1024) -#define LOGGER_ENTRY_MAX_PAYLOAD \ - (LOGGER_ENTRY_MAX_LEN - sizeof(struct logger_entry)) +#define LOGGER_ENTRY_MAX_PAYLOAD 4076 #define __LOGGERIO 0xAE @@ -45,5 +64,7 @@ struct logger_entry { #define LOGGER_GET_LOG_LEN _IO(__LOGGERIO, 2) /* used log len */ #define LOGGER_GET_NEXT_ENTRY_LEN _IO(__LOGGERIO, 3) /* next entry len */ #define LOGGER_FLUSH_LOG _IO(__LOGGERIO, 4) /* flush log */ +#define LOGGER_GET_VERSION _IO(__LOGGERIO, 5) /* abi version */ +#define LOGGER_SET_VERSION _IO(__LOGGERIO, 6) /* abi version */ #endif /* _LINUX_LOGGER_H */ diff --git a/include/linux/capability.h b/include/linux/capability.h index c8f2a5f7..c4f6d94d 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -357,7 +357,11 @@ struct cpu_vfs_cap_data { #define CAP_MAC_ADMIN 33 -#define CAP_LAST_CAP CAP_MAC_ADMIN +/* Allow configuring the kernel's syslog (printk behaviour) */ + +#define CAP_SYSLOG 34 + +#define CAP_LAST_CAP CAP_SYSLOG #define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP) From 8d4f825606dfdbf3c3c756824841b7eb1328a0a4 Mon Sep 17 00:00:00 2001 From: securecrt Date: Wed, 15 Aug 2012 18:54:43 +0800 Subject: [PATCH 095/155] pmem: Check for valid virtual address while flushing pmem: Correctly account for aligned blocks --- drivers/misc/pmem.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/misc/pmem.c b/drivers/misc/pmem.c index f1523fd5..bc083e17 100755 --- a/drivers/misc/pmem.c +++ b/drivers/misc/pmem.c @@ -1,7 +1,7 @@ /* drivers/android/pmem.c * * Copyright (C) 2007 Google, Inc. - * Copyright (c) 2009-2010, Code Aurora Forum. All rights reserved. + * Copyright (c) 2009-2012, Code Aurora Forum. All rights reserved. * * This software is licensed under the terms of the GNU General Public * License version 2, as published by the Free Software Foundation, and @@ -1074,17 +1074,17 @@ static void bitmap_bits_set_all(uint32_t *bitp, int bit_start, int bit_end) static int bitmap_allocate_contiguous(uint32_t *bitp, int num_bits_to_alloc, - int total_bits, int spacing) + int total_bits, int spacing, int start_bit) { int bit_start, last_bit, word_index; if (num_bits_to_alloc <= 0) return -1; - for (bit_start = 0; ; - bit_start = (last_bit + + for (bit_start = start_bit; ; + bit_start = ((last_bit + (word_index << PMEM_32BIT_WORD_ORDER) + spacing - 1) - & ~(spacing - 1)) { + & ~(spacing - 1)) + start_bit) { int bit_end = bit_start + num_bits_to_alloc, total_words; if (bit_end > total_bits) @@ -1162,7 +1162,8 @@ static int reserve_quanta(const unsigned int quanta_needed, ret = bitmap_allocate_contiguous(pmem[id].allocator.bitmap.bitmap, quanta_needed, (pmem[id].size + pmem[id].quantum - 1) / pmem[id].quantum, - spacing); + spacing, + start_bit); #if PMEM_DEBUG if (ret < 0) @@ -1915,6 +1916,13 @@ int pmem_cache_maint(struct file *file, unsigned int cmd, if (!file) return -EBADF; + /* + * check that the vaddr passed for flushing is valid + * so that you don't crash the kernel + */ + if (!pmem_addr->vaddr) + return -EINVAL; + data = file->private_data; id = get_id(file); From e4c201d97a7070f55fc9d0c2f02b5f767c0a685a Mon Sep 17 00:00:00 2001 From: securecrt Date: Wed, 15 Aug 2012 18:55:05 +0800 Subject: [PATCH 096/155] pmem_adsp (user-space) as non-cached pmem_venc (user-space) as non-cached --- arch/arm/mach-msm/board-htcleo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-msm/board-htcleo.c b/arch/arm/mach-msm/board-htcleo.c index 4697ef53..4913f043 100755 --- a/arch/arm/mach-msm/board-htcleo.c +++ b/arch/arm/mach-msm/board-htcleo.c @@ -771,7 +771,7 @@ static struct android_pmem_platform_data android_pmem_adsp_pdata = { #else .no_allocator = 0, #endif - .cached = 1, + .cached = 0, }; @@ -784,7 +784,7 @@ static struct android_pmem_platform_data android_pmem_venc_pdata = { #else .no_allocator = 0, #endif - .cached = 1, + .cached = 0, }; static struct platform_device android_pmem_mdp_device = { From c59e5f029ca79573404a5a01bc2107d3c4d9f729 Mon Sep 17 00:00:00 2001 From: securecrt Date: Wed, 15 Aug 2012 18:55:32 +0800 Subject: [PATCH 097/155] change MSM_NAND_DMA_BUFFER_SIZE to SZ_1M --- drivers/mtd/devices/htcleo_nand.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/devices/htcleo_nand.c b/drivers/mtd/devices/htcleo_nand.c index 9b23d680..12a9b390 100755 --- a/drivers/mtd/devices/htcleo_nand.c +++ b/drivers/mtd/devices/htcleo_nand.c @@ -51,7 +51,7 @@ unsigned crci_mask; #include "msm_nand.h" -#define MSM_NAND_DMA_BUFFER_SIZE SZ_4K +#define MSM_NAND_DMA_BUFFER_SIZE SZ_1M #define MSM_NAND_DMA_BUFFER_SLOTS \ (MSM_NAND_DMA_BUFFER_SIZE / (sizeof(((atomic_t *)0)->counter) * 8)) From cc0db50c3303916e1d867327aa37c688a69c2bd4 Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Fri, 17 Aug 2012 01:19:24 +0800 Subject: [PATCH 098/155] ksm: remove unswappable max_kernel_pages ksm: fix bad user data when swapping thp: ksm: free swap when swapcache page is replaced --- mm/ksm.c | 56 ++++++++++++++++++-------------------------------------- 1 file changed, 18 insertions(+), 38 deletions(-) mode change 100644 => 100755 mm/ksm.c diff --git a/mm/ksm.c b/mm/ksm.c old mode 100644 new mode 100755 index e9501f83..17abf485 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -163,9 +163,6 @@ static unsigned long ksm_pages_unshared; /* The number of rmap_items in use: to calculate pages_volatile */ static unsigned long ksm_rmap_items; -/* Limit on the number of unswappable pages used */ -static unsigned long ksm_max_kernel_pages; - /* Number of pages ksmd should scan in one batch */ static unsigned int ksm_thread_pages_to_scan = 100; @@ -628,7 +625,7 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page, if (!ptep) goto out; - if (pte_write(*ptep)) { + if (pte_write(*ptep) || pte_dirty(*ptep)) { pte_t entry; swapped = PageSwapCache(page); @@ -651,7 +648,9 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page, set_pte_at_notify(mm, addr, ptep, entry); goto out_unlock; } - entry = pte_wrprotect(entry); + if (pte_dirty(entry)) + set_page_dirty(page); + entry = pte_mkclean(pte_wrprotect(entry)); set_pte_at_notify(mm, addr, ptep, entry); } *orig_pte = *ptep; @@ -717,6 +716,8 @@ static int replace_page(struct vm_area_struct *vma, struct page *oldpage, set_pte_at_notify(mm, addr, ptep, mk_pte(newpage, prot)); page_remove_rmap(oldpage); + if (!page_mapped(oldpage)) + try_to_free_swap(oldpage); put_page(oldpage); pte_unmap_unlock(ptep, ptl); @@ -827,13 +828,6 @@ static int try_to_merge_two_pages(struct mm_struct *mm1, unsigned long addr1, struct page *kpage; int err = -EFAULT; - /* - * The number of nodes in the stable tree - * is the number of kernel pages that we hold. - */ - if (ksm_max_kernel_pages && - ksm_max_kernel_pages <= ksm_pages_shared) - return err; kpage = alloc_page(GFP_HIGHUSER); if (!kpage) @@ -1209,6 +1203,18 @@ static struct rmap_item *scan_get_next_rmap_item(struct page **page) slot = ksm_scan.mm_slot; if (slot == &ksm_mm_head) { + /* + * A number of pages can hang around indefinitely on per-cpu + * pagevecs, raised page count preventing write_protect_page + * from merging them. Though it doesn't really matter much, + * it is puzzling to see some stuck in pages_volatile until + * other activity jostles them out, and they also prevented + * LTP's KSM test from succeeding deterministically; so drain + * them here (here rather than on entry to ksm_do_scan(), + * so we don't IPI too often when pages_to_scan is set low). + */ + lru_add_drain_all(); + root_unstable_tree = RB_ROOT; spin_lock(&ksm_mmlist_lock); @@ -1577,29 +1583,6 @@ static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr, } KSM_ATTR(run); -static ssize_t max_kernel_pages_store(struct kobject *kobj, - struct kobj_attribute *attr, - const char *buf, size_t count) -{ - int err; - unsigned long nr_pages; - - err = strict_strtoul(buf, 10, &nr_pages); - if (err) - return -EINVAL; - - ksm_max_kernel_pages = nr_pages; - - return count; -} - -static ssize_t max_kernel_pages_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - return sprintf(buf, "%lu\n", ksm_max_kernel_pages); -} -KSM_ATTR(max_kernel_pages); - static ssize_t pages_shared_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { @@ -1649,7 +1632,6 @@ static struct attribute *ksm_attrs[] = { &sleep_millisecs_attr.attr, &pages_to_scan_attr.attr, &run_attr.attr, - &max_kernel_pages_attr.attr, &pages_shared_attr.attr, &pages_sharing_attr.attr, &pages_unshared_attr.attr, @@ -1669,8 +1651,6 @@ static int __init ksm_init(void) struct task_struct *ksm_thread; int err; - ksm_max_kernel_pages = totalram_pages / 4; - err = ksm_slab_init(); if (err) goto out; From c95ed3371a7d0749ca59bc395640ef8a7d2b79b4 Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Fri, 17 Aug 2012 01:21:55 +0800 Subject: [PATCH 099/155] staging: zram: fix zram locking Staging: zram: Replace mutex lock by a R/W semaphore Staging: zram: Add a missing GFP_KERNEL specifier in zram_init_device() --- drivers/staging/zram/Kconfig | 25 ------ drivers/staging/zram/Makefile | 0 drivers/staging/zram/xvmalloc.c | 0 drivers/staging/zram/xvmalloc.h | 0 drivers/staging/zram/xvmalloc_int.h | 0 drivers/staging/zram/zram.txt | 0 drivers/staging/zram/zram_drv.c | 134 ++++++++++++++++------------ drivers/staging/zram/zram_drv.h | 8 +- drivers/staging/zram/zram_sysfs.c | 18 ++-- 9 files changed, 95 insertions(+), 90 deletions(-) mode change 100644 => 100755 drivers/staging/zram/Kconfig mode change 100644 => 100755 drivers/staging/zram/Makefile mode change 100644 => 100755 drivers/staging/zram/xvmalloc.c mode change 100644 => 100755 drivers/staging/zram/xvmalloc.h mode change 100644 => 100755 drivers/staging/zram/xvmalloc_int.h mode change 100644 => 100755 drivers/staging/zram/zram.txt mode change 100644 => 100755 drivers/staging/zram/zram_drv.c mode change 100644 => 100755 drivers/staging/zram/zram_drv.h mode change 100644 => 100755 drivers/staging/zram/zram_sysfs.c diff --git a/drivers/staging/zram/Kconfig b/drivers/staging/zram/Kconfig old mode 100644 new mode 100755 index 18c1a971..3bec4dba --- a/drivers/staging/zram/Kconfig +++ b/drivers/staging/zram/Kconfig @@ -21,23 +21,6 @@ config ZRAM See zram.txt for more information. Project home: http://compcache.googlecode.com/ -config ZRAM_NUM_DEVICES - int "Default number of zram devices" - depends on ZRAM - range 1 32 - default 1 - help - Select default number of zram devices. You can override this value - using 'num_devices' module parameter. - -config ZRAM_DEFAULT_PERCENTAGE - int "Default number of zram percentage" - depends on ZRAM - range 10 80 - default 25 - help - Select default zram disk size: percentage of total RAM - config ZRAM_DEBUG bool "Compressed RAM block device debug support" depends on ZRAM @@ -45,11 +28,3 @@ config ZRAM_DEBUG help This option adds additional debugging code to the compressed RAM block device driver. - -config ZRAM_DEFAULT_DISKSIZE - int "Default size of zram in bytes" - depends on ZRAM - default 100663296 - help - Set default zram disk size (default ~ 96MB) - diff --git a/drivers/staging/zram/Makefile b/drivers/staging/zram/Makefile old mode 100644 new mode 100755 diff --git a/drivers/staging/zram/xvmalloc.c b/drivers/staging/zram/xvmalloc.c old mode 100644 new mode 100755 diff --git a/drivers/staging/zram/xvmalloc.h b/drivers/staging/zram/xvmalloc.h old mode 100644 new mode 100755 diff --git a/drivers/staging/zram/xvmalloc_int.h b/drivers/staging/zram/xvmalloc_int.h old mode 100644 new mode 100755 diff --git a/drivers/staging/zram/zram.txt b/drivers/staging/zram/zram.txt old mode 100644 new mode 100755 diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c old mode 100644 new mode 100755 index fc4c2e6f..88383651 --- a/drivers/staging/zram/zram_drv.c +++ b/drivers/staging/zram/zram_drv.c @@ -104,19 +104,33 @@ static int page_zero_filled(void *ptr) return 1; } -static u64 zram_default_disksize_bytes(void) +static void zram_set_disksize(struct zram *zram, size_t totalram_bytes) { -#if 0 - return ((totalram_pages << PAGE_SHIFT) * - default_disksize_perc_ram / 100) & PAGE_MASK; -#endif - return CONFIG_ZRAM_DEFAULT_DISKSIZE; -} + if (!zram->disksize) { + pr_info( + "disk size not provided. You can use disksize_kb module " + "param to specify size.\nUsing default: (%u%% of RAM).\n", + default_disksize_perc_ram + ); + zram->disksize = default_disksize_perc_ram * + (totalram_bytes / 100); + } -static void zram_set_disksize(struct zram *zram, u64 size_bytes) -{ - zram->disksize = size_bytes; - set_capacity(zram->disk, size_bytes >> SECTOR_SHIFT); + if (zram->disksize > 2 * (totalram_bytes)) { + pr_info( + "There is little point creating a zram of greater than " + "twice the size of memory since we expect a 2:1 compression " + "ratio. Note that zram uses about 0.1%% of the size of " + "the disk when not in use so a huge zram is " + "wasteful.\n" + "\tMemory Size: %zu kB\n" + "\tSize you selected: %llu kB\n" + "Continuing anyway ...\n", + totalram_bytes >> 10, zram->disksize + ); + } + + zram->disksize &= PAGE_MASK; } static void zram_free_page(struct zram *zram, size_t index) @@ -546,27 +560,35 @@ static int zram_make_request(struct request_queue *queue, struct bio *bio) { struct zram *zram = queue->queuedata; + if (unlikely(!zram->init_done) && zram_init_device(zram)) + goto error; + + down_read(&zram->init_lock); + if (unlikely(!zram->init_done)) + goto error_unlock; + if (!valid_io_request(zram, bio)) { zram_stat64_inc(zram, &zram->stats.invalid_io); - bio_io_error(bio); - return 0; - } - - if (unlikely(!zram->init_done) && zram_init_device(zram)) { - bio_io_error(bio); - return 0; + goto error_unlock; } __zram_make_request(zram, bio, bio_data_dir(bio)); + up_read(&zram->init_lock); return 0; + +error_unlock: + up_read(&zram->init_lock); +error: + bio_io_error(bio); + return 0; + } -void zram_reset_device(struct zram *zram) +void __zram_reset_device(struct zram *zram) { size_t index; - mutex_lock(&zram->init_lock); zram->init_done = 0; /* Free various per-device buffers */ @@ -602,8 +624,14 @@ void zram_reset_device(struct zram *zram) /* Reset stats */ memset(&zram->stats, 0, sizeof(zram->stats)); - zram_set_disksize(zram, zram_default_disksize_bytes()); - mutex_unlock(&zram->init_lock); + zram->disksize = 0; +} + +void zram_reset_device(struct zram *zram) +{ + down_write(&zram->init_lock); + __zram_reset_device(zram); + up_write(&zram->init_lock); } int zram_init_device(struct zram *zram) @@ -611,37 +639,39 @@ int zram_init_device(struct zram *zram) int ret; size_t num_pages; - mutex_lock(&zram->init_lock); + down_write(&zram->init_lock); if (zram->init_done) { - mutex_unlock(&zram->init_lock); + up_write(&zram->init_lock); return 0; } + zram_set_disksize(zram, totalram_pages << PAGE_SHIFT); + zram->compress_workmem = kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL); if (!zram->compress_workmem) { pr_err("Error allocating compressor working memory!\n"); ret = -ENOMEM; - goto fail; + goto fail_no_table; } - zram->compress_buffer = (void *)__get_free_pages(__GFP_ZERO, 1); + zram->compress_buffer = + (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1); if (!zram->compress_buffer) { pr_err("Error allocating compressor buffer space\n"); ret = -ENOMEM; - goto fail; + goto fail_no_table; } num_pages = zram->disksize >> PAGE_SHIFT; zram->table = vmalloc(num_pages * sizeof(*zram->table)); if (!zram->table) { pr_err("Error allocating zram address table\n"); - /* To prevent accessing table entries during cleanup */ - zram->disksize = 0; ret = -ENOMEM; - goto fail; + goto fail_no_table; } - memset(zram->table, 0, num_pages * sizeof(*zram->table)); + memset(zram->table, 0, num_pages * sizeof(*zram->table)); + set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); /* zram devices sort of resembles non-rotational disks */ queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue); @@ -654,15 +684,17 @@ int zram_init_device(struct zram *zram) } zram->init_done = 1; - mutex_unlock(&zram->init_lock); + up_write(&zram->init_lock); pr_debug("Initialization done!\n"); return 0; +fail_no_table: + /* To prevent accessing table entries during cleanup */ + zram->disksize = 0; fail: - mutex_unlock(&zram->init_lock); - zram_reset_device(zram); - + __zram_reset_device(zram); + up_write(&zram->init_lock); pr_err("Initialization failed: err=%d\n", ret); return ret; } @@ -687,7 +719,7 @@ static int create_device(struct zram *zram, int device_id) int ret = 0; init_rwsem(&zram->lock); - mutex_init(&zram->init_lock); + init_rwsem(&zram->init_lock); spin_lock_init(&zram->stat64_lock); zram->queue = blk_alloc_queue(GFP_KERNEL); @@ -718,12 +750,8 @@ static int create_device(struct zram *zram, int device_id) zram->disk->private_data = zram; snprintf(zram->disk->disk_name, 16, "zram%d", device_id); - /* - * Set some default disksize. To set another disksize, user - * must reset the device and then write a new disksize to - * corresponding device's sysfs node. - */ - zram_set_disksize(zram, zram_default_disksize_bytes()); + /* Actual capacity set using syfs (/sys/block/zram/disksize */ + set_capacity(zram->disk, 0); /* * To ensure that we always get PAGE_SIZE aligned @@ -768,13 +796,6 @@ static int __init zram_init(void) { int ret, dev_id; - /* - * Module parameter not specified by user. Use default - * value as defined during kernel config. - */ - if (zram_num_devices == 0) - zram_num_devices = CONFIG_ZRAM_NUM_DEVICES; - if (zram_num_devices > max_num_devices) { pr_warning("Invalid value for num_devices: %u\n", zram_num_devices); @@ -789,12 +810,15 @@ static int __init zram_init(void) goto out; } + if (!zram_num_devices) { + pr_info("num_devices not specified. Using default: 1\n"); + zram_num_devices = 1; + } + /* Allocate the device array and initialize each one */ pr_info("Creating %u devices ...\n", zram_num_devices); - zram_devices = kzalloc(zram_num_devices * sizeof(struct zram), - GFP_KERNEL); - if (!zram_devices) - { + zram_devices = kzalloc(zram_num_devices * sizeof(struct zram), GFP_KERNEL); + if (!zram_devices) { ret = -ENOMEM; goto unregister; } @@ -836,8 +860,8 @@ static void __exit zram_exit(void) pr_debug("Cleanup done!\n"); } -module_param_named(num_devices, zram_num_devices, uint, 0); -MODULE_PARM_DESC(num_devices, "Number of zram devices"); +module_param(zram_num_devices, uint, 0); +MODULE_PARM_DESC(zram_num_devices, "Number of zram devices"); module_init(zram_init); module_exit(zram_exit); diff --git a/drivers/staging/zram/zram_drv.h b/drivers/staging/zram/zram_drv.h old mode 100644 new mode 100755 index fed0d14b..31617ee7 --- a/drivers/staging/zram/zram_drv.h +++ b/drivers/staging/zram/zram_drv.h @@ -41,7 +41,7 @@ struct zobj_header { /*-- Configurable parameters */ /* Default zram disk size: 25% of total RAM */ -static const unsigned default_disksize_perc_ram = CONFIG_ZRAM_DEFAULT_PERCENTAGE; +static const unsigned default_disksize_perc_ram = 25; /* * Pages that compress to size greater than this are stored @@ -112,8 +112,8 @@ struct zram { struct request_queue *queue; struct gendisk *disk; int init_done; - /* Prevent concurrent execution of device init and reset */ - struct mutex init_lock; + /* Prevent concurrent execution of device init, reset and R/W request */ + struct rw_semaphore init_lock; /* * This is the limit on amount of *uncompressed* worth of data * we can store in a disk. @@ -130,7 +130,7 @@ extern struct attribute_group zram_disk_attr_group; #endif extern int zram_init_device(struct zram *zram); -extern void zram_reset_device(struct zram *zram); +extern void __zram_reset_device(struct zram *zram); #endif diff --git a/drivers/staging/zram/zram_sysfs.c b/drivers/staging/zram/zram_sysfs.c old mode 100644 new mode 100755 index d894928a..41e51a2b --- a/drivers/staging/zram/zram_sysfs.c +++ b/drivers/staging/zram/zram_sysfs.c @@ -55,19 +55,23 @@ static ssize_t disksize_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { int ret; + u64 disksize; struct zram *zram = dev_to_zram(dev); + ret = strict_strtoull(buf, 10, &disksize); + if (ret) + return ret; + + down_write(&zram->init_lock); if (zram->init_done) { + up_write(&zram->init_lock); pr_info("Cannot change disksize for initialized device\n"); return -EBUSY; } - ret = strict_strtoull(buf, 10, &zram->disksize); - if (ret) - return ret; - - zram->disksize = PAGE_ALIGN(zram->disksize); + zram->disksize = PAGE_ALIGN(disksize); set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); + up_write(&zram->init_lock); return len; } @@ -106,8 +110,10 @@ static ssize_t reset_store(struct device *dev, if (bdev) fsync_bdev(bdev); + down_write(&zram->init_lock); if (zram->init_done) - zram_reset_device(zram); + __zram_reset_device(zram); + up_write(&zram->init_lock); return len; } From 8b041c69af8468d32fe97236fdae7e6bd3f87cf7 Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Fri, 17 Aug 2012 01:40:53 +0800 Subject: [PATCH 100/155] mm/ksm.c is doing an unneeded _notify in write_protect_page. --- mm/ksm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/ksm.c b/mm/ksm.c index 17abf485..d40ed022 100755 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -645,7 +645,7 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page, * page */ if ((page_mapcount(page) + 2 + swapped) != page_count(page)) { - set_pte_at_notify(mm, addr, ptep, entry); + set_pte_at(mm, addr, ptep, entry); goto out_unlock; } if (pte_dirty(entry)) From 7c81b7476dc1b0931c47849307bf1c92445c4999 Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Fri, 17 Aug 2012 01:49:14 +0800 Subject: [PATCH 101/155] ksm: check for ERR_PTR from follow_page() --- mm/ksm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/ksm.c b/mm/ksm.c index d40ed022..d8ce84ad 100755 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -314,7 +314,7 @@ static int break_ksm(struct vm_area_struct *vma, unsigned long addr) do { cond_resched(); page = follow_page(vma, addr, FOLL_GET); - if (!page) + if (IS_ERR_OR_NULL(page)) break; if (PageKsm(page)) ret = handle_mm_fault(vma->vm_mm, vma, addr, @@ -388,7 +388,7 @@ static struct page *get_mergeable_page(struct rmap_item *rmap_item) goto out; page = follow_page(vma, addr, FOLL_GET); - if (!page) + if (IS_ERR_OR_NULL(page)) goto out; if (PageAnon(page)) { flush_anon_page(vma, page, addr); @@ -1320,7 +1320,7 @@ next_mm: static void ksm_do_scan(unsigned int scan_npages) { struct rmap_item *rmap_item; - struct page *page; + struct page *uninitialized_var(page); while (scan_npages--) { cond_resched(); From f1beec1b3240e23e026b29050f14d9632a6708b1 Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Sat, 18 Aug 2012 23:45:48 +0800 Subject: [PATCH 102/155] vmalloc(): adjust gfp mask passed on nested vmalloc() invocation --- mm/vmalloc.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) mode change 100644 => 100755 mm/vmalloc.c diff --git a/mm/vmalloc.c b/mm/vmalloc.c old mode 100644 new mode 100755 index c2287313..b689e2ae --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1470,6 +1470,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, { struct page **pages; unsigned int nr_pages, array_size, i; + gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO; nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT; array_size = (nr_pages * sizeof(struct page *)); @@ -1477,13 +1478,11 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, area->nr_pages = nr_pages; /* Please note that the recursion is strictly bounded. */ if (array_size > PAGE_SIZE) { - pages = __vmalloc_node(array_size, 1, gfp_mask | __GFP_ZERO, + pages = __vmalloc_node(array_size, 1, nested_gfp|__GFP_HIGHMEM, PAGE_KERNEL, node, caller); area->flags |= VM_VPAGES; } else { - pages = kmalloc_node(array_size, - (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO, - node); + pages = kmalloc_node(array_size, nested_gfp, node); } area->pages = pages; area->caller = caller; From e0c9143ea1ec510a41b347be043e98034eedf5c8 Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Mon, 20 Aug 2012 00:49:43 +0800 Subject: [PATCH 103/155] mm: cleancache core ops functions and config --- Documentation/vm/cleancache.txt | 279 ++++++++++++++++++++++++++++++++ include/linux/cleancache.h | 122 ++++++++++++++ mm/Kconfig | 22 +++ mm/Makefile | 1 + mm/cleancache.c | 244 ++++++++++++++++++++++++++++ 5 files changed, 668 insertions(+) create mode 100755 Documentation/vm/cleancache.txt create mode 100755 include/linux/cleancache.h mode change 100644 => 100755 mm/Kconfig mode change 100644 => 100755 mm/Makefile create mode 100755 mm/cleancache.c diff --git a/Documentation/vm/cleancache.txt b/Documentation/vm/cleancache.txt new file mode 100755 index 00000000..e0a53567 --- /dev/null +++ b/Documentation/vm/cleancache.txt @@ -0,0 +1,279 @@ +MOTIVATION + +Cleancache is a new optional feature provided by the VFS layer that +potentially dramatically increases page cache effectiveness for +many workloads in many environments at a negligible cost. + +Cleancache can be thought of as a page-granularity victim cache for clean +pages that the kernel's pageframe replacement algorithm (PFRA) would like +to keep around, but can't since there isn't enough memory. So when the +PFRA "evicts" a page, it first attempts to use cleancache code to +put the data contained in that page into "transcendent memory", memory +that is not directly accessible or addressable by the kernel and is +of unknown and possibly time-varying size. + +Later, when a cleancache-enabled filesystem wishes to access a page +in a file on disk, it first checks cleancache to see if it already +contains it; if it does, the page of data is copied into the kernel +and a disk access is avoided. + +Transcendent memory "drivers" for cleancache are currently implemented +in Xen (using hypervisor memory) and zcache (using in-kernel compressed +memory) and other implementations are in development. + +FAQs are included below. + +IMPLEMENTATION OVERVIEW + +A cleancache "backend" that provides transcendent memory registers itself +to the kernel's cleancache "frontend" by calling cleancache_register_ops, +passing a pointer to a cleancache_ops structure with funcs set appropriately. +Note that cleancache_register_ops returns the previous settings so that +chaining can be performed if desired. The functions provided must conform to +certain semantics as follows: + +Most important, cleancache is "ephemeral". Pages which are copied into +cleancache have an indefinite lifetime which is completely unknowable +by the kernel and so may or may not still be in cleancache at any later time. +Thus, as its name implies, cleancache is not suitable for dirty pages. +Cleancache has complete discretion over what pages to preserve and what +pages to discard and when. + +Mounting a cleancache-enabled filesystem should call "init_fs" to obtain a +pool id which, if positive, must be saved in the filesystem's superblock; +a negative return value indicates failure. A "put_page" will copy a +(presumably about-to-be-evicted) page into cleancache and associate it with +the pool id, a file key, and a page index into the file. (The combination +of a pool id, a file key, and an index is sometimes called a "handle".) +A "get_page" will copy the page, if found, from cleancache into kernel memory. +An "invalidate_page" will ensure the page no longer is present in cleancache; +an "invalidate_inode" will invalidate all pages associated with the specified +file; and, when a filesystem is unmounted, an "invalidate_fs" will invalidate +all pages in all files specified by the given pool id and also surrender +the pool id. + +An "init_shared_fs", like init_fs, obtains a pool id but tells cleancache +to treat the pool as shared using a 128-bit UUID as a key. On systems +that may run multiple kernels (such as hard partitioned or virtualized +systems) that may share a clustered filesystem, and where cleancache +may be shared among those kernels, calls to init_shared_fs that specify the +same UUID will receive the same pool id, thus allowing the pages to +be shared. Note that any security requirements must be imposed outside +of the kernel (e.g. by "tools" that control cleancache). Or a +cleancache implementation can simply disable shared_init by always +returning a negative value. + +If a get_page is successful on a non-shared pool, the page is invalidated +(thus making cleancache an "exclusive" cache). On a shared pool, the page +is NOT invalidated on a successful get_page so that it remains accessible to +other sharers. The kernel is responsible for ensuring coherency between +cleancache (shared or not), the page cache, and the filesystem, using +cleancache invalidate operations as required. + +Note that cleancache must enforce put-put-get coherency and get-get +coherency. For the former, if two puts are made to the same handle but +with different data, say AAA by the first put and BBB by the second, a +subsequent get can never return the stale data (AAA). For get-get coherency, +if a get for a given handle fails, subsequent gets for that handle will +never succeed unless preceded by a successful put with that handle. + +Last, cleancache provides no SMP serialization guarantees; if two +different Linux threads are simultaneously putting and invalidating a page +with the same handle, the results are indeterminate. Callers must +lock the page to ensure serial behavior. + +CLEANCACHE PERFORMANCE METRICS + +Cleancache monitoring is done by sysfs files in the +/sys/kernel/mm/cleancache directory. The effectiveness of cleancache +can be measured (across all filesystems) with: + +succ_gets - number of gets that were successful +failed_gets - number of gets that failed +puts - number of puts attempted (all "succeed") +invalidates - number of invalidates attempted + +A backend implementatation may provide additional metrics. + +FAQ + +1) Where's the value? (Andrew Morton) + +Cleancache provides a significant performance benefit to many workloads +in many environments with negligible overhead by improving the +effectiveness of the pagecache. Clean pagecache pages are +saved in transcendent memory (RAM that is otherwise not directly +addressable to the kernel); fetching those pages later avoids "refaults" +and thus disk reads. + +Cleancache (and its sister code "frontswap") provide interfaces for +this transcendent memory (aka "tmem"), which conceptually lies between +fast kernel-directly-addressable RAM and slower DMA/asynchronous devices. +Disallowing direct kernel or userland reads/writes to tmem +is ideal when data is transformed to a different form and size (such +as with compression) or secretly moved (as might be useful for write- +balancing for some RAM-like devices). Evicted page-cache pages (and +swap pages) are a great use for this kind of slower-than-RAM-but-much- +faster-than-disk transcendent memory, and the cleancache (and frontswap) +"page-object-oriented" specification provides a nice way to read and +write -- and indirectly "name" -- the pages. + +In the virtual case, the whole point of virtualization is to statistically +multiplex physical resources across the varying demands of multiple +virtual machines. This is really hard to do with RAM and efforts to +do it well with no kernel change have essentially failed (except in some +well-publicized special-case workloads). Cleancache -- and frontswap -- +with a fairly small impact on the kernel, provide a huge amount +of flexibility for more dynamic, flexible RAM multiplexing. +Specifically, the Xen Transcendent Memory backend allows otherwise +"fallow" hypervisor-owned RAM to not only be "time-shared" between multiple +virtual machines, but the pages can be compressed and deduplicated to +optimize RAM utilization. And when guest OS's are induced to surrender +underutilized RAM (e.g. with "self-ballooning"), page cache pages +are the first to go, and cleancache allows those pages to be +saved and reclaimed if overall host system memory conditions allow. + +And the identical interface used for cleancache can be used in +physical systems as well. The zcache driver acts as a memory-hungry +device that stores pages of data in a compressed state. And +the proposed "RAMster" driver shares RAM across multiple physical +systems. + +2) Why does cleancache have its sticky fingers so deep inside the + filesystems and VFS? (Andrew Morton and Christoph Hellwig) + +The core hooks for cleancache in VFS are in most cases a single line +and the minimum set are placed precisely where needed to maintain +coherency (via cleancache_invalidate operations) between cleancache, +the page cache, and disk. All hooks compile into nothingness if +cleancache is config'ed off and turn into a function-pointer- +compare-to-NULL if config'ed on but no backend claims the ops +functions, or to a compare-struct-element-to-negative if a +backend claims the ops functions but a filesystem doesn't enable +cleancache. + +Some filesystems are built entirely on top of VFS and the hooks +in VFS are sufficient, so don't require an "init_fs" hook; the +initial implementation of cleancache didn't provide this hook. +But for some filesystems (such as btrfs), the VFS hooks are +incomplete and one or more hooks in fs-specific code are required. +And for some other filesystems, such as tmpfs, cleancache may +be counterproductive. So it seemed prudent to require a filesystem +to "opt in" to use cleancache, which requires adding a hook in +each filesystem. Not all filesystems are supported by cleancache +only because they haven't been tested. The existing set should +be sufficient to validate the concept, the opt-in approach means +that untested filesystems are not affected, and the hooks in the +existing filesystems should make it very easy to add more +filesystems in the future. + +The total impact of the hooks to existing fs and mm files is only +about 40 lines added (not counting comments and blank lines). + +3) Why not make cleancache asynchronous and batched so it can + more easily interface with real devices with DMA instead + of copying each individual page? (Minchan Kim) + +The one-page-at-a-time copy semantics simplifies the implementation +on both the frontend and backend and also allows the backend to +do fancy things on-the-fly like page compression and +page deduplication. And since the data is "gone" (copied into/out +of the pageframe) before the cleancache get/put call returns, +a great deal of race conditions and potential coherency issues +are avoided. While the interface seems odd for a "real device" +or for real kernel-addressable RAM, it makes perfect sense for +transcendent memory. + +4) Why is non-shared cleancache "exclusive"? And where is the + page "invalidated" after a "get"? (Minchan Kim) + +The main reason is to free up space in transcendent memory and +to avoid unnecessary cleancache_invalidate calls. If you want inclusive, +the page can be "put" immediately following the "get". If +put-after-get for inclusive becomes common, the interface could +be easily extended to add a "get_no_invalidate" call. + +The invalidate is done by the cleancache backend implementation. + +5) What's the performance impact? + +Performance analysis has been presented at OLS'09 and LCA'10. +Briefly, performance gains can be significant on most workloads, +especially when memory pressure is high (e.g. when RAM is +overcommitted in a virtual workload); and because the hooks are +invoked primarily in place of or in addition to a disk read/write, +overhead is negligible even in worst case workloads. Basically +cleancache replaces I/O with memory-copy-CPU-overhead; on older +single-core systems with slow memory-copy speeds, cleancache +has little value, but in newer multicore machines, especially +consolidated/virtualized machines, it has great value. + +6) How do I add cleancache support for filesystem X? (Boaz Harrash) + +Filesystems that are well-behaved and conform to certain +restrictions can utilize cleancache simply by making a call to +cleancache_init_fs at mount time. Unusual, misbehaving, or +poorly layered filesystems must either add additional hooks +and/or undergo extensive additional testing... or should just +not enable the optional cleancache. + +Some points for a filesystem to consider: + +- The FS should be block-device-based (e.g. a ram-based FS such + as tmpfs should not enable cleancache) +- To ensure coherency/correctness, the FS must ensure that all + file removal or truncation operations either go through VFS or + add hooks to do the equivalent cleancache "invalidate" operations +- To ensure coherency/correctness, either inode numbers must + be unique across the lifetime of the on-disk file OR the + FS must provide an "encode_fh" function. +- The FS must call the VFS superblock alloc and deactivate routines + or add hooks to do the equivalent cleancache calls done there. +- To maximize performance, all pages fetched from the FS should + go through the do_mpag_readpage routine or the FS should add + hooks to do the equivalent (cf. btrfs) +- Currently, the FS blocksize must be the same as PAGESIZE. This + is not an architectural restriction, but no backends currently + support anything different. +- A clustered FS should invoke the "shared_init_fs" cleancache + hook to get best performance for some backends. + +7) Why not use the KVA of the inode as the key? (Christoph Hellwig) + +If cleancache would use the inode virtual address instead of +inode/filehandle, the pool id could be eliminated. But, this +won't work because cleancache retains pagecache data pages +persistently even when the inode has been pruned from the +inode unused list, and only invalidates the data page if the file +gets removed/truncated. So if cleancache used the inode kva, +there would be potential coherency issues if/when the inode +kva is reused for a different file. Alternately, if cleancache +invalidated the pages when the inode kva was freed, much of the value +of cleancache would be lost because the cache of pages in cleanache +is potentially much larger than the kernel pagecache and is most +useful if the pages survive inode cache removal. + +8) Why is a global variable required? + +The cleancache_enabled flag is checked in all of the frequently-used +cleancache hooks. The alternative is a function call to check a static +variable. Since cleancache is enabled dynamically at runtime, systems +that don't enable cleancache would suffer thousands (possibly +tens-of-thousands) of unnecessary function calls per second. So the +global variable allows cleancache to be enabled by default at compile +time, but have insignificant performance impact when cleancache remains +disabled at runtime. + +9) Does cleanache work with KVM? + +The memory model of KVM is sufficiently different that a cleancache +backend may have less value for KVM. This remains to be tested, +especially in an overcommitted system. + +10) Does cleancache work in userspace? It sounds useful for + memory hungry caches like web browsers. (Jamie Lokier) + +No plans yet, though we agree it sounds useful, at least for +apps that bypass the page cache (e.g. O_DIRECT). + +Last updated: Dan Magenheimer, April 13 2011 diff --git a/include/linux/cleancache.h b/include/linux/cleancache.h new file mode 100755 index 00000000..04ffb2e6 --- /dev/null +++ b/include/linux/cleancache.h @@ -0,0 +1,122 @@ +#ifndef _LINUX_CLEANCACHE_H +#define _LINUX_CLEANCACHE_H + +#include +#include +#include + +#define CLEANCACHE_KEY_MAX 6 + +/* + * cleancache requires every file with a page in cleancache to have a + * unique key unless/until the file is removed/truncated. For some + * filesystems, the inode number is unique, but for "modern" filesystems + * an exportable filehandle is required (see exportfs.h) + */ +struct cleancache_filekey { + union { + ino_t ino; + __u32 fh[CLEANCACHE_KEY_MAX]; + u32 key[CLEANCACHE_KEY_MAX]; + } u; +}; + +struct cleancache_ops { + int (*init_fs)(size_t); + int (*init_shared_fs)(char *uuid, size_t); + int (*get_page)(int, struct cleancache_filekey, + pgoff_t, struct page *); + void (*put_page)(int, struct cleancache_filekey, + pgoff_t, struct page *); + void (*flush_page)(int, struct cleancache_filekey, pgoff_t); + void (*flush_inode)(int, struct cleancache_filekey); + void (*flush_fs)(int); +}; + +extern struct cleancache_ops + cleancache_register_ops(struct cleancache_ops *ops); +extern void __cleancache_init_fs(struct super_block *); +extern void __cleancache_init_shared_fs(char *, struct super_block *); +extern int __cleancache_get_page(struct page *); +extern void __cleancache_put_page(struct page *); +extern void __cleancache_flush_page(struct address_space *, struct page *); +extern void __cleancache_flush_inode(struct address_space *); +extern void __cleancache_flush_fs(struct super_block *); +extern int cleancache_enabled; + +#ifdef CONFIG_CLEANCACHE +static inline bool cleancache_fs_enabled(struct page *page) +{ + return page->mapping->host->i_sb->cleancache_poolid >= 0; +} +static inline bool cleancache_fs_enabled_mapping(struct address_space *mapping) +{ + return mapping->host->i_sb->cleancache_poolid >= 0; +} +#else +#define cleancache_enabled (0) +#define cleancache_fs_enabled(_page) (0) +#define cleancache_fs_enabled_mapping(_page) (0) +#endif + +/* + * The shim layer provided by these inline functions allows the compiler + * to reduce all cleancache hooks to nothingness if CONFIG_CLEANCACHE + * is disabled, to a single global variable check if CONFIG_CLEANCACHE + * is enabled but no cleancache "backend" has dynamically enabled it, + * and, for the most frequent cleancache ops, to a single global variable + * check plus a superblock element comparison if CONFIG_CLEANCACHE is enabled + * and a cleancache backend has dynamically enabled cleancache, but the + * filesystem referenced by that cleancache op has not enabled cleancache. + * As a result, CONFIG_CLEANCACHE can be enabled by default with essentially + * no measurable performance impact. + */ + +static inline void cleancache_init_fs(struct super_block *sb) +{ + if (cleancache_enabled) + __cleancache_init_fs(sb); +} + +static inline void cleancache_init_shared_fs(char *uuid, struct super_block *sb) +{ + if (cleancache_enabled) + __cleancache_init_shared_fs(uuid, sb); +} + +static inline int cleancache_get_page(struct page *page) +{ + int ret = -1; + + if (cleancache_enabled && cleancache_fs_enabled(page)) + ret = __cleancache_get_page(page); + return ret; +} + +static inline void cleancache_put_page(struct page *page) +{ + if (cleancache_enabled && cleancache_fs_enabled(page)) + __cleancache_put_page(page); +} + +static inline void cleancache_flush_page(struct address_space *mapping, + struct page *page) +{ + /* careful... page->mapping is NULL sometimes when this is called */ + if (cleancache_enabled && cleancache_fs_enabled_mapping(mapping)) + __cleancache_flush_page(mapping, page); +} + +static inline void cleancache_flush_inode(struct address_space *mapping) +{ + if (cleancache_enabled && cleancache_fs_enabled_mapping(mapping)) + __cleancache_flush_inode(mapping); +} + +static inline void cleancache_flush_fs(struct super_block *sb) +{ + if (cleancache_enabled) + __cleancache_flush_fs(sb); +} + +#endif /* _LINUX_CLEANCACHE_H */ diff --git a/mm/Kconfig b/mm/Kconfig old mode 100644 new mode 100755 index 2c19c0ba..f86e0d29 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -288,3 +288,25 @@ config NOMMU_INITIAL_TRIM_EXCESS of 1 says that all excess pages should be trimmed. See Documentation/nommu-mmap.txt for more information. +config CLEANCACHE + bool "Enable cleancache driver to cache clean pages if tmem is present" + default n + help + Cleancache can be thought of as a page-granularity victim cache + for clean pages that the kernel's pageframe replacement algorithm + (PFRA) would like to keep around, but can't since there isn't enough + memory. So when the PFRA "evicts" a page, it first attempts to use + cleancacne code to put the data contained in that page into + "transcendent memory", memory that is not directly accessible or + addressable by the kernel and is of unknown and possibly + time-varying size. And when a cleancache-enabled + filesystem wishes to access a page in a file on disk, it first + checks cleancache to see if it already contains it; if it does, + the page is copied into the kernel and a disk access is avoided. + When a transcendent memory driver is available (such as zcache or + Xen transcendent memory), a significant I/O reduction + may be achieved. When none is available, all cleancache calls + are reduced to a single pointer-compare-against-NULL resulting + in a negligible performance hit. + + If unsure, say Y to enable cleancache \ No newline at end of file diff --git a/mm/Makefile b/mm/Makefile old mode 100644 new mode 100755 index 66f54865..82a734fd --- a/mm/Makefile +++ b/mm/Makefile @@ -46,3 +46,4 @@ obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o +obj-$(CONFIG_CLEANCACHE) += cleancache.o diff --git a/mm/cleancache.c b/mm/cleancache.c new file mode 100755 index 00000000..bcaae4c2 --- /dev/null +++ b/mm/cleancache.c @@ -0,0 +1,244 @@ +/* + * Cleancache frontend + * + * This code provides the generic "frontend" layer to call a matching + * "backend" driver implementation of cleancache. See + * Documentation/vm/cleancache.txt for more information. + * + * Copyright (C) 2009-2010 Oracle Corp. All rights reserved. + * Author: Dan Magenheimer + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ + +#include +#include +#include +#include +#include + +/* + * This global enablement flag may be read thousands of times per second + * by cleancache_get/put/flush even on systems where cleancache_ops + * is not claimed (e.g. cleancache is config'ed on but remains + * disabled), so is preferred to the slower alternative: a function + * call that checks a non-global. + */ +int cleancache_enabled; +EXPORT_SYMBOL(cleancache_enabled); + +/* + * cleancache_ops is set by cleancache_ops_register to contain the pointers + * to the cleancache "backend" implementation functions. + */ +static struct cleancache_ops cleancache_ops; + +/* useful stats available in /sys/kernel/mm/cleancache */ +static unsigned long cleancache_succ_gets; +static unsigned long cleancache_failed_gets; +static unsigned long cleancache_puts; +static unsigned long cleancache_flushes; + +/* + * register operations for cleancache, returning previous thus allowing + * detection of multiple backends and possible nesting + */ +struct cleancache_ops cleancache_register_ops(struct cleancache_ops *ops) +{ + struct cleancache_ops old = cleancache_ops; + + cleancache_ops = *ops; + cleancache_enabled = 1; + return old; +} +EXPORT_SYMBOL(cleancache_register_ops); + +/* Called by a cleancache-enabled filesystem at time of mount */ +void __cleancache_init_fs(struct super_block *sb) +{ + sb->cleancache_poolid = (*cleancache_ops.init_fs)(PAGE_SIZE); +} +EXPORT_SYMBOL(__cleancache_init_fs); + +/* Called by a cleancache-enabled clustered filesystem at time of mount */ +void __cleancache_init_shared_fs(char *uuid, struct super_block *sb) +{ + sb->cleancache_poolid = + (*cleancache_ops.init_shared_fs)(uuid, PAGE_SIZE); +} +EXPORT_SYMBOL(__cleancache_init_shared_fs); + +/* + * If the filesystem uses exportable filehandles, use the filehandle as + * the key, else use the inode number. + */ +static int cleancache_get_key(struct inode *inode, + struct cleancache_filekey *key) +{ + int (*fhfn)(struct dentry *, __u32 *fh, int *, int); + int len = 0, maxlen = CLEANCACHE_KEY_MAX; + struct super_block *sb = inode->i_sb; + + key->u.ino = inode->i_ino; + if (sb->s_export_op != NULL) { + fhfn = sb->s_export_op->encode_fh; + if (fhfn) { + struct dentry d; + d.d_inode = inode; + len = (*fhfn)(&d, &key->u.fh[0], &maxlen, 0); + if (len <= 0 || len == 255) + return -1; + if (maxlen > CLEANCACHE_KEY_MAX) + return -1; + } + } + return 0; +} + +/* + * "Get" data from cleancache associated with the poolid/inode/index + * that were specified when the data was put to cleanache and, if + * successful, use it to fill the specified page with data and return 0. + * The pageframe is unchanged and returns -1 if the get fails. + * Page must be locked by caller. + */ +int __cleancache_get_page(struct page *page) +{ + int ret = -1; + int pool_id; + struct cleancache_filekey key = { .u.key = { 0 } }; + + VM_BUG_ON(!PageLocked(page)); + pool_id = page->mapping->host->i_sb->cleancache_poolid; + if (pool_id < 0) + goto out; + + if (cleancache_get_key(page->mapping->host, &key) < 0) + goto out; + + ret = (*cleancache_ops.get_page)(pool_id, key, page->index, page); + if (ret == 0) + cleancache_succ_gets++; + else + cleancache_failed_gets++; +out: + return ret; +} +EXPORT_SYMBOL(__cleancache_get_page); + +/* + * "Put" data from a page to cleancache and associate it with the + * (previously-obtained per-filesystem) poolid and the page's, + * inode and page index. Page must be locked. Note that a put_page + * always "succeeds", though a subsequent get_page may succeed or fail. + */ +void __cleancache_put_page(struct page *page) +{ + int pool_id; + struct cleancache_filekey key = { .u.key = { 0 } }; + + VM_BUG_ON(!PageLocked(page)); + pool_id = page->mapping->host->i_sb->cleancache_poolid; + if (pool_id >= 0 && + cleancache_get_key(page->mapping->host, &key) >= 0) { + (*cleancache_ops.put_page)(pool_id, key, page->index, page); + cleancache_puts++; + } +} +EXPORT_SYMBOL(__cleancache_put_page); + +/* + * Flush any data from cleancache associated with the poolid and the + * page's inode and page index so that a subsequent "get" will fail. + */ +void __cleancache_flush_page(struct address_space *mapping, struct page *page) +{ + /* careful... page->mapping is NULL sometimes when this is called */ + int pool_id = mapping->host->i_sb->cleancache_poolid; + struct cleancache_filekey key = { .u.key = { 0 } }; + + if (pool_id >= 0) { + VM_BUG_ON(!PageLocked(page)); + if (cleancache_get_key(mapping->host, &key) >= 0) { + (*cleancache_ops.flush_page)(pool_id, key, page->index); + cleancache_flushes++; + } + } +} +EXPORT_SYMBOL(__cleancache_flush_page); + +/* + * Flush all data from cleancache associated with the poolid and the + * mappings's inode so that all subsequent gets to this poolid/inode + * will fail. + */ +void __cleancache_flush_inode(struct address_space *mapping) +{ + int pool_id = mapping->host->i_sb->cleancache_poolid; + struct cleancache_filekey key = { .u.key = { 0 } }; + + if (pool_id >= 0 && cleancache_get_key(mapping->host, &key) >= 0) + (*cleancache_ops.flush_inode)(pool_id, key); +} +EXPORT_SYMBOL(__cleancache_flush_inode); + +/* + * Called by any cleancache-enabled filesystem at time of unmount; + * note that pool_id is surrendered and may be reutrned by a subsequent + * cleancache_init_fs or cleancache_init_shared_fs + */ +void __cleancache_flush_fs(struct super_block *sb) +{ + if (sb->cleancache_poolid >= 0) { + int old_poolid = sb->cleancache_poolid; + sb->cleancache_poolid = -1; + (*cleancache_ops.flush_fs)(old_poolid); + } +} +EXPORT_SYMBOL(__cleancache_flush_fs); + +#ifdef CONFIG_SYSFS + +/* see Documentation/ABI/xxx/sysfs-kernel-mm-cleancache */ + +#define CLEANCACHE_SYSFS_RO(_name) \ + static ssize_t cleancache_##_name##_show(struct kobject *kobj, \ + struct kobj_attribute *attr, char *buf) \ + { \ + return sprintf(buf, "%lu\n", cleancache_##_name); \ + } \ + static struct kobj_attribute cleancache_##_name##_attr = { \ + .attr = { .name = __stringify(_name), .mode = 0444 }, \ + .show = cleancache_##_name##_show, \ + } + +CLEANCACHE_SYSFS_RO(succ_gets); +CLEANCACHE_SYSFS_RO(failed_gets); +CLEANCACHE_SYSFS_RO(puts); +CLEANCACHE_SYSFS_RO(flushes); + +static struct attribute *cleancache_attrs[] = { + &cleancache_succ_gets_attr.attr, + &cleancache_failed_gets_attr.attr, + &cleancache_puts_attr.attr, + &cleancache_flushes_attr.attr, + NULL, +}; + +static struct attribute_group cleancache_attr_group = { + .attrs = cleancache_attrs, + .name = "cleancache", +}; + +#endif /* CONFIG_SYSFS */ + +static int __init init_cleancache(void) +{ +#ifdef CONFIG_SYSFS + int err; + + err = sysfs_create_group(mm_kobj, &cleancache_attr_group); +#endif /* CONFIG_SYSFS */ + return 0; +} +module_init(init_cleancache) From 8eb6724dbfb99bb1f17f3192483fafc1f9eb73fe Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Mon, 20 Aug 2012 00:51:06 +0800 Subject: [PATCH 104/155] add zcache --- drivers/staging/Kconfig | 1 + drivers/staging/Makefile | 1 + drivers/staging/zcache/Kconfig | 13 + drivers/staging/zcache/Makefile | 3 + drivers/staging/zcache/tmem.c | 710 +++++++++++++ drivers/staging/zcache/tmem.h | 195 ++++ drivers/staging/zcache/zcache.c | 1658 +++++++++++++++++++++++++++++++ 7 files changed, 2581 insertions(+) create mode 100755 drivers/staging/zcache/Kconfig create mode 100755 drivers/staging/zcache/Makefile create mode 100755 drivers/staging/zcache/tmem.c create mode 100755 drivers/staging/zcache/tmem.h create mode 100755 drivers/staging/zcache/zcache.c diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig index 8ee4bfa6..e4c3c9dd 100644 --- a/drivers/staging/Kconfig +++ b/drivers/staging/Kconfig @@ -125,5 +125,6 @@ source "drivers/staging/iio/Kconfig" source "drivers/staging/zram/Kconfig" +source "drivers/staging/zcache/Kconfig" endif # !STAGING_EXCLUDE_BUILD endif # STAGING diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile index 5a1b7341..5f0f554b 100644 --- a/drivers/staging/Makefile +++ b/drivers/staging/Makefile @@ -45,4 +45,5 @@ obj-$(CONFIG_DX_SEP) += sep/ obj-$(CONFIG_IIO) += iio/ obj-$(CONFIG_ZRAM) += zram/ obj-$(CONFIG_XVMALLOC) += zram/ +obj-$(CONFIG_ZCACHE) += zcache/ diff --git a/drivers/staging/zcache/Kconfig b/drivers/staging/zcache/Kconfig new file mode 100755 index 00000000..7fabcb2b --- /dev/null +++ b/drivers/staging/zcache/Kconfig @@ -0,0 +1,13 @@ +config ZCACHE + tristate "Dynamic compression of swap pages and clean pagecache pages" + depends on CLEANCACHE || FRONTSWAP + select XVMALLOC + select LZO_COMPRESS + select LZO_DECOMPRESS + default n + help + Zcache doubles RAM efficiency while providing a significant + performance boosts on many workloads. Zcache uses lzo1x + compression and an in-kernel implementation of transcendent + memory to store clean page cache pages and swap in RAM, + providing a noticeable reduction in disk I/O. diff --git a/drivers/staging/zcache/Makefile b/drivers/staging/zcache/Makefile new file mode 100755 index 00000000..f5ec64f9 --- /dev/null +++ b/drivers/staging/zcache/Makefile @@ -0,0 +1,3 @@ +zcache-y := tmem.o + +obj-$(CONFIG_ZCACHE) += zcache.o diff --git a/drivers/staging/zcache/tmem.c b/drivers/staging/zcache/tmem.c new file mode 100755 index 00000000..e954d405 --- /dev/null +++ b/drivers/staging/zcache/tmem.c @@ -0,0 +1,710 @@ +/* + * In-kernel transcendent memory (generic implementation) + * + * Copyright (c) 2009-2011, Dan Magenheimer, Oracle Corp. + * + * The primary purpose of Transcedent Memory ("tmem") is to map object-oriented + * "handles" (triples containing a pool id, and object id, and an index), to + * pages in a page-accessible memory (PAM). Tmem references the PAM pages via + * an abstract "pampd" (PAM page-descriptor), which can be operated on by a + * set of functions (pamops). Each pampd contains some representation of + * PAGE_SIZE bytes worth of data. Tmem must support potentially millions of + * pages and must be able to insert, find, and delete these pages at a + * potential frequency of thousands per second concurrently across many CPUs, + * (and, if used with KVM, across many vcpus across many guests). + * Tmem is tracked with a hierarchy of data structures, organized by + * the elements in a handle-tuple: pool_id, object_id, and page index. + * One or more "clients" (e.g. guests) each provide one or more tmem_pools. + * Each pool, contains a hash table of rb_trees of tmem_objs. Each + * tmem_obj contains a radix-tree-like tree of pointers, with intermediate + * nodes called tmem_objnodes. Each leaf pointer in this tree points to + * a pampd, which is accessible only through a small set of callbacks + * registered by the PAM implementation (see tmem_register_pamops). Tmem + * does all memory allocation via a set of callbacks registered by the tmem + * host implementation (e.g. see tmem_register_hostops). + */ + +#include +#include +#include + +#include "tmem.h" + +/* data structure sentinels used for debugging... see tmem.h */ +#define POOL_SENTINEL 0x87658765 +#define OBJ_SENTINEL 0x12345678 +#define OBJNODE_SENTINEL 0xfedcba09 + +/* + * A tmem host implementation must use this function to register callbacks + * for memory allocation. + */ +static struct tmem_hostops tmem_hostops; + +static void tmem_objnode_tree_init(void); + +void tmem_register_hostops(struct tmem_hostops *m) +{ + tmem_objnode_tree_init(); + tmem_hostops = *m; +} + +/* + * A tmem host implementation must use this function to register + * callbacks for a page-accessible memory (PAM) implementation + */ +static struct tmem_pamops tmem_pamops; + +void tmem_register_pamops(struct tmem_pamops *m) +{ + tmem_pamops = *m; +} + +/* + * Oid's are potentially very sparse and tmem_objs may have an indeterminately + * short life, being added and deleted at a relatively high frequency. + * So an rb_tree is an ideal data structure to manage tmem_objs. But because + * of the potentially huge number of tmem_objs, each pool manages a hashtable + * of rb_trees to reduce search, insert, delete, and rebalancing time. + * Each hashbucket also has a lock to manage concurrent access. + * + * The following routines manage tmem_objs. When any tmem_obj is accessed, + * the hashbucket lock must be held. + */ + +/* searches for object==oid in pool, returns locked object if found */ +static struct tmem_obj *tmem_obj_find(struct tmem_hashbucket *hb, + struct tmem_oid *oidp) +{ + struct rb_node *rbnode; + struct tmem_obj *obj; + + rbnode = hb->obj_rb_root.rb_node; + while (rbnode) { + BUG_ON(RB_EMPTY_NODE(rbnode)); + obj = rb_entry(rbnode, struct tmem_obj, rb_tree_node); + switch (tmem_oid_compare(oidp, &obj->oid)) { + case 0: /* equal */ + goto out; + case -1: + rbnode = rbnode->rb_left; + break; + case 1: + rbnode = rbnode->rb_right; + break; + } + } + obj = NULL; +out: + return obj; +} + +static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *); + +/* free an object that has no more pampds in it */ +static void tmem_obj_free(struct tmem_obj *obj, struct tmem_hashbucket *hb) +{ + struct tmem_pool *pool; + + BUG_ON(obj == NULL); + ASSERT_SENTINEL(obj, OBJ); + BUG_ON(obj->pampd_count > 0); + pool = obj->pool; + BUG_ON(pool == NULL); + if (obj->objnode_tree_root != NULL) /* may be "stump" with no leaves */ + tmem_pampd_destroy_all_in_obj(obj); + BUG_ON(obj->objnode_tree_root != NULL); + BUG_ON((long)obj->objnode_count != 0); + atomic_dec(&pool->obj_count); + BUG_ON(atomic_read(&pool->obj_count) < 0); + INVERT_SENTINEL(obj, OBJ); + obj->pool = NULL; + tmem_oid_set_invalid(&obj->oid); + rb_erase(&obj->rb_tree_node, &hb->obj_rb_root); +} + +/* + * initialize, and insert an tmem_object_root (called only if find failed) + */ +static void tmem_obj_init(struct tmem_obj *obj, struct tmem_hashbucket *hb, + struct tmem_pool *pool, + struct tmem_oid *oidp) +{ + struct rb_root *root = &hb->obj_rb_root; + struct rb_node **new = &(root->rb_node), *parent = NULL; + struct tmem_obj *this; + + BUG_ON(pool == NULL); + atomic_inc(&pool->obj_count); + obj->objnode_tree_height = 0; + obj->objnode_tree_root = NULL; + obj->pool = pool; + obj->oid = *oidp; + obj->objnode_count = 0; + obj->pampd_count = 0; + SET_SENTINEL(obj, OBJ); + while (*new) { + BUG_ON(RB_EMPTY_NODE(*new)); + this = rb_entry(*new, struct tmem_obj, rb_tree_node); + parent = *new; + switch (tmem_oid_compare(oidp, &this->oid)) { + case 0: + BUG(); /* already present; should never happen! */ + break; + case -1: + new = &(*new)->rb_left; + break; + case 1: + new = &(*new)->rb_right; + break; + } + } + rb_link_node(&obj->rb_tree_node, parent, new); + rb_insert_color(&obj->rb_tree_node, root); +} + +/* + * Tmem is managed as a set of tmem_pools with certain attributes, such as + * "ephemeral" vs "persistent". These attributes apply to all tmem_objs + * and all pampds that belong to a tmem_pool. A tmem_pool is created + * or deleted relatively rarely (for example, when a filesystem is + * mounted or unmounted. + */ + +/* flush all data from a pool and, optionally, free it */ +static void tmem_pool_flush(struct tmem_pool *pool, bool destroy) +{ + struct rb_node *rbnode; + struct tmem_obj *obj; + struct tmem_hashbucket *hb = &pool->hashbucket[0]; + int i; + + BUG_ON(pool == NULL); + for (i = 0; i < TMEM_HASH_BUCKETS; i++, hb++) { + spin_lock(&hb->lock); + rbnode = rb_first(&hb->obj_rb_root); + while (rbnode != NULL) { + obj = rb_entry(rbnode, struct tmem_obj, rb_tree_node); + rbnode = rb_next(rbnode); + tmem_pampd_destroy_all_in_obj(obj); + tmem_obj_free(obj, hb); + (*tmem_hostops.obj_free)(obj, pool); + } + spin_unlock(&hb->lock); + } + if (destroy) + list_del(&pool->pool_list); +} + +/* + * A tmem_obj contains a radix-tree-like tree in which the intermediate + * nodes are called tmem_objnodes. (The kernel lib/radix-tree.c implementation + * is very specialized and tuned for specific uses and is not particularly + * suited for use from this code, though some code from the core algorithms has + * been reused, thus the copyright notices below). Each tmem_objnode contains + * a set of pointers which point to either a set of intermediate tmem_objnodes + * or a set of of pampds. + * + * Portions Copyright (C) 2001 Momchil Velikov + * Portions Copyright (C) 2001 Christoph Hellwig + * Portions Copyright (C) 2005 SGI, Christoph Lameter + */ + +struct tmem_objnode_tree_path { + struct tmem_objnode *objnode; + int offset; +}; + +/* objnode height_to_maxindex translation */ +static unsigned long tmem_objnode_tree_h2max[OBJNODE_TREE_MAX_PATH + 1]; + +static void tmem_objnode_tree_init(void) +{ + unsigned int ht, tmp; + + for (ht = 0; ht < ARRAY_SIZE(tmem_objnode_tree_h2max); ht++) { + tmp = ht * OBJNODE_TREE_MAP_SHIFT; + if (tmp >= OBJNODE_TREE_INDEX_BITS) + tmem_objnode_tree_h2max[ht] = ~0UL; + else + tmem_objnode_tree_h2max[ht] = + (~0UL >> (OBJNODE_TREE_INDEX_BITS - tmp - 1)) >> 1; + } +} + +static struct tmem_objnode *tmem_objnode_alloc(struct tmem_obj *obj) +{ + struct tmem_objnode *objnode; + + ASSERT_SENTINEL(obj, OBJ); + BUG_ON(obj->pool == NULL); + ASSERT_SENTINEL(obj->pool, POOL); + objnode = (*tmem_hostops.objnode_alloc)(obj->pool); + if (unlikely(objnode == NULL)) + goto out; + objnode->obj = obj; + SET_SENTINEL(objnode, OBJNODE); + memset(&objnode->slots, 0, sizeof(objnode->slots)); + objnode->slots_in_use = 0; + obj->objnode_count++; +out: + return objnode; +} + +static void tmem_objnode_free(struct tmem_objnode *objnode) +{ + struct tmem_pool *pool; + int i; + + BUG_ON(objnode == NULL); + for (i = 0; i < OBJNODE_TREE_MAP_SIZE; i++) + BUG_ON(objnode->slots[i] != NULL); + ASSERT_SENTINEL(objnode, OBJNODE); + INVERT_SENTINEL(objnode, OBJNODE); + BUG_ON(objnode->obj == NULL); + ASSERT_SENTINEL(objnode->obj, OBJ); + pool = objnode->obj->pool; + BUG_ON(pool == NULL); + ASSERT_SENTINEL(pool, POOL); + objnode->obj->objnode_count--; + objnode->obj = NULL; + (*tmem_hostops.objnode_free)(objnode, pool); +} + +/* + * lookup index in object and return associated pampd (or NULL if not found) + */ +static void *tmem_pampd_lookup_in_obj(struct tmem_obj *obj, uint32_t index) +{ + unsigned int height, shift; + struct tmem_objnode **slot = NULL; + + BUG_ON(obj == NULL); + ASSERT_SENTINEL(obj, OBJ); + BUG_ON(obj->pool == NULL); + ASSERT_SENTINEL(obj->pool, POOL); + + height = obj->objnode_tree_height; + if (index > tmem_objnode_tree_h2max[obj->objnode_tree_height]) + goto out; + if (height == 0 && obj->objnode_tree_root) { + slot = &obj->objnode_tree_root; + goto out; + } + shift = (height-1) * OBJNODE_TREE_MAP_SHIFT; + slot = &obj->objnode_tree_root; + while (height > 0) { + if (*slot == NULL) + goto out; + slot = (struct tmem_objnode **) + ((*slot)->slots + + ((index >> shift) & OBJNODE_TREE_MAP_MASK)); + shift -= OBJNODE_TREE_MAP_SHIFT; + height--; + } +out: + return slot != NULL ? *slot : NULL; +} + +static int tmem_pampd_add_to_obj(struct tmem_obj *obj, uint32_t index, + void *pampd) +{ + int ret = 0; + struct tmem_objnode *objnode = NULL, *newnode, *slot; + unsigned int height, shift; + int offset = 0; + + /* if necessary, extend the tree to be higher */ + if (index > tmem_objnode_tree_h2max[obj->objnode_tree_height]) { + height = obj->objnode_tree_height + 1; + if (index > tmem_objnode_tree_h2max[height]) + while (index > tmem_objnode_tree_h2max[height]) + height++; + if (obj->objnode_tree_root == NULL) { + obj->objnode_tree_height = height; + goto insert; + } + do { + newnode = tmem_objnode_alloc(obj); + if (!newnode) { + ret = -ENOMEM; + goto out; + } + newnode->slots[0] = obj->objnode_tree_root; + newnode->slots_in_use = 1; + obj->objnode_tree_root = newnode; + obj->objnode_tree_height++; + } while (height > obj->objnode_tree_height); + } +insert: + slot = obj->objnode_tree_root; + height = obj->objnode_tree_height; + shift = (height-1) * OBJNODE_TREE_MAP_SHIFT; + while (height > 0) { + if (slot == NULL) { + /* add a child objnode. */ + slot = tmem_objnode_alloc(obj); + if (!slot) { + ret = -ENOMEM; + goto out; + } + if (objnode) { + + objnode->slots[offset] = slot; + objnode->slots_in_use++; + } else + obj->objnode_tree_root = slot; + } + /* go down a level */ + offset = (index >> shift) & OBJNODE_TREE_MAP_MASK; + objnode = slot; + slot = objnode->slots[offset]; + shift -= OBJNODE_TREE_MAP_SHIFT; + height--; + } + BUG_ON(slot != NULL); + if (objnode) { + objnode->slots_in_use++; + objnode->slots[offset] = pampd; + } else + obj->objnode_tree_root = pampd; + obj->pampd_count++; +out: + return ret; +} + +static void *tmem_pampd_delete_from_obj(struct tmem_obj *obj, uint32_t index) +{ + struct tmem_objnode_tree_path path[OBJNODE_TREE_MAX_PATH + 1]; + struct tmem_objnode_tree_path *pathp = path; + struct tmem_objnode *slot = NULL; + unsigned int height, shift; + int offset; + + BUG_ON(obj == NULL); + ASSERT_SENTINEL(obj, OBJ); + BUG_ON(obj->pool == NULL); + ASSERT_SENTINEL(obj->pool, POOL); + height = obj->objnode_tree_height; + if (index > tmem_objnode_tree_h2max[height]) + goto out; + slot = obj->objnode_tree_root; + if (height == 0 && obj->objnode_tree_root) { + obj->objnode_tree_root = NULL; + goto out; + } + shift = (height - 1) * OBJNODE_TREE_MAP_SHIFT; + pathp->objnode = NULL; + do { + if (slot == NULL) + goto out; + pathp++; + offset = (index >> shift) & OBJNODE_TREE_MAP_MASK; + pathp->offset = offset; + pathp->objnode = slot; + slot = slot->slots[offset]; + shift -= OBJNODE_TREE_MAP_SHIFT; + height--; + } while (height > 0); + if (slot == NULL) + goto out; + while (pathp->objnode) { + pathp->objnode->slots[pathp->offset] = NULL; + pathp->objnode->slots_in_use--; + if (pathp->objnode->slots_in_use) { + if (pathp->objnode == obj->objnode_tree_root) { + while (obj->objnode_tree_height > 0 && + obj->objnode_tree_root->slots_in_use == 1 && + obj->objnode_tree_root->slots[0]) { + struct tmem_objnode *to_free = + obj->objnode_tree_root; + + obj->objnode_tree_root = + to_free->slots[0]; + obj->objnode_tree_height--; + to_free->slots[0] = NULL; + to_free->slots_in_use = 0; + tmem_objnode_free(to_free); + } + } + goto out; + } + tmem_objnode_free(pathp->objnode); /* 0 slots used, free it */ + pathp--; + } + obj->objnode_tree_height = 0; + obj->objnode_tree_root = NULL; + +out: + if (slot != NULL) + obj->pampd_count--; + BUG_ON(obj->pampd_count < 0); + return slot; +} + +/* recursively walk the objnode_tree destroying pampds and objnodes */ +static void tmem_objnode_node_destroy(struct tmem_obj *obj, + struct tmem_objnode *objnode, + unsigned int ht) +{ + int i; + + if (ht == 0) + return; + for (i = 0; i < OBJNODE_TREE_MAP_SIZE; i++) { + if (objnode->slots[i]) { + if (ht == 1) { + obj->pampd_count--; + (*tmem_pamops.free)(objnode->slots[i], + obj->pool); + objnode->slots[i] = NULL; + continue; + } + tmem_objnode_node_destroy(obj, objnode->slots[i], ht-1); + tmem_objnode_free(objnode->slots[i]); + objnode->slots[i] = NULL; + } + } +} + +static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *obj) +{ + if (obj->objnode_tree_root == NULL) + return; + if (obj->objnode_tree_height == 0) { + obj->pampd_count--; + (*tmem_pamops.free)(obj->objnode_tree_root, obj->pool); + } else { + tmem_objnode_node_destroy(obj, obj->objnode_tree_root, + obj->objnode_tree_height); + tmem_objnode_free(obj->objnode_tree_root); + obj->objnode_tree_height = 0; + } + obj->objnode_tree_root = NULL; +} + +/* + * Tmem is operated on by a set of well-defined actions: + * "put", "get", "flush", "flush_object", "new pool" and "destroy pool". + * (The tmem ABI allows for subpages and exchanges but these operations + * are not included in this implementation.) + * + * These "tmem core" operations are implemented in the following functions. + */ + +/* + * "Put" a page, e.g. copy a page from the kernel into newly allocated + * PAM space (if such space is available). Tmem_put is complicated by + * a corner case: What if a page with matching handle already exists in + * tmem? To guarantee coherency, one of two actions is necessary: Either + * the data for the page must be overwritten, or the page must be + * "flushed" so that the data is not accessible to a subsequent "get". + * Since these "duplicate puts" are relatively rare, this implementation + * always flushes for simplicity. + */ +int tmem_put(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index, + struct page *page) +{ + struct tmem_obj *obj = NULL, *objfound = NULL, *objnew = NULL; + void *pampd = NULL, *pampd_del = NULL; + int ret = -ENOMEM; + bool ephemeral; + struct tmem_hashbucket *hb; + + ephemeral = is_ephemeral(pool); + hb = &pool->hashbucket[tmem_oid_hash(oidp)]; + spin_lock(&hb->lock); + obj = objfound = tmem_obj_find(hb, oidp); + if (obj != NULL) { + pampd = tmem_pampd_lookup_in_obj(objfound, index); + if (pampd != NULL) { + /* if found, is a dup put, flush the old one */ + pampd_del = tmem_pampd_delete_from_obj(obj, index); + BUG_ON(pampd_del != pampd); + (*tmem_pamops.free)(pampd, pool); + if (obj->pampd_count == 0) { + objnew = obj; + objfound = NULL; + } + pampd = NULL; + } + } else { + obj = objnew = (*tmem_hostops.obj_alloc)(pool); + if (unlikely(obj == NULL)) { + ret = -ENOMEM; + goto out; + } + tmem_obj_init(obj, hb, pool, oidp); + } + BUG_ON(obj == NULL); + BUG_ON(((objnew != obj) && (objfound != obj)) || (objnew == objfound)); + pampd = (*tmem_pamops.create)(obj->pool, &obj->oid, index, page); + if (unlikely(pampd == NULL)) + goto free; + ret = tmem_pampd_add_to_obj(obj, index, pampd); + if (unlikely(ret == -ENOMEM)) + /* may have partially built objnode tree ("stump") */ + goto delete_and_free; + goto out; + +delete_and_free: + (void)tmem_pampd_delete_from_obj(obj, index); +free: + if (pampd) + (*tmem_pamops.free)(pampd, pool); + if (objnew) { + tmem_obj_free(objnew, hb); + (*tmem_hostops.obj_free)(objnew, pool); + } +out: + spin_unlock(&hb->lock); + return ret; +} + +/* + * "Get" a page, e.g. if one can be found, copy the tmem page with the + * matching handle from PAM space to the kernel. By tmem definition, + * when a "get" is successful on an ephemeral page, the page is "flushed", + * and when a "get" is successful on a persistent page, the page is retained + * in tmem. Note that to preserve + * coherency, "get" can never be skipped if tmem contains the data. + * That is, if a get is done with a certain handle and fails, any + * subsequent "get" must also fail (unless of course there is a + * "put" done with the same handle). + + */ +int tmem_get(struct tmem_pool *pool, struct tmem_oid *oidp, + uint32_t index, struct page *page) +{ + struct tmem_obj *obj; + void *pampd; + bool ephemeral = is_ephemeral(pool); + uint32_t ret = -1; + struct tmem_hashbucket *hb; + + hb = &pool->hashbucket[tmem_oid_hash(oidp)]; + spin_lock(&hb->lock); + obj = tmem_obj_find(hb, oidp); + if (obj == NULL) + goto out; + ephemeral = is_ephemeral(pool); + if (ephemeral) + pampd = tmem_pampd_delete_from_obj(obj, index); + else + pampd = tmem_pampd_lookup_in_obj(obj, index); + if (pampd == NULL) + goto out; + ret = (*tmem_pamops.get_data)(page, pampd, pool); + if (ret < 0) + goto out; + if (ephemeral) { + (*tmem_pamops.free)(pampd, pool); + if (obj->pampd_count == 0) { + tmem_obj_free(obj, hb); + (*tmem_hostops.obj_free)(obj, pool); + obj = NULL; + } + } + ret = 0; +out: + spin_unlock(&hb->lock); + return ret; +} + +/* + * If a page in tmem matches the handle, "flush" this page from tmem such + * that any subsequent "get" does not succeed (unless, of course, there + * was another "put" with the same handle). + */ +int tmem_flush_page(struct tmem_pool *pool, + struct tmem_oid *oidp, uint32_t index) +{ + struct tmem_obj *obj; + void *pampd; + int ret = -1; + struct tmem_hashbucket *hb; + + hb = &pool->hashbucket[tmem_oid_hash(oidp)]; + spin_lock(&hb->lock); + obj = tmem_obj_find(hb, oidp); + if (obj == NULL) + goto out; + pampd = tmem_pampd_delete_from_obj(obj, index); + if (pampd == NULL) + goto out; + (*tmem_pamops.free)(pampd, pool); + if (obj->pampd_count == 0) { + tmem_obj_free(obj, hb); + (*tmem_hostops.obj_free)(obj, pool); + } + ret = 0; + +out: + spin_unlock(&hb->lock); + return ret; +} + +/* + * "Flush" all pages in tmem matching this oid. + */ +int tmem_flush_object(struct tmem_pool *pool, struct tmem_oid *oidp) +{ + struct tmem_obj *obj; + struct tmem_hashbucket *hb; + int ret = -1; + + hb = &pool->hashbucket[tmem_oid_hash(oidp)]; + spin_lock(&hb->lock); + obj = tmem_obj_find(hb, oidp); + if (obj == NULL) + goto out; + tmem_pampd_destroy_all_in_obj(obj); + tmem_obj_free(obj, hb); + (*tmem_hostops.obj_free)(obj, pool); + ret = 0; + +out: + spin_unlock(&hb->lock); + return ret; +} + +/* + * "Flush" all pages (and tmem_objs) from this tmem_pool and disable + * all subsequent access to this tmem_pool. + */ +int tmem_destroy_pool(struct tmem_pool *pool) +{ + int ret = -1; + + if (pool == NULL) + goto out; + tmem_pool_flush(pool, 1); + ret = 0; +out: + return ret; +} + +static LIST_HEAD(tmem_global_pool_list); + +/* + * Create a new tmem_pool with the provided flag and return + * a pool id provided by the tmem host implementation. + */ +void tmem_new_pool(struct tmem_pool *pool, uint32_t flags) +{ + int persistent = flags & TMEM_POOL_PERSIST; + int shared = flags & TMEM_POOL_SHARED; + struct tmem_hashbucket *hb = &pool->hashbucket[0]; + int i; + + for (i = 0; i < TMEM_HASH_BUCKETS; i++, hb++) { + hb->obj_rb_root = RB_ROOT; + spin_lock_init(&hb->lock); + } + INIT_LIST_HEAD(&pool->pool_list); + atomic_set(&pool->obj_count, 0); + SET_SENTINEL(pool, POOL); + list_add_tail(&pool->pool_list, &tmem_global_pool_list); + pool->persistent = persistent; + pool->shared = shared; +} diff --git a/drivers/staging/zcache/tmem.h b/drivers/staging/zcache/tmem.h new file mode 100755 index 00000000..2e07e217 --- /dev/null +++ b/drivers/staging/zcache/tmem.h @@ -0,0 +1,195 @@ +/* + * tmem.h + * + * Transcendent memory + * + * Copyright (c) 2009-2011, Dan Magenheimer, Oracle Corp. + */ + +#ifndef _TMEM_H_ +#define _TMEM_H_ + +#include +#include +#include +#include + +/* + * These are pre-defined by the Xen<->Linux ABI + */ +#define TMEM_PUT_PAGE 4 +#define TMEM_GET_PAGE 5 +#define TMEM_FLUSH_PAGE 6 +#define TMEM_FLUSH_OBJECT 7 +#define TMEM_POOL_PERSIST 1 +#define TMEM_POOL_SHARED 2 +#define TMEM_POOL_PRECOMPRESSED 4 +#define TMEM_POOL_PAGESIZE_SHIFT 4 +#define TMEM_POOL_PAGESIZE_MASK 0xf +#define TMEM_POOL_RESERVED_BITS 0x00ffff00 + +/* + * sentinels have proven very useful for debugging but can be removed + * or disabled before final merge. + */ +#define SENTINELS +#ifdef SENTINELS +#define DECL_SENTINEL uint32_t sentinel; +#define SET_SENTINEL(_x, _y) (_x->sentinel = _y##_SENTINEL) +#define INVERT_SENTINEL(_x, _y) (_x->sentinel = ~_y##_SENTINEL) +#define ASSERT_SENTINEL(_x, _y) WARN_ON(_x->sentinel != _y##_SENTINEL) +#define ASSERT_INVERTED_SENTINEL(_x, _y) WARN_ON(_x->sentinel != ~_y##_SENTINEL) +#else +#define DECL_SENTINEL +#define SET_SENTINEL(_x, _y) do { } while (0) +#define INVERT_SENTINEL(_x, _y) do { } while (0) +#define ASSERT_SENTINEL(_x, _y) do { } while (0) +#define ASSERT_INVERTED_SENTINEL(_x, _y) do { } while (0) +#endif + +#define ASSERT_SPINLOCK(_l) WARN_ON(!spin_is_locked(_l)) + +/* + * A pool is the highest-level data structure managed by tmem and + * usually corresponds to a large independent set of pages such as + * a filesystem. Each pool has an id, and certain attributes and counters. + * It also contains a set of hash buckets, each of which contains an rbtree + * of objects and a lock to manage concurrency within the pool. + */ + +#define TMEM_HASH_BUCKET_BITS 8 +#define TMEM_HASH_BUCKETS (1<persistent) +#define is_ephemeral(_p) (!(_p->persistent)) + +/* + * An object id ("oid") is large: 192-bits (to ensure, for example, files + * in a modern filesystem can be uniquely identified). + */ + +struct tmem_oid { + uint64_t oid[3]; +}; + +static inline void tmem_oid_set_invalid(struct tmem_oid *oidp) +{ + oidp->oid[0] = oidp->oid[1] = oidp->oid[2] = -1UL; +} + +static inline bool tmem_oid_valid(struct tmem_oid *oidp) +{ + return oidp->oid[0] != -1UL || oidp->oid[1] != -1UL || + oidp->oid[2] != -1UL; +} + +static inline int tmem_oid_compare(struct tmem_oid *left, + struct tmem_oid *right) +{ + int ret; + + if (left->oid[2] == right->oid[2]) { + if (left->oid[1] == right->oid[1]) { + if (left->oid[0] == right->oid[0]) + ret = 0; + else if (left->oid[0] < right->oid[0]) + ret = -1; + else + return 1; + } else if (left->oid[1] < right->oid[1]) + ret = -1; + else + ret = 1; + } else if (left->oid[2] < right->oid[2]) + ret = -1; + else + ret = 1; + return ret; +} + +static inline unsigned tmem_oid_hash(struct tmem_oid *oidp) +{ + return hash_long(oidp->oid[0] ^ oidp->oid[1] ^ oidp->oid[2], + TMEM_HASH_BUCKET_BITS); +} + +/* + * A tmem_obj contains an identifier (oid), pointers to the parent + * pool and the rb_tree to which it belongs, counters, and an ordered + * set of pampds, structured in a radix-tree-like tree. The intermediate + * nodes of the tree are called tmem_objnodes. + */ + +struct tmem_objnode; + +struct tmem_obj { + struct tmem_oid oid; + struct tmem_pool *pool; + struct rb_node rb_tree_node; + struct tmem_objnode *objnode_tree_root; + unsigned int objnode_tree_height; + unsigned long objnode_count; + long pampd_count; + DECL_SENTINEL +}; + +#define OBJNODE_TREE_MAP_SHIFT 6 +#define OBJNODE_TREE_MAP_SIZE (1UL << OBJNODE_TREE_MAP_SHIFT) +#define OBJNODE_TREE_MAP_MASK (OBJNODE_TREE_MAP_SIZE-1) +#define OBJNODE_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long)) +#define OBJNODE_TREE_MAX_PATH \ + (OBJNODE_TREE_INDEX_BITS/OBJNODE_TREE_MAP_SHIFT + 2) + +struct tmem_objnode { + struct tmem_obj *obj; + DECL_SENTINEL + void *slots[OBJNODE_TREE_MAP_SIZE]; + unsigned int slots_in_use; +}; + +/* pampd abstract datatype methods provided by the PAM implementation */ +struct tmem_pamops { + void *(*create)(struct tmem_pool *, struct tmem_oid *, uint32_t, + struct page *); + int (*get_data)(struct page *, void *, struct tmem_pool *); + void (*free)(void *, struct tmem_pool *); +}; +extern void tmem_register_pamops(struct tmem_pamops *m); + +/* memory allocation methods provided by the host implementation */ +struct tmem_hostops { + struct tmem_obj *(*obj_alloc)(struct tmem_pool *); + void (*obj_free)(struct tmem_obj *, struct tmem_pool *); + struct tmem_objnode *(*objnode_alloc)(struct tmem_pool *); + void (*objnode_free)(struct tmem_objnode *, struct tmem_pool *); +}; +extern void tmem_register_hostops(struct tmem_hostops *m); + +/* core tmem accessor functions */ +extern int tmem_put(struct tmem_pool *, struct tmem_oid *, uint32_t index, + struct page *page); +extern int tmem_get(struct tmem_pool *, struct tmem_oid *, uint32_t index, + struct page *page); +extern int tmem_flush_page(struct tmem_pool *, struct tmem_oid *, + uint32_t index); +extern int tmem_flush_object(struct tmem_pool *, struct tmem_oid *); +extern int tmem_destroy_pool(struct tmem_pool *); +extern void tmem_new_pool(struct tmem_pool *, uint32_t); +#endif /* _TMEM_H */ diff --git a/drivers/staging/zcache/zcache.c b/drivers/staging/zcache/zcache.c new file mode 100755 index 00000000..b8a2b30a --- /dev/null +++ b/drivers/staging/zcache/zcache.c @@ -0,0 +1,1658 @@ +/* + * zcache.c + * + * Copyright (c) 2010,2011, Dan Magenheimer, Oracle Corp. + * Copyright (c) 2010,2011, Nitin Gupta + * + * Zcache provides an in-kernel "host implementation" for transcendent memory + * and, thus indirectly, for cleancache and frontswap. Zcache includes two + * page-accessible memory [1] interfaces, both utilizing lzo1x compression: + * 1) "compression buddies" ("zbud") is used for ephemeral pages + * 2) xvmalloc is used for persistent pages. + * Xvmalloc (based on the TLSF allocator) has very low fragmentation + * so maximizes space efficiency, while zbud allows pairs (and potentially, + * in the future, more than a pair of) compressed pages to be closely linked + * so that reclaiming can be done via the kernel's physical-page-oriented + * "shrinker" interface. + * + * [1] For a definition of page-accessible memory (aka PAM), see: + * http://marc.info/?l=linux-mm&m=127811271605009 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "tmem.h" + +#include "../zram/xvmalloc.h" /* if built in drivers/staging */ + +#if (!defined(CONFIG_CLEANCACHE) && !defined(CONFIG_FRONTSWAP)) +#error "zcache is useless without CONFIG_CLEANCACHE or CONFIG_FRONTSWAP" +#endif +#ifdef CONFIG_CLEANCACHE +#include +#endif +#ifdef CONFIG_FRONTSWAP +#include +#endif + +#if 0 +/* this is more aggressive but may cause other problems? */ +#define ZCACHE_GFP_MASK (GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN) +#else +#define ZCACHE_GFP_MASK \ + (__GFP_FS | __GFP_NORETRY | __GFP_NOWARN | __GFP_NOMEMALLOC) +#endif + +/********** + * Compression buddies ("zbud") provides for packing two (or, possibly + * in the future, more) compressed ephemeral pages into a single "raw" + * (physical) page and tracking them with data structures so that + * the raw pages can be easily reclaimed. + * + * A zbud page ("zbpg") is an aligned page containing a list_head, + * a lock, and two "zbud headers". The remainder of the physical + * page is divided up into aligned 64-byte "chunks" which contain + * the compressed data for zero, one, or two zbuds. Each zbpg + * resides on: (1) an "unused list" if it has no zbuds; (2) a + * "buddied" list if it is fully populated with two zbuds; or + * (3) one of PAGE_SIZE/64 "unbuddied" lists indexed by how many chunks + * the one unbuddied zbud uses. The data inside a zbpg cannot be + * read or written unless the zbpg's lock is held. + */ + +#define ZBH_SENTINEL 0x43214321 +#define ZBPG_SENTINEL 0xdeadbeef + +#define ZBUD_MAX_BUDS 2 + +struct zbud_hdr { + uint32_t pool_id; + struct tmem_oid oid; + uint32_t index; + uint16_t size; /* compressed size in bytes, zero means unused */ + DECL_SENTINEL +}; + +struct zbud_page { + struct list_head bud_list; + spinlock_t lock; + struct zbud_hdr buddy[ZBUD_MAX_BUDS]; + DECL_SENTINEL + /* followed by NUM_CHUNK aligned CHUNK_SIZE-byte chunks */ +}; + +#define CHUNK_SHIFT 6 +#define CHUNK_SIZE (1 << CHUNK_SHIFT) +#define CHUNK_MASK (~(CHUNK_SIZE-1)) +#define NCHUNKS (((PAGE_SIZE - sizeof(struct zbud_page)) & \ + CHUNK_MASK) >> CHUNK_SHIFT) +#define MAX_CHUNK (NCHUNKS-1) + +static struct { + struct list_head list; + unsigned count; +} zbud_unbuddied[NCHUNKS]; +/* list N contains pages with N chunks USED and NCHUNKS-N unused */ +/* element 0 is never used but optimizing that isn't worth it */ +static unsigned long zbud_cumul_chunk_counts[NCHUNKS]; + +struct list_head zbud_buddied_list; +static unsigned long zcache_zbud_buddied_count; + +/* protects the buddied list and all unbuddied lists */ +static DEFINE_SPINLOCK(zbud_budlists_spinlock); + +static LIST_HEAD(zbpg_unused_list); +static unsigned long zcache_zbpg_unused_list_count; + +/* protects the unused page list */ +static DEFINE_SPINLOCK(zbpg_unused_list_spinlock); + +static atomic_t zcache_zbud_curr_raw_pages; +static atomic_t zcache_zbud_curr_zpages; +static unsigned long zcache_zbud_curr_zbytes; +static unsigned long zcache_zbud_cumul_zpages; +static unsigned long zcache_zbud_cumul_zbytes; +static unsigned long zcache_compress_poor; + +/* forward references */ +static void *zcache_get_free_page(void); +static void zcache_free_page(void *p); + +/* + * zbud helper functions + */ + +static inline unsigned zbud_max_buddy_size(void) +{ + return MAX_CHUNK << CHUNK_SHIFT; +} + +static inline unsigned zbud_size_to_chunks(unsigned size) +{ + BUG_ON(size == 0 || size > zbud_max_buddy_size()); + return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT; +} + +static inline int zbud_budnum(struct zbud_hdr *zh) +{ + unsigned offset = (unsigned long)zh & (PAGE_SIZE - 1); + struct zbud_page *zbpg = NULL; + unsigned budnum = -1U; + int i; + + for (i = 0; i < ZBUD_MAX_BUDS; i++) + if (offset == offsetof(typeof(*zbpg), buddy[i])) { + budnum = i; + break; + } + BUG_ON(budnum == -1U); + return budnum; +} + +static char *zbud_data(struct zbud_hdr *zh, unsigned size) +{ + struct zbud_page *zbpg; + char *p; + unsigned budnum; + + ASSERT_SENTINEL(zh, ZBH); + budnum = zbud_budnum(zh); + BUG_ON(size == 0 || size > zbud_max_buddy_size()); + zbpg = container_of(zh, struct zbud_page, buddy[budnum]); + ASSERT_SPINLOCK(&zbpg->lock); + p = (char *)zbpg; + if (budnum == 0) + p += ((sizeof(struct zbud_page) + CHUNK_SIZE - 1) & + CHUNK_MASK); + else if (budnum == 1) + p += PAGE_SIZE - ((size + CHUNK_SIZE - 1) & CHUNK_MASK); + return p; +} + +/* + * zbud raw page management + */ + +static struct zbud_page *zbud_alloc_raw_page(void) +{ + struct zbud_page *zbpg = NULL; + struct zbud_hdr *zh0, *zh1; + bool recycled = 0; + + /* if any pages on the zbpg list, use one */ + spin_lock(&zbpg_unused_list_spinlock); + if (!list_empty(&zbpg_unused_list)) { + zbpg = list_first_entry(&zbpg_unused_list, + struct zbud_page, bud_list); + list_del_init(&zbpg->bud_list); + zcache_zbpg_unused_list_count--; + recycled = 1; + } + spin_unlock(&zbpg_unused_list_spinlock); + if (zbpg == NULL) + /* none on zbpg list, try to get a kernel page */ + zbpg = zcache_get_free_page(); + if (likely(zbpg != NULL)) { + INIT_LIST_HEAD(&zbpg->bud_list); + zh0 = &zbpg->buddy[0]; zh1 = &zbpg->buddy[1]; + spin_lock_init(&zbpg->lock); + if (recycled) { + ASSERT_INVERTED_SENTINEL(zbpg, ZBPG); + SET_SENTINEL(zbpg, ZBPG); + BUG_ON(zh0->size != 0 || tmem_oid_valid(&zh0->oid)); + BUG_ON(zh1->size != 0 || tmem_oid_valid(&zh1->oid)); + } else { + atomic_inc(&zcache_zbud_curr_raw_pages); + INIT_LIST_HEAD(&zbpg->bud_list); + SET_SENTINEL(zbpg, ZBPG); + zh0->size = 0; zh1->size = 0; + tmem_oid_set_invalid(&zh0->oid); + tmem_oid_set_invalid(&zh1->oid); + } + } + return zbpg; +} + +static void zbud_free_raw_page(struct zbud_page *zbpg) +{ + struct zbud_hdr *zh0 = &zbpg->buddy[0], *zh1 = &zbpg->buddy[1]; + + ASSERT_SENTINEL(zbpg, ZBPG); + BUG_ON(!list_empty(&zbpg->bud_list)); + ASSERT_SPINLOCK(&zbpg->lock); + BUG_ON(zh0->size != 0 || tmem_oid_valid(&zh0->oid)); + BUG_ON(zh1->size != 0 || tmem_oid_valid(&zh1->oid)); + INVERT_SENTINEL(zbpg, ZBPG); + spin_unlock(&zbpg->lock); + spin_lock(&zbpg_unused_list_spinlock); + list_add(&zbpg->bud_list, &zbpg_unused_list); + zcache_zbpg_unused_list_count++; + spin_unlock(&zbpg_unused_list_spinlock); +} + +/* + * core zbud handling routines + */ + +static unsigned zbud_free(struct zbud_hdr *zh) +{ + unsigned size; + + ASSERT_SENTINEL(zh, ZBH); + BUG_ON(!tmem_oid_valid(&zh->oid)); + size = zh->size; + BUG_ON(zh->size == 0 || zh->size > zbud_max_buddy_size()); + zh->size = 0; + tmem_oid_set_invalid(&zh->oid); + INVERT_SENTINEL(zh, ZBH); + zcache_zbud_curr_zbytes -= size; + atomic_dec(&zcache_zbud_curr_zpages); + return size; +} + +static void zbud_free_and_delist(struct zbud_hdr *zh) +{ + unsigned chunks; + struct zbud_hdr *zh_other; + unsigned budnum = zbud_budnum(zh), size; + struct zbud_page *zbpg = + container_of(zh, struct zbud_page, buddy[budnum]); + + spin_lock(&zbpg->lock); + if (list_empty(&zbpg->bud_list)) { + /* ignore zombie page... see zbud_evict_pages() */ + spin_unlock(&zbpg->lock); + return; + } + size = zbud_free(zh); + ASSERT_SPINLOCK(&zbpg->lock); + zh_other = &zbpg->buddy[(budnum == 0) ? 1 : 0]; + if (zh_other->size == 0) { /* was unbuddied: unlist and free */ + chunks = zbud_size_to_chunks(size) ; + spin_lock(&zbud_budlists_spinlock); + BUG_ON(list_empty(&zbud_unbuddied[chunks].list)); + list_del_init(&zbpg->bud_list); + zbud_unbuddied[chunks].count--; + spin_unlock(&zbud_budlists_spinlock); + zbud_free_raw_page(zbpg); + } else { /* was buddied: move remaining buddy to unbuddied list */ + chunks = zbud_size_to_chunks(zh_other->size) ; + spin_lock(&zbud_budlists_spinlock); + list_del_init(&zbpg->bud_list); + zcache_zbud_buddied_count--; + list_add_tail(&zbpg->bud_list, &zbud_unbuddied[chunks].list); + zbud_unbuddied[chunks].count++; + spin_unlock(&zbud_budlists_spinlock); + spin_unlock(&zbpg->lock); + } +} + +static struct zbud_hdr *zbud_create(uint32_t pool_id, struct tmem_oid *oid, + uint32_t index, struct page *page, + void *cdata, unsigned size) +{ + struct zbud_hdr *zh0, *zh1, *zh = NULL; + struct zbud_page *zbpg = NULL, *ztmp; + unsigned nchunks; + char *to; + int i, found_good_buddy = 0; + + nchunks = zbud_size_to_chunks(size) ; + for (i = MAX_CHUNK - nchunks + 1; i > 0; i--) { + spin_lock(&zbud_budlists_spinlock); + if (!list_empty(&zbud_unbuddied[i].list)) { + list_for_each_entry_safe(zbpg, ztmp, + &zbud_unbuddied[i].list, bud_list) { + if (spin_trylock(&zbpg->lock)) { + found_good_buddy = i; + goto found_unbuddied; + } + } + } + spin_unlock(&zbud_budlists_spinlock); + } + /* didn't find a good buddy, try allocating a new page */ + zbpg = zbud_alloc_raw_page(); + if (unlikely(zbpg == NULL)) + goto out; + /* ok, have a page, now compress the data before taking locks */ + spin_lock(&zbpg->lock); + spin_lock(&zbud_budlists_spinlock); + list_add_tail(&zbpg->bud_list, &zbud_unbuddied[nchunks].list); + zbud_unbuddied[nchunks].count++; + zh = &zbpg->buddy[0]; + goto init_zh; + +found_unbuddied: + ASSERT_SPINLOCK(&zbpg->lock); + zh0 = &zbpg->buddy[0]; zh1 = &zbpg->buddy[1]; + BUG_ON(!((zh0->size == 0) ^ (zh1->size == 0))); + if (zh0->size != 0) { /* buddy0 in use, buddy1 is vacant */ + ASSERT_SENTINEL(zh0, ZBH); + zh = zh1; + } else if (zh1->size != 0) { /* buddy1 in use, buddy0 is vacant */ + ASSERT_SENTINEL(zh1, ZBH); + zh = zh0; + } else + BUG(); + list_del_init(&zbpg->bud_list); + zbud_unbuddied[found_good_buddy].count--; + list_add_tail(&zbpg->bud_list, &zbud_buddied_list); + zcache_zbud_buddied_count++; + +init_zh: + SET_SENTINEL(zh, ZBH); + zh->size = size; + zh->index = index; + zh->oid = *oid; + zh->pool_id = pool_id; + /* can wait to copy the data until the list locks are dropped */ + spin_unlock(&zbud_budlists_spinlock); + + to = zbud_data(zh, size); + memcpy(to, cdata, size); + spin_unlock(&zbpg->lock); + zbud_cumul_chunk_counts[nchunks]++; + atomic_inc(&zcache_zbud_curr_zpages); + zcache_zbud_cumul_zpages++; + zcache_zbud_curr_zbytes += size; + zcache_zbud_cumul_zbytes += size; +out: + return zh; +} + +static int zbud_decompress(struct page *page, struct zbud_hdr *zh) +{ + struct zbud_page *zbpg; + unsigned budnum = zbud_budnum(zh); + size_t out_len = PAGE_SIZE; + char *to_va, *from_va; + unsigned size; + int ret = 0; + + zbpg = container_of(zh, struct zbud_page, buddy[budnum]); + spin_lock(&zbpg->lock); + if (list_empty(&zbpg->bud_list)) { + /* ignore zombie page... see zbud_evict_pages() */ + ret = -EINVAL; + goto out; + } + ASSERT_SENTINEL(zh, ZBH); + BUG_ON(zh->size == 0 || zh->size > zbud_max_buddy_size()); + to_va = kmap_atomic(page, KM_USER0); + size = zh->size; + from_va = zbud_data(zh, size); + ret = lzo1x_decompress_safe(from_va, size, to_va, &out_len); + BUG_ON(ret != LZO_E_OK); + BUG_ON(out_len != PAGE_SIZE); + kunmap_atomic(to_va, KM_USER0); +out: + spin_unlock(&zbpg->lock); + return ret; +} + +/* + * The following routines handle shrinking of ephemeral pages by evicting + * pages "least valuable" first. + */ + +static unsigned long zcache_evicted_raw_pages; +static unsigned long zcache_evicted_buddied_pages; +static unsigned long zcache_evicted_unbuddied_pages; + +static struct tmem_pool *zcache_get_pool_by_id(uint32_t poolid); +static void zcache_put_pool(struct tmem_pool *pool); + +/* + * Flush and free all zbuds in a zbpg, then free the pageframe + */ +static void zbud_evict_zbpg(struct zbud_page *zbpg) +{ + struct zbud_hdr *zh; + int i, j; + uint32_t pool_id[ZBUD_MAX_BUDS], index[ZBUD_MAX_BUDS]; + struct tmem_oid oid[ZBUD_MAX_BUDS]; + struct tmem_pool *pool; + + ASSERT_SPINLOCK(&zbpg->lock); + BUG_ON(!list_empty(&zbpg->bud_list)); + for (i = 0, j = 0; i < ZBUD_MAX_BUDS; i++) { + zh = &zbpg->buddy[i]; + if (zh->size) { + pool_id[j] = zh->pool_id; + oid[j] = zh->oid; + index[j] = zh->index; + j++; + zbud_free(zh); + } + } + spin_unlock(&zbpg->lock); + for (i = 0; i < j; i++) { + pool = zcache_get_pool_by_id(pool_id[i]); + if (pool != NULL) { + tmem_flush_page(pool, &oid[i], index[i]); + zcache_put_pool(pool); + } + } + ASSERT_SENTINEL(zbpg, ZBPG); + spin_lock(&zbpg->lock); + zbud_free_raw_page(zbpg); +} + +/* + * Free nr pages. This code is funky because we want to hold the locks + * protecting various lists for as short a time as possible, and in some + * circumstances the list may change asynchronously when the list lock is + * not held. In some cases we also trylock not only to avoid waiting on a + * page in use by another cpu, but also to avoid potential deadlock due to + * lock inversion. + */ +static void zbud_evict_pages(int nr) +{ + struct zbud_page *zbpg; + int i; + + /* first try freeing any pages on unused list */ +retry_unused_list: + spin_lock_bh(&zbpg_unused_list_spinlock); + if (!list_empty(&zbpg_unused_list)) { + /* can't walk list here, since it may change when unlocked */ + zbpg = list_first_entry(&zbpg_unused_list, + struct zbud_page, bud_list); + list_del_init(&zbpg->bud_list); + zcache_zbpg_unused_list_count--; + atomic_dec(&zcache_zbud_curr_raw_pages); + spin_unlock_bh(&zbpg_unused_list_spinlock); + zcache_free_page(zbpg); + zcache_evicted_raw_pages++; + if (--nr <= 0) + goto out; + goto retry_unused_list; + } + spin_unlock_bh(&zbpg_unused_list_spinlock); + + /* now try freeing unbuddied pages, starting with least space avail */ + for (i = 0; i < MAX_CHUNK; i++) { +retry_unbud_list_i: + spin_lock_bh(&zbud_budlists_spinlock); + if (list_empty(&zbud_unbuddied[i].list)) { + spin_unlock_bh(&zbud_budlists_spinlock); + continue; + } + list_for_each_entry(zbpg, &zbud_unbuddied[i].list, bud_list) { + if (unlikely(!spin_trylock(&zbpg->lock))) + continue; + list_del_init(&zbpg->bud_list); + zbud_unbuddied[i].count--; + spin_unlock(&zbud_budlists_spinlock); + zcache_evicted_unbuddied_pages++; + /* want budlists unlocked when doing zbpg eviction */ + zbud_evict_zbpg(zbpg); + local_bh_enable(); + if (--nr <= 0) + goto out; + goto retry_unbud_list_i; + } + spin_unlock_bh(&zbud_budlists_spinlock); + } + + /* as a last resort, free buddied pages */ +retry_bud_list: + spin_lock_bh(&zbud_budlists_spinlock); + if (list_empty(&zbud_buddied_list)) { + spin_unlock_bh(&zbud_budlists_spinlock); + goto out; + } + list_for_each_entry(zbpg, &zbud_buddied_list, bud_list) { + if (unlikely(!spin_trylock(&zbpg->lock))) + continue; + list_del_init(&zbpg->bud_list); + zcache_zbud_buddied_count--; + spin_unlock(&zbud_budlists_spinlock); + zcache_evicted_buddied_pages++; + /* want budlists unlocked when doing zbpg eviction */ + zbud_evict_zbpg(zbpg); + local_bh_enable(); + if (--nr <= 0) + goto out; + goto retry_bud_list; + } + spin_unlock_bh(&zbud_budlists_spinlock); +out: + return; +} + +static void zbud_init(void) +{ + int i; + + INIT_LIST_HEAD(&zbud_buddied_list); + zcache_zbud_buddied_count = 0; + for (i = 0; i < NCHUNKS; i++) { + INIT_LIST_HEAD(&zbud_unbuddied[i].list); + zbud_unbuddied[i].count = 0; + } +} + +#ifdef CONFIG_SYSFS +/* + * These sysfs routines show a nice distribution of how many zbpg's are + * currently (and have ever been placed) in each unbuddied list. It's fun + * to watch but can probably go away before final merge. + */ +static int zbud_show_unbuddied_list_counts(char *buf) +{ + int i; + char *p = buf; + + for (i = 0; i < NCHUNKS - 1; i++) + p += sprintf(p, "%u ", zbud_unbuddied[i].count); + p += sprintf(p, "%d\n", zbud_unbuddied[i].count); + return p - buf; +} + +static int zbud_show_cumul_chunk_counts(char *buf) +{ + unsigned long i, chunks = 0, total_chunks = 0, sum_total_chunks = 0; + unsigned long total_chunks_lte_21 = 0, total_chunks_lte_32 = 0; + unsigned long total_chunks_lte_42 = 0; + char *p = buf; + + for (i = 0; i < NCHUNKS; i++) { + p += sprintf(p, "%lu ", zbud_cumul_chunk_counts[i]); + chunks += zbud_cumul_chunk_counts[i]; + total_chunks += zbud_cumul_chunk_counts[i]; + sum_total_chunks += i * zbud_cumul_chunk_counts[i]; + if (i == 21) + total_chunks_lte_21 = total_chunks; + if (i == 32) + total_chunks_lte_32 = total_chunks; + if (i == 42) + total_chunks_lte_42 = total_chunks; + } + p += sprintf(p, "<=21:%lu <=32:%lu <=42:%lu, mean:%lu\n", + total_chunks_lte_21, total_chunks_lte_32, total_chunks_lte_42, + chunks == 0 ? 0 : sum_total_chunks / chunks); + return p - buf; +} +#endif + +/********** + * This "zv" PAM implementation combines the TLSF-based xvMalloc + * with lzo1x compression to maximize the amount of data that can + * be packed into a physical page. + * + * Zv represents a PAM page with the index and object (plus a "size" value + * necessary for decompression) immediately preceding the compressed data. + */ + +#define ZVH_SENTINEL 0x43214321 + +struct zv_hdr { + uint32_t pool_id; + struct tmem_oid oid; + uint32_t index; + DECL_SENTINEL +}; + +static const int zv_max_page_size = (PAGE_SIZE / 8) * 7; + +static struct zv_hdr *zv_create(struct xv_pool *xvpool, uint32_t pool_id, + struct tmem_oid *oid, uint32_t index, + void *cdata, unsigned clen) +{ + struct page *page; + struct zv_hdr *zv = NULL; + uint32_t offset; + int ret; + + BUG_ON(!irqs_disabled()); + ret = xv_malloc(xvpool, clen + sizeof(struct zv_hdr), + &page, &offset, ZCACHE_GFP_MASK); + if (unlikely(ret)) + goto out; + zv = kmap_atomic(page, KM_USER0) + offset; + zv->index = index; + zv->oid = *oid; + zv->pool_id = pool_id; + SET_SENTINEL(zv, ZVH); + memcpy((char *)zv + sizeof(struct zv_hdr), cdata, clen); + kunmap_atomic(zv, KM_USER0); +out: + return zv; +} + +static void zv_free(struct xv_pool *xvpool, struct zv_hdr *zv) +{ + unsigned long flags; + struct page *page; + uint32_t offset; + uint16_t size; + + ASSERT_SENTINEL(zv, ZVH); + size = xv_get_object_size(zv) - sizeof(*zv); + BUG_ON(size == 0 || size > zv_max_page_size); + INVERT_SENTINEL(zv, ZVH); + page = virt_to_page(zv); + offset = (unsigned long)zv & ~PAGE_MASK; + local_irq_save(flags); + xv_free(xvpool, page, offset); + local_irq_restore(flags); +} + +static void zv_decompress(struct page *page, struct zv_hdr *zv) +{ + size_t clen = PAGE_SIZE; + char *to_va; + unsigned size; + int ret; + + ASSERT_SENTINEL(zv, ZVH); + size = xv_get_object_size(zv) - sizeof(*zv); + BUG_ON(size == 0 || size > zv_max_page_size); + to_va = kmap_atomic(page, KM_USER0); + ret = lzo1x_decompress_safe((char *)zv + sizeof(*zv), + size, to_va, &clen); + kunmap_atomic(to_va, KM_USER0); + BUG_ON(ret != LZO_E_OK); + BUG_ON(clen != PAGE_SIZE); +} + +/* + * zcache core code starts here + */ + +/* useful stats not collected by cleancache or frontswap */ +static unsigned long zcache_flush_total; +static unsigned long zcache_flush_found; +static unsigned long zcache_flobj_total; +static unsigned long zcache_flobj_found; +static unsigned long zcache_failed_eph_puts; +static unsigned long zcache_failed_pers_puts; + +#define MAX_POOLS_PER_CLIENT 16 + +static struct { + struct tmem_pool *tmem_pools[MAX_POOLS_PER_CLIENT]; + struct xv_pool *xvpool; +} zcache_client; + +/* + * Tmem operations assume the poolid implies the invoking client. + * Zcache only has one client (the kernel itself), so translate + * the poolid into the tmem_pool allocated for it. A KVM version + * of zcache would have one client per guest and each client might + * have a poolid==N. + */ +static struct tmem_pool *zcache_get_pool_by_id(uint32_t poolid) +{ + struct tmem_pool *pool = NULL; + + if (poolid >= 0) { + pool = zcache_client.tmem_pools[poolid]; + if (pool != NULL) + atomic_inc(&pool->refcount); + } + return pool; +} + +static void zcache_put_pool(struct tmem_pool *pool) +{ + if (pool != NULL) + atomic_dec(&pool->refcount); +} + +/* counters for debugging */ +static unsigned long zcache_failed_get_free_pages; +static unsigned long zcache_failed_alloc; +static unsigned long zcache_put_to_flush; +static unsigned long zcache_aborted_preload; +static unsigned long zcache_aborted_shrink; + +/* + * Ensure that memory allocation requests in zcache don't result + * in direct reclaim requests via the shrinker, which would cause + * an infinite loop. Maybe a GFP flag would be better? + */ +static DEFINE_SPINLOCK(zcache_direct_reclaim_lock); + +/* + * for now, used named slabs so can easily track usage; later can + * either just use kmalloc, or perhaps add a slab-like allocator + * to more carefully manage total memory utilization + */ +static struct kmem_cache *zcache_objnode_cache; +static struct kmem_cache *zcache_obj_cache; +static atomic_t zcache_curr_obj_count = ATOMIC_INIT(0); +static unsigned long zcache_curr_obj_count_max; +static atomic_t zcache_curr_objnode_count = ATOMIC_INIT(0); +static unsigned long zcache_curr_objnode_count_max; + +/* + * to avoid memory allocation recursion (e.g. due to direct reclaim), we + * preload all necessary data structures so the hostops callbacks never + * actually do a malloc + */ +struct zcache_preload { + void *page; + struct tmem_obj *obj; + int nr; + struct tmem_objnode *objnodes[OBJNODE_TREE_MAX_PATH]; +}; +static DEFINE_PER_CPU(struct zcache_preload, zcache_preloads) = { 0, }; + +static int zcache_do_preload(struct tmem_pool *pool) +{ + struct zcache_preload *kp; + struct tmem_objnode *objnode; + struct tmem_obj *obj; + void *page; + int ret = -ENOMEM; + + if (unlikely(zcache_objnode_cache == NULL)) + goto out; + if (unlikely(zcache_obj_cache == NULL)) + goto out; + if (!spin_trylock(&zcache_direct_reclaim_lock)) { + zcache_aborted_preload++; + goto out; + } + preempt_disable(); + kp = &__get_cpu_var(zcache_preloads); + while (kp->nr < ARRAY_SIZE(kp->objnodes)) { + preempt_enable_no_resched(); + objnode = kmem_cache_alloc(zcache_objnode_cache, + ZCACHE_GFP_MASK); + if (unlikely(objnode == NULL)) { + zcache_failed_alloc++; + goto unlock_out; + } + preempt_disable(); + kp = &__get_cpu_var(zcache_preloads); + if (kp->nr < ARRAY_SIZE(kp->objnodes)) + kp->objnodes[kp->nr++] = objnode; + else + kmem_cache_free(zcache_objnode_cache, objnode); + } + preempt_enable_no_resched(); + obj = kmem_cache_alloc(zcache_obj_cache, ZCACHE_GFP_MASK); + if (unlikely(obj == NULL)) { + zcache_failed_alloc++; + goto unlock_out; + } + page = (void *)__get_free_page(ZCACHE_GFP_MASK); + if (unlikely(page == NULL)) { + zcache_failed_get_free_pages++; + kmem_cache_free(zcache_obj_cache, obj); + goto unlock_out; + } + preempt_disable(); + kp = &__get_cpu_var(zcache_preloads); + if (kp->obj == NULL) + kp->obj = obj; + else + kmem_cache_free(zcache_obj_cache, obj); + if (kp->page == NULL) + kp->page = page; + else + free_page((unsigned long)page); + ret = 0; +unlock_out: + spin_unlock(&zcache_direct_reclaim_lock); +out: + return ret; +} + +static void *zcache_get_free_page(void) +{ + struct zcache_preload *kp; + void *page; + + kp = &__get_cpu_var(zcache_preloads); + page = kp->page; + BUG_ON(page == NULL); + kp->page = NULL; + return page; +} + +static void zcache_free_page(void *p) +{ + free_page((unsigned long)p); +} + +/* + * zcache implementation for tmem host ops + */ + +static struct tmem_objnode *zcache_objnode_alloc(struct tmem_pool *pool) +{ + struct tmem_objnode *objnode = NULL; + unsigned long count; + struct zcache_preload *kp; + + kp = &__get_cpu_var(zcache_preloads); + if (kp->nr <= 0) + goto out; + objnode = kp->objnodes[kp->nr - 1]; + BUG_ON(objnode == NULL); + kp->objnodes[kp->nr - 1] = NULL; + kp->nr--; + count = atomic_inc_return(&zcache_curr_objnode_count); + if (count > zcache_curr_objnode_count_max) + zcache_curr_objnode_count_max = count; +out: + return objnode; +} + +static void zcache_objnode_free(struct tmem_objnode *objnode, + struct tmem_pool *pool) +{ + atomic_dec(&zcache_curr_objnode_count); + BUG_ON(atomic_read(&zcache_curr_objnode_count) < 0); + kmem_cache_free(zcache_objnode_cache, objnode); +} + +static struct tmem_obj *zcache_obj_alloc(struct tmem_pool *pool) +{ + struct tmem_obj *obj = NULL; + unsigned long count; + struct zcache_preload *kp; + + kp = &__get_cpu_var(zcache_preloads); + obj = kp->obj; + BUG_ON(obj == NULL); + kp->obj = NULL; + count = atomic_inc_return(&zcache_curr_obj_count); + if (count > zcache_curr_obj_count_max) + zcache_curr_obj_count_max = count; + return obj; +} + +static void zcache_obj_free(struct tmem_obj *obj, struct tmem_pool *pool) +{ + atomic_dec(&zcache_curr_obj_count); + BUG_ON(atomic_read(&zcache_curr_obj_count) < 0); + kmem_cache_free(zcache_obj_cache, obj); +} + +static struct tmem_hostops zcache_hostops = { + .obj_alloc = zcache_obj_alloc, + .obj_free = zcache_obj_free, + .objnode_alloc = zcache_objnode_alloc, + .objnode_free = zcache_objnode_free, +}; + +/* + * zcache implementations for PAM page descriptor ops + */ + +static atomic_t zcache_curr_eph_pampd_count = ATOMIC_INIT(0); +static unsigned long zcache_curr_eph_pampd_count_max; +static atomic_t zcache_curr_pers_pampd_count = ATOMIC_INIT(0); +static unsigned long zcache_curr_pers_pampd_count_max; + +/* forward reference */ +static int zcache_compress(struct page *from, void **out_va, size_t *out_len); + +static void *zcache_pampd_create(struct tmem_pool *pool, struct tmem_oid *oid, + uint32_t index, struct page *page) +{ + void *pampd = NULL, *cdata; + size_t clen; + int ret; + bool ephemeral = is_ephemeral(pool); + unsigned long count; + + if (ephemeral) { + ret = zcache_compress(page, &cdata, &clen); + if (ret == 0) + + goto out; + if (clen == 0 || clen > zbud_max_buddy_size()) { + zcache_compress_poor++; + goto out; + } + pampd = (void *)zbud_create(pool->pool_id, oid, index, + page, cdata, clen); + if (pampd != NULL) { + count = atomic_inc_return(&zcache_curr_eph_pampd_count); + if (count > zcache_curr_eph_pampd_count_max) + zcache_curr_eph_pampd_count_max = count; + } + } else { + /* + * FIXME: This is all the "policy" there is for now. + * 3/4 totpages should allow ~37% of RAM to be filled with + * compressed frontswap pages + */ + if (atomic_read(&zcache_curr_pers_pampd_count) > + 3 * totalram_pages / 4) + goto out; + ret = zcache_compress(page, &cdata, &clen); + if (ret == 0) + goto out; + if (clen > zv_max_page_size) { + zcache_compress_poor++; + goto out; + } + pampd = (void *)zv_create(zcache_client.xvpool, pool->pool_id, + oid, index, cdata, clen); + if (pampd == NULL) + goto out; + count = atomic_inc_return(&zcache_curr_pers_pampd_count); + if (count > zcache_curr_pers_pampd_count_max) + zcache_curr_pers_pampd_count_max = count; + } +out: + return pampd; +} + +/* + * fill the pageframe corresponding to the struct page with the data + * from the passed pampd + */ +static int zcache_pampd_get_data(struct page *page, void *pampd, + struct tmem_pool *pool) +{ + int ret = 0; + + if (is_ephemeral(pool)) + ret = zbud_decompress(page, pampd); + else + zv_decompress(page, pampd); + return ret; +} + +/* + * free the pampd and remove it from any zcache lists + * pampd must no longer be pointed to from any tmem data structures! + */ +static void zcache_pampd_free(void *pampd, struct tmem_pool *pool) +{ + if (is_ephemeral(pool)) { + zbud_free_and_delist((struct zbud_hdr *)pampd); + atomic_dec(&zcache_curr_eph_pampd_count); + BUG_ON(atomic_read(&zcache_curr_eph_pampd_count) < 0); + } else { + zv_free(zcache_client.xvpool, (struct zv_hdr *)pampd); + atomic_dec(&zcache_curr_pers_pampd_count); + BUG_ON(atomic_read(&zcache_curr_pers_pampd_count) < 0); + } +} + +static struct tmem_pamops zcache_pamops = { + .create = zcache_pampd_create, + .get_data = zcache_pampd_get_data, + .free = zcache_pampd_free, +}; + +/* + * zcache compression/decompression and related per-cpu stuff + */ + +#define LZO_WORKMEM_BYTES LZO1X_1_MEM_COMPRESS +#define LZO_DSTMEM_PAGE_ORDER 1 +static DEFINE_PER_CPU(unsigned char *, zcache_workmem); +static DEFINE_PER_CPU(unsigned char *, zcache_dstmem); + +static int zcache_compress(struct page *from, void **out_va, size_t *out_len) +{ + int ret = 0; + unsigned char *dmem = __get_cpu_var(zcache_dstmem); + unsigned char *wmem = __get_cpu_var(zcache_workmem); + char *from_va; + + BUG_ON(!irqs_disabled()); + if (unlikely(dmem == NULL || wmem == NULL)) + goto out; /* no buffer, so can't compress */ + from_va = kmap_atomic(from, KM_USER0); + mb(); + ret = lzo1x_1_compress(from_va, PAGE_SIZE, dmem, out_len, wmem); + BUG_ON(ret != LZO_E_OK); + *out_va = dmem; + kunmap_atomic(from_va, KM_USER0); + ret = 1; +out: + return ret; +} + + +static int zcache_cpu_notifier(struct notifier_block *nb, + unsigned long action, void *pcpu) +{ + int cpu = (long)pcpu; + struct zcache_preload *kp; + + switch (action) { + case CPU_UP_PREPARE: + per_cpu(zcache_dstmem, cpu) = (void *)__get_free_pages( + GFP_KERNEL | __GFP_REPEAT, + LZO_DSTMEM_PAGE_ORDER), + per_cpu(zcache_workmem, cpu) = + kzalloc(LZO1X_MEM_COMPRESS, + GFP_KERNEL | __GFP_REPEAT); + break; + case CPU_DEAD: + case CPU_UP_CANCELED: + free_pages((unsigned long)per_cpu(zcache_dstmem, cpu), + LZO_DSTMEM_PAGE_ORDER); + per_cpu(zcache_dstmem, cpu) = NULL; + kfree(per_cpu(zcache_workmem, cpu)); + per_cpu(zcache_workmem, cpu) = NULL; + kp = &per_cpu(zcache_preloads, cpu); + while (kp->nr) { + kmem_cache_free(zcache_objnode_cache, + kp->objnodes[kp->nr - 1]); + kp->objnodes[kp->nr - 1] = NULL; + kp->nr--; + } + kmem_cache_free(zcache_obj_cache, kp->obj); + free_page((unsigned long)kp->page); + break; + default: + break; + } + return NOTIFY_OK; +} + +static struct notifier_block zcache_cpu_notifier_block = { + .notifier_call = zcache_cpu_notifier +}; + +#ifdef CONFIG_SYSFS +#define ZCACHE_SYSFS_RO(_name) \ + static ssize_t zcache_##_name##_show(struct kobject *kobj, \ + struct kobj_attribute *attr, char *buf) \ + { \ + return sprintf(buf, "%lu\n", zcache_##_name); \ + } \ + static struct kobj_attribute zcache_##_name##_attr = { \ + .attr = { .name = __stringify(_name), .mode = 0444 }, \ + .show = zcache_##_name##_show, \ + } + +#define ZCACHE_SYSFS_RO_ATOMIC(_name) \ + static ssize_t zcache_##_name##_show(struct kobject *kobj, \ + struct kobj_attribute *attr, char *buf) \ + { \ + return sprintf(buf, "%d\n", atomic_read(&zcache_##_name)); \ + } \ + static struct kobj_attribute zcache_##_name##_attr = { \ + .attr = { .name = __stringify(_name), .mode = 0444 }, \ + .show = zcache_##_name##_show, \ + } + +#define ZCACHE_SYSFS_RO_CUSTOM(_name, _func) \ + static ssize_t zcache_##_name##_show(struct kobject *kobj, \ + struct kobj_attribute *attr, char *buf) \ + { \ + return _func(buf); \ + } \ + static struct kobj_attribute zcache_##_name##_attr = { \ + .attr = { .name = __stringify(_name), .mode = 0444 }, \ + .show = zcache_##_name##_show, \ + } + +ZCACHE_SYSFS_RO(curr_obj_count_max); +ZCACHE_SYSFS_RO(curr_objnode_count_max); +ZCACHE_SYSFS_RO(flush_total); +ZCACHE_SYSFS_RO(flush_found); +ZCACHE_SYSFS_RO(flobj_total); +ZCACHE_SYSFS_RO(flobj_found); +ZCACHE_SYSFS_RO(failed_eph_puts); +ZCACHE_SYSFS_RO(failed_pers_puts); +ZCACHE_SYSFS_RO(zbud_curr_zbytes); +ZCACHE_SYSFS_RO(zbud_cumul_zpages); +ZCACHE_SYSFS_RO(zbud_cumul_zbytes); +ZCACHE_SYSFS_RO(zbud_buddied_count); +ZCACHE_SYSFS_RO(zbpg_unused_list_count); +ZCACHE_SYSFS_RO(evicted_raw_pages); +ZCACHE_SYSFS_RO(evicted_unbuddied_pages); +ZCACHE_SYSFS_RO(evicted_buddied_pages); +ZCACHE_SYSFS_RO(failed_get_free_pages); +ZCACHE_SYSFS_RO(failed_alloc); +ZCACHE_SYSFS_RO(put_to_flush); +ZCACHE_SYSFS_RO(aborted_preload); +ZCACHE_SYSFS_RO(aborted_shrink); +ZCACHE_SYSFS_RO(compress_poor); +ZCACHE_SYSFS_RO_ATOMIC(zbud_curr_raw_pages); +ZCACHE_SYSFS_RO_ATOMIC(zbud_curr_zpages); +ZCACHE_SYSFS_RO_ATOMIC(curr_obj_count); +ZCACHE_SYSFS_RO_ATOMIC(curr_objnode_count); +ZCACHE_SYSFS_RO_CUSTOM(zbud_unbuddied_list_counts, + zbud_show_unbuddied_list_counts); +ZCACHE_SYSFS_RO_CUSTOM(zbud_cumul_chunk_counts, + zbud_show_cumul_chunk_counts); + +static struct attribute *zcache_attrs[] = { + &zcache_curr_obj_count_attr.attr, + &zcache_curr_obj_count_max_attr.attr, + &zcache_curr_objnode_count_attr.attr, + &zcache_curr_objnode_count_max_attr.attr, + &zcache_flush_total_attr.attr, + &zcache_flobj_total_attr.attr, + &zcache_flush_found_attr.attr, + &zcache_flobj_found_attr.attr, + &zcache_failed_eph_puts_attr.attr, + &zcache_failed_pers_puts_attr.attr, + &zcache_compress_poor_attr.attr, + &zcache_zbud_curr_raw_pages_attr.attr, + &zcache_zbud_curr_zpages_attr.attr, + &zcache_zbud_curr_zbytes_attr.attr, + &zcache_zbud_cumul_zpages_attr.attr, + &zcache_zbud_cumul_zbytes_attr.attr, + &zcache_zbud_buddied_count_attr.attr, + &zcache_zbpg_unused_list_count_attr.attr, + &zcache_evicted_raw_pages_attr.attr, + &zcache_evicted_unbuddied_pages_attr.attr, + &zcache_evicted_buddied_pages_attr.attr, + &zcache_failed_get_free_pages_attr.attr, + &zcache_failed_alloc_attr.attr, + &zcache_put_to_flush_attr.attr, + &zcache_aborted_preload_attr.attr, + &zcache_aborted_shrink_attr.attr, + &zcache_zbud_unbuddied_list_counts_attr.attr, + &zcache_zbud_cumul_chunk_counts_attr.attr, + NULL, +}; + +static struct attribute_group zcache_attr_group = { + .attrs = zcache_attrs, + .name = "zcache", +}; + +#endif /* CONFIG_SYSFS */ +/* + * When zcache is disabled ("frozen"), pools can be created and destroyed, + * but all puts (and thus all other operations that require memory allocation) + * must fail. If zcache is unfrozen, accepts puts, then frozen again, + * data consistency requires all puts while frozen to be converted into + * flushes. + */ +static bool zcache_freeze; + +/* + * zcache shrinker interface (only useful for ephemeral pages, so zbud only) + */ +static int shrink_zcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) +{ + int ret = -1; + + if (nr >= 0) { + if (!(gfp_mask & __GFP_FS)) + /* does this case really need to be skipped? */ + goto out; + if (spin_trylock(&zcache_direct_reclaim_lock)) { + zbud_evict_pages(nr); + spin_unlock(&zcache_direct_reclaim_lock); + } else + zcache_aborted_shrink++; + } + ret = (int)atomic_read(&zcache_zbud_curr_raw_pages); +out: + return ret; +} + +static struct shrinker zcache_shrinker = { + .shrink = shrink_zcache_memory, + .seeks = DEFAULT_SEEKS, +}; + +/* + * zcache shims between cleancache/frontswap ops and tmem + */ + +static int zcache_put_page(int pool_id, struct tmem_oid *oidp, + uint32_t index, struct page *page) +{ + struct tmem_pool *pool; + int ret = -1; + + BUG_ON(!irqs_disabled()); + pool = zcache_get_pool_by_id(pool_id); + if (unlikely(pool == NULL)) + goto out; + if (!zcache_freeze && zcache_do_preload(pool) == 0) { + /* preload does preempt_disable on success */ + ret = tmem_put(pool, oidp, index, page); + if (ret < 0) { + if (is_ephemeral(pool)) + zcache_failed_eph_puts++; + else + zcache_failed_pers_puts++; + } + zcache_put_pool(pool); + preempt_enable_no_resched(); + } else { + zcache_put_to_flush++; + if (atomic_read(&pool->obj_count) > 0) + /* the put fails whether the flush succeeds or not */ + (void)tmem_flush_page(pool, oidp, index); + zcache_put_pool(pool); + } +out: + return ret; +} + +static int zcache_get_page(int pool_id, struct tmem_oid *oidp, + uint32_t index, struct page *page) +{ + struct tmem_pool *pool; + int ret = -1; + unsigned long flags; + + local_irq_save(flags); + pool = zcache_get_pool_by_id(pool_id); + if (likely(pool != NULL)) { + if (atomic_read(&pool->obj_count) > 0) + ret = tmem_get(pool, oidp, index, page); + zcache_put_pool(pool); + } + local_irq_restore(flags); + return ret; +} + +static int zcache_flush_page(int pool_id, struct tmem_oid *oidp, uint32_t index) +{ + struct tmem_pool *pool; + int ret = -1; + unsigned long flags; + + local_irq_save(flags); + zcache_flush_total++; + pool = zcache_get_pool_by_id(pool_id); + if (likely(pool != NULL)) { + if (atomic_read(&pool->obj_count) > 0) + ret = tmem_flush_page(pool, oidp, index); + zcache_put_pool(pool); + } + if (ret >= 0) + zcache_flush_found++; + local_irq_restore(flags); + return ret; +} + +static int zcache_flush_object(int pool_id, struct tmem_oid *oidp) +{ + struct tmem_pool *pool; + int ret = -1; + unsigned long flags; + + local_irq_save(flags); + zcache_flobj_total++; + pool = zcache_get_pool_by_id(pool_id); + if (likely(pool != NULL)) { + if (atomic_read(&pool->obj_count) > 0) + ret = tmem_flush_object(pool, oidp); + zcache_put_pool(pool); + } + if (ret >= 0) + zcache_flobj_found++; + local_irq_restore(flags); + return ret; +} + +static int zcache_destroy_pool(int pool_id) +{ + struct tmem_pool *pool = NULL; + int ret = -1; + + if (pool_id < 0) + goto out; + pool = zcache_client.tmem_pools[pool_id]; + if (pool == NULL) + goto out; + zcache_client.tmem_pools[pool_id] = NULL; + /* wait for pool activity on other cpus to quiesce */ + while (atomic_read(&pool->refcount) != 0) + ; + local_bh_disable(); + ret = tmem_destroy_pool(pool); + local_bh_enable(); + kfree(pool); + pr_info("zcache: destroyed pool id=%d\n", pool_id); +out: + return ret; +} + +static int zcache_new_pool(uint32_t flags) +{ + int poolid = -1; + struct tmem_pool *pool; + + pool = kmalloc(sizeof(struct tmem_pool), GFP_KERNEL); + if (pool == NULL) { + pr_info("zcache: pool creation failed: out of memory\n"); + goto out; + } + + for (poolid = 0; poolid < MAX_POOLS_PER_CLIENT; poolid++) + if (zcache_client.tmem_pools[poolid] == NULL) + break; + if (poolid >= MAX_POOLS_PER_CLIENT) { + pr_info("zcache: pool creation failed: max exceeded\n"); + kfree(pool); + poolid = -1; + goto out; + } + atomic_set(&pool->refcount, 0); + pool->client = &zcache_client; + pool->pool_id = poolid; + tmem_new_pool(pool, flags); + zcache_client.tmem_pools[poolid] = pool; + pr_info("zcache: created %s tmem pool, id=%d\n", + flags & TMEM_POOL_PERSIST ? "persistent" : "ephemeral", + poolid); +out: + return poolid; +} + +/********** + * Two kernel functionalities currently can be layered on top of tmem. + * These are "cleancache" which is used as a second-chance cache for clean + * page cache pages; and "frontswap" which is used for swap pages + * to avoid writes to disk. A generic "shim" is provided here for each + * to translate in-kernel semantics to zcache semantics. + */ + +#ifdef CONFIG_CLEANCACHE +static void zcache_cleancache_put_page(int pool_id, + struct cleancache_filekey key, + pgoff_t index, struct page *page) +{ + u32 ind = (u32) index; + struct tmem_oid oid = *(struct tmem_oid *)&key; + + if (likely(ind == index)) + (void)zcache_put_page(pool_id, &oid, index, page); +} + +static int zcache_cleancache_get_page(int pool_id, + struct cleancache_filekey key, + pgoff_t index, struct page *page) +{ + u32 ind = (u32) index; + struct tmem_oid oid = *(struct tmem_oid *)&key; + int ret = -1; + + if (likely(ind == index)) + ret = zcache_get_page(pool_id, &oid, index, page); + return ret; +} + +static void zcache_cleancache_flush_page(int pool_id, + struct cleancache_filekey key, + pgoff_t index) +{ + u32 ind = (u32) index; + struct tmem_oid oid = *(struct tmem_oid *)&key; + + if (likely(ind == index)) + (void)zcache_flush_page(pool_id, &oid, ind); +} + +static void zcache_cleancache_flush_inode(int pool_id, + struct cleancache_filekey key) +{ + struct tmem_oid oid = *(struct tmem_oid *)&key; + + (void)zcache_flush_object(pool_id, &oid); +} + +static void zcache_cleancache_flush_fs(int pool_id) +{ + if (pool_id >= 0) + (void)zcache_destroy_pool(pool_id); +} + +static int zcache_cleancache_init_fs(size_t pagesize) +{ + BUG_ON(sizeof(struct cleancache_filekey) != + sizeof(struct tmem_oid)); + BUG_ON(pagesize != PAGE_SIZE); + return zcache_new_pool(0); +} + +static int zcache_cleancache_init_shared_fs(char *uuid, size_t pagesize) +{ + /* shared pools are unsupported and map to private */ + BUG_ON(sizeof(struct cleancache_filekey) != + sizeof(struct tmem_oid)); + BUG_ON(pagesize != PAGE_SIZE); + return zcache_new_pool(0); +} + +static struct cleancache_ops zcache_cleancache_ops = { + .put_page = zcache_cleancache_put_page, + .get_page = zcache_cleancache_get_page, + .flush_page = zcache_cleancache_flush_page, + .flush_inode = zcache_cleancache_flush_inode, + .flush_fs = zcache_cleancache_flush_fs, + .init_shared_fs = zcache_cleancache_init_shared_fs, + .init_fs = zcache_cleancache_init_fs +}; + +struct cleancache_ops zcache_cleancache_register_ops(void) +{ + struct cleancache_ops old_ops = + cleancache_register_ops(&zcache_cleancache_ops); + + return old_ops; +} +#endif + +#ifdef CONFIG_FRONTSWAP +/* a single tmem poolid is used for all frontswap "types" (swapfiles) */ +static int zcache_frontswap_poolid = -1; + +/* + * Swizzling increases objects per swaptype, increasing tmem concurrency + * for heavy swaploads. Later, larger nr_cpus -> larger SWIZ_BITS + */ +#define SWIZ_BITS 4 +#define SWIZ_MASK ((1 << SWIZ_BITS) - 1) +#define _oswiz(_type, _ind) ((_type << SWIZ_BITS) | (_ind & SWIZ_MASK)) +#define iswiz(_ind) (_ind >> SWIZ_BITS) + +static inline struct tmem_oid oswiz(unsigned type, u32 ind) +{ + struct tmem_oid oid = { .oid = { 0 } }; + oid.oid[0] = _oswiz(type, ind); + return oid; +} + +static int zcache_frontswap_put_page(unsigned type, pgoff_t offset, + struct page *page) +{ + u64 ind64 = (u64)offset; + u32 ind = (u32)offset; + struct tmem_oid oid = oswiz(type, ind); + int ret = -1; + unsigned long flags; + + BUG_ON(!PageLocked(page)); + if (likely(ind64 == ind)) { + local_irq_save(flags); + ret = zcache_put_page(zcache_frontswap_poolid, &oid, + iswiz(ind), page); + local_irq_restore(flags); + } + return ret; +} + +/* returns 0 if the page was successfully gotten from frontswap, -1 if + * was not present (should never happen!) */ +static int zcache_frontswap_get_page(unsigned type, pgoff_t offset, + struct page *page) +{ + u64 ind64 = (u64)offset; + u32 ind = (u32)offset; + struct tmem_oid oid = oswiz(type, ind); + int ret = -1; + + BUG_ON(!PageLocked(page)); + if (likely(ind64 == ind)) + ret = zcache_get_page(zcache_frontswap_poolid, &oid, + iswiz(ind), page); + return ret; +} + +/* flush a single page from frontswap */ +static void zcache_frontswap_flush_page(unsigned type, pgoff_t offset) +{ + u64 ind64 = (u64)offset; + u32 ind = (u32)offset; + struct tmem_oid oid = oswiz(type, ind); + + if (likely(ind64 == ind)) + (void)zcache_flush_page(zcache_frontswap_poolid, &oid, + iswiz(ind)); +} + +/* flush all pages from the passed swaptype */ +static void zcache_frontswap_flush_area(unsigned type) +{ + struct tmem_oid oid; + int ind; + + for (ind = SWIZ_MASK; ind >= 0; ind--) { + oid = oswiz(type, ind); + (void)zcache_flush_object(zcache_frontswap_poolid, &oid); + } +} + +static void zcache_frontswap_init(unsigned ignored) +{ + /* a single tmem poolid is used for all frontswap "types" (swapfiles) */ + if (zcache_frontswap_poolid < 0) + zcache_frontswap_poolid = zcache_new_pool(TMEM_POOL_PERSIST); +} + +static struct frontswap_ops zcache_frontswap_ops = { + .put_page = zcache_frontswap_put_page, + .get_page = zcache_frontswap_get_page, + .flush_page = zcache_frontswap_flush_page, + .flush_area = zcache_frontswap_flush_area, + .init = zcache_frontswap_init +}; + +struct frontswap_ops zcache_frontswap_register_ops(void) +{ + struct frontswap_ops old_ops = + frontswap_register_ops(&zcache_frontswap_ops); + + return old_ops; +} +#endif + +/* + * zcache initialization + * NOTE FOR NOW zcache MUST BE PROVIDED AS A KERNEL BOOT PARAMETER OR + * NOTHING HAPPENS! + */ + +static int zcache_enabled; + +static int __init enable_zcache(char *s) +{ + zcache_enabled = 1; + return 1; +} +__setup("zcache", enable_zcache); + +/* allow independent dynamic disabling of cleancache and frontswap */ + +static int use_cleancache = 1; + +static int __init no_cleancache(char *s) +{ + use_cleancache = 0; + return 1; +} + +__setup("nocleancache", no_cleancache); + +static int use_frontswap = 1; + +static int __init no_frontswap(char *s) +{ + use_frontswap = 0; + return 1; +} + +__setup("nofrontswap", no_frontswap); + +static int __init zcache_init(void) +{ +#ifdef CONFIG_SYSFS + int ret = 0; + + ret = sysfs_create_group(mm_kobj, &zcache_attr_group); + if (ret) { + pr_err("zcache: can't create sysfs\n"); + goto out; + } +#endif /* CONFIG_SYSFS */ +#if defined(CONFIG_CLEANCACHE) || defined(CONFIG_FRONTSWAP) + if (zcache_enabled) { + unsigned int cpu; + + tmem_register_hostops(&zcache_hostops); + tmem_register_pamops(&zcache_pamops); + ret = register_cpu_notifier(&zcache_cpu_notifier_block); + if (ret) { + pr_err("zcache: can't register cpu notifier\n"); + goto out; + } + for_each_online_cpu(cpu) { + void *pcpu = (void *)(long)cpu; + zcache_cpu_notifier(&zcache_cpu_notifier_block, + CPU_UP_PREPARE, pcpu); + } + } + zcache_objnode_cache = kmem_cache_create("zcache_objnode", + sizeof(struct tmem_objnode), 0, 0, NULL); + zcache_obj_cache = kmem_cache_create("zcache_obj", + sizeof(struct tmem_obj), 0, 0, NULL); +#endif +#ifdef CONFIG_CLEANCACHE + if (zcache_enabled && use_cleancache) { + struct cleancache_ops old_ops; + + zbud_init(); + register_shrinker(&zcache_shrinker); + old_ops = zcache_cleancache_register_ops(); + pr_info("zcache: cleancache enabled using kernel " + "transcendent memory and compression buddies\n"); + if (old_ops.init_fs != NULL) + pr_warning("zcache: cleancache_ops overridden"); + } +#endif +#ifdef CONFIG_FRONTSWAP + if (zcache_enabled && use_frontswap) { + struct frontswap_ops old_ops; + + zcache_client.xvpool = xv_create_pool(); + if (zcache_client.xvpool == NULL) { + pr_err("zcache: can't create xvpool\n"); + goto out; + } + old_ops = zcache_frontswap_register_ops(); + pr_info("zcache: frontswap enabled using kernel " + "transcendent memory and xvmalloc\n"); + if (old_ops.init != NULL) + pr_warning("ktmem: frontswap_ops overridden"); + } +#endif +out: + return ret; +} + +module_init(zcache_init) From 1abd4f495eaa84e73b597f238cce704f06c54dc4 Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Mon, 20 Aug 2012 00:51:37 +0800 Subject: [PATCH 105/155] fs: add field to superblock to support cleancache --- include/linux/fs.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/linux/fs.h b/include/linux/fs.h index 9b678052..925a431e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1383,6 +1383,11 @@ struct super_block { * generic_show_options() */ char *s_options; + + /* + * Saved pool identifier for cleancache (-1 means none) + */ + int cleancache_poolid; }; extern struct timespec current_fs_time(struct super_block *sb); From 8b62d33820d84cc081745802f9a62425151047f4 Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Mon, 20 Aug 2012 00:52:57 +0800 Subject: [PATCH 106/155] enable zcache & cleancache --- arch/arm/mach-msm/board-htcleo.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-msm/board-htcleo.h b/arch/arm/mach-msm/board-htcleo.h index 8d633974..f20da79f 100755 --- a/arch/arm/mach-msm/board-htcleo.h +++ b/arch/arm/mach-msm/board-htcleo.h @@ -40,7 +40,7 @@ #define MSM_FB_SIZE 0x00600000 #define MSM_PMEM_MDP_BASE 0x3B700000 -#define MSM_PMEM_MDP_SIZE 0x02000000 +#define MSM_PMEM_MDP_SIZE 0x03000000 #define MSM_PMEM_ADSP_BASE 0x3D700000 #define MSM_PMEM_ADSP_SIZE 0x01800000 @@ -59,7 +59,7 @@ /* Begin EBI region */ #define PMEM_KERNEL_EBI1_SIZE 0x00028000 -#define MSM_PMEM_SF_SIZE 0x02000000 +#define MSM_PMEM_SF_SIZE 0x03000000 /* MSM_RAM_CONSOLE uses the last 0x00040000 of EBI memory, defined in msm_iomap.h #define MSM_RAM_CONSOLE_SIZE 0x00040000 From c2ff7098d4011efb6143305f62b71c0f95e38108 Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Mon, 20 Aug 2012 00:53:53 +0800 Subject: [PATCH 107/155] enable zcache & cleancache --- arch/arm/configs/htcleo_defconfig | 6 ++++++ 1 file changed, 6 insertions(+) mode change 100644 => 100755 arch/arm/configs/htcleo_defconfig diff --git a/arch/arm/configs/htcleo_defconfig b/arch/arm/configs/htcleo_defconfig old mode 100644 new mode 100755 index 12f408f2..a26de0ce --- a/arch/arm/configs/htcleo_defconfig +++ b/arch/arm/configs/htcleo_defconfig @@ -406,6 +406,7 @@ CONFIG_DEFAULT_MMAP_MIN_ADDR=4096 CONFIG_ALIGNMENT_TRAP=y CONFIG_ALLOW_CPU_ALIGNMENT=y # CONFIG_UACCESS_WITH_MEMCPY is not set +CONFIG_CLEANCACHE=y # # Boot options @@ -1688,6 +1689,11 @@ CONFIG_ANDROID_LOW_MEMORY_KILLER=y # CONFIG_IIO is not set # CONFIG_BTPORT is not set +# +# ZCACHE +# +CONFIG_ZCACHE=y + # # ZRAM # From b6c1977f351a6095b33a667546ec0496cbe8ca20 Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Mon, 20 Aug 2012 00:57:38 +0800 Subject: [PATCH 108/155] revert #8b62d33 --- arch/arm/mach-msm/board-htcleo.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-msm/board-htcleo.h b/arch/arm/mach-msm/board-htcleo.h index f20da79f..8d633974 100755 --- a/arch/arm/mach-msm/board-htcleo.h +++ b/arch/arm/mach-msm/board-htcleo.h @@ -40,7 +40,7 @@ #define MSM_FB_SIZE 0x00600000 #define MSM_PMEM_MDP_BASE 0x3B700000 -#define MSM_PMEM_MDP_SIZE 0x03000000 +#define MSM_PMEM_MDP_SIZE 0x02000000 #define MSM_PMEM_ADSP_BASE 0x3D700000 #define MSM_PMEM_ADSP_SIZE 0x01800000 @@ -59,7 +59,7 @@ /* Begin EBI region */ #define PMEM_KERNEL_EBI1_SIZE 0x00028000 -#define MSM_PMEM_SF_SIZE 0x03000000 +#define MSM_PMEM_SF_SIZE 0x02000000 /* MSM_RAM_CONSOLE uses the last 0x00040000 of EBI memory, defined in msm_iomap.h #define MSM_RAM_CONSOLE_SIZE 0x00040000 From 3d343ac32afc6eb933d98eb2dc4b3ad532de7f3a Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Mon, 20 Aug 2012 12:10:10 +0800 Subject: [PATCH 109/155] mm/fs: add hooks to support cleancache --- fs/buffer.c | 5 +++++ fs/mpage.c | 7 +++++++ fs/super.c | 3 +++ mm/filemap.c | 11 +++++++++++ mm/truncate.c | 8 +++++++- 5 files changed, 33 insertions(+), 1 deletion(-) mode change 100644 => 100755 fs/buffer.c mode change 100644 => 100755 fs/mpage.c mode change 100644 => 100755 fs/super.c mode change 100644 => 100755 mm/filemap.c mode change 100644 => 100755 mm/truncate.c diff --git a/fs/buffer.c b/fs/buffer.c old mode 100644 new mode 100755 index 6fa53025..abf188f3 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -41,6 +41,7 @@ #include #include #include +#include static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); @@ -276,6 +277,10 @@ void invalidate_bdev(struct block_device *bdev) invalidate_bh_lrus(); invalidate_mapping_pages(mapping, 0, -1); + /* 99% of the time, we don't need to flush the cleancache on the bdev. + * But, for the strange corners, lets be cautious + */ + cleancache_flush_inode(mapping); } EXPORT_SYMBOL(invalidate_bdev); diff --git a/fs/mpage.c b/fs/mpage.c old mode 100644 new mode 100755 index 42381bd6..b5677aba --- a/fs/mpage.c +++ b/fs/mpage.c @@ -26,6 +26,7 @@ #include #include #include +#include /* * I/O completion handler for multipage BIOs. @@ -284,6 +285,12 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, } else if (fully_mapped) { SetPageMappedToDisk(page); } + + if (fully_mapped && blocks_per_page == 1 && !PageUptodate(page) && + cleancache_get_page(page) == 0) { + SetPageUptodate(page); + goto confused; + } /* * This page will go to BIO. Do we need to send this BIO off first? diff --git a/fs/super.c b/fs/super.c old mode 100644 new mode 100755 index aff046b0..a0ba74eb --- a/fs/super.c +++ b/fs/super.c @@ -38,6 +38,7 @@ #include #include #include +#include #include "internal.h" @@ -104,6 +105,7 @@ static struct super_block *alloc_super(struct file_system_type *type) s->s_qcop = sb_quotactl_ops; s->s_op = &default_op; s->s_time_gran = 1000000000; + s->cleancache_poolid = -1; } out: return s; @@ -219,6 +221,7 @@ void deactivate_locked_super(struct super_block *s) s->s_count -= S_BIAS-1; spin_unlock(&sb_lock); vfs_dq_off(s, 0); + cleancache_flush_fs(s); fs->kill_sb(s); put_filesystem(fs); put_super(s); diff --git a/mm/filemap.c b/mm/filemap.c old mode 100644 new mode 100755 index 8e96c907..a4399ff2 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -34,6 +34,7 @@ #include /* for BUG_ON(!in_atomic()) only */ #include #include /* for page_is_file_cache() */ +#include #include "internal.h" /* @@ -119,6 +120,16 @@ void __remove_from_page_cache(struct page *page) { struct address_space *mapping = page->mapping; + /* + * if we're uptodate, flush out into the cleancache, otherwise + * invalidate any existing cleancache entries. We can't leave + * stale data around in the cleancache once our page is gone + */ + if (PageUptodate(page) && PageMappedToDisk(page)) + cleancache_put_page(page); + else + cleancache_flush_page(mapping, page); + radix_tree_delete(&mapping->page_tree, page->index); page->mapping = NULL; mapping->nrpages--; diff --git a/mm/truncate.c b/mm/truncate.c old mode 100644 new mode 100755 index 258bda7e..31c639ea --- a/mm/truncate.c +++ b/mm/truncate.c @@ -18,6 +18,7 @@ #include #include /* grr. try_to_release_page, do_invalidatepage */ +#include #include "internal.h" @@ -50,6 +51,7 @@ void do_invalidatepage(struct page *page, unsigned long offset) static inline void truncate_partial_page(struct page *page, unsigned partial) { zero_user_segment(page, partial, PAGE_CACHE_SIZE); + cleancache_flush_page(page->mapping, page); if (page_has_private(page)) do_invalidatepage(page, partial); } @@ -213,7 +215,8 @@ void truncate_inode_pages_range(struct address_space *mapping, struct pagevec pvec; pgoff_t next; int i; - + + cleancache_flush_inode(mapping); if (mapping->nrpages == 0) return; @@ -287,6 +290,7 @@ void truncate_inode_pages_range(struct address_space *mapping, } pagevec_release(&pvec); } + cleancache_flush_inode(mapping); } EXPORT_SYMBOL(truncate_inode_pages_range); @@ -423,6 +427,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping, int did_range_unmap = 0; int wrapped = 0; + cleancache_flush_inode(mapping); pagevec_init(&pvec, 0); next = start; while (next <= end && !wrapped && @@ -479,6 +484,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping, pagevec_release(&pvec); cond_resched(); } + cleancache_flush_inode(mapping); return ret; } EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range); From 7c50bd921f08bf56b04e171115dd7e3f52896f51 Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Mon, 20 Aug 2012 15:24:37 +0800 Subject: [PATCH 110/155] staging:lowmemkiller add Fudgeswap fudgeswap acts as follows: If set to non zero (defualt is 512k): Check for the amount of SWAP_FREE space avalible If > 0KB is avalible: if fudgeswap > swapfree: other_file += swapfree else: other_file += fugeswap In short: we will add in fugeswap as long as its less then the free swap Setting this to a very large positive number will indicate swap ought to be fully used as free (and will slow the system down) smaller numbers will allow you to put some pressure on SWAP without slowing the system down as much. small negitive numbers will allow the system to be faster at the same minfree level. default is 512 to give a very little bit of pressure to use some swap but this can be modified at runtime via: /sys/module/lowmemorykiller/parameters/fugeswap originally by ezterry Please enter the commit message for your changes. Lines starting --- drivers/staging/android/lowmemorykiller.c | 24 +++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c index 05ebece0..5930a813 100755 --- a/drivers/staging/android/lowmemorykiller.c +++ b/drivers/staging/android/lowmemorykiller.c @@ -41,6 +41,11 @@ #include #include +#ifdef CONFIG_SWAP +#include +#include +#endif + static uint32_t lowmem_debug_level = 2; static int lowmem_adj[6] = { 0, @@ -64,6 +69,10 @@ static struct task_struct *lowmem_deathpending; static unsigned long lowmem_deathpending_timeout; static struct kobject *lowmem_kobj; +#ifdef CONFIG_SWAP +static int fudgeswap = 512; +#endif + #define lowmem_print(level, x...) \ do { \ if (lowmem_debug_level >= (level)) \ @@ -122,7 +131,19 @@ static inline void get_free_ram(int *other_free, int *other_file) *other_free = global_page_state(NR_FREE_PAGES); *other_file = global_page_state(NR_FILE_PAGES) - global_page_state(NR_SHMEM); +#ifdef CONFIG_SWAP + if(fudgeswap != 0){ + struct sysinfo si; + si_swapinfo(&si); + if(si.freeswap > 0){ + if(fudgeswap > si.freeswap) + other_file += si.freeswap; + else + other_file += fudgeswap; + } + } +#endif if (offlining) { /* Discount all free space in the section being offlined */ for_each_zone(zone) { @@ -347,6 +368,9 @@ module_param_named(debug_level, lowmem_debug_level, uint, S_IRUGO | S_IWUSR); module_param_named(notify_trigger, lowmem_minfree_notif_trigger, uint, S_IRUGO | S_IWUSR); +#ifdef CONFIG_SWAP +module_param_named(fudgeswap, fudgeswap, int, S_IRUGO | S_IWUSR); +#endif module_init(lowmem_init); module_exit(lowmem_exit); From f3a9b636b38756c67fc9a3edf2db3088dfd7fd3c Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Mon, 20 Aug 2012 23:10:46 +0800 Subject: [PATCH 111/155] Revert "mm/fs: add hooks to support cleancache" This reverts commit 3d343ac32afc6eb933d98eb2dc4b3ad532de7f3a. --- fs/buffer.c | 5 ----- fs/mpage.c | 7 ------- fs/super.c | 3 --- mm/filemap.c | 11 ----------- mm/truncate.c | 8 +------- 5 files changed, 1 insertion(+), 33 deletions(-) mode change 100755 => 100644 fs/buffer.c mode change 100755 => 100644 fs/mpage.c mode change 100755 => 100644 fs/super.c mode change 100755 => 100644 mm/filemap.c mode change 100755 => 100644 mm/truncate.c diff --git a/fs/buffer.c b/fs/buffer.c old mode 100755 new mode 100644 index abf188f3..6fa53025 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -41,7 +41,6 @@ #include #include #include -#include static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); @@ -277,10 +276,6 @@ void invalidate_bdev(struct block_device *bdev) invalidate_bh_lrus(); invalidate_mapping_pages(mapping, 0, -1); - /* 99% of the time, we don't need to flush the cleancache on the bdev. - * But, for the strange corners, lets be cautious - */ - cleancache_flush_inode(mapping); } EXPORT_SYMBOL(invalidate_bdev); diff --git a/fs/mpage.c b/fs/mpage.c old mode 100755 new mode 100644 index b5677aba..42381bd6 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -26,7 +26,6 @@ #include #include #include -#include /* * I/O completion handler for multipage BIOs. @@ -285,12 +284,6 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, } else if (fully_mapped) { SetPageMappedToDisk(page); } - - if (fully_mapped && blocks_per_page == 1 && !PageUptodate(page) && - cleancache_get_page(page) == 0) { - SetPageUptodate(page); - goto confused; - } /* * This page will go to BIO. Do we need to send this BIO off first? diff --git a/fs/super.c b/fs/super.c old mode 100755 new mode 100644 index a0ba74eb..aff046b0 --- a/fs/super.c +++ b/fs/super.c @@ -38,7 +38,6 @@ #include #include #include -#include #include "internal.h" @@ -105,7 +104,6 @@ static struct super_block *alloc_super(struct file_system_type *type) s->s_qcop = sb_quotactl_ops; s->s_op = &default_op; s->s_time_gran = 1000000000; - s->cleancache_poolid = -1; } out: return s; @@ -221,7 +219,6 @@ void deactivate_locked_super(struct super_block *s) s->s_count -= S_BIAS-1; spin_unlock(&sb_lock); vfs_dq_off(s, 0); - cleancache_flush_fs(s); fs->kill_sb(s); put_filesystem(fs); put_super(s); diff --git a/mm/filemap.c b/mm/filemap.c old mode 100755 new mode 100644 index a4399ff2..8e96c907 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -34,7 +34,6 @@ #include /* for BUG_ON(!in_atomic()) only */ #include #include /* for page_is_file_cache() */ -#include #include "internal.h" /* @@ -120,16 +119,6 @@ void __remove_from_page_cache(struct page *page) { struct address_space *mapping = page->mapping; - /* - * if we're uptodate, flush out into the cleancache, otherwise - * invalidate any existing cleancache entries. We can't leave - * stale data around in the cleancache once our page is gone - */ - if (PageUptodate(page) && PageMappedToDisk(page)) - cleancache_put_page(page); - else - cleancache_flush_page(mapping, page); - radix_tree_delete(&mapping->page_tree, page->index); page->mapping = NULL; mapping->nrpages--; diff --git a/mm/truncate.c b/mm/truncate.c old mode 100755 new mode 100644 index 31c639ea..258bda7e --- a/mm/truncate.c +++ b/mm/truncate.c @@ -18,7 +18,6 @@ #include #include /* grr. try_to_release_page, do_invalidatepage */ -#include #include "internal.h" @@ -51,7 +50,6 @@ void do_invalidatepage(struct page *page, unsigned long offset) static inline void truncate_partial_page(struct page *page, unsigned partial) { zero_user_segment(page, partial, PAGE_CACHE_SIZE); - cleancache_flush_page(page->mapping, page); if (page_has_private(page)) do_invalidatepage(page, partial); } @@ -215,8 +213,7 @@ void truncate_inode_pages_range(struct address_space *mapping, struct pagevec pvec; pgoff_t next; int i; - - cleancache_flush_inode(mapping); + if (mapping->nrpages == 0) return; @@ -290,7 +287,6 @@ void truncate_inode_pages_range(struct address_space *mapping, } pagevec_release(&pvec); } - cleancache_flush_inode(mapping); } EXPORT_SYMBOL(truncate_inode_pages_range); @@ -427,7 +423,6 @@ int invalidate_inode_pages2_range(struct address_space *mapping, int did_range_unmap = 0; int wrapped = 0; - cleancache_flush_inode(mapping); pagevec_init(&pvec, 0); next = start; while (next <= end && !wrapped && @@ -484,7 +479,6 @@ int invalidate_inode_pages2_range(struct address_space *mapping, pagevec_release(&pvec); cond_resched(); } - cleancache_flush_inode(mapping); return ret; } EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range); From 0f794ead76c1db9f60edb03bdd2632c15b936401 Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Mon, 20 Aug 2012 23:12:10 +0800 Subject: [PATCH 112/155] Revert "enable zcache & cleancache" This reverts commit c2ff7098d4011efb6143305f62b71c0f95e38108. --- arch/arm/configs/htcleo_defconfig | 6 ------ 1 file changed, 6 deletions(-) mode change 100755 => 100644 arch/arm/configs/htcleo_defconfig diff --git a/arch/arm/configs/htcleo_defconfig b/arch/arm/configs/htcleo_defconfig old mode 100755 new mode 100644 index a26de0ce..12f408f2 --- a/arch/arm/configs/htcleo_defconfig +++ b/arch/arm/configs/htcleo_defconfig @@ -406,7 +406,6 @@ CONFIG_DEFAULT_MMAP_MIN_ADDR=4096 CONFIG_ALIGNMENT_TRAP=y CONFIG_ALLOW_CPU_ALIGNMENT=y # CONFIG_UACCESS_WITH_MEMCPY is not set -CONFIG_CLEANCACHE=y # # Boot options @@ -1689,11 +1688,6 @@ CONFIG_ANDROID_LOW_MEMORY_KILLER=y # CONFIG_IIO is not set # CONFIG_BTPORT is not set -# -# ZCACHE -# -CONFIG_ZCACHE=y - # # ZRAM # From 4cecd4ccb2ef21842d6ab0fb62a15d3b1a1801aa Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Mon, 20 Aug 2012 23:12:22 +0800 Subject: [PATCH 113/155] Revert "fs: add field to superblock to support cleancache" This reverts commit 1abd4f495eaa84e73b597f238cce704f06c54dc4. --- include/linux/fs.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index 925a431e..9b678052 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1383,11 +1383,6 @@ struct super_block { * generic_show_options() */ char *s_options; - - /* - * Saved pool identifier for cleancache (-1 means none) - */ - int cleancache_poolid; }; extern struct timespec current_fs_time(struct super_block *sb); From 61eb7c5296af5ca7b86f8d8075f3dd248b3534f0 Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Mon, 20 Aug 2012 23:14:51 +0800 Subject: [PATCH 114/155] Revert "add zcache" This reverts commit 8eb6724dbfb99bb1f17f3192483fafc1f9eb73fe. --- drivers/staging/Kconfig | 1 - drivers/staging/Makefile | 1 - drivers/staging/zcache/Kconfig | 13 - drivers/staging/zcache/Makefile | 3 - drivers/staging/zcache/tmem.c | 710 ------------- drivers/staging/zcache/tmem.h | 195 ---- drivers/staging/zcache/zcache.c | 1658 ------------------------------- 7 files changed, 2581 deletions(-) delete mode 100755 drivers/staging/zcache/Kconfig delete mode 100755 drivers/staging/zcache/Makefile delete mode 100755 drivers/staging/zcache/tmem.c delete mode 100755 drivers/staging/zcache/tmem.h delete mode 100755 drivers/staging/zcache/zcache.c diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig index e4c3c9dd..8ee4bfa6 100644 --- a/drivers/staging/Kconfig +++ b/drivers/staging/Kconfig @@ -125,6 +125,5 @@ source "drivers/staging/iio/Kconfig" source "drivers/staging/zram/Kconfig" -source "drivers/staging/zcache/Kconfig" endif # !STAGING_EXCLUDE_BUILD endif # STAGING diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile index 5f0f554b..5a1b7341 100644 --- a/drivers/staging/Makefile +++ b/drivers/staging/Makefile @@ -45,5 +45,4 @@ obj-$(CONFIG_DX_SEP) += sep/ obj-$(CONFIG_IIO) += iio/ obj-$(CONFIG_ZRAM) += zram/ obj-$(CONFIG_XVMALLOC) += zram/ -obj-$(CONFIG_ZCACHE) += zcache/ diff --git a/drivers/staging/zcache/Kconfig b/drivers/staging/zcache/Kconfig deleted file mode 100755 index 7fabcb2b..00000000 --- a/drivers/staging/zcache/Kconfig +++ /dev/null @@ -1,13 +0,0 @@ -config ZCACHE - tristate "Dynamic compression of swap pages and clean pagecache pages" - depends on CLEANCACHE || FRONTSWAP - select XVMALLOC - select LZO_COMPRESS - select LZO_DECOMPRESS - default n - help - Zcache doubles RAM efficiency while providing a significant - performance boosts on many workloads. Zcache uses lzo1x - compression and an in-kernel implementation of transcendent - memory to store clean page cache pages and swap in RAM, - providing a noticeable reduction in disk I/O. diff --git a/drivers/staging/zcache/Makefile b/drivers/staging/zcache/Makefile deleted file mode 100755 index f5ec64f9..00000000 --- a/drivers/staging/zcache/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -zcache-y := tmem.o - -obj-$(CONFIG_ZCACHE) += zcache.o diff --git a/drivers/staging/zcache/tmem.c b/drivers/staging/zcache/tmem.c deleted file mode 100755 index e954d405..00000000 --- a/drivers/staging/zcache/tmem.c +++ /dev/null @@ -1,710 +0,0 @@ -/* - * In-kernel transcendent memory (generic implementation) - * - * Copyright (c) 2009-2011, Dan Magenheimer, Oracle Corp. - * - * The primary purpose of Transcedent Memory ("tmem") is to map object-oriented - * "handles" (triples containing a pool id, and object id, and an index), to - * pages in a page-accessible memory (PAM). Tmem references the PAM pages via - * an abstract "pampd" (PAM page-descriptor), which can be operated on by a - * set of functions (pamops). Each pampd contains some representation of - * PAGE_SIZE bytes worth of data. Tmem must support potentially millions of - * pages and must be able to insert, find, and delete these pages at a - * potential frequency of thousands per second concurrently across many CPUs, - * (and, if used with KVM, across many vcpus across many guests). - * Tmem is tracked with a hierarchy of data structures, organized by - * the elements in a handle-tuple: pool_id, object_id, and page index. - * One or more "clients" (e.g. guests) each provide one or more tmem_pools. - * Each pool, contains a hash table of rb_trees of tmem_objs. Each - * tmem_obj contains a radix-tree-like tree of pointers, with intermediate - * nodes called tmem_objnodes. Each leaf pointer in this tree points to - * a pampd, which is accessible only through a small set of callbacks - * registered by the PAM implementation (see tmem_register_pamops). Tmem - * does all memory allocation via a set of callbacks registered by the tmem - * host implementation (e.g. see tmem_register_hostops). - */ - -#include -#include -#include - -#include "tmem.h" - -/* data structure sentinels used for debugging... see tmem.h */ -#define POOL_SENTINEL 0x87658765 -#define OBJ_SENTINEL 0x12345678 -#define OBJNODE_SENTINEL 0xfedcba09 - -/* - * A tmem host implementation must use this function to register callbacks - * for memory allocation. - */ -static struct tmem_hostops tmem_hostops; - -static void tmem_objnode_tree_init(void); - -void tmem_register_hostops(struct tmem_hostops *m) -{ - tmem_objnode_tree_init(); - tmem_hostops = *m; -} - -/* - * A tmem host implementation must use this function to register - * callbacks for a page-accessible memory (PAM) implementation - */ -static struct tmem_pamops tmem_pamops; - -void tmem_register_pamops(struct tmem_pamops *m) -{ - tmem_pamops = *m; -} - -/* - * Oid's are potentially very sparse and tmem_objs may have an indeterminately - * short life, being added and deleted at a relatively high frequency. - * So an rb_tree is an ideal data structure to manage tmem_objs. But because - * of the potentially huge number of tmem_objs, each pool manages a hashtable - * of rb_trees to reduce search, insert, delete, and rebalancing time. - * Each hashbucket also has a lock to manage concurrent access. - * - * The following routines manage tmem_objs. When any tmem_obj is accessed, - * the hashbucket lock must be held. - */ - -/* searches for object==oid in pool, returns locked object if found */ -static struct tmem_obj *tmem_obj_find(struct tmem_hashbucket *hb, - struct tmem_oid *oidp) -{ - struct rb_node *rbnode; - struct tmem_obj *obj; - - rbnode = hb->obj_rb_root.rb_node; - while (rbnode) { - BUG_ON(RB_EMPTY_NODE(rbnode)); - obj = rb_entry(rbnode, struct tmem_obj, rb_tree_node); - switch (tmem_oid_compare(oidp, &obj->oid)) { - case 0: /* equal */ - goto out; - case -1: - rbnode = rbnode->rb_left; - break; - case 1: - rbnode = rbnode->rb_right; - break; - } - } - obj = NULL; -out: - return obj; -} - -static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *); - -/* free an object that has no more pampds in it */ -static void tmem_obj_free(struct tmem_obj *obj, struct tmem_hashbucket *hb) -{ - struct tmem_pool *pool; - - BUG_ON(obj == NULL); - ASSERT_SENTINEL(obj, OBJ); - BUG_ON(obj->pampd_count > 0); - pool = obj->pool; - BUG_ON(pool == NULL); - if (obj->objnode_tree_root != NULL) /* may be "stump" with no leaves */ - tmem_pampd_destroy_all_in_obj(obj); - BUG_ON(obj->objnode_tree_root != NULL); - BUG_ON((long)obj->objnode_count != 0); - atomic_dec(&pool->obj_count); - BUG_ON(atomic_read(&pool->obj_count) < 0); - INVERT_SENTINEL(obj, OBJ); - obj->pool = NULL; - tmem_oid_set_invalid(&obj->oid); - rb_erase(&obj->rb_tree_node, &hb->obj_rb_root); -} - -/* - * initialize, and insert an tmem_object_root (called only if find failed) - */ -static void tmem_obj_init(struct tmem_obj *obj, struct tmem_hashbucket *hb, - struct tmem_pool *pool, - struct tmem_oid *oidp) -{ - struct rb_root *root = &hb->obj_rb_root; - struct rb_node **new = &(root->rb_node), *parent = NULL; - struct tmem_obj *this; - - BUG_ON(pool == NULL); - atomic_inc(&pool->obj_count); - obj->objnode_tree_height = 0; - obj->objnode_tree_root = NULL; - obj->pool = pool; - obj->oid = *oidp; - obj->objnode_count = 0; - obj->pampd_count = 0; - SET_SENTINEL(obj, OBJ); - while (*new) { - BUG_ON(RB_EMPTY_NODE(*new)); - this = rb_entry(*new, struct tmem_obj, rb_tree_node); - parent = *new; - switch (tmem_oid_compare(oidp, &this->oid)) { - case 0: - BUG(); /* already present; should never happen! */ - break; - case -1: - new = &(*new)->rb_left; - break; - case 1: - new = &(*new)->rb_right; - break; - } - } - rb_link_node(&obj->rb_tree_node, parent, new); - rb_insert_color(&obj->rb_tree_node, root); -} - -/* - * Tmem is managed as a set of tmem_pools with certain attributes, such as - * "ephemeral" vs "persistent". These attributes apply to all tmem_objs - * and all pampds that belong to a tmem_pool. A tmem_pool is created - * or deleted relatively rarely (for example, when a filesystem is - * mounted or unmounted. - */ - -/* flush all data from a pool and, optionally, free it */ -static void tmem_pool_flush(struct tmem_pool *pool, bool destroy) -{ - struct rb_node *rbnode; - struct tmem_obj *obj; - struct tmem_hashbucket *hb = &pool->hashbucket[0]; - int i; - - BUG_ON(pool == NULL); - for (i = 0; i < TMEM_HASH_BUCKETS; i++, hb++) { - spin_lock(&hb->lock); - rbnode = rb_first(&hb->obj_rb_root); - while (rbnode != NULL) { - obj = rb_entry(rbnode, struct tmem_obj, rb_tree_node); - rbnode = rb_next(rbnode); - tmem_pampd_destroy_all_in_obj(obj); - tmem_obj_free(obj, hb); - (*tmem_hostops.obj_free)(obj, pool); - } - spin_unlock(&hb->lock); - } - if (destroy) - list_del(&pool->pool_list); -} - -/* - * A tmem_obj contains a radix-tree-like tree in which the intermediate - * nodes are called tmem_objnodes. (The kernel lib/radix-tree.c implementation - * is very specialized and tuned for specific uses and is not particularly - * suited for use from this code, though some code from the core algorithms has - * been reused, thus the copyright notices below). Each tmem_objnode contains - * a set of pointers which point to either a set of intermediate tmem_objnodes - * or a set of of pampds. - * - * Portions Copyright (C) 2001 Momchil Velikov - * Portions Copyright (C) 2001 Christoph Hellwig - * Portions Copyright (C) 2005 SGI, Christoph Lameter - */ - -struct tmem_objnode_tree_path { - struct tmem_objnode *objnode; - int offset; -}; - -/* objnode height_to_maxindex translation */ -static unsigned long tmem_objnode_tree_h2max[OBJNODE_TREE_MAX_PATH + 1]; - -static void tmem_objnode_tree_init(void) -{ - unsigned int ht, tmp; - - for (ht = 0; ht < ARRAY_SIZE(tmem_objnode_tree_h2max); ht++) { - tmp = ht * OBJNODE_TREE_MAP_SHIFT; - if (tmp >= OBJNODE_TREE_INDEX_BITS) - tmem_objnode_tree_h2max[ht] = ~0UL; - else - tmem_objnode_tree_h2max[ht] = - (~0UL >> (OBJNODE_TREE_INDEX_BITS - tmp - 1)) >> 1; - } -} - -static struct tmem_objnode *tmem_objnode_alloc(struct tmem_obj *obj) -{ - struct tmem_objnode *objnode; - - ASSERT_SENTINEL(obj, OBJ); - BUG_ON(obj->pool == NULL); - ASSERT_SENTINEL(obj->pool, POOL); - objnode = (*tmem_hostops.objnode_alloc)(obj->pool); - if (unlikely(objnode == NULL)) - goto out; - objnode->obj = obj; - SET_SENTINEL(objnode, OBJNODE); - memset(&objnode->slots, 0, sizeof(objnode->slots)); - objnode->slots_in_use = 0; - obj->objnode_count++; -out: - return objnode; -} - -static void tmem_objnode_free(struct tmem_objnode *objnode) -{ - struct tmem_pool *pool; - int i; - - BUG_ON(objnode == NULL); - for (i = 0; i < OBJNODE_TREE_MAP_SIZE; i++) - BUG_ON(objnode->slots[i] != NULL); - ASSERT_SENTINEL(objnode, OBJNODE); - INVERT_SENTINEL(objnode, OBJNODE); - BUG_ON(objnode->obj == NULL); - ASSERT_SENTINEL(objnode->obj, OBJ); - pool = objnode->obj->pool; - BUG_ON(pool == NULL); - ASSERT_SENTINEL(pool, POOL); - objnode->obj->objnode_count--; - objnode->obj = NULL; - (*tmem_hostops.objnode_free)(objnode, pool); -} - -/* - * lookup index in object and return associated pampd (or NULL if not found) - */ -static void *tmem_pampd_lookup_in_obj(struct tmem_obj *obj, uint32_t index) -{ - unsigned int height, shift; - struct tmem_objnode **slot = NULL; - - BUG_ON(obj == NULL); - ASSERT_SENTINEL(obj, OBJ); - BUG_ON(obj->pool == NULL); - ASSERT_SENTINEL(obj->pool, POOL); - - height = obj->objnode_tree_height; - if (index > tmem_objnode_tree_h2max[obj->objnode_tree_height]) - goto out; - if (height == 0 && obj->objnode_tree_root) { - slot = &obj->objnode_tree_root; - goto out; - } - shift = (height-1) * OBJNODE_TREE_MAP_SHIFT; - slot = &obj->objnode_tree_root; - while (height > 0) { - if (*slot == NULL) - goto out; - slot = (struct tmem_objnode **) - ((*slot)->slots + - ((index >> shift) & OBJNODE_TREE_MAP_MASK)); - shift -= OBJNODE_TREE_MAP_SHIFT; - height--; - } -out: - return slot != NULL ? *slot : NULL; -} - -static int tmem_pampd_add_to_obj(struct tmem_obj *obj, uint32_t index, - void *pampd) -{ - int ret = 0; - struct tmem_objnode *objnode = NULL, *newnode, *slot; - unsigned int height, shift; - int offset = 0; - - /* if necessary, extend the tree to be higher */ - if (index > tmem_objnode_tree_h2max[obj->objnode_tree_height]) { - height = obj->objnode_tree_height + 1; - if (index > tmem_objnode_tree_h2max[height]) - while (index > tmem_objnode_tree_h2max[height]) - height++; - if (obj->objnode_tree_root == NULL) { - obj->objnode_tree_height = height; - goto insert; - } - do { - newnode = tmem_objnode_alloc(obj); - if (!newnode) { - ret = -ENOMEM; - goto out; - } - newnode->slots[0] = obj->objnode_tree_root; - newnode->slots_in_use = 1; - obj->objnode_tree_root = newnode; - obj->objnode_tree_height++; - } while (height > obj->objnode_tree_height); - } -insert: - slot = obj->objnode_tree_root; - height = obj->objnode_tree_height; - shift = (height-1) * OBJNODE_TREE_MAP_SHIFT; - while (height > 0) { - if (slot == NULL) { - /* add a child objnode. */ - slot = tmem_objnode_alloc(obj); - if (!slot) { - ret = -ENOMEM; - goto out; - } - if (objnode) { - - objnode->slots[offset] = slot; - objnode->slots_in_use++; - } else - obj->objnode_tree_root = slot; - } - /* go down a level */ - offset = (index >> shift) & OBJNODE_TREE_MAP_MASK; - objnode = slot; - slot = objnode->slots[offset]; - shift -= OBJNODE_TREE_MAP_SHIFT; - height--; - } - BUG_ON(slot != NULL); - if (objnode) { - objnode->slots_in_use++; - objnode->slots[offset] = pampd; - } else - obj->objnode_tree_root = pampd; - obj->pampd_count++; -out: - return ret; -} - -static void *tmem_pampd_delete_from_obj(struct tmem_obj *obj, uint32_t index) -{ - struct tmem_objnode_tree_path path[OBJNODE_TREE_MAX_PATH + 1]; - struct tmem_objnode_tree_path *pathp = path; - struct tmem_objnode *slot = NULL; - unsigned int height, shift; - int offset; - - BUG_ON(obj == NULL); - ASSERT_SENTINEL(obj, OBJ); - BUG_ON(obj->pool == NULL); - ASSERT_SENTINEL(obj->pool, POOL); - height = obj->objnode_tree_height; - if (index > tmem_objnode_tree_h2max[height]) - goto out; - slot = obj->objnode_tree_root; - if (height == 0 && obj->objnode_tree_root) { - obj->objnode_tree_root = NULL; - goto out; - } - shift = (height - 1) * OBJNODE_TREE_MAP_SHIFT; - pathp->objnode = NULL; - do { - if (slot == NULL) - goto out; - pathp++; - offset = (index >> shift) & OBJNODE_TREE_MAP_MASK; - pathp->offset = offset; - pathp->objnode = slot; - slot = slot->slots[offset]; - shift -= OBJNODE_TREE_MAP_SHIFT; - height--; - } while (height > 0); - if (slot == NULL) - goto out; - while (pathp->objnode) { - pathp->objnode->slots[pathp->offset] = NULL; - pathp->objnode->slots_in_use--; - if (pathp->objnode->slots_in_use) { - if (pathp->objnode == obj->objnode_tree_root) { - while (obj->objnode_tree_height > 0 && - obj->objnode_tree_root->slots_in_use == 1 && - obj->objnode_tree_root->slots[0]) { - struct tmem_objnode *to_free = - obj->objnode_tree_root; - - obj->objnode_tree_root = - to_free->slots[0]; - obj->objnode_tree_height--; - to_free->slots[0] = NULL; - to_free->slots_in_use = 0; - tmem_objnode_free(to_free); - } - } - goto out; - } - tmem_objnode_free(pathp->objnode); /* 0 slots used, free it */ - pathp--; - } - obj->objnode_tree_height = 0; - obj->objnode_tree_root = NULL; - -out: - if (slot != NULL) - obj->pampd_count--; - BUG_ON(obj->pampd_count < 0); - return slot; -} - -/* recursively walk the objnode_tree destroying pampds and objnodes */ -static void tmem_objnode_node_destroy(struct tmem_obj *obj, - struct tmem_objnode *objnode, - unsigned int ht) -{ - int i; - - if (ht == 0) - return; - for (i = 0; i < OBJNODE_TREE_MAP_SIZE; i++) { - if (objnode->slots[i]) { - if (ht == 1) { - obj->pampd_count--; - (*tmem_pamops.free)(objnode->slots[i], - obj->pool); - objnode->slots[i] = NULL; - continue; - } - tmem_objnode_node_destroy(obj, objnode->slots[i], ht-1); - tmem_objnode_free(objnode->slots[i]); - objnode->slots[i] = NULL; - } - } -} - -static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *obj) -{ - if (obj->objnode_tree_root == NULL) - return; - if (obj->objnode_tree_height == 0) { - obj->pampd_count--; - (*tmem_pamops.free)(obj->objnode_tree_root, obj->pool); - } else { - tmem_objnode_node_destroy(obj, obj->objnode_tree_root, - obj->objnode_tree_height); - tmem_objnode_free(obj->objnode_tree_root); - obj->objnode_tree_height = 0; - } - obj->objnode_tree_root = NULL; -} - -/* - * Tmem is operated on by a set of well-defined actions: - * "put", "get", "flush", "flush_object", "new pool" and "destroy pool". - * (The tmem ABI allows for subpages and exchanges but these operations - * are not included in this implementation.) - * - * These "tmem core" operations are implemented in the following functions. - */ - -/* - * "Put" a page, e.g. copy a page from the kernel into newly allocated - * PAM space (if such space is available). Tmem_put is complicated by - * a corner case: What if a page with matching handle already exists in - * tmem? To guarantee coherency, one of two actions is necessary: Either - * the data for the page must be overwritten, or the page must be - * "flushed" so that the data is not accessible to a subsequent "get". - * Since these "duplicate puts" are relatively rare, this implementation - * always flushes for simplicity. - */ -int tmem_put(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index, - struct page *page) -{ - struct tmem_obj *obj = NULL, *objfound = NULL, *objnew = NULL; - void *pampd = NULL, *pampd_del = NULL; - int ret = -ENOMEM; - bool ephemeral; - struct tmem_hashbucket *hb; - - ephemeral = is_ephemeral(pool); - hb = &pool->hashbucket[tmem_oid_hash(oidp)]; - spin_lock(&hb->lock); - obj = objfound = tmem_obj_find(hb, oidp); - if (obj != NULL) { - pampd = tmem_pampd_lookup_in_obj(objfound, index); - if (pampd != NULL) { - /* if found, is a dup put, flush the old one */ - pampd_del = tmem_pampd_delete_from_obj(obj, index); - BUG_ON(pampd_del != pampd); - (*tmem_pamops.free)(pampd, pool); - if (obj->pampd_count == 0) { - objnew = obj; - objfound = NULL; - } - pampd = NULL; - } - } else { - obj = objnew = (*tmem_hostops.obj_alloc)(pool); - if (unlikely(obj == NULL)) { - ret = -ENOMEM; - goto out; - } - tmem_obj_init(obj, hb, pool, oidp); - } - BUG_ON(obj == NULL); - BUG_ON(((objnew != obj) && (objfound != obj)) || (objnew == objfound)); - pampd = (*tmem_pamops.create)(obj->pool, &obj->oid, index, page); - if (unlikely(pampd == NULL)) - goto free; - ret = tmem_pampd_add_to_obj(obj, index, pampd); - if (unlikely(ret == -ENOMEM)) - /* may have partially built objnode tree ("stump") */ - goto delete_and_free; - goto out; - -delete_and_free: - (void)tmem_pampd_delete_from_obj(obj, index); -free: - if (pampd) - (*tmem_pamops.free)(pampd, pool); - if (objnew) { - tmem_obj_free(objnew, hb); - (*tmem_hostops.obj_free)(objnew, pool); - } -out: - spin_unlock(&hb->lock); - return ret; -} - -/* - * "Get" a page, e.g. if one can be found, copy the tmem page with the - * matching handle from PAM space to the kernel. By tmem definition, - * when a "get" is successful on an ephemeral page, the page is "flushed", - * and when a "get" is successful on a persistent page, the page is retained - * in tmem. Note that to preserve - * coherency, "get" can never be skipped if tmem contains the data. - * That is, if a get is done with a certain handle and fails, any - * subsequent "get" must also fail (unless of course there is a - * "put" done with the same handle). - - */ -int tmem_get(struct tmem_pool *pool, struct tmem_oid *oidp, - uint32_t index, struct page *page) -{ - struct tmem_obj *obj; - void *pampd; - bool ephemeral = is_ephemeral(pool); - uint32_t ret = -1; - struct tmem_hashbucket *hb; - - hb = &pool->hashbucket[tmem_oid_hash(oidp)]; - spin_lock(&hb->lock); - obj = tmem_obj_find(hb, oidp); - if (obj == NULL) - goto out; - ephemeral = is_ephemeral(pool); - if (ephemeral) - pampd = tmem_pampd_delete_from_obj(obj, index); - else - pampd = tmem_pampd_lookup_in_obj(obj, index); - if (pampd == NULL) - goto out; - ret = (*tmem_pamops.get_data)(page, pampd, pool); - if (ret < 0) - goto out; - if (ephemeral) { - (*tmem_pamops.free)(pampd, pool); - if (obj->pampd_count == 0) { - tmem_obj_free(obj, hb); - (*tmem_hostops.obj_free)(obj, pool); - obj = NULL; - } - } - ret = 0; -out: - spin_unlock(&hb->lock); - return ret; -} - -/* - * If a page in tmem matches the handle, "flush" this page from tmem such - * that any subsequent "get" does not succeed (unless, of course, there - * was another "put" with the same handle). - */ -int tmem_flush_page(struct tmem_pool *pool, - struct tmem_oid *oidp, uint32_t index) -{ - struct tmem_obj *obj; - void *pampd; - int ret = -1; - struct tmem_hashbucket *hb; - - hb = &pool->hashbucket[tmem_oid_hash(oidp)]; - spin_lock(&hb->lock); - obj = tmem_obj_find(hb, oidp); - if (obj == NULL) - goto out; - pampd = tmem_pampd_delete_from_obj(obj, index); - if (pampd == NULL) - goto out; - (*tmem_pamops.free)(pampd, pool); - if (obj->pampd_count == 0) { - tmem_obj_free(obj, hb); - (*tmem_hostops.obj_free)(obj, pool); - } - ret = 0; - -out: - spin_unlock(&hb->lock); - return ret; -} - -/* - * "Flush" all pages in tmem matching this oid. - */ -int tmem_flush_object(struct tmem_pool *pool, struct tmem_oid *oidp) -{ - struct tmem_obj *obj; - struct tmem_hashbucket *hb; - int ret = -1; - - hb = &pool->hashbucket[tmem_oid_hash(oidp)]; - spin_lock(&hb->lock); - obj = tmem_obj_find(hb, oidp); - if (obj == NULL) - goto out; - tmem_pampd_destroy_all_in_obj(obj); - tmem_obj_free(obj, hb); - (*tmem_hostops.obj_free)(obj, pool); - ret = 0; - -out: - spin_unlock(&hb->lock); - return ret; -} - -/* - * "Flush" all pages (and tmem_objs) from this tmem_pool and disable - * all subsequent access to this tmem_pool. - */ -int tmem_destroy_pool(struct tmem_pool *pool) -{ - int ret = -1; - - if (pool == NULL) - goto out; - tmem_pool_flush(pool, 1); - ret = 0; -out: - return ret; -} - -static LIST_HEAD(tmem_global_pool_list); - -/* - * Create a new tmem_pool with the provided flag and return - * a pool id provided by the tmem host implementation. - */ -void tmem_new_pool(struct tmem_pool *pool, uint32_t flags) -{ - int persistent = flags & TMEM_POOL_PERSIST; - int shared = flags & TMEM_POOL_SHARED; - struct tmem_hashbucket *hb = &pool->hashbucket[0]; - int i; - - for (i = 0; i < TMEM_HASH_BUCKETS; i++, hb++) { - hb->obj_rb_root = RB_ROOT; - spin_lock_init(&hb->lock); - } - INIT_LIST_HEAD(&pool->pool_list); - atomic_set(&pool->obj_count, 0); - SET_SENTINEL(pool, POOL); - list_add_tail(&pool->pool_list, &tmem_global_pool_list); - pool->persistent = persistent; - pool->shared = shared; -} diff --git a/drivers/staging/zcache/tmem.h b/drivers/staging/zcache/tmem.h deleted file mode 100755 index 2e07e217..00000000 --- a/drivers/staging/zcache/tmem.h +++ /dev/null @@ -1,195 +0,0 @@ -/* - * tmem.h - * - * Transcendent memory - * - * Copyright (c) 2009-2011, Dan Magenheimer, Oracle Corp. - */ - -#ifndef _TMEM_H_ -#define _TMEM_H_ - -#include -#include -#include -#include - -/* - * These are pre-defined by the Xen<->Linux ABI - */ -#define TMEM_PUT_PAGE 4 -#define TMEM_GET_PAGE 5 -#define TMEM_FLUSH_PAGE 6 -#define TMEM_FLUSH_OBJECT 7 -#define TMEM_POOL_PERSIST 1 -#define TMEM_POOL_SHARED 2 -#define TMEM_POOL_PRECOMPRESSED 4 -#define TMEM_POOL_PAGESIZE_SHIFT 4 -#define TMEM_POOL_PAGESIZE_MASK 0xf -#define TMEM_POOL_RESERVED_BITS 0x00ffff00 - -/* - * sentinels have proven very useful for debugging but can be removed - * or disabled before final merge. - */ -#define SENTINELS -#ifdef SENTINELS -#define DECL_SENTINEL uint32_t sentinel; -#define SET_SENTINEL(_x, _y) (_x->sentinel = _y##_SENTINEL) -#define INVERT_SENTINEL(_x, _y) (_x->sentinel = ~_y##_SENTINEL) -#define ASSERT_SENTINEL(_x, _y) WARN_ON(_x->sentinel != _y##_SENTINEL) -#define ASSERT_INVERTED_SENTINEL(_x, _y) WARN_ON(_x->sentinel != ~_y##_SENTINEL) -#else -#define DECL_SENTINEL -#define SET_SENTINEL(_x, _y) do { } while (0) -#define INVERT_SENTINEL(_x, _y) do { } while (0) -#define ASSERT_SENTINEL(_x, _y) do { } while (0) -#define ASSERT_INVERTED_SENTINEL(_x, _y) do { } while (0) -#endif - -#define ASSERT_SPINLOCK(_l) WARN_ON(!spin_is_locked(_l)) - -/* - * A pool is the highest-level data structure managed by tmem and - * usually corresponds to a large independent set of pages such as - * a filesystem. Each pool has an id, and certain attributes and counters. - * It also contains a set of hash buckets, each of which contains an rbtree - * of objects and a lock to manage concurrency within the pool. - */ - -#define TMEM_HASH_BUCKET_BITS 8 -#define TMEM_HASH_BUCKETS (1<persistent) -#define is_ephemeral(_p) (!(_p->persistent)) - -/* - * An object id ("oid") is large: 192-bits (to ensure, for example, files - * in a modern filesystem can be uniquely identified). - */ - -struct tmem_oid { - uint64_t oid[3]; -}; - -static inline void tmem_oid_set_invalid(struct tmem_oid *oidp) -{ - oidp->oid[0] = oidp->oid[1] = oidp->oid[2] = -1UL; -} - -static inline bool tmem_oid_valid(struct tmem_oid *oidp) -{ - return oidp->oid[0] != -1UL || oidp->oid[1] != -1UL || - oidp->oid[2] != -1UL; -} - -static inline int tmem_oid_compare(struct tmem_oid *left, - struct tmem_oid *right) -{ - int ret; - - if (left->oid[2] == right->oid[2]) { - if (left->oid[1] == right->oid[1]) { - if (left->oid[0] == right->oid[0]) - ret = 0; - else if (left->oid[0] < right->oid[0]) - ret = -1; - else - return 1; - } else if (left->oid[1] < right->oid[1]) - ret = -1; - else - ret = 1; - } else if (left->oid[2] < right->oid[2]) - ret = -1; - else - ret = 1; - return ret; -} - -static inline unsigned tmem_oid_hash(struct tmem_oid *oidp) -{ - return hash_long(oidp->oid[0] ^ oidp->oid[1] ^ oidp->oid[2], - TMEM_HASH_BUCKET_BITS); -} - -/* - * A tmem_obj contains an identifier (oid), pointers to the parent - * pool and the rb_tree to which it belongs, counters, and an ordered - * set of pampds, structured in a radix-tree-like tree. The intermediate - * nodes of the tree are called tmem_objnodes. - */ - -struct tmem_objnode; - -struct tmem_obj { - struct tmem_oid oid; - struct tmem_pool *pool; - struct rb_node rb_tree_node; - struct tmem_objnode *objnode_tree_root; - unsigned int objnode_tree_height; - unsigned long objnode_count; - long pampd_count; - DECL_SENTINEL -}; - -#define OBJNODE_TREE_MAP_SHIFT 6 -#define OBJNODE_TREE_MAP_SIZE (1UL << OBJNODE_TREE_MAP_SHIFT) -#define OBJNODE_TREE_MAP_MASK (OBJNODE_TREE_MAP_SIZE-1) -#define OBJNODE_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long)) -#define OBJNODE_TREE_MAX_PATH \ - (OBJNODE_TREE_INDEX_BITS/OBJNODE_TREE_MAP_SHIFT + 2) - -struct tmem_objnode { - struct tmem_obj *obj; - DECL_SENTINEL - void *slots[OBJNODE_TREE_MAP_SIZE]; - unsigned int slots_in_use; -}; - -/* pampd abstract datatype methods provided by the PAM implementation */ -struct tmem_pamops { - void *(*create)(struct tmem_pool *, struct tmem_oid *, uint32_t, - struct page *); - int (*get_data)(struct page *, void *, struct tmem_pool *); - void (*free)(void *, struct tmem_pool *); -}; -extern void tmem_register_pamops(struct tmem_pamops *m); - -/* memory allocation methods provided by the host implementation */ -struct tmem_hostops { - struct tmem_obj *(*obj_alloc)(struct tmem_pool *); - void (*obj_free)(struct tmem_obj *, struct tmem_pool *); - struct tmem_objnode *(*objnode_alloc)(struct tmem_pool *); - void (*objnode_free)(struct tmem_objnode *, struct tmem_pool *); -}; -extern void tmem_register_hostops(struct tmem_hostops *m); - -/* core tmem accessor functions */ -extern int tmem_put(struct tmem_pool *, struct tmem_oid *, uint32_t index, - struct page *page); -extern int tmem_get(struct tmem_pool *, struct tmem_oid *, uint32_t index, - struct page *page); -extern int tmem_flush_page(struct tmem_pool *, struct tmem_oid *, - uint32_t index); -extern int tmem_flush_object(struct tmem_pool *, struct tmem_oid *); -extern int tmem_destroy_pool(struct tmem_pool *); -extern void tmem_new_pool(struct tmem_pool *, uint32_t); -#endif /* _TMEM_H */ diff --git a/drivers/staging/zcache/zcache.c b/drivers/staging/zcache/zcache.c deleted file mode 100755 index b8a2b30a..00000000 --- a/drivers/staging/zcache/zcache.c +++ /dev/null @@ -1,1658 +0,0 @@ -/* - * zcache.c - * - * Copyright (c) 2010,2011, Dan Magenheimer, Oracle Corp. - * Copyright (c) 2010,2011, Nitin Gupta - * - * Zcache provides an in-kernel "host implementation" for transcendent memory - * and, thus indirectly, for cleancache and frontswap. Zcache includes two - * page-accessible memory [1] interfaces, both utilizing lzo1x compression: - * 1) "compression buddies" ("zbud") is used for ephemeral pages - * 2) xvmalloc is used for persistent pages. - * Xvmalloc (based on the TLSF allocator) has very low fragmentation - * so maximizes space efficiency, while zbud allows pairs (and potentially, - * in the future, more than a pair of) compressed pages to be closely linked - * so that reclaiming can be done via the kernel's physical-page-oriented - * "shrinker" interface. - * - * [1] For a definition of page-accessible memory (aka PAM), see: - * http://marc.info/?l=linux-mm&m=127811271605009 - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include "tmem.h" - -#include "../zram/xvmalloc.h" /* if built in drivers/staging */ - -#if (!defined(CONFIG_CLEANCACHE) && !defined(CONFIG_FRONTSWAP)) -#error "zcache is useless without CONFIG_CLEANCACHE or CONFIG_FRONTSWAP" -#endif -#ifdef CONFIG_CLEANCACHE -#include -#endif -#ifdef CONFIG_FRONTSWAP -#include -#endif - -#if 0 -/* this is more aggressive but may cause other problems? */ -#define ZCACHE_GFP_MASK (GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN) -#else -#define ZCACHE_GFP_MASK \ - (__GFP_FS | __GFP_NORETRY | __GFP_NOWARN | __GFP_NOMEMALLOC) -#endif - -/********** - * Compression buddies ("zbud") provides for packing two (or, possibly - * in the future, more) compressed ephemeral pages into a single "raw" - * (physical) page and tracking them with data structures so that - * the raw pages can be easily reclaimed. - * - * A zbud page ("zbpg") is an aligned page containing a list_head, - * a lock, and two "zbud headers". The remainder of the physical - * page is divided up into aligned 64-byte "chunks" which contain - * the compressed data for zero, one, or two zbuds. Each zbpg - * resides on: (1) an "unused list" if it has no zbuds; (2) a - * "buddied" list if it is fully populated with two zbuds; or - * (3) one of PAGE_SIZE/64 "unbuddied" lists indexed by how many chunks - * the one unbuddied zbud uses. The data inside a zbpg cannot be - * read or written unless the zbpg's lock is held. - */ - -#define ZBH_SENTINEL 0x43214321 -#define ZBPG_SENTINEL 0xdeadbeef - -#define ZBUD_MAX_BUDS 2 - -struct zbud_hdr { - uint32_t pool_id; - struct tmem_oid oid; - uint32_t index; - uint16_t size; /* compressed size in bytes, zero means unused */ - DECL_SENTINEL -}; - -struct zbud_page { - struct list_head bud_list; - spinlock_t lock; - struct zbud_hdr buddy[ZBUD_MAX_BUDS]; - DECL_SENTINEL - /* followed by NUM_CHUNK aligned CHUNK_SIZE-byte chunks */ -}; - -#define CHUNK_SHIFT 6 -#define CHUNK_SIZE (1 << CHUNK_SHIFT) -#define CHUNK_MASK (~(CHUNK_SIZE-1)) -#define NCHUNKS (((PAGE_SIZE - sizeof(struct zbud_page)) & \ - CHUNK_MASK) >> CHUNK_SHIFT) -#define MAX_CHUNK (NCHUNKS-1) - -static struct { - struct list_head list; - unsigned count; -} zbud_unbuddied[NCHUNKS]; -/* list N contains pages with N chunks USED and NCHUNKS-N unused */ -/* element 0 is never used but optimizing that isn't worth it */ -static unsigned long zbud_cumul_chunk_counts[NCHUNKS]; - -struct list_head zbud_buddied_list; -static unsigned long zcache_zbud_buddied_count; - -/* protects the buddied list and all unbuddied lists */ -static DEFINE_SPINLOCK(zbud_budlists_spinlock); - -static LIST_HEAD(zbpg_unused_list); -static unsigned long zcache_zbpg_unused_list_count; - -/* protects the unused page list */ -static DEFINE_SPINLOCK(zbpg_unused_list_spinlock); - -static atomic_t zcache_zbud_curr_raw_pages; -static atomic_t zcache_zbud_curr_zpages; -static unsigned long zcache_zbud_curr_zbytes; -static unsigned long zcache_zbud_cumul_zpages; -static unsigned long zcache_zbud_cumul_zbytes; -static unsigned long zcache_compress_poor; - -/* forward references */ -static void *zcache_get_free_page(void); -static void zcache_free_page(void *p); - -/* - * zbud helper functions - */ - -static inline unsigned zbud_max_buddy_size(void) -{ - return MAX_CHUNK << CHUNK_SHIFT; -} - -static inline unsigned zbud_size_to_chunks(unsigned size) -{ - BUG_ON(size == 0 || size > zbud_max_buddy_size()); - return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT; -} - -static inline int zbud_budnum(struct zbud_hdr *zh) -{ - unsigned offset = (unsigned long)zh & (PAGE_SIZE - 1); - struct zbud_page *zbpg = NULL; - unsigned budnum = -1U; - int i; - - for (i = 0; i < ZBUD_MAX_BUDS; i++) - if (offset == offsetof(typeof(*zbpg), buddy[i])) { - budnum = i; - break; - } - BUG_ON(budnum == -1U); - return budnum; -} - -static char *zbud_data(struct zbud_hdr *zh, unsigned size) -{ - struct zbud_page *zbpg; - char *p; - unsigned budnum; - - ASSERT_SENTINEL(zh, ZBH); - budnum = zbud_budnum(zh); - BUG_ON(size == 0 || size > zbud_max_buddy_size()); - zbpg = container_of(zh, struct zbud_page, buddy[budnum]); - ASSERT_SPINLOCK(&zbpg->lock); - p = (char *)zbpg; - if (budnum == 0) - p += ((sizeof(struct zbud_page) + CHUNK_SIZE - 1) & - CHUNK_MASK); - else if (budnum == 1) - p += PAGE_SIZE - ((size + CHUNK_SIZE - 1) & CHUNK_MASK); - return p; -} - -/* - * zbud raw page management - */ - -static struct zbud_page *zbud_alloc_raw_page(void) -{ - struct zbud_page *zbpg = NULL; - struct zbud_hdr *zh0, *zh1; - bool recycled = 0; - - /* if any pages on the zbpg list, use one */ - spin_lock(&zbpg_unused_list_spinlock); - if (!list_empty(&zbpg_unused_list)) { - zbpg = list_first_entry(&zbpg_unused_list, - struct zbud_page, bud_list); - list_del_init(&zbpg->bud_list); - zcache_zbpg_unused_list_count--; - recycled = 1; - } - spin_unlock(&zbpg_unused_list_spinlock); - if (zbpg == NULL) - /* none on zbpg list, try to get a kernel page */ - zbpg = zcache_get_free_page(); - if (likely(zbpg != NULL)) { - INIT_LIST_HEAD(&zbpg->bud_list); - zh0 = &zbpg->buddy[0]; zh1 = &zbpg->buddy[1]; - spin_lock_init(&zbpg->lock); - if (recycled) { - ASSERT_INVERTED_SENTINEL(zbpg, ZBPG); - SET_SENTINEL(zbpg, ZBPG); - BUG_ON(zh0->size != 0 || tmem_oid_valid(&zh0->oid)); - BUG_ON(zh1->size != 0 || tmem_oid_valid(&zh1->oid)); - } else { - atomic_inc(&zcache_zbud_curr_raw_pages); - INIT_LIST_HEAD(&zbpg->bud_list); - SET_SENTINEL(zbpg, ZBPG); - zh0->size = 0; zh1->size = 0; - tmem_oid_set_invalid(&zh0->oid); - tmem_oid_set_invalid(&zh1->oid); - } - } - return zbpg; -} - -static void zbud_free_raw_page(struct zbud_page *zbpg) -{ - struct zbud_hdr *zh0 = &zbpg->buddy[0], *zh1 = &zbpg->buddy[1]; - - ASSERT_SENTINEL(zbpg, ZBPG); - BUG_ON(!list_empty(&zbpg->bud_list)); - ASSERT_SPINLOCK(&zbpg->lock); - BUG_ON(zh0->size != 0 || tmem_oid_valid(&zh0->oid)); - BUG_ON(zh1->size != 0 || tmem_oid_valid(&zh1->oid)); - INVERT_SENTINEL(zbpg, ZBPG); - spin_unlock(&zbpg->lock); - spin_lock(&zbpg_unused_list_spinlock); - list_add(&zbpg->bud_list, &zbpg_unused_list); - zcache_zbpg_unused_list_count++; - spin_unlock(&zbpg_unused_list_spinlock); -} - -/* - * core zbud handling routines - */ - -static unsigned zbud_free(struct zbud_hdr *zh) -{ - unsigned size; - - ASSERT_SENTINEL(zh, ZBH); - BUG_ON(!tmem_oid_valid(&zh->oid)); - size = zh->size; - BUG_ON(zh->size == 0 || zh->size > zbud_max_buddy_size()); - zh->size = 0; - tmem_oid_set_invalid(&zh->oid); - INVERT_SENTINEL(zh, ZBH); - zcache_zbud_curr_zbytes -= size; - atomic_dec(&zcache_zbud_curr_zpages); - return size; -} - -static void zbud_free_and_delist(struct zbud_hdr *zh) -{ - unsigned chunks; - struct zbud_hdr *zh_other; - unsigned budnum = zbud_budnum(zh), size; - struct zbud_page *zbpg = - container_of(zh, struct zbud_page, buddy[budnum]); - - spin_lock(&zbpg->lock); - if (list_empty(&zbpg->bud_list)) { - /* ignore zombie page... see zbud_evict_pages() */ - spin_unlock(&zbpg->lock); - return; - } - size = zbud_free(zh); - ASSERT_SPINLOCK(&zbpg->lock); - zh_other = &zbpg->buddy[(budnum == 0) ? 1 : 0]; - if (zh_other->size == 0) { /* was unbuddied: unlist and free */ - chunks = zbud_size_to_chunks(size) ; - spin_lock(&zbud_budlists_spinlock); - BUG_ON(list_empty(&zbud_unbuddied[chunks].list)); - list_del_init(&zbpg->bud_list); - zbud_unbuddied[chunks].count--; - spin_unlock(&zbud_budlists_spinlock); - zbud_free_raw_page(zbpg); - } else { /* was buddied: move remaining buddy to unbuddied list */ - chunks = zbud_size_to_chunks(zh_other->size) ; - spin_lock(&zbud_budlists_spinlock); - list_del_init(&zbpg->bud_list); - zcache_zbud_buddied_count--; - list_add_tail(&zbpg->bud_list, &zbud_unbuddied[chunks].list); - zbud_unbuddied[chunks].count++; - spin_unlock(&zbud_budlists_spinlock); - spin_unlock(&zbpg->lock); - } -} - -static struct zbud_hdr *zbud_create(uint32_t pool_id, struct tmem_oid *oid, - uint32_t index, struct page *page, - void *cdata, unsigned size) -{ - struct zbud_hdr *zh0, *zh1, *zh = NULL; - struct zbud_page *zbpg = NULL, *ztmp; - unsigned nchunks; - char *to; - int i, found_good_buddy = 0; - - nchunks = zbud_size_to_chunks(size) ; - for (i = MAX_CHUNK - nchunks + 1; i > 0; i--) { - spin_lock(&zbud_budlists_spinlock); - if (!list_empty(&zbud_unbuddied[i].list)) { - list_for_each_entry_safe(zbpg, ztmp, - &zbud_unbuddied[i].list, bud_list) { - if (spin_trylock(&zbpg->lock)) { - found_good_buddy = i; - goto found_unbuddied; - } - } - } - spin_unlock(&zbud_budlists_spinlock); - } - /* didn't find a good buddy, try allocating a new page */ - zbpg = zbud_alloc_raw_page(); - if (unlikely(zbpg == NULL)) - goto out; - /* ok, have a page, now compress the data before taking locks */ - spin_lock(&zbpg->lock); - spin_lock(&zbud_budlists_spinlock); - list_add_tail(&zbpg->bud_list, &zbud_unbuddied[nchunks].list); - zbud_unbuddied[nchunks].count++; - zh = &zbpg->buddy[0]; - goto init_zh; - -found_unbuddied: - ASSERT_SPINLOCK(&zbpg->lock); - zh0 = &zbpg->buddy[0]; zh1 = &zbpg->buddy[1]; - BUG_ON(!((zh0->size == 0) ^ (zh1->size == 0))); - if (zh0->size != 0) { /* buddy0 in use, buddy1 is vacant */ - ASSERT_SENTINEL(zh0, ZBH); - zh = zh1; - } else if (zh1->size != 0) { /* buddy1 in use, buddy0 is vacant */ - ASSERT_SENTINEL(zh1, ZBH); - zh = zh0; - } else - BUG(); - list_del_init(&zbpg->bud_list); - zbud_unbuddied[found_good_buddy].count--; - list_add_tail(&zbpg->bud_list, &zbud_buddied_list); - zcache_zbud_buddied_count++; - -init_zh: - SET_SENTINEL(zh, ZBH); - zh->size = size; - zh->index = index; - zh->oid = *oid; - zh->pool_id = pool_id; - /* can wait to copy the data until the list locks are dropped */ - spin_unlock(&zbud_budlists_spinlock); - - to = zbud_data(zh, size); - memcpy(to, cdata, size); - spin_unlock(&zbpg->lock); - zbud_cumul_chunk_counts[nchunks]++; - atomic_inc(&zcache_zbud_curr_zpages); - zcache_zbud_cumul_zpages++; - zcache_zbud_curr_zbytes += size; - zcache_zbud_cumul_zbytes += size; -out: - return zh; -} - -static int zbud_decompress(struct page *page, struct zbud_hdr *zh) -{ - struct zbud_page *zbpg; - unsigned budnum = zbud_budnum(zh); - size_t out_len = PAGE_SIZE; - char *to_va, *from_va; - unsigned size; - int ret = 0; - - zbpg = container_of(zh, struct zbud_page, buddy[budnum]); - spin_lock(&zbpg->lock); - if (list_empty(&zbpg->bud_list)) { - /* ignore zombie page... see zbud_evict_pages() */ - ret = -EINVAL; - goto out; - } - ASSERT_SENTINEL(zh, ZBH); - BUG_ON(zh->size == 0 || zh->size > zbud_max_buddy_size()); - to_va = kmap_atomic(page, KM_USER0); - size = zh->size; - from_va = zbud_data(zh, size); - ret = lzo1x_decompress_safe(from_va, size, to_va, &out_len); - BUG_ON(ret != LZO_E_OK); - BUG_ON(out_len != PAGE_SIZE); - kunmap_atomic(to_va, KM_USER0); -out: - spin_unlock(&zbpg->lock); - return ret; -} - -/* - * The following routines handle shrinking of ephemeral pages by evicting - * pages "least valuable" first. - */ - -static unsigned long zcache_evicted_raw_pages; -static unsigned long zcache_evicted_buddied_pages; -static unsigned long zcache_evicted_unbuddied_pages; - -static struct tmem_pool *zcache_get_pool_by_id(uint32_t poolid); -static void zcache_put_pool(struct tmem_pool *pool); - -/* - * Flush and free all zbuds in a zbpg, then free the pageframe - */ -static void zbud_evict_zbpg(struct zbud_page *zbpg) -{ - struct zbud_hdr *zh; - int i, j; - uint32_t pool_id[ZBUD_MAX_BUDS], index[ZBUD_MAX_BUDS]; - struct tmem_oid oid[ZBUD_MAX_BUDS]; - struct tmem_pool *pool; - - ASSERT_SPINLOCK(&zbpg->lock); - BUG_ON(!list_empty(&zbpg->bud_list)); - for (i = 0, j = 0; i < ZBUD_MAX_BUDS; i++) { - zh = &zbpg->buddy[i]; - if (zh->size) { - pool_id[j] = zh->pool_id; - oid[j] = zh->oid; - index[j] = zh->index; - j++; - zbud_free(zh); - } - } - spin_unlock(&zbpg->lock); - for (i = 0; i < j; i++) { - pool = zcache_get_pool_by_id(pool_id[i]); - if (pool != NULL) { - tmem_flush_page(pool, &oid[i], index[i]); - zcache_put_pool(pool); - } - } - ASSERT_SENTINEL(zbpg, ZBPG); - spin_lock(&zbpg->lock); - zbud_free_raw_page(zbpg); -} - -/* - * Free nr pages. This code is funky because we want to hold the locks - * protecting various lists for as short a time as possible, and in some - * circumstances the list may change asynchronously when the list lock is - * not held. In some cases we also trylock not only to avoid waiting on a - * page in use by another cpu, but also to avoid potential deadlock due to - * lock inversion. - */ -static void zbud_evict_pages(int nr) -{ - struct zbud_page *zbpg; - int i; - - /* first try freeing any pages on unused list */ -retry_unused_list: - spin_lock_bh(&zbpg_unused_list_spinlock); - if (!list_empty(&zbpg_unused_list)) { - /* can't walk list here, since it may change when unlocked */ - zbpg = list_first_entry(&zbpg_unused_list, - struct zbud_page, bud_list); - list_del_init(&zbpg->bud_list); - zcache_zbpg_unused_list_count--; - atomic_dec(&zcache_zbud_curr_raw_pages); - spin_unlock_bh(&zbpg_unused_list_spinlock); - zcache_free_page(zbpg); - zcache_evicted_raw_pages++; - if (--nr <= 0) - goto out; - goto retry_unused_list; - } - spin_unlock_bh(&zbpg_unused_list_spinlock); - - /* now try freeing unbuddied pages, starting with least space avail */ - for (i = 0; i < MAX_CHUNK; i++) { -retry_unbud_list_i: - spin_lock_bh(&zbud_budlists_spinlock); - if (list_empty(&zbud_unbuddied[i].list)) { - spin_unlock_bh(&zbud_budlists_spinlock); - continue; - } - list_for_each_entry(zbpg, &zbud_unbuddied[i].list, bud_list) { - if (unlikely(!spin_trylock(&zbpg->lock))) - continue; - list_del_init(&zbpg->bud_list); - zbud_unbuddied[i].count--; - spin_unlock(&zbud_budlists_spinlock); - zcache_evicted_unbuddied_pages++; - /* want budlists unlocked when doing zbpg eviction */ - zbud_evict_zbpg(zbpg); - local_bh_enable(); - if (--nr <= 0) - goto out; - goto retry_unbud_list_i; - } - spin_unlock_bh(&zbud_budlists_spinlock); - } - - /* as a last resort, free buddied pages */ -retry_bud_list: - spin_lock_bh(&zbud_budlists_spinlock); - if (list_empty(&zbud_buddied_list)) { - spin_unlock_bh(&zbud_budlists_spinlock); - goto out; - } - list_for_each_entry(zbpg, &zbud_buddied_list, bud_list) { - if (unlikely(!spin_trylock(&zbpg->lock))) - continue; - list_del_init(&zbpg->bud_list); - zcache_zbud_buddied_count--; - spin_unlock(&zbud_budlists_spinlock); - zcache_evicted_buddied_pages++; - /* want budlists unlocked when doing zbpg eviction */ - zbud_evict_zbpg(zbpg); - local_bh_enable(); - if (--nr <= 0) - goto out; - goto retry_bud_list; - } - spin_unlock_bh(&zbud_budlists_spinlock); -out: - return; -} - -static void zbud_init(void) -{ - int i; - - INIT_LIST_HEAD(&zbud_buddied_list); - zcache_zbud_buddied_count = 0; - for (i = 0; i < NCHUNKS; i++) { - INIT_LIST_HEAD(&zbud_unbuddied[i].list); - zbud_unbuddied[i].count = 0; - } -} - -#ifdef CONFIG_SYSFS -/* - * These sysfs routines show a nice distribution of how many zbpg's are - * currently (and have ever been placed) in each unbuddied list. It's fun - * to watch but can probably go away before final merge. - */ -static int zbud_show_unbuddied_list_counts(char *buf) -{ - int i; - char *p = buf; - - for (i = 0; i < NCHUNKS - 1; i++) - p += sprintf(p, "%u ", zbud_unbuddied[i].count); - p += sprintf(p, "%d\n", zbud_unbuddied[i].count); - return p - buf; -} - -static int zbud_show_cumul_chunk_counts(char *buf) -{ - unsigned long i, chunks = 0, total_chunks = 0, sum_total_chunks = 0; - unsigned long total_chunks_lte_21 = 0, total_chunks_lte_32 = 0; - unsigned long total_chunks_lte_42 = 0; - char *p = buf; - - for (i = 0; i < NCHUNKS; i++) { - p += sprintf(p, "%lu ", zbud_cumul_chunk_counts[i]); - chunks += zbud_cumul_chunk_counts[i]; - total_chunks += zbud_cumul_chunk_counts[i]; - sum_total_chunks += i * zbud_cumul_chunk_counts[i]; - if (i == 21) - total_chunks_lte_21 = total_chunks; - if (i == 32) - total_chunks_lte_32 = total_chunks; - if (i == 42) - total_chunks_lte_42 = total_chunks; - } - p += sprintf(p, "<=21:%lu <=32:%lu <=42:%lu, mean:%lu\n", - total_chunks_lte_21, total_chunks_lte_32, total_chunks_lte_42, - chunks == 0 ? 0 : sum_total_chunks / chunks); - return p - buf; -} -#endif - -/********** - * This "zv" PAM implementation combines the TLSF-based xvMalloc - * with lzo1x compression to maximize the amount of data that can - * be packed into a physical page. - * - * Zv represents a PAM page with the index and object (plus a "size" value - * necessary for decompression) immediately preceding the compressed data. - */ - -#define ZVH_SENTINEL 0x43214321 - -struct zv_hdr { - uint32_t pool_id; - struct tmem_oid oid; - uint32_t index; - DECL_SENTINEL -}; - -static const int zv_max_page_size = (PAGE_SIZE / 8) * 7; - -static struct zv_hdr *zv_create(struct xv_pool *xvpool, uint32_t pool_id, - struct tmem_oid *oid, uint32_t index, - void *cdata, unsigned clen) -{ - struct page *page; - struct zv_hdr *zv = NULL; - uint32_t offset; - int ret; - - BUG_ON(!irqs_disabled()); - ret = xv_malloc(xvpool, clen + sizeof(struct zv_hdr), - &page, &offset, ZCACHE_GFP_MASK); - if (unlikely(ret)) - goto out; - zv = kmap_atomic(page, KM_USER0) + offset; - zv->index = index; - zv->oid = *oid; - zv->pool_id = pool_id; - SET_SENTINEL(zv, ZVH); - memcpy((char *)zv + sizeof(struct zv_hdr), cdata, clen); - kunmap_atomic(zv, KM_USER0); -out: - return zv; -} - -static void zv_free(struct xv_pool *xvpool, struct zv_hdr *zv) -{ - unsigned long flags; - struct page *page; - uint32_t offset; - uint16_t size; - - ASSERT_SENTINEL(zv, ZVH); - size = xv_get_object_size(zv) - sizeof(*zv); - BUG_ON(size == 0 || size > zv_max_page_size); - INVERT_SENTINEL(zv, ZVH); - page = virt_to_page(zv); - offset = (unsigned long)zv & ~PAGE_MASK; - local_irq_save(flags); - xv_free(xvpool, page, offset); - local_irq_restore(flags); -} - -static void zv_decompress(struct page *page, struct zv_hdr *zv) -{ - size_t clen = PAGE_SIZE; - char *to_va; - unsigned size; - int ret; - - ASSERT_SENTINEL(zv, ZVH); - size = xv_get_object_size(zv) - sizeof(*zv); - BUG_ON(size == 0 || size > zv_max_page_size); - to_va = kmap_atomic(page, KM_USER0); - ret = lzo1x_decompress_safe((char *)zv + sizeof(*zv), - size, to_va, &clen); - kunmap_atomic(to_va, KM_USER0); - BUG_ON(ret != LZO_E_OK); - BUG_ON(clen != PAGE_SIZE); -} - -/* - * zcache core code starts here - */ - -/* useful stats not collected by cleancache or frontswap */ -static unsigned long zcache_flush_total; -static unsigned long zcache_flush_found; -static unsigned long zcache_flobj_total; -static unsigned long zcache_flobj_found; -static unsigned long zcache_failed_eph_puts; -static unsigned long zcache_failed_pers_puts; - -#define MAX_POOLS_PER_CLIENT 16 - -static struct { - struct tmem_pool *tmem_pools[MAX_POOLS_PER_CLIENT]; - struct xv_pool *xvpool; -} zcache_client; - -/* - * Tmem operations assume the poolid implies the invoking client. - * Zcache only has one client (the kernel itself), so translate - * the poolid into the tmem_pool allocated for it. A KVM version - * of zcache would have one client per guest and each client might - * have a poolid==N. - */ -static struct tmem_pool *zcache_get_pool_by_id(uint32_t poolid) -{ - struct tmem_pool *pool = NULL; - - if (poolid >= 0) { - pool = zcache_client.tmem_pools[poolid]; - if (pool != NULL) - atomic_inc(&pool->refcount); - } - return pool; -} - -static void zcache_put_pool(struct tmem_pool *pool) -{ - if (pool != NULL) - atomic_dec(&pool->refcount); -} - -/* counters for debugging */ -static unsigned long zcache_failed_get_free_pages; -static unsigned long zcache_failed_alloc; -static unsigned long zcache_put_to_flush; -static unsigned long zcache_aborted_preload; -static unsigned long zcache_aborted_shrink; - -/* - * Ensure that memory allocation requests in zcache don't result - * in direct reclaim requests via the shrinker, which would cause - * an infinite loop. Maybe a GFP flag would be better? - */ -static DEFINE_SPINLOCK(zcache_direct_reclaim_lock); - -/* - * for now, used named slabs so can easily track usage; later can - * either just use kmalloc, or perhaps add a slab-like allocator - * to more carefully manage total memory utilization - */ -static struct kmem_cache *zcache_objnode_cache; -static struct kmem_cache *zcache_obj_cache; -static atomic_t zcache_curr_obj_count = ATOMIC_INIT(0); -static unsigned long zcache_curr_obj_count_max; -static atomic_t zcache_curr_objnode_count = ATOMIC_INIT(0); -static unsigned long zcache_curr_objnode_count_max; - -/* - * to avoid memory allocation recursion (e.g. due to direct reclaim), we - * preload all necessary data structures so the hostops callbacks never - * actually do a malloc - */ -struct zcache_preload { - void *page; - struct tmem_obj *obj; - int nr; - struct tmem_objnode *objnodes[OBJNODE_TREE_MAX_PATH]; -}; -static DEFINE_PER_CPU(struct zcache_preload, zcache_preloads) = { 0, }; - -static int zcache_do_preload(struct tmem_pool *pool) -{ - struct zcache_preload *kp; - struct tmem_objnode *objnode; - struct tmem_obj *obj; - void *page; - int ret = -ENOMEM; - - if (unlikely(zcache_objnode_cache == NULL)) - goto out; - if (unlikely(zcache_obj_cache == NULL)) - goto out; - if (!spin_trylock(&zcache_direct_reclaim_lock)) { - zcache_aborted_preload++; - goto out; - } - preempt_disable(); - kp = &__get_cpu_var(zcache_preloads); - while (kp->nr < ARRAY_SIZE(kp->objnodes)) { - preempt_enable_no_resched(); - objnode = kmem_cache_alloc(zcache_objnode_cache, - ZCACHE_GFP_MASK); - if (unlikely(objnode == NULL)) { - zcache_failed_alloc++; - goto unlock_out; - } - preempt_disable(); - kp = &__get_cpu_var(zcache_preloads); - if (kp->nr < ARRAY_SIZE(kp->objnodes)) - kp->objnodes[kp->nr++] = objnode; - else - kmem_cache_free(zcache_objnode_cache, objnode); - } - preempt_enable_no_resched(); - obj = kmem_cache_alloc(zcache_obj_cache, ZCACHE_GFP_MASK); - if (unlikely(obj == NULL)) { - zcache_failed_alloc++; - goto unlock_out; - } - page = (void *)__get_free_page(ZCACHE_GFP_MASK); - if (unlikely(page == NULL)) { - zcache_failed_get_free_pages++; - kmem_cache_free(zcache_obj_cache, obj); - goto unlock_out; - } - preempt_disable(); - kp = &__get_cpu_var(zcache_preloads); - if (kp->obj == NULL) - kp->obj = obj; - else - kmem_cache_free(zcache_obj_cache, obj); - if (kp->page == NULL) - kp->page = page; - else - free_page((unsigned long)page); - ret = 0; -unlock_out: - spin_unlock(&zcache_direct_reclaim_lock); -out: - return ret; -} - -static void *zcache_get_free_page(void) -{ - struct zcache_preload *kp; - void *page; - - kp = &__get_cpu_var(zcache_preloads); - page = kp->page; - BUG_ON(page == NULL); - kp->page = NULL; - return page; -} - -static void zcache_free_page(void *p) -{ - free_page((unsigned long)p); -} - -/* - * zcache implementation for tmem host ops - */ - -static struct tmem_objnode *zcache_objnode_alloc(struct tmem_pool *pool) -{ - struct tmem_objnode *objnode = NULL; - unsigned long count; - struct zcache_preload *kp; - - kp = &__get_cpu_var(zcache_preloads); - if (kp->nr <= 0) - goto out; - objnode = kp->objnodes[kp->nr - 1]; - BUG_ON(objnode == NULL); - kp->objnodes[kp->nr - 1] = NULL; - kp->nr--; - count = atomic_inc_return(&zcache_curr_objnode_count); - if (count > zcache_curr_objnode_count_max) - zcache_curr_objnode_count_max = count; -out: - return objnode; -} - -static void zcache_objnode_free(struct tmem_objnode *objnode, - struct tmem_pool *pool) -{ - atomic_dec(&zcache_curr_objnode_count); - BUG_ON(atomic_read(&zcache_curr_objnode_count) < 0); - kmem_cache_free(zcache_objnode_cache, objnode); -} - -static struct tmem_obj *zcache_obj_alloc(struct tmem_pool *pool) -{ - struct tmem_obj *obj = NULL; - unsigned long count; - struct zcache_preload *kp; - - kp = &__get_cpu_var(zcache_preloads); - obj = kp->obj; - BUG_ON(obj == NULL); - kp->obj = NULL; - count = atomic_inc_return(&zcache_curr_obj_count); - if (count > zcache_curr_obj_count_max) - zcache_curr_obj_count_max = count; - return obj; -} - -static void zcache_obj_free(struct tmem_obj *obj, struct tmem_pool *pool) -{ - atomic_dec(&zcache_curr_obj_count); - BUG_ON(atomic_read(&zcache_curr_obj_count) < 0); - kmem_cache_free(zcache_obj_cache, obj); -} - -static struct tmem_hostops zcache_hostops = { - .obj_alloc = zcache_obj_alloc, - .obj_free = zcache_obj_free, - .objnode_alloc = zcache_objnode_alloc, - .objnode_free = zcache_objnode_free, -}; - -/* - * zcache implementations for PAM page descriptor ops - */ - -static atomic_t zcache_curr_eph_pampd_count = ATOMIC_INIT(0); -static unsigned long zcache_curr_eph_pampd_count_max; -static atomic_t zcache_curr_pers_pampd_count = ATOMIC_INIT(0); -static unsigned long zcache_curr_pers_pampd_count_max; - -/* forward reference */ -static int zcache_compress(struct page *from, void **out_va, size_t *out_len); - -static void *zcache_pampd_create(struct tmem_pool *pool, struct tmem_oid *oid, - uint32_t index, struct page *page) -{ - void *pampd = NULL, *cdata; - size_t clen; - int ret; - bool ephemeral = is_ephemeral(pool); - unsigned long count; - - if (ephemeral) { - ret = zcache_compress(page, &cdata, &clen); - if (ret == 0) - - goto out; - if (clen == 0 || clen > zbud_max_buddy_size()) { - zcache_compress_poor++; - goto out; - } - pampd = (void *)zbud_create(pool->pool_id, oid, index, - page, cdata, clen); - if (pampd != NULL) { - count = atomic_inc_return(&zcache_curr_eph_pampd_count); - if (count > zcache_curr_eph_pampd_count_max) - zcache_curr_eph_pampd_count_max = count; - } - } else { - /* - * FIXME: This is all the "policy" there is for now. - * 3/4 totpages should allow ~37% of RAM to be filled with - * compressed frontswap pages - */ - if (atomic_read(&zcache_curr_pers_pampd_count) > - 3 * totalram_pages / 4) - goto out; - ret = zcache_compress(page, &cdata, &clen); - if (ret == 0) - goto out; - if (clen > zv_max_page_size) { - zcache_compress_poor++; - goto out; - } - pampd = (void *)zv_create(zcache_client.xvpool, pool->pool_id, - oid, index, cdata, clen); - if (pampd == NULL) - goto out; - count = atomic_inc_return(&zcache_curr_pers_pampd_count); - if (count > zcache_curr_pers_pampd_count_max) - zcache_curr_pers_pampd_count_max = count; - } -out: - return pampd; -} - -/* - * fill the pageframe corresponding to the struct page with the data - * from the passed pampd - */ -static int zcache_pampd_get_data(struct page *page, void *pampd, - struct tmem_pool *pool) -{ - int ret = 0; - - if (is_ephemeral(pool)) - ret = zbud_decompress(page, pampd); - else - zv_decompress(page, pampd); - return ret; -} - -/* - * free the pampd and remove it from any zcache lists - * pampd must no longer be pointed to from any tmem data structures! - */ -static void zcache_pampd_free(void *pampd, struct tmem_pool *pool) -{ - if (is_ephemeral(pool)) { - zbud_free_and_delist((struct zbud_hdr *)pampd); - atomic_dec(&zcache_curr_eph_pampd_count); - BUG_ON(atomic_read(&zcache_curr_eph_pampd_count) < 0); - } else { - zv_free(zcache_client.xvpool, (struct zv_hdr *)pampd); - atomic_dec(&zcache_curr_pers_pampd_count); - BUG_ON(atomic_read(&zcache_curr_pers_pampd_count) < 0); - } -} - -static struct tmem_pamops zcache_pamops = { - .create = zcache_pampd_create, - .get_data = zcache_pampd_get_data, - .free = zcache_pampd_free, -}; - -/* - * zcache compression/decompression and related per-cpu stuff - */ - -#define LZO_WORKMEM_BYTES LZO1X_1_MEM_COMPRESS -#define LZO_DSTMEM_PAGE_ORDER 1 -static DEFINE_PER_CPU(unsigned char *, zcache_workmem); -static DEFINE_PER_CPU(unsigned char *, zcache_dstmem); - -static int zcache_compress(struct page *from, void **out_va, size_t *out_len) -{ - int ret = 0; - unsigned char *dmem = __get_cpu_var(zcache_dstmem); - unsigned char *wmem = __get_cpu_var(zcache_workmem); - char *from_va; - - BUG_ON(!irqs_disabled()); - if (unlikely(dmem == NULL || wmem == NULL)) - goto out; /* no buffer, so can't compress */ - from_va = kmap_atomic(from, KM_USER0); - mb(); - ret = lzo1x_1_compress(from_va, PAGE_SIZE, dmem, out_len, wmem); - BUG_ON(ret != LZO_E_OK); - *out_va = dmem; - kunmap_atomic(from_va, KM_USER0); - ret = 1; -out: - return ret; -} - - -static int zcache_cpu_notifier(struct notifier_block *nb, - unsigned long action, void *pcpu) -{ - int cpu = (long)pcpu; - struct zcache_preload *kp; - - switch (action) { - case CPU_UP_PREPARE: - per_cpu(zcache_dstmem, cpu) = (void *)__get_free_pages( - GFP_KERNEL | __GFP_REPEAT, - LZO_DSTMEM_PAGE_ORDER), - per_cpu(zcache_workmem, cpu) = - kzalloc(LZO1X_MEM_COMPRESS, - GFP_KERNEL | __GFP_REPEAT); - break; - case CPU_DEAD: - case CPU_UP_CANCELED: - free_pages((unsigned long)per_cpu(zcache_dstmem, cpu), - LZO_DSTMEM_PAGE_ORDER); - per_cpu(zcache_dstmem, cpu) = NULL; - kfree(per_cpu(zcache_workmem, cpu)); - per_cpu(zcache_workmem, cpu) = NULL; - kp = &per_cpu(zcache_preloads, cpu); - while (kp->nr) { - kmem_cache_free(zcache_objnode_cache, - kp->objnodes[kp->nr - 1]); - kp->objnodes[kp->nr - 1] = NULL; - kp->nr--; - } - kmem_cache_free(zcache_obj_cache, kp->obj); - free_page((unsigned long)kp->page); - break; - default: - break; - } - return NOTIFY_OK; -} - -static struct notifier_block zcache_cpu_notifier_block = { - .notifier_call = zcache_cpu_notifier -}; - -#ifdef CONFIG_SYSFS -#define ZCACHE_SYSFS_RO(_name) \ - static ssize_t zcache_##_name##_show(struct kobject *kobj, \ - struct kobj_attribute *attr, char *buf) \ - { \ - return sprintf(buf, "%lu\n", zcache_##_name); \ - } \ - static struct kobj_attribute zcache_##_name##_attr = { \ - .attr = { .name = __stringify(_name), .mode = 0444 }, \ - .show = zcache_##_name##_show, \ - } - -#define ZCACHE_SYSFS_RO_ATOMIC(_name) \ - static ssize_t zcache_##_name##_show(struct kobject *kobj, \ - struct kobj_attribute *attr, char *buf) \ - { \ - return sprintf(buf, "%d\n", atomic_read(&zcache_##_name)); \ - } \ - static struct kobj_attribute zcache_##_name##_attr = { \ - .attr = { .name = __stringify(_name), .mode = 0444 }, \ - .show = zcache_##_name##_show, \ - } - -#define ZCACHE_SYSFS_RO_CUSTOM(_name, _func) \ - static ssize_t zcache_##_name##_show(struct kobject *kobj, \ - struct kobj_attribute *attr, char *buf) \ - { \ - return _func(buf); \ - } \ - static struct kobj_attribute zcache_##_name##_attr = { \ - .attr = { .name = __stringify(_name), .mode = 0444 }, \ - .show = zcache_##_name##_show, \ - } - -ZCACHE_SYSFS_RO(curr_obj_count_max); -ZCACHE_SYSFS_RO(curr_objnode_count_max); -ZCACHE_SYSFS_RO(flush_total); -ZCACHE_SYSFS_RO(flush_found); -ZCACHE_SYSFS_RO(flobj_total); -ZCACHE_SYSFS_RO(flobj_found); -ZCACHE_SYSFS_RO(failed_eph_puts); -ZCACHE_SYSFS_RO(failed_pers_puts); -ZCACHE_SYSFS_RO(zbud_curr_zbytes); -ZCACHE_SYSFS_RO(zbud_cumul_zpages); -ZCACHE_SYSFS_RO(zbud_cumul_zbytes); -ZCACHE_SYSFS_RO(zbud_buddied_count); -ZCACHE_SYSFS_RO(zbpg_unused_list_count); -ZCACHE_SYSFS_RO(evicted_raw_pages); -ZCACHE_SYSFS_RO(evicted_unbuddied_pages); -ZCACHE_SYSFS_RO(evicted_buddied_pages); -ZCACHE_SYSFS_RO(failed_get_free_pages); -ZCACHE_SYSFS_RO(failed_alloc); -ZCACHE_SYSFS_RO(put_to_flush); -ZCACHE_SYSFS_RO(aborted_preload); -ZCACHE_SYSFS_RO(aborted_shrink); -ZCACHE_SYSFS_RO(compress_poor); -ZCACHE_SYSFS_RO_ATOMIC(zbud_curr_raw_pages); -ZCACHE_SYSFS_RO_ATOMIC(zbud_curr_zpages); -ZCACHE_SYSFS_RO_ATOMIC(curr_obj_count); -ZCACHE_SYSFS_RO_ATOMIC(curr_objnode_count); -ZCACHE_SYSFS_RO_CUSTOM(zbud_unbuddied_list_counts, - zbud_show_unbuddied_list_counts); -ZCACHE_SYSFS_RO_CUSTOM(zbud_cumul_chunk_counts, - zbud_show_cumul_chunk_counts); - -static struct attribute *zcache_attrs[] = { - &zcache_curr_obj_count_attr.attr, - &zcache_curr_obj_count_max_attr.attr, - &zcache_curr_objnode_count_attr.attr, - &zcache_curr_objnode_count_max_attr.attr, - &zcache_flush_total_attr.attr, - &zcache_flobj_total_attr.attr, - &zcache_flush_found_attr.attr, - &zcache_flobj_found_attr.attr, - &zcache_failed_eph_puts_attr.attr, - &zcache_failed_pers_puts_attr.attr, - &zcache_compress_poor_attr.attr, - &zcache_zbud_curr_raw_pages_attr.attr, - &zcache_zbud_curr_zpages_attr.attr, - &zcache_zbud_curr_zbytes_attr.attr, - &zcache_zbud_cumul_zpages_attr.attr, - &zcache_zbud_cumul_zbytes_attr.attr, - &zcache_zbud_buddied_count_attr.attr, - &zcache_zbpg_unused_list_count_attr.attr, - &zcache_evicted_raw_pages_attr.attr, - &zcache_evicted_unbuddied_pages_attr.attr, - &zcache_evicted_buddied_pages_attr.attr, - &zcache_failed_get_free_pages_attr.attr, - &zcache_failed_alloc_attr.attr, - &zcache_put_to_flush_attr.attr, - &zcache_aborted_preload_attr.attr, - &zcache_aborted_shrink_attr.attr, - &zcache_zbud_unbuddied_list_counts_attr.attr, - &zcache_zbud_cumul_chunk_counts_attr.attr, - NULL, -}; - -static struct attribute_group zcache_attr_group = { - .attrs = zcache_attrs, - .name = "zcache", -}; - -#endif /* CONFIG_SYSFS */ -/* - * When zcache is disabled ("frozen"), pools can be created and destroyed, - * but all puts (and thus all other operations that require memory allocation) - * must fail. If zcache is unfrozen, accepts puts, then frozen again, - * data consistency requires all puts while frozen to be converted into - * flushes. - */ -static bool zcache_freeze; - -/* - * zcache shrinker interface (only useful for ephemeral pages, so zbud only) - */ -static int shrink_zcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) -{ - int ret = -1; - - if (nr >= 0) { - if (!(gfp_mask & __GFP_FS)) - /* does this case really need to be skipped? */ - goto out; - if (spin_trylock(&zcache_direct_reclaim_lock)) { - zbud_evict_pages(nr); - spin_unlock(&zcache_direct_reclaim_lock); - } else - zcache_aborted_shrink++; - } - ret = (int)atomic_read(&zcache_zbud_curr_raw_pages); -out: - return ret; -} - -static struct shrinker zcache_shrinker = { - .shrink = shrink_zcache_memory, - .seeks = DEFAULT_SEEKS, -}; - -/* - * zcache shims between cleancache/frontswap ops and tmem - */ - -static int zcache_put_page(int pool_id, struct tmem_oid *oidp, - uint32_t index, struct page *page) -{ - struct tmem_pool *pool; - int ret = -1; - - BUG_ON(!irqs_disabled()); - pool = zcache_get_pool_by_id(pool_id); - if (unlikely(pool == NULL)) - goto out; - if (!zcache_freeze && zcache_do_preload(pool) == 0) { - /* preload does preempt_disable on success */ - ret = tmem_put(pool, oidp, index, page); - if (ret < 0) { - if (is_ephemeral(pool)) - zcache_failed_eph_puts++; - else - zcache_failed_pers_puts++; - } - zcache_put_pool(pool); - preempt_enable_no_resched(); - } else { - zcache_put_to_flush++; - if (atomic_read(&pool->obj_count) > 0) - /* the put fails whether the flush succeeds or not */ - (void)tmem_flush_page(pool, oidp, index); - zcache_put_pool(pool); - } -out: - return ret; -} - -static int zcache_get_page(int pool_id, struct tmem_oid *oidp, - uint32_t index, struct page *page) -{ - struct tmem_pool *pool; - int ret = -1; - unsigned long flags; - - local_irq_save(flags); - pool = zcache_get_pool_by_id(pool_id); - if (likely(pool != NULL)) { - if (atomic_read(&pool->obj_count) > 0) - ret = tmem_get(pool, oidp, index, page); - zcache_put_pool(pool); - } - local_irq_restore(flags); - return ret; -} - -static int zcache_flush_page(int pool_id, struct tmem_oid *oidp, uint32_t index) -{ - struct tmem_pool *pool; - int ret = -1; - unsigned long flags; - - local_irq_save(flags); - zcache_flush_total++; - pool = zcache_get_pool_by_id(pool_id); - if (likely(pool != NULL)) { - if (atomic_read(&pool->obj_count) > 0) - ret = tmem_flush_page(pool, oidp, index); - zcache_put_pool(pool); - } - if (ret >= 0) - zcache_flush_found++; - local_irq_restore(flags); - return ret; -} - -static int zcache_flush_object(int pool_id, struct tmem_oid *oidp) -{ - struct tmem_pool *pool; - int ret = -1; - unsigned long flags; - - local_irq_save(flags); - zcache_flobj_total++; - pool = zcache_get_pool_by_id(pool_id); - if (likely(pool != NULL)) { - if (atomic_read(&pool->obj_count) > 0) - ret = tmem_flush_object(pool, oidp); - zcache_put_pool(pool); - } - if (ret >= 0) - zcache_flobj_found++; - local_irq_restore(flags); - return ret; -} - -static int zcache_destroy_pool(int pool_id) -{ - struct tmem_pool *pool = NULL; - int ret = -1; - - if (pool_id < 0) - goto out; - pool = zcache_client.tmem_pools[pool_id]; - if (pool == NULL) - goto out; - zcache_client.tmem_pools[pool_id] = NULL; - /* wait for pool activity on other cpus to quiesce */ - while (atomic_read(&pool->refcount) != 0) - ; - local_bh_disable(); - ret = tmem_destroy_pool(pool); - local_bh_enable(); - kfree(pool); - pr_info("zcache: destroyed pool id=%d\n", pool_id); -out: - return ret; -} - -static int zcache_new_pool(uint32_t flags) -{ - int poolid = -1; - struct tmem_pool *pool; - - pool = kmalloc(sizeof(struct tmem_pool), GFP_KERNEL); - if (pool == NULL) { - pr_info("zcache: pool creation failed: out of memory\n"); - goto out; - } - - for (poolid = 0; poolid < MAX_POOLS_PER_CLIENT; poolid++) - if (zcache_client.tmem_pools[poolid] == NULL) - break; - if (poolid >= MAX_POOLS_PER_CLIENT) { - pr_info("zcache: pool creation failed: max exceeded\n"); - kfree(pool); - poolid = -1; - goto out; - } - atomic_set(&pool->refcount, 0); - pool->client = &zcache_client; - pool->pool_id = poolid; - tmem_new_pool(pool, flags); - zcache_client.tmem_pools[poolid] = pool; - pr_info("zcache: created %s tmem pool, id=%d\n", - flags & TMEM_POOL_PERSIST ? "persistent" : "ephemeral", - poolid); -out: - return poolid; -} - -/********** - * Two kernel functionalities currently can be layered on top of tmem. - * These are "cleancache" which is used as a second-chance cache for clean - * page cache pages; and "frontswap" which is used for swap pages - * to avoid writes to disk. A generic "shim" is provided here for each - * to translate in-kernel semantics to zcache semantics. - */ - -#ifdef CONFIG_CLEANCACHE -static void zcache_cleancache_put_page(int pool_id, - struct cleancache_filekey key, - pgoff_t index, struct page *page) -{ - u32 ind = (u32) index; - struct tmem_oid oid = *(struct tmem_oid *)&key; - - if (likely(ind == index)) - (void)zcache_put_page(pool_id, &oid, index, page); -} - -static int zcache_cleancache_get_page(int pool_id, - struct cleancache_filekey key, - pgoff_t index, struct page *page) -{ - u32 ind = (u32) index; - struct tmem_oid oid = *(struct tmem_oid *)&key; - int ret = -1; - - if (likely(ind == index)) - ret = zcache_get_page(pool_id, &oid, index, page); - return ret; -} - -static void zcache_cleancache_flush_page(int pool_id, - struct cleancache_filekey key, - pgoff_t index) -{ - u32 ind = (u32) index; - struct tmem_oid oid = *(struct tmem_oid *)&key; - - if (likely(ind == index)) - (void)zcache_flush_page(pool_id, &oid, ind); -} - -static void zcache_cleancache_flush_inode(int pool_id, - struct cleancache_filekey key) -{ - struct tmem_oid oid = *(struct tmem_oid *)&key; - - (void)zcache_flush_object(pool_id, &oid); -} - -static void zcache_cleancache_flush_fs(int pool_id) -{ - if (pool_id >= 0) - (void)zcache_destroy_pool(pool_id); -} - -static int zcache_cleancache_init_fs(size_t pagesize) -{ - BUG_ON(sizeof(struct cleancache_filekey) != - sizeof(struct tmem_oid)); - BUG_ON(pagesize != PAGE_SIZE); - return zcache_new_pool(0); -} - -static int zcache_cleancache_init_shared_fs(char *uuid, size_t pagesize) -{ - /* shared pools are unsupported and map to private */ - BUG_ON(sizeof(struct cleancache_filekey) != - sizeof(struct tmem_oid)); - BUG_ON(pagesize != PAGE_SIZE); - return zcache_new_pool(0); -} - -static struct cleancache_ops zcache_cleancache_ops = { - .put_page = zcache_cleancache_put_page, - .get_page = zcache_cleancache_get_page, - .flush_page = zcache_cleancache_flush_page, - .flush_inode = zcache_cleancache_flush_inode, - .flush_fs = zcache_cleancache_flush_fs, - .init_shared_fs = zcache_cleancache_init_shared_fs, - .init_fs = zcache_cleancache_init_fs -}; - -struct cleancache_ops zcache_cleancache_register_ops(void) -{ - struct cleancache_ops old_ops = - cleancache_register_ops(&zcache_cleancache_ops); - - return old_ops; -} -#endif - -#ifdef CONFIG_FRONTSWAP -/* a single tmem poolid is used for all frontswap "types" (swapfiles) */ -static int zcache_frontswap_poolid = -1; - -/* - * Swizzling increases objects per swaptype, increasing tmem concurrency - * for heavy swaploads. Later, larger nr_cpus -> larger SWIZ_BITS - */ -#define SWIZ_BITS 4 -#define SWIZ_MASK ((1 << SWIZ_BITS) - 1) -#define _oswiz(_type, _ind) ((_type << SWIZ_BITS) | (_ind & SWIZ_MASK)) -#define iswiz(_ind) (_ind >> SWIZ_BITS) - -static inline struct tmem_oid oswiz(unsigned type, u32 ind) -{ - struct tmem_oid oid = { .oid = { 0 } }; - oid.oid[0] = _oswiz(type, ind); - return oid; -} - -static int zcache_frontswap_put_page(unsigned type, pgoff_t offset, - struct page *page) -{ - u64 ind64 = (u64)offset; - u32 ind = (u32)offset; - struct tmem_oid oid = oswiz(type, ind); - int ret = -1; - unsigned long flags; - - BUG_ON(!PageLocked(page)); - if (likely(ind64 == ind)) { - local_irq_save(flags); - ret = zcache_put_page(zcache_frontswap_poolid, &oid, - iswiz(ind), page); - local_irq_restore(flags); - } - return ret; -} - -/* returns 0 if the page was successfully gotten from frontswap, -1 if - * was not present (should never happen!) */ -static int zcache_frontswap_get_page(unsigned type, pgoff_t offset, - struct page *page) -{ - u64 ind64 = (u64)offset; - u32 ind = (u32)offset; - struct tmem_oid oid = oswiz(type, ind); - int ret = -1; - - BUG_ON(!PageLocked(page)); - if (likely(ind64 == ind)) - ret = zcache_get_page(zcache_frontswap_poolid, &oid, - iswiz(ind), page); - return ret; -} - -/* flush a single page from frontswap */ -static void zcache_frontswap_flush_page(unsigned type, pgoff_t offset) -{ - u64 ind64 = (u64)offset; - u32 ind = (u32)offset; - struct tmem_oid oid = oswiz(type, ind); - - if (likely(ind64 == ind)) - (void)zcache_flush_page(zcache_frontswap_poolid, &oid, - iswiz(ind)); -} - -/* flush all pages from the passed swaptype */ -static void zcache_frontswap_flush_area(unsigned type) -{ - struct tmem_oid oid; - int ind; - - for (ind = SWIZ_MASK; ind >= 0; ind--) { - oid = oswiz(type, ind); - (void)zcache_flush_object(zcache_frontswap_poolid, &oid); - } -} - -static void zcache_frontswap_init(unsigned ignored) -{ - /* a single tmem poolid is used for all frontswap "types" (swapfiles) */ - if (zcache_frontswap_poolid < 0) - zcache_frontswap_poolid = zcache_new_pool(TMEM_POOL_PERSIST); -} - -static struct frontswap_ops zcache_frontswap_ops = { - .put_page = zcache_frontswap_put_page, - .get_page = zcache_frontswap_get_page, - .flush_page = zcache_frontswap_flush_page, - .flush_area = zcache_frontswap_flush_area, - .init = zcache_frontswap_init -}; - -struct frontswap_ops zcache_frontswap_register_ops(void) -{ - struct frontswap_ops old_ops = - frontswap_register_ops(&zcache_frontswap_ops); - - return old_ops; -} -#endif - -/* - * zcache initialization - * NOTE FOR NOW zcache MUST BE PROVIDED AS A KERNEL BOOT PARAMETER OR - * NOTHING HAPPENS! - */ - -static int zcache_enabled; - -static int __init enable_zcache(char *s) -{ - zcache_enabled = 1; - return 1; -} -__setup("zcache", enable_zcache); - -/* allow independent dynamic disabling of cleancache and frontswap */ - -static int use_cleancache = 1; - -static int __init no_cleancache(char *s) -{ - use_cleancache = 0; - return 1; -} - -__setup("nocleancache", no_cleancache); - -static int use_frontswap = 1; - -static int __init no_frontswap(char *s) -{ - use_frontswap = 0; - return 1; -} - -__setup("nofrontswap", no_frontswap); - -static int __init zcache_init(void) -{ -#ifdef CONFIG_SYSFS - int ret = 0; - - ret = sysfs_create_group(mm_kobj, &zcache_attr_group); - if (ret) { - pr_err("zcache: can't create sysfs\n"); - goto out; - } -#endif /* CONFIG_SYSFS */ -#if defined(CONFIG_CLEANCACHE) || defined(CONFIG_FRONTSWAP) - if (zcache_enabled) { - unsigned int cpu; - - tmem_register_hostops(&zcache_hostops); - tmem_register_pamops(&zcache_pamops); - ret = register_cpu_notifier(&zcache_cpu_notifier_block); - if (ret) { - pr_err("zcache: can't register cpu notifier\n"); - goto out; - } - for_each_online_cpu(cpu) { - void *pcpu = (void *)(long)cpu; - zcache_cpu_notifier(&zcache_cpu_notifier_block, - CPU_UP_PREPARE, pcpu); - } - } - zcache_objnode_cache = kmem_cache_create("zcache_objnode", - sizeof(struct tmem_objnode), 0, 0, NULL); - zcache_obj_cache = kmem_cache_create("zcache_obj", - sizeof(struct tmem_obj), 0, 0, NULL); -#endif -#ifdef CONFIG_CLEANCACHE - if (zcache_enabled && use_cleancache) { - struct cleancache_ops old_ops; - - zbud_init(); - register_shrinker(&zcache_shrinker); - old_ops = zcache_cleancache_register_ops(); - pr_info("zcache: cleancache enabled using kernel " - "transcendent memory and compression buddies\n"); - if (old_ops.init_fs != NULL) - pr_warning("zcache: cleancache_ops overridden"); - } -#endif -#ifdef CONFIG_FRONTSWAP - if (zcache_enabled && use_frontswap) { - struct frontswap_ops old_ops; - - zcache_client.xvpool = xv_create_pool(); - if (zcache_client.xvpool == NULL) { - pr_err("zcache: can't create xvpool\n"); - goto out; - } - old_ops = zcache_frontswap_register_ops(); - pr_info("zcache: frontswap enabled using kernel " - "transcendent memory and xvmalloc\n"); - if (old_ops.init != NULL) - pr_warning("ktmem: frontswap_ops overridden"); - } -#endif -out: - return ret; -} - -module_init(zcache_init) From 5c726afbb6f0983468eec87d86147c91d94b4430 Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Mon, 20 Aug 2012 23:15:00 +0800 Subject: [PATCH 115/155] Revert "mm: cleancache core ops functions and config" This reverts commit e0c9143ea1ec510a41b347be043e98034eedf5c8. --- Documentation/vm/cleancache.txt | 279 -------------------------------- include/linux/cleancache.h | 122 -------------- mm/Kconfig | 22 --- mm/Makefile | 1 - mm/cleancache.c | 244 ---------------------------- 5 files changed, 668 deletions(-) delete mode 100755 Documentation/vm/cleancache.txt delete mode 100755 include/linux/cleancache.h mode change 100755 => 100644 mm/Kconfig mode change 100755 => 100644 mm/Makefile delete mode 100755 mm/cleancache.c diff --git a/Documentation/vm/cleancache.txt b/Documentation/vm/cleancache.txt deleted file mode 100755 index e0a53567..00000000 --- a/Documentation/vm/cleancache.txt +++ /dev/null @@ -1,279 +0,0 @@ -MOTIVATION - -Cleancache is a new optional feature provided by the VFS layer that -potentially dramatically increases page cache effectiveness for -many workloads in many environments at a negligible cost. - -Cleancache can be thought of as a page-granularity victim cache for clean -pages that the kernel's pageframe replacement algorithm (PFRA) would like -to keep around, but can't since there isn't enough memory. So when the -PFRA "evicts" a page, it first attempts to use cleancache code to -put the data contained in that page into "transcendent memory", memory -that is not directly accessible or addressable by the kernel and is -of unknown and possibly time-varying size. - -Later, when a cleancache-enabled filesystem wishes to access a page -in a file on disk, it first checks cleancache to see if it already -contains it; if it does, the page of data is copied into the kernel -and a disk access is avoided. - -Transcendent memory "drivers" for cleancache are currently implemented -in Xen (using hypervisor memory) and zcache (using in-kernel compressed -memory) and other implementations are in development. - -FAQs are included below. - -IMPLEMENTATION OVERVIEW - -A cleancache "backend" that provides transcendent memory registers itself -to the kernel's cleancache "frontend" by calling cleancache_register_ops, -passing a pointer to a cleancache_ops structure with funcs set appropriately. -Note that cleancache_register_ops returns the previous settings so that -chaining can be performed if desired. The functions provided must conform to -certain semantics as follows: - -Most important, cleancache is "ephemeral". Pages which are copied into -cleancache have an indefinite lifetime which is completely unknowable -by the kernel and so may or may not still be in cleancache at any later time. -Thus, as its name implies, cleancache is not suitable for dirty pages. -Cleancache has complete discretion over what pages to preserve and what -pages to discard and when. - -Mounting a cleancache-enabled filesystem should call "init_fs" to obtain a -pool id which, if positive, must be saved in the filesystem's superblock; -a negative return value indicates failure. A "put_page" will copy a -(presumably about-to-be-evicted) page into cleancache and associate it with -the pool id, a file key, and a page index into the file. (The combination -of a pool id, a file key, and an index is sometimes called a "handle".) -A "get_page" will copy the page, if found, from cleancache into kernel memory. -An "invalidate_page" will ensure the page no longer is present in cleancache; -an "invalidate_inode" will invalidate all pages associated with the specified -file; and, when a filesystem is unmounted, an "invalidate_fs" will invalidate -all pages in all files specified by the given pool id and also surrender -the pool id. - -An "init_shared_fs", like init_fs, obtains a pool id but tells cleancache -to treat the pool as shared using a 128-bit UUID as a key. On systems -that may run multiple kernels (such as hard partitioned or virtualized -systems) that may share a clustered filesystem, and where cleancache -may be shared among those kernels, calls to init_shared_fs that specify the -same UUID will receive the same pool id, thus allowing the pages to -be shared. Note that any security requirements must be imposed outside -of the kernel (e.g. by "tools" that control cleancache). Or a -cleancache implementation can simply disable shared_init by always -returning a negative value. - -If a get_page is successful on a non-shared pool, the page is invalidated -(thus making cleancache an "exclusive" cache). On a shared pool, the page -is NOT invalidated on a successful get_page so that it remains accessible to -other sharers. The kernel is responsible for ensuring coherency between -cleancache (shared or not), the page cache, and the filesystem, using -cleancache invalidate operations as required. - -Note that cleancache must enforce put-put-get coherency and get-get -coherency. For the former, if two puts are made to the same handle but -with different data, say AAA by the first put and BBB by the second, a -subsequent get can never return the stale data (AAA). For get-get coherency, -if a get for a given handle fails, subsequent gets for that handle will -never succeed unless preceded by a successful put with that handle. - -Last, cleancache provides no SMP serialization guarantees; if two -different Linux threads are simultaneously putting and invalidating a page -with the same handle, the results are indeterminate. Callers must -lock the page to ensure serial behavior. - -CLEANCACHE PERFORMANCE METRICS - -Cleancache monitoring is done by sysfs files in the -/sys/kernel/mm/cleancache directory. The effectiveness of cleancache -can be measured (across all filesystems) with: - -succ_gets - number of gets that were successful -failed_gets - number of gets that failed -puts - number of puts attempted (all "succeed") -invalidates - number of invalidates attempted - -A backend implementatation may provide additional metrics. - -FAQ - -1) Where's the value? (Andrew Morton) - -Cleancache provides a significant performance benefit to many workloads -in many environments with negligible overhead by improving the -effectiveness of the pagecache. Clean pagecache pages are -saved in transcendent memory (RAM that is otherwise not directly -addressable to the kernel); fetching those pages later avoids "refaults" -and thus disk reads. - -Cleancache (and its sister code "frontswap") provide interfaces for -this transcendent memory (aka "tmem"), which conceptually lies between -fast kernel-directly-addressable RAM and slower DMA/asynchronous devices. -Disallowing direct kernel or userland reads/writes to tmem -is ideal when data is transformed to a different form and size (such -as with compression) or secretly moved (as might be useful for write- -balancing for some RAM-like devices). Evicted page-cache pages (and -swap pages) are a great use for this kind of slower-than-RAM-but-much- -faster-than-disk transcendent memory, and the cleancache (and frontswap) -"page-object-oriented" specification provides a nice way to read and -write -- and indirectly "name" -- the pages. - -In the virtual case, the whole point of virtualization is to statistically -multiplex physical resources across the varying demands of multiple -virtual machines. This is really hard to do with RAM and efforts to -do it well with no kernel change have essentially failed (except in some -well-publicized special-case workloads). Cleancache -- and frontswap -- -with a fairly small impact on the kernel, provide a huge amount -of flexibility for more dynamic, flexible RAM multiplexing. -Specifically, the Xen Transcendent Memory backend allows otherwise -"fallow" hypervisor-owned RAM to not only be "time-shared" between multiple -virtual machines, but the pages can be compressed and deduplicated to -optimize RAM utilization. And when guest OS's are induced to surrender -underutilized RAM (e.g. with "self-ballooning"), page cache pages -are the first to go, and cleancache allows those pages to be -saved and reclaimed if overall host system memory conditions allow. - -And the identical interface used for cleancache can be used in -physical systems as well. The zcache driver acts as a memory-hungry -device that stores pages of data in a compressed state. And -the proposed "RAMster" driver shares RAM across multiple physical -systems. - -2) Why does cleancache have its sticky fingers so deep inside the - filesystems and VFS? (Andrew Morton and Christoph Hellwig) - -The core hooks for cleancache in VFS are in most cases a single line -and the minimum set are placed precisely where needed to maintain -coherency (via cleancache_invalidate operations) between cleancache, -the page cache, and disk. All hooks compile into nothingness if -cleancache is config'ed off and turn into a function-pointer- -compare-to-NULL if config'ed on but no backend claims the ops -functions, or to a compare-struct-element-to-negative if a -backend claims the ops functions but a filesystem doesn't enable -cleancache. - -Some filesystems are built entirely on top of VFS and the hooks -in VFS are sufficient, so don't require an "init_fs" hook; the -initial implementation of cleancache didn't provide this hook. -But for some filesystems (such as btrfs), the VFS hooks are -incomplete and one or more hooks in fs-specific code are required. -And for some other filesystems, such as tmpfs, cleancache may -be counterproductive. So it seemed prudent to require a filesystem -to "opt in" to use cleancache, which requires adding a hook in -each filesystem. Not all filesystems are supported by cleancache -only because they haven't been tested. The existing set should -be sufficient to validate the concept, the opt-in approach means -that untested filesystems are not affected, and the hooks in the -existing filesystems should make it very easy to add more -filesystems in the future. - -The total impact of the hooks to existing fs and mm files is only -about 40 lines added (not counting comments and blank lines). - -3) Why not make cleancache asynchronous and batched so it can - more easily interface with real devices with DMA instead - of copying each individual page? (Minchan Kim) - -The one-page-at-a-time copy semantics simplifies the implementation -on both the frontend and backend and also allows the backend to -do fancy things on-the-fly like page compression and -page deduplication. And since the data is "gone" (copied into/out -of the pageframe) before the cleancache get/put call returns, -a great deal of race conditions and potential coherency issues -are avoided. While the interface seems odd for a "real device" -or for real kernel-addressable RAM, it makes perfect sense for -transcendent memory. - -4) Why is non-shared cleancache "exclusive"? And where is the - page "invalidated" after a "get"? (Minchan Kim) - -The main reason is to free up space in transcendent memory and -to avoid unnecessary cleancache_invalidate calls. If you want inclusive, -the page can be "put" immediately following the "get". If -put-after-get for inclusive becomes common, the interface could -be easily extended to add a "get_no_invalidate" call. - -The invalidate is done by the cleancache backend implementation. - -5) What's the performance impact? - -Performance analysis has been presented at OLS'09 and LCA'10. -Briefly, performance gains can be significant on most workloads, -especially when memory pressure is high (e.g. when RAM is -overcommitted in a virtual workload); and because the hooks are -invoked primarily in place of or in addition to a disk read/write, -overhead is negligible even in worst case workloads. Basically -cleancache replaces I/O with memory-copy-CPU-overhead; on older -single-core systems with slow memory-copy speeds, cleancache -has little value, but in newer multicore machines, especially -consolidated/virtualized machines, it has great value. - -6) How do I add cleancache support for filesystem X? (Boaz Harrash) - -Filesystems that are well-behaved and conform to certain -restrictions can utilize cleancache simply by making a call to -cleancache_init_fs at mount time. Unusual, misbehaving, or -poorly layered filesystems must either add additional hooks -and/or undergo extensive additional testing... or should just -not enable the optional cleancache. - -Some points for a filesystem to consider: - -- The FS should be block-device-based (e.g. a ram-based FS such - as tmpfs should not enable cleancache) -- To ensure coherency/correctness, the FS must ensure that all - file removal or truncation operations either go through VFS or - add hooks to do the equivalent cleancache "invalidate" operations -- To ensure coherency/correctness, either inode numbers must - be unique across the lifetime of the on-disk file OR the - FS must provide an "encode_fh" function. -- The FS must call the VFS superblock alloc and deactivate routines - or add hooks to do the equivalent cleancache calls done there. -- To maximize performance, all pages fetched from the FS should - go through the do_mpag_readpage routine or the FS should add - hooks to do the equivalent (cf. btrfs) -- Currently, the FS blocksize must be the same as PAGESIZE. This - is not an architectural restriction, but no backends currently - support anything different. -- A clustered FS should invoke the "shared_init_fs" cleancache - hook to get best performance for some backends. - -7) Why not use the KVA of the inode as the key? (Christoph Hellwig) - -If cleancache would use the inode virtual address instead of -inode/filehandle, the pool id could be eliminated. But, this -won't work because cleancache retains pagecache data pages -persistently even when the inode has been pruned from the -inode unused list, and only invalidates the data page if the file -gets removed/truncated. So if cleancache used the inode kva, -there would be potential coherency issues if/when the inode -kva is reused for a different file. Alternately, if cleancache -invalidated the pages when the inode kva was freed, much of the value -of cleancache would be lost because the cache of pages in cleanache -is potentially much larger than the kernel pagecache and is most -useful if the pages survive inode cache removal. - -8) Why is a global variable required? - -The cleancache_enabled flag is checked in all of the frequently-used -cleancache hooks. The alternative is a function call to check a static -variable. Since cleancache is enabled dynamically at runtime, systems -that don't enable cleancache would suffer thousands (possibly -tens-of-thousands) of unnecessary function calls per second. So the -global variable allows cleancache to be enabled by default at compile -time, but have insignificant performance impact when cleancache remains -disabled at runtime. - -9) Does cleanache work with KVM? - -The memory model of KVM is sufficiently different that a cleancache -backend may have less value for KVM. This remains to be tested, -especially in an overcommitted system. - -10) Does cleancache work in userspace? It sounds useful for - memory hungry caches like web browsers. (Jamie Lokier) - -No plans yet, though we agree it sounds useful, at least for -apps that bypass the page cache (e.g. O_DIRECT). - -Last updated: Dan Magenheimer, April 13 2011 diff --git a/include/linux/cleancache.h b/include/linux/cleancache.h deleted file mode 100755 index 04ffb2e6..00000000 --- a/include/linux/cleancache.h +++ /dev/null @@ -1,122 +0,0 @@ -#ifndef _LINUX_CLEANCACHE_H -#define _LINUX_CLEANCACHE_H - -#include -#include -#include - -#define CLEANCACHE_KEY_MAX 6 - -/* - * cleancache requires every file with a page in cleancache to have a - * unique key unless/until the file is removed/truncated. For some - * filesystems, the inode number is unique, but for "modern" filesystems - * an exportable filehandle is required (see exportfs.h) - */ -struct cleancache_filekey { - union { - ino_t ino; - __u32 fh[CLEANCACHE_KEY_MAX]; - u32 key[CLEANCACHE_KEY_MAX]; - } u; -}; - -struct cleancache_ops { - int (*init_fs)(size_t); - int (*init_shared_fs)(char *uuid, size_t); - int (*get_page)(int, struct cleancache_filekey, - pgoff_t, struct page *); - void (*put_page)(int, struct cleancache_filekey, - pgoff_t, struct page *); - void (*flush_page)(int, struct cleancache_filekey, pgoff_t); - void (*flush_inode)(int, struct cleancache_filekey); - void (*flush_fs)(int); -}; - -extern struct cleancache_ops - cleancache_register_ops(struct cleancache_ops *ops); -extern void __cleancache_init_fs(struct super_block *); -extern void __cleancache_init_shared_fs(char *, struct super_block *); -extern int __cleancache_get_page(struct page *); -extern void __cleancache_put_page(struct page *); -extern void __cleancache_flush_page(struct address_space *, struct page *); -extern void __cleancache_flush_inode(struct address_space *); -extern void __cleancache_flush_fs(struct super_block *); -extern int cleancache_enabled; - -#ifdef CONFIG_CLEANCACHE -static inline bool cleancache_fs_enabled(struct page *page) -{ - return page->mapping->host->i_sb->cleancache_poolid >= 0; -} -static inline bool cleancache_fs_enabled_mapping(struct address_space *mapping) -{ - return mapping->host->i_sb->cleancache_poolid >= 0; -} -#else -#define cleancache_enabled (0) -#define cleancache_fs_enabled(_page) (0) -#define cleancache_fs_enabled_mapping(_page) (0) -#endif - -/* - * The shim layer provided by these inline functions allows the compiler - * to reduce all cleancache hooks to nothingness if CONFIG_CLEANCACHE - * is disabled, to a single global variable check if CONFIG_CLEANCACHE - * is enabled but no cleancache "backend" has dynamically enabled it, - * and, for the most frequent cleancache ops, to a single global variable - * check plus a superblock element comparison if CONFIG_CLEANCACHE is enabled - * and a cleancache backend has dynamically enabled cleancache, but the - * filesystem referenced by that cleancache op has not enabled cleancache. - * As a result, CONFIG_CLEANCACHE can be enabled by default with essentially - * no measurable performance impact. - */ - -static inline void cleancache_init_fs(struct super_block *sb) -{ - if (cleancache_enabled) - __cleancache_init_fs(sb); -} - -static inline void cleancache_init_shared_fs(char *uuid, struct super_block *sb) -{ - if (cleancache_enabled) - __cleancache_init_shared_fs(uuid, sb); -} - -static inline int cleancache_get_page(struct page *page) -{ - int ret = -1; - - if (cleancache_enabled && cleancache_fs_enabled(page)) - ret = __cleancache_get_page(page); - return ret; -} - -static inline void cleancache_put_page(struct page *page) -{ - if (cleancache_enabled && cleancache_fs_enabled(page)) - __cleancache_put_page(page); -} - -static inline void cleancache_flush_page(struct address_space *mapping, - struct page *page) -{ - /* careful... page->mapping is NULL sometimes when this is called */ - if (cleancache_enabled && cleancache_fs_enabled_mapping(mapping)) - __cleancache_flush_page(mapping, page); -} - -static inline void cleancache_flush_inode(struct address_space *mapping) -{ - if (cleancache_enabled && cleancache_fs_enabled_mapping(mapping)) - __cleancache_flush_inode(mapping); -} - -static inline void cleancache_flush_fs(struct super_block *sb) -{ - if (cleancache_enabled) - __cleancache_flush_fs(sb); -} - -#endif /* _LINUX_CLEANCACHE_H */ diff --git a/mm/Kconfig b/mm/Kconfig old mode 100755 new mode 100644 index f86e0d29..2c19c0ba --- a/mm/Kconfig +++ b/mm/Kconfig @@ -288,25 +288,3 @@ config NOMMU_INITIAL_TRIM_EXCESS of 1 says that all excess pages should be trimmed. See Documentation/nommu-mmap.txt for more information. -config CLEANCACHE - bool "Enable cleancache driver to cache clean pages if tmem is present" - default n - help - Cleancache can be thought of as a page-granularity victim cache - for clean pages that the kernel's pageframe replacement algorithm - (PFRA) would like to keep around, but can't since there isn't enough - memory. So when the PFRA "evicts" a page, it first attempts to use - cleancacne code to put the data contained in that page into - "transcendent memory", memory that is not directly accessible or - addressable by the kernel and is of unknown and possibly - time-varying size. And when a cleancache-enabled - filesystem wishes to access a page in a file on disk, it first - checks cleancache to see if it already contains it; if it does, - the page is copied into the kernel and a disk access is avoided. - When a transcendent memory driver is available (such as zcache or - Xen transcendent memory), a significant I/O reduction - may be achieved. When none is available, all cleancache calls - are reduced to a single pointer-compare-against-NULL resulting - in a negligible performance hit. - - If unsure, say Y to enable cleancache \ No newline at end of file diff --git a/mm/Makefile b/mm/Makefile old mode 100755 new mode 100644 index 82a734fd..66f54865 --- a/mm/Makefile +++ b/mm/Makefile @@ -46,4 +46,3 @@ obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o -obj-$(CONFIG_CLEANCACHE) += cleancache.o diff --git a/mm/cleancache.c b/mm/cleancache.c deleted file mode 100755 index bcaae4c2..00000000 --- a/mm/cleancache.c +++ /dev/null @@ -1,244 +0,0 @@ -/* - * Cleancache frontend - * - * This code provides the generic "frontend" layer to call a matching - * "backend" driver implementation of cleancache. See - * Documentation/vm/cleancache.txt for more information. - * - * Copyright (C) 2009-2010 Oracle Corp. All rights reserved. - * Author: Dan Magenheimer - * - * This work is licensed under the terms of the GNU GPL, version 2. - */ - -#include -#include -#include -#include -#include - -/* - * This global enablement flag may be read thousands of times per second - * by cleancache_get/put/flush even on systems where cleancache_ops - * is not claimed (e.g. cleancache is config'ed on but remains - * disabled), so is preferred to the slower alternative: a function - * call that checks a non-global. - */ -int cleancache_enabled; -EXPORT_SYMBOL(cleancache_enabled); - -/* - * cleancache_ops is set by cleancache_ops_register to contain the pointers - * to the cleancache "backend" implementation functions. - */ -static struct cleancache_ops cleancache_ops; - -/* useful stats available in /sys/kernel/mm/cleancache */ -static unsigned long cleancache_succ_gets; -static unsigned long cleancache_failed_gets; -static unsigned long cleancache_puts; -static unsigned long cleancache_flushes; - -/* - * register operations for cleancache, returning previous thus allowing - * detection of multiple backends and possible nesting - */ -struct cleancache_ops cleancache_register_ops(struct cleancache_ops *ops) -{ - struct cleancache_ops old = cleancache_ops; - - cleancache_ops = *ops; - cleancache_enabled = 1; - return old; -} -EXPORT_SYMBOL(cleancache_register_ops); - -/* Called by a cleancache-enabled filesystem at time of mount */ -void __cleancache_init_fs(struct super_block *sb) -{ - sb->cleancache_poolid = (*cleancache_ops.init_fs)(PAGE_SIZE); -} -EXPORT_SYMBOL(__cleancache_init_fs); - -/* Called by a cleancache-enabled clustered filesystem at time of mount */ -void __cleancache_init_shared_fs(char *uuid, struct super_block *sb) -{ - sb->cleancache_poolid = - (*cleancache_ops.init_shared_fs)(uuid, PAGE_SIZE); -} -EXPORT_SYMBOL(__cleancache_init_shared_fs); - -/* - * If the filesystem uses exportable filehandles, use the filehandle as - * the key, else use the inode number. - */ -static int cleancache_get_key(struct inode *inode, - struct cleancache_filekey *key) -{ - int (*fhfn)(struct dentry *, __u32 *fh, int *, int); - int len = 0, maxlen = CLEANCACHE_KEY_MAX; - struct super_block *sb = inode->i_sb; - - key->u.ino = inode->i_ino; - if (sb->s_export_op != NULL) { - fhfn = sb->s_export_op->encode_fh; - if (fhfn) { - struct dentry d; - d.d_inode = inode; - len = (*fhfn)(&d, &key->u.fh[0], &maxlen, 0); - if (len <= 0 || len == 255) - return -1; - if (maxlen > CLEANCACHE_KEY_MAX) - return -1; - } - } - return 0; -} - -/* - * "Get" data from cleancache associated with the poolid/inode/index - * that were specified when the data was put to cleanache and, if - * successful, use it to fill the specified page with data and return 0. - * The pageframe is unchanged and returns -1 if the get fails. - * Page must be locked by caller. - */ -int __cleancache_get_page(struct page *page) -{ - int ret = -1; - int pool_id; - struct cleancache_filekey key = { .u.key = { 0 } }; - - VM_BUG_ON(!PageLocked(page)); - pool_id = page->mapping->host->i_sb->cleancache_poolid; - if (pool_id < 0) - goto out; - - if (cleancache_get_key(page->mapping->host, &key) < 0) - goto out; - - ret = (*cleancache_ops.get_page)(pool_id, key, page->index, page); - if (ret == 0) - cleancache_succ_gets++; - else - cleancache_failed_gets++; -out: - return ret; -} -EXPORT_SYMBOL(__cleancache_get_page); - -/* - * "Put" data from a page to cleancache and associate it with the - * (previously-obtained per-filesystem) poolid and the page's, - * inode and page index. Page must be locked. Note that a put_page - * always "succeeds", though a subsequent get_page may succeed or fail. - */ -void __cleancache_put_page(struct page *page) -{ - int pool_id; - struct cleancache_filekey key = { .u.key = { 0 } }; - - VM_BUG_ON(!PageLocked(page)); - pool_id = page->mapping->host->i_sb->cleancache_poolid; - if (pool_id >= 0 && - cleancache_get_key(page->mapping->host, &key) >= 0) { - (*cleancache_ops.put_page)(pool_id, key, page->index, page); - cleancache_puts++; - } -} -EXPORT_SYMBOL(__cleancache_put_page); - -/* - * Flush any data from cleancache associated with the poolid and the - * page's inode and page index so that a subsequent "get" will fail. - */ -void __cleancache_flush_page(struct address_space *mapping, struct page *page) -{ - /* careful... page->mapping is NULL sometimes when this is called */ - int pool_id = mapping->host->i_sb->cleancache_poolid; - struct cleancache_filekey key = { .u.key = { 0 } }; - - if (pool_id >= 0) { - VM_BUG_ON(!PageLocked(page)); - if (cleancache_get_key(mapping->host, &key) >= 0) { - (*cleancache_ops.flush_page)(pool_id, key, page->index); - cleancache_flushes++; - } - } -} -EXPORT_SYMBOL(__cleancache_flush_page); - -/* - * Flush all data from cleancache associated with the poolid and the - * mappings's inode so that all subsequent gets to this poolid/inode - * will fail. - */ -void __cleancache_flush_inode(struct address_space *mapping) -{ - int pool_id = mapping->host->i_sb->cleancache_poolid; - struct cleancache_filekey key = { .u.key = { 0 } }; - - if (pool_id >= 0 && cleancache_get_key(mapping->host, &key) >= 0) - (*cleancache_ops.flush_inode)(pool_id, key); -} -EXPORT_SYMBOL(__cleancache_flush_inode); - -/* - * Called by any cleancache-enabled filesystem at time of unmount; - * note that pool_id is surrendered and may be reutrned by a subsequent - * cleancache_init_fs or cleancache_init_shared_fs - */ -void __cleancache_flush_fs(struct super_block *sb) -{ - if (sb->cleancache_poolid >= 0) { - int old_poolid = sb->cleancache_poolid; - sb->cleancache_poolid = -1; - (*cleancache_ops.flush_fs)(old_poolid); - } -} -EXPORT_SYMBOL(__cleancache_flush_fs); - -#ifdef CONFIG_SYSFS - -/* see Documentation/ABI/xxx/sysfs-kernel-mm-cleancache */ - -#define CLEANCACHE_SYSFS_RO(_name) \ - static ssize_t cleancache_##_name##_show(struct kobject *kobj, \ - struct kobj_attribute *attr, char *buf) \ - { \ - return sprintf(buf, "%lu\n", cleancache_##_name); \ - } \ - static struct kobj_attribute cleancache_##_name##_attr = { \ - .attr = { .name = __stringify(_name), .mode = 0444 }, \ - .show = cleancache_##_name##_show, \ - } - -CLEANCACHE_SYSFS_RO(succ_gets); -CLEANCACHE_SYSFS_RO(failed_gets); -CLEANCACHE_SYSFS_RO(puts); -CLEANCACHE_SYSFS_RO(flushes); - -static struct attribute *cleancache_attrs[] = { - &cleancache_succ_gets_attr.attr, - &cleancache_failed_gets_attr.attr, - &cleancache_puts_attr.attr, - &cleancache_flushes_attr.attr, - NULL, -}; - -static struct attribute_group cleancache_attr_group = { - .attrs = cleancache_attrs, - .name = "cleancache", -}; - -#endif /* CONFIG_SYSFS */ - -static int __init init_cleancache(void) -{ -#ifdef CONFIG_SYSFS - int err; - - err = sysfs_create_group(mm_kobj, &cleancache_attr_group); -#endif /* CONFIG_SYSFS */ - return 0; -} -module_init(init_cleancache) From 04d584c63411b8f490013822de2178695ac390ff Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Thu, 23 Aug 2012 00:00:26 +0800 Subject: [PATCH 116/155] drivers/mmc/host/msm-sdcc: remove dead config options CONFIG_MMC_MSM7X00A_RESUME_IN_WQ and CONFIG_MMC_EMBEDDED_SDIO don't exist in Kconfig and is never defined anywhere else, therefore removing all references for it from the source code. --- drivers/mmc/host/msm_sdcc.c | 36 +----------------------------------- drivers/mmc/host/msm_sdcc.h | 3 --- 2 files changed, 1 insertion(+), 38 deletions(-) mode change 100644 => 100755 drivers/mmc/host/msm_sdcc.c mode change 100644 => 100755 drivers/mmc/host/msm_sdcc.h diff --git a/drivers/mmc/host/msm_sdcc.c b/drivers/mmc/host/msm_sdcc.c old mode 100644 new mode 100755 index 806dcad0..1f8cd0ee --- a/drivers/mmc/host/msm_sdcc.c +++ b/drivers/mmc/host/msm_sdcc.c @@ -1265,24 +1265,6 @@ msmsdcc_init_dma(struct msmsdcc_host *host) return 0; } -#ifdef CONFIG_MMC_MSM7X00A_RESUME_IN_WQ -static void -do_resume_work(struct work_struct *work) -{ - struct msmsdcc_host *host = - container_of(work, struct msmsdcc_host, resume_task); - struct mmc_host *mmc = host->mmc; - - if (mmc) { - mmc_resume_host(mmc); - if (host->stat_irq) - enable_irq(host->stat_irq); - } -} - -#endif - - #ifdef CONFIG_HAS_EARLYSUSPEND static void msmsdcc_early_suspend(struct early_suspend *h) { @@ -1382,14 +1364,6 @@ msmsdcc_probe(struct platform_device *pdev) host->dmares = dmares; spin_lock_init(&host->lock); -#ifdef CONFIG_MMC_EMBEDDED_SDIO - if (plat->embedded_sdio) - mmc_set_embedded_sdio_data(mmc, - &plat->embedded_sdio->cis, - &plat->embedded_sdio->cccr, - plat->embedded_sdio->funcs, - plat->embedded_sdio->num_funcs); -#endif /* * Setup DMA @@ -1608,22 +1582,14 @@ msmsdcc_resume(struct platform_device *dev) msmsdcc_writel(host, host->saved_irq0mask, MMCIMASK0); - if (mmc->card && mmc->card->type != MMC_TYPE_SDIO) { -#ifdef CONFIG_MMC_MSM7X00A_RESUME_IN_WQ - schedule_work(&host->resume_task); -#else + if (mmc->card && mmc->card->type != MMC_TYPE_SDIO) mmc_resume_host(mmc); -#endif - } - if (host->stat_irq) enable_irq(host->stat_irq); - #if BUSCLK_PWRSAVE if (host->clks_on) msmsdcc_disable_clocks(host, 1); #endif - } return 0; } diff --git a/drivers/mmc/host/msm_sdcc.h b/drivers/mmc/host/msm_sdcc.h old mode 100644 new mode 100755 index fdb0b9c6..65c4a3e5 --- a/drivers/mmc/host/msm_sdcc.h +++ b/drivers/mmc/host/msm_sdcc.h @@ -258,9 +258,6 @@ struct msmsdcc_host { int polling_enabled; #endif -#ifdef CONFIG_MMC_MSM7X00A_RESUME_IN_WQ - struct work_struct resume_task; -#endif struct tasklet_struct dma_tlet; From 81d981629c93026be48ffc126d36078216cc05ee Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Thu, 23 Aug 2012 00:10:19 +0800 Subject: [PATCH 117/155] mmc: msm_sdcc: Fix possible circular locking dependency warning In the context of request processing thread, data mover lock is acquired after the host lock. In another context, in the completion handler of data mover the locks are acquired in the reverse order, resulting in possible circular lock dependency warning. Hence, schedule a tasklet to process the dma completion so as to avoid nested locks. --- drivers/mmc/host/msm_sdcc.c | 46 +++++++++++++++++++++++++------------ drivers/mmc/host/msm_sdcc.h | 2 +- 2 files changed, 32 insertions(+), 16 deletions(-) diff --git a/drivers/mmc/host/msm_sdcc.c b/drivers/mmc/host/msm_sdcc.c index 1f8cd0ee..c3193db6 100755 --- a/drivers/mmc/host/msm_sdcc.c +++ b/drivers/mmc/host/msm_sdcc.c @@ -308,42 +308,40 @@ msmsdcc_dma_exec_func(struct msm_dmov_cmd *cmd) } static void -msmsdcc_dma_complete_func(struct msm_dmov_cmd *cmd, - unsigned int result, - struct msm_dmov_errdata *err) +msmsdcc_dma_complete_tlet(unsigned long data) { - struct msmsdcc_dma_data *dma_data = - container_of(cmd, struct msmsdcc_dma_data, hdr); - struct msmsdcc_host *host = dma_data->host; + struct msmsdcc_host *host = (struct msmsdcc_host *)data; unsigned long flags; struct mmc_request *mrq; + struct msm_dmov_errdata err; spin_lock_irqsave(&host->lock, flags); host->dma.active = 0; + err = host->dma.err; mrq = host->curr.mrq; BUG_ON(!mrq); WARN_ON(!mrq->data); - if (!(result & DMOV_RSLT_VALID)) { + if (!(host->dma.result & DMOV_RSLT_VALID)) { pr_err("msmsdcc: Invalid DataMover result\n"); goto out; } - if (result & DMOV_RSLT_DONE) { + if (host->dma.result & DMOV_RSLT_DONE) { host->curr.data_xfered = host->curr.xfer_size; } else { /* Error or flush */ - if (result & DMOV_RSLT_ERROR) + if (host->dma.result & DMOV_RSLT_ERROR) pr_err("%s: DMA error (0x%.8x)\n", - mmc_hostname(host->mmc), result); - if (result & DMOV_RSLT_FLUSH) + mmc_hostname(host->mmc), host->dma.result); + if (host->dma.result & DMOV_RSLT_FLUSH) pr_err("%s: DMA channel flushed (0x%.8x)\n", - mmc_hostname(host->mmc), result); - if (err) + mmc_hostname(host->mmc), host->dma.result); + pr_err("Flush data: %.8x %.8x %.8x %.8x %.8x %.8x\n", - err->flush[0], err->flush[1], err->flush[2], - err->flush[3], err->flush[4], err->flush[5]); + err.flush[0], err.flush[1], err.flush[2], + err.flush[3], err.flush[4], err.flush[5]); if (!mrq->data->error) mrq->data->error = -EIO; } @@ -391,6 +389,22 @@ out: return; } +static void +msmsdcc_dma_complete_func(struct msm_dmov_cmd *cmd, + unsigned int result, + struct msm_dmov_errdata *err) +{ + struct msmsdcc_dma_data *dma_data = + container_of(cmd, struct msmsdcc_dma_data, hdr); + struct msmsdcc_host *host = dma_data->host; + + dma_data->result = result; + if (err) + memcpy(&dma_data->err, err, sizeof(struct msm_dmov_errdata)); + + tasklet_schedule(&host->dma_tlet); +} + static int validate_dma(struct msmsdcc_host *host, struct mmc_data *data) { if (host->dma.channel == -1) @@ -1364,6 +1378,8 @@ msmsdcc_probe(struct platform_device *pdev) host->dmares = dmares; spin_lock_init(&host->lock); + tasklet_init(&host->dma_tlet, msmsdcc_dma_complete_tlet, + (unsigned long)host); /* * Setup DMA diff --git a/drivers/mmc/host/msm_sdcc.h b/drivers/mmc/host/msm_sdcc.h index 65c4a3e5..16504610 100755 --- a/drivers/mmc/host/msm_sdcc.h +++ b/drivers/mmc/host/msm_sdcc.h @@ -190,7 +190,7 @@ struct msmsdcc_dma_data { int busy; /* Set if DM is busy */ int active; unsigned int result; - struct msm_dmov_errdata *err; + struct msm_dmov_errdata err; }; struct msmsdcc_pio_data { From 29e0f0df9d761946e1776b7b2f15eae475211155 Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Thu, 23 Aug 2012 00:34:36 +0800 Subject: [PATCH 118/155] mmc: msm_sdcc: Add prog done interrupt support Enable prog done interrupt for stop command(CMD12) that is sent after a multi-block write(CMD25). The PROG_DONE bit is set when the card has finished its programming and is ready for next data. After every write request the card will be polled for ready status using CMD13. For a multi-block write(CMD25) before sending CMD13, stop command (CMD12) will be sent. If we enable prog done interrupt for CMD12, then CMD13 polling can be avoided. The prog done interrupt means that the card is done with its programming and is ready for next request. --- drivers/mmc/host/msm_sdcc.c | 26 ++++++++++++++++++++++++-- drivers/mmc/host/msm_sdcc.h | 6 +++--- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/drivers/mmc/host/msm_sdcc.c b/drivers/mmc/host/msm_sdcc.c index c3193db6..c022cc2f 100755 --- a/drivers/mmc/host/msm_sdcc.c +++ b/drivers/mmc/host/msm_sdcc.c @@ -556,6 +556,11 @@ msmsdcc_start_command_deferred(struct msmsdcc_host *host, (cmd->opcode == 53)) *c |= MCI_CSPM_DATCMD; + if (host->prog_scan && (cmd->opcode == 12)) { + *c |= MCI_CPSM_PROGENA; + host->prog_enable = true; + } + if (cmd == cmd->mrq->stop) *c |= MCI_CSPM_MCIABORT; @@ -626,6 +631,8 @@ msmsdcc_start_data(struct msmsdcc_host *host, struct mmc_data *data, } dsb(); msm_dmov_enqueue_cmd_ext(host->dma.channel, &host->dma.hdr); + if (data->flags & MMC_DATA_WRITE) + host->prog_scan = true; } else { msmsdcc_writel(host, timeout, MMCIDATATIMER); @@ -920,8 +927,23 @@ static void msmsdcc_do_cmdirq(struct msmsdcc_host *host, uint32_t status) else if (host->curr.data) { /* Non DMA */ msmsdcc_stop_data(host); msmsdcc_request_end(host, cmd->mrq); - } else /* host->data == NULL */ + } else { /* host->data == NULL */ + if (!cmd->error && host->prog_enable) { + if (status & MCI_PROGDONE) { + host->prog_scan = false; + host->prog_enable = false; msmsdcc_request_end(host, cmd->mrq); + } else { + host->curr.cmd = cmd; + } + } else { + if (host->prog_enable) { + host->prog_scan = false; + host->prog_enable = false; + } + msmsdcc_request_end(host, cmd->mrq); + } + } } else if (cmd->data) if (!(cmd->data->flags & MMC_DATA_READ)) msmsdcc_start_data(host, cmd->data, @@ -935,7 +957,7 @@ msmsdcc_handle_irq_data(struct msmsdcc_host *host, u32 status, struct mmc_data *data = host->curr.data; if (status & (MCI_CMDSENT | MCI_CMDRESPEND | MCI_CMDCRCFAIL | - MCI_CMDTIMEOUT) && host->curr.cmd) { + MCI_CMDTIMEOUT | MCI_PROGDONE) && host->curr.cmd) { msmsdcc_do_cmdirq(host, status); } diff --git a/drivers/mmc/host/msm_sdcc.h b/drivers/mmc/host/msm_sdcc.h index 16504610..78d8b1ac 100755 --- a/drivers/mmc/host/msm_sdcc.h +++ b/drivers/mmc/host/msm_sdcc.h @@ -155,7 +155,7 @@ #define MCI_IRQENABLE \ (MCI_CMDCRCFAILMASK|MCI_DATACRCFAILMASK|MCI_CMDTIMEOUTMASK| \ MCI_DATATIMEOUTMASK|MCI_TXUNDERRUNMASK|MCI_RXOVERRUNMASK| \ - MCI_CMDRESPENDMASK|MCI_CMDSENTMASK|MCI_DATAENDMASK) + MCI_CMDRESPENDMASK|MCI_CMDSENTMASK|MCI_DATAENDMASK|MCI_PROGDONEMASK) /* * The size of the FIFO in bytes. @@ -264,8 +264,6 @@ struct msmsdcc_host { #ifdef CONFIG_MMC_AUTO_SUSPEND unsigned long suspended; #endif - unsigned int prog_scan; - unsigned int prog_enable; /* Command parameters */ unsigned int cmd_timeout; unsigned int cmd_pio_irqmask; @@ -276,6 +274,8 @@ struct msmsdcc_host { unsigned int dummy_52_needed; unsigned int dummy_52_state; + bool prog_scan; + bool prog_enable; }; #endif From 08802ee5be4234c1fd62a6cf1bcd6c5f8c4216b6 Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Thu, 23 Aug 2012 00:43:40 +0800 Subject: [PATCH 119/155] mmc: msm_sdcc: Fix bug in PIO mode when data size is not word aligned The current code for PIO doesn't transfer whole data when data size is not in multiple of 4 bytes. The last few bytes are not written to the card resulting in no DATAEND interrupt from SDCC. This patch allows data transfer for non-aligned data size in PIO mode. --- drivers/mmc/host/msm_sdcc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/msm_sdcc.c b/drivers/mmc/host/msm_sdcc.c index c022cc2f..ed4d6f13 100755 --- a/drivers/mmc/host/msm_sdcc.c +++ b/drivers/mmc/host/msm_sdcc.c @@ -722,6 +722,9 @@ msmsdcc_pio_read(struct msmsdcc_host *host, char *buffer, unsigned int remain) count += remain; }else #endif + if (remain % 4) + remain = ((remain >> 2) + 1) << 2; + while (msmsdcc_readl(host, MMCISTATUS) & MCI_RXDATAAVLBL) { *ptr = msmsdcc_readl(host, MMCIFIFO + (count % MCI_FIFOSIZE)); ptr++; @@ -758,13 +761,14 @@ msmsdcc_pio_write(struct msmsdcc_host *host, char *buffer, } else { #endif do { - unsigned int count, maxcnt; + unsigned int count, maxcnt, sz; maxcnt = status & MCI_TXFIFOEMPTY ? MCI_FIFOSIZE : MCI_FIFOHALFSIZE; count = min(remain, maxcnt); - writesl(base + MMCIFIFO, ptr, count >> 2); + sz = count % 4 ? (count >> 2) + 1 : (count >> 2); + writesl(base + MMCIFIFO, ptr, sz); ptr += count; remain -= count; From 4eade398cdd8ce0f9b1e715fe49e6d86a2a18982 Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Thu, 23 Aug 2012 01:05:06 +0800 Subject: [PATCH 120/155] mmc: msm: fix dma usage not to use internal APIs Remove parts of this driver which use internal API calls. This replaces the calls as suggested by Russell King. --- drivers/mmc/host/msm_sdcc.c | 49 +++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 27 deletions(-) diff --git a/drivers/mmc/host/msm_sdcc.c b/drivers/mmc/host/msm_sdcc.c index ed4d6f13..e22fc1cb 100755 --- a/drivers/mmc/host/msm_sdcc.c +++ b/drivers/mmc/host/msm_sdcc.c @@ -465,14 +465,30 @@ static int msmsdcc_config_dma(struct msmsdcc_host *host, struct mmc_data *data) host->curr.user_pages = 0; box = &nc->cmd[0]; - for (i = 0; i < host->dma.num_ents; i++) { + + /* location of command block must be 64 bit aligned */ + BUG_ON(host->dma.cmd_busaddr & 0x07); + + nc->cmdptr = (host->dma.cmd_busaddr >> 3) | CMD_PTR_LP; + host->dma.hdr.cmdptr = DMOV_CMD_PTR_LIST | + DMOV_CMD_ADDR(host->dma.cmdptr_busaddr); + host->dma.hdr.complete_func = msmsdcc_dma_complete_func; + + n = dma_map_sg(mmc_dev(host->mmc), host->dma.sg, + host->dma.num_ents, host->dma.dir); + if (n == 0) { + printk(KERN_ERR "%s: Unable to map in all sg elements\n", + mmc_hostname(host->mmc)); + host->dma.sg = NULL; + host->dma.num_ents = 0; + return -ENOMEM; + } + + for_each_sg(host->dma.sg, sg, n, i) { + box->cmd = CMD_MODE_BOX; - /* Initialize sg dma address */ - sg->dma_address = page_to_dma(mmc_dev(host->mmc), sg_page(sg)) - + sg->offset; - - if (i == (host->dma.num_ents - 1)) + if (i == n - 1) box->cmd |= CMD_LC; rows = (sg_dma_len(sg) % MCI_FIFOSIZE) ? (sg_dma_len(sg) / MCI_FIFOSIZE) + 1 : @@ -500,27 +516,6 @@ static int msmsdcc_config_dma(struct msmsdcc_host *host, struct mmc_data *data) box->cmd |= CMD_DST_CRCI(crci); } box++; - sg++; - } - - /* location of command block must be 64 bit aligned */ - BUG_ON(host->dma.cmd_busaddr & 0x07); - - nc->cmdptr = (host->dma.cmd_busaddr >> 3) | CMD_PTR_LP; - host->dma.hdr.cmdptr = DMOV_CMD_PTR_LIST | - DMOV_CMD_ADDR(host->dma.cmdptr_busaddr); - host->dma.hdr.complete_func = msmsdcc_dma_complete_func; - - n = dma_map_sg(mmc_dev(host->mmc), host->dma.sg, - host->dma.num_ents, host->dma.dir); -/* dsb inside dma_map_sg will write nc out to mem as well */ - - if (n != host->dma.num_ents) { - printk(KERN_ERR "%s: Unable to map in all sg elements\n", - mmc_hostname(host->mmc)); - host->dma.sg = NULL; - host->dma.num_ents = 0; - return -ENOMEM; } return 0; From 2145485d7d4f0d9cda8f51725269b0421ebac734 Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Thu, 23 Aug 2012 02:45:27 +0800 Subject: [PATCH 121/155] mmc: change clock from 50Mhz to 64Mhz PCLK mmc: change NR_SG from 32 to 128 for better performance --- drivers/mmc/host/msm_sdcc.c | 2 +- drivers/mmc/host/msm_sdcc.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/msm_sdcc.c b/drivers/mmc/host/msm_sdcc.c index e22fc1cb..4fef37f7 100755 --- a/drivers/mmc/host/msm_sdcc.c +++ b/drivers/mmc/host/msm_sdcc.c @@ -73,7 +73,7 @@ static int msmsdcc_auto_suspend(struct mmc_host *, int); #define BUSCLK_TIMEOUT (HZ) #define SQN_BUSCLK_TIMEOUT (5 * HZ) static unsigned int msmsdcc_fmin = 144000; -static unsigned int msmsdcc_fmax = 50000000; +static unsigned int msmsdcc_fmax = 64000000; static unsigned int msmsdcc_4bit = 1; static unsigned int msmsdcc_pwrsave = 1; static unsigned int msmsdcc_piopoll = 1; diff --git a/drivers/mmc/host/msm_sdcc.h b/drivers/mmc/host/msm_sdcc.h index 78d8b1ac..1368fc0c 100755 --- a/drivers/mmc/host/msm_sdcc.h +++ b/drivers/mmc/host/msm_sdcc.h @@ -164,7 +164,7 @@ #define MCI_FIFOHALFSIZE (MCI_FIFOSIZE / 2) -#define NR_SG 32 +#define NR_SG 128 struct clk; From 4b10fc19b9dfa387ee0744a968503726b2d4231e Mon Sep 17 00:00:00 2001 From: securecrt Date: Thu, 23 Aug 2012 12:39:53 +0800 Subject: [PATCH 122/155] staging: Add Snappy compression support to zram Zram currently uses LZO compression. With Snappy, it uses less CPU time and is thus more useful. The sacrifice in compression ratio is small. Zram's LZO and Snappy support can be independently enabled at compile time and each zram device can switch between compression methods when unused. When only a single compression method was enabled at compile time, no idirection penalty is incurred. http://driverdev.linuxdriverproject.org/pipermail/devel/2011-April/015114.html --- arch/arm/configs/htcleo_defconfig | 4 + drivers/staging/Kconfig | 2 + drivers/staging/Makefile | 3 +- drivers/staging/snappy/Kconfig | 5 + drivers/staging/snappy/Makefile | 5 + drivers/staging/snappy/csnappy.h | 125 +++++ drivers/staging/snappy/csnappy_compress.c | 497 ++++++++++++++++++++ drivers/staging/snappy/csnappy_decompress.c | 321 +++++++++++++ drivers/staging/snappy/csnappy_internal.h | 83 ++++ drivers/staging/zram/Kconfig | 14 +- drivers/staging/zram/zram_drv.c | 101 +++- drivers/staging/zram/zram_drv.h | 30 ++ drivers/staging/zram/zram_sysfs.c | 58 +++ 13 files changed, 1233 insertions(+), 15 deletions(-) mode change 100644 => 100755 drivers/staging/Kconfig mode change 100644 => 100755 drivers/staging/Makefile create mode 100755 drivers/staging/snappy/Kconfig create mode 100755 drivers/staging/snappy/Makefile create mode 100755 drivers/staging/snappy/csnappy.h create mode 100755 drivers/staging/snappy/csnappy_compress.c create mode 100755 drivers/staging/snappy/csnappy_decompress.c create mode 100755 drivers/staging/snappy/csnappy_internal.h diff --git a/arch/arm/configs/htcleo_defconfig b/arch/arm/configs/htcleo_defconfig index 12f408f2..8d26b82b 100644 --- a/arch/arm/configs/htcleo_defconfig +++ b/arch/arm/configs/htcleo_defconfig @@ -1697,6 +1697,10 @@ CONFIG_ZRAM_NUM_DEVICES=1 CONFIG_ZRAM_DEFAULT_PERCENTAGE=18 # CONFIG_ZRAM_DEBUG is not set CONFIG_ZRAM_DEFAULT_DISKSIZE=100000000 +# CONFIG_ZRAM_LZO is not set +CONFIG_ZRAM_SNAPPY=y +CONFIG_SNAPPY_COMPRESS=y +CONFIG_SNAPPY_DECOMPRESS=y # # File systems diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig old mode 100644 new mode 100755 index 8ee4bfa6..b8964347 --- a/drivers/staging/Kconfig +++ b/drivers/staging/Kconfig @@ -125,5 +125,7 @@ source "drivers/staging/iio/Kconfig" source "drivers/staging/zram/Kconfig" +source "drivers/staging/snappy/Kconfig" + endif # !STAGING_EXCLUDE_BUILD endif # STAGING diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile old mode 100644 new mode 100755 index 5a1b7341..621dc916 --- a/drivers/staging/Makefile +++ b/drivers/staging/Makefile @@ -45,4 +45,5 @@ obj-$(CONFIG_DX_SEP) += sep/ obj-$(CONFIG_IIO) += iio/ obj-$(CONFIG_ZRAM) += zram/ obj-$(CONFIG_XVMALLOC) += zram/ - +obj-$(CONFIG_SNAPPY_COMPRESS) += snappy/ +obj-$(CONFIG_SNAPPY_DECOMPRESS) += snappy/ diff --git a/drivers/staging/snappy/Kconfig b/drivers/staging/snappy/Kconfig new file mode 100755 index 00000000..24f69085 --- /dev/null +++ b/drivers/staging/snappy/Kconfig @@ -0,0 +1,5 @@ +config SNAPPY_COMPRESS + tristate "Google Snappy Compression" + +config SNAPPY_DECOMPRESS + tristate "Google Snappy Decompression" diff --git a/drivers/staging/snappy/Makefile b/drivers/staging/snappy/Makefile new file mode 100755 index 00000000..399d070a --- /dev/null +++ b/drivers/staging/snappy/Makefile @@ -0,0 +1,5 @@ +snappy_compress-objs := csnappy_compress.o +snappy_decompress-objs := csnappy_decompress.o + +obj-$(CONFIG_SNAPPY_COMPRESS) += csnappy_compress.o +obj-$(CONFIG_SNAPPY_DECOMPRESS) += csnappy_decompress.o diff --git a/drivers/staging/snappy/csnappy.h b/drivers/staging/snappy/csnappy.h new file mode 100755 index 00000000..1e0a54ea --- /dev/null +++ b/drivers/staging/snappy/csnappy.h @@ -0,0 +1,125 @@ +#ifndef __CSNAPPY_H__ +#define __CSNAPPY_H__ +/* +File modified for the Linux Kernel by +Zeev Tarantov +*/ +#ifdef __cplusplus +extern "C" { +#endif + +#define CSNAPPY_VERSION 4 + +#define CSNAPPY_WORKMEM_BYTES_POWER_OF_TWO 15 +#define CSNAPPY_WORKMEM_BYTES (1 << CSNAPPY_WORKMEM_BYTES_POWER_OF_TWO) + +/* + * Returns the maximal size of the compressed representation of + * input data that is "source_len" bytes in length; + */ +uint32_t +csnappy_max_compressed_length(uint32_t source_len) __attribute__((const)); + +/* + * Flat array compression that does not emit the "uncompressed length" + * prefix. Compresses "input" array to the "output" array. + * + * REQUIRES: "input" is at most 32KiB long. + * REQUIRES: "output" points to an array of memory that is at least + * "csnappy_max_compressed_length(input_length)" in size. + * REQUIRES: working_memory has (1 << workmem_bytes_power_of_two) bytes. + * REQUIRES: 9 <= workmem_bytes_power_of_two <= 15. + * + * Returns an "end" pointer into "output" buffer. + * "end - output" is the compressed size of "input". + */ +char* +csnappy_compress_fragment( + const char *input, + const uint32_t input_length, + char *output, + void *working_memory, + const int workmem_bytes_power_of_two); + +/* + * REQUIRES: "compressed" must point to an area of memory that is at + * least "csnappy_max_compressed_length(input_length)" bytes in length. + * REQUIRES: working_memory has (1 << workmem_bytes_power_of_two) bytes. + * REQUIRES: 9 <= workmem_bytes_power_of_two <= 15. + * + * Takes the data stored in "input[0..input_length]" and stores + * it in the array pointed to by "compressed". + * + * "*out_compressed_length" is set to the length of the compressed output. + */ +void +csnappy_compress( + const char *input, + uint32_t input_length, + char *compressed, + uint32_t *out_compressed_length, + void *working_memory, + const int workmem_bytes_power_of_two); + +/* + * Reads header of compressed data to get stored length of uncompressed data. + * REQUIRES: start points to compressed data. + * REQUIRES: n is length of available compressed data. + * + * Returns SNAPPY_E_HEADER_BAD on error. + * Returns number of bytes read from input on success. + * Stores decoded length into *result. + */ +int +csnappy_get_uncompressed_length( + const char *start, + uint32_t n, + uint32_t *result); + +/* + * Safely decompresses all data from array "src" of length "src_len" containing + * entire compressed stream (with header) into array "dst" of size "dst_len". + * REQUIRES: dst_len is at least csnappy_get_uncompressed_length(...). + * + * Iff sucessful, returns CSNAPPY_E_OK. + * If recorded length in header is greater than dst_len, returns + * CSNAPPY_E_OUTPUT_INSUF. + * If compressed data is malformed, does not write more than dst_len into dst. + */ +int +csnappy_decompress( + const char *src, + uint32_t src_len, + char *dst, + uint32_t dst_len); + +/* + * Safely decompresses stream src_len bytes long read from src to dst. + * Amount of available space at dst must be provided in *dst_len by caller. + * If compressed stream needs more space, it will not overflow and return + * CSNAPPY_E_OUTPUT_OVERRUN. + * On success, sets *dst_len to actal number of bytes decompressed. + * Iff sucessful, returns CSNAPPY_E_OK. + */ +int +csnappy_decompress_noheader( + const char *src, + uint32_t src_len, + char *dst, + uint32_t *dst_len); + +/* + * Return values (< 0 = Error) + */ +#define CSNAPPY_E_OK 0 +#define CSNAPPY_E_HEADER_BAD (-1) +#define CSNAPPY_E_OUTPUT_INSUF (-2) +#define CSNAPPY_E_OUTPUT_OVERRUN (-3) +#define CSNAPPY_E_INPUT_NOT_CONSUMED (-4) +#define CSNAPPY_E_DATA_MALFORMED (-5) + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/drivers/staging/snappy/csnappy_compress.c b/drivers/staging/snappy/csnappy_compress.c new file mode 100755 index 00000000..7344f772 --- /dev/null +++ b/drivers/staging/snappy/csnappy_compress.c @@ -0,0 +1,497 @@ +/* +Copyright 2011, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +File modified for the Linux Kernel by +Zeev Tarantov +*/ + +#include "csnappy_internal.h" +#ifdef __KERNEL__ +#include +#include +#endif +#include "csnappy.h" + + +static inline char* +encode_varint32(char *sptr, uint32_t v) +{ + uint8_t* ptr = (uint8_t *)sptr; + static const int B = 128; + if (v < (1<<7)) { + *(ptr++) = v; + } else if (v < (1<<14)) { + *(ptr++) = v | B; + *(ptr++) = v>>7; + } else if (v < (1<<21)) { + *(ptr++) = v | B; + *(ptr++) = (v>>7) | B; + *(ptr++) = v>>14; + } else if (v < (1<<28)) { + *(ptr++) = v | B; + *(ptr++) = (v>>7) | B; + *(ptr++) = (v>>14) | B; + *(ptr++) = v>>21; + } else { + *(ptr++) = v | B; + *(ptr++) = (v>>7) | B; + *(ptr++) = (v>>14) | B; + *(ptr++) = (v>>21) | B; + *(ptr++) = v>>28; + } + return (char *)ptr; +} + + +/* + * Any hash function will produce a valid compressed bitstream, but a good + * hash function reduces the number of collisions and thus yields better + * compression for compressible input, and more speed for incompressible + * input. Of course, it doesn't hurt if the hash function is reasonably fast + * either, as it gets called a lot. + */ +static inline uint32_t HashBytes(uint32_t bytes, int shift) +{ + uint32_t kMul = 0x1e35a7bd; + return (bytes * kMul) >> shift; +} +static inline uint32_t Hash(const char *p, int shift) +{ + return HashBytes(UNALIGNED_LOAD32(p), shift); +} + + +/* + * *** DO NOT CHANGE THE VALUE OF kBlockSize *** + + * New Compression code chops up the input into blocks of at most + * the following size. This ensures that back-references in the + * output never cross kBlockSize block boundaries. This can be + * helpful in implementing blocked decompression. However the + * decompression code should not rely on this guarantee since older + * compression code may not obey it. + */ +#define kBlockLog 15 +#define kBlockSize (1 << kBlockLog) + + +/* + * Return the largest n such that + * + * s1[0,n-1] == s2[0,n-1] + * and n <= (s2_limit - s2). + * + * Does not read *s2_limit or beyond. + * Does not read *(s1 + (s2_limit - s2)) or beyond. + * Requires that s2_limit >= s2. + * + * Separate implementation for x86_64, for speed. Uses the fact that + * x86_64 is little endian. + */ +#if defined(__x86_64__) +static inline int +FindMatchLength(const char *s1, const char *s2, const char *s2_limit) +{ + uint64_t x; + int matched, matching_bits; + DCHECK_GE(s2_limit, s2); + matched = 0; + /* + * Find out how long the match is. We loop over the data 64 bits at a + * time until we find a 64-bit block that doesn't match; then we find + * the first non-matching bit and use that to calculate the total + * length of the match. + */ + while (likely(s2 <= s2_limit - 8)) { + if (unlikely(UNALIGNED_LOAD64(s1 + matched) == + UNALIGNED_LOAD64(s2))) { + s2 += 8; + matched += 8; + } else { + /* + * On current (mid-2008) Opteron models there is a 3% + * more efficient code sequence to find the first + * non-matching byte. However, what follows is ~10% + * better on Intel Core 2 and newer, and we expect AMD's + * bsf instruction to improve. + */ + x = UNALIGNED_LOAD64(s1 + matched) ^ + UNALIGNED_LOAD64(s2); + matching_bits = FindLSBSetNonZero64(x); + matched += matching_bits >> 3; + return matched; + } + } + while (likely(s2 < s2_limit)) { + if (likely(s1[matched] == *s2)) { + ++s2; + ++matched; + } else { + return matched; + } + } + return matched; +} +#else /* !defined(__x86_64__) */ +static inline int +FindMatchLength(const char *s1, const char *s2, const char *s2_limit) +{ + /* Implementation based on the x86-64 version, above. */ + int matched = 0; + DCHECK_GE(s2_limit, s2); + + while (s2 <= s2_limit - 4 && + UNALIGNED_LOAD32(s2) == UNALIGNED_LOAD32(s1 + matched)) { + s2 += 4; + matched += 4; + } +#if defined(__LITTLE_ENDIAN) + if (s2 <= s2_limit - 4) { + uint32_t x = UNALIGNED_LOAD32(s1 + matched) ^ + UNALIGNED_LOAD32(s2); + int matching_bits = FindLSBSetNonZero(x); + matched += matching_bits >> 3; + } else { + while ((s2 < s2_limit) && (s1[matched] == *s2)) { + ++s2; + ++matched; + } + } +#else + while ((s2 < s2_limit) && (s1[matched] == *s2)) { + ++s2; + ++matched; + } +#endif + return matched; +} +#endif /* !defined(__x86_64__) */ + + +static inline char* +EmitLiteral(char *op, const char *literal, int len, int allow_fast_path) +{ + int n = len - 1; /* Zero-length literals are disallowed */ + if (n < 60) { + /* Fits in tag byte */ + *op++ = LITERAL | (n << 2); + /* + The vast majority of copies are below 16 bytes, for which a + call to memcpy is overkill. This fast path can sometimes + copy up to 15 bytes too much, but that is okay in the + main loop, since we have a bit to go on for both sides: + - The input will always have kInputMarginBytes = 15 extra + available bytes, as long as we're in the main loop, and + if not, allow_fast_path = false. + - The output will always have 32 spare bytes (see + snappy_max_compressed_length). + */ + if (allow_fast_path && len <= 16) { + UNALIGNED_STORE64(op, UNALIGNED_LOAD64(literal)); + UNALIGNED_STORE64(op + 8, + UNALIGNED_LOAD64(literal + 8)); + return op + len; + } + } else { + /* Encode in upcoming bytes */ + char *base = op; + int count = 0; + op++; + while (n > 0) { + *op++ = n & 0xff; + n >>= 8; + count++; + } + DCHECK_GE(count, 1); + DCHECK_LE(count, 4); + *base = LITERAL | ((59+count) << 2); + } + memcpy(op, literal, len); + return op + len; +} + +static inline char* +EmitCopyLessThan64(char *op, int offset, int len) +{ + DCHECK_LE(len, 64); + DCHECK_GE(len, 4); + DCHECK_LT(offset, 65536); + + if ((len < 12) && (offset < 2048)) { + int len_minus_4 = len - 4; + DCHECK_LT(len_minus_4, 8); /* Must fit in 3 bits */ + *op++ = COPY_1_BYTE_OFFSET | + ((len_minus_4) << 2) | + ((offset >> 8) << 5); + *op++ = offset & 0xff; + } else { + *op++ = COPY_2_BYTE_OFFSET | ((len-1) << 2); + put_unaligned_le16(offset, op); + op += 2; + } + return op; +} + +static inline char* +EmitCopy(char *op, int offset, int len) +{ + /* Emit 64 byte copies but make sure to keep at least four bytes + * reserved */ + while (len >= 68) { + op = EmitCopyLessThan64(op, offset, 64); + len -= 64; + } + + /* Emit an extra 60 byte copy if have too much data to fit in one + * copy */ + if (len > 64) { + op = EmitCopyLessThan64(op, offset, 60); + len -= 60; + } + + /* Emit remainder */ + op = EmitCopyLessThan64(op, offset, len); + return op; +} + + +/* + * For 0 <= offset <= 4, GetUint32AtOffset(UNALIGNED_LOAD64(p), offset) will + * equal UNALIGNED_LOAD32(p + offset). Motivation: On x86-64 hardware we have + * empirically found that overlapping loads such as + * UNALIGNED_LOAD32(p) ... UNALIGNED_LOAD32(p+1) ... UNALIGNED_LOAD32(p+2) + * are slower than UNALIGNED_LOAD64(p) followed by shifts and casts to uint32_t. + */ +static inline uint32_t +GetUint32AtOffset(uint64_t v, int offset) +{ + DCHECK(0 <= offset && offset <= 4); +#ifdef __LITTLE_ENDIAN + return v >> (8 * offset); +#else + return v >> (32 - 8 * offset); +#endif +} + +#define kInputMarginBytes 15 +char* +csnappy_compress_fragment( + const char *input, + const uint32_t input_size, + char *op, + void *working_memory, + const int workmem_bytes_power_of_two) +{ + const char *ip, *ip_end, *base_ip, *next_emit, *ip_limit, *next_ip, + *candidate, *base; + uint16_t *table = (uint16_t *)working_memory; + uint64_t input_bytes; + uint32_t hash, next_hash, prev_hash, cur_hash, skip, candidate_bytes; + int shift, matched; + + DCHECK_GE(workmem_bytes_power_of_two, 9); + DCHECK_LE(workmem_bytes_power_of_two, 15); + /* Table of 2^X bytes, need (X-1) bits to address table of uint16_t. + * How many bits of 32bit hash function result are discarded? */ + shift = 33 - workmem_bytes_power_of_two; + /* "ip" is the input pointer, and "op" is the output pointer. */ + ip = input; + DCHECK_LE(input_size, kBlockSize); + ip_end = input + input_size; + base_ip = ip; + /* Bytes in [next_emit, ip) will be emitted as literal bytes. Or + [next_emit, ip_end) after the main loop. */ + next_emit = ip; + + if (unlikely(input_size < kInputMarginBytes)) + goto emit_remainder; + + memset(working_memory, 0, 1 << workmem_bytes_power_of_two); + + ip_limit = input + input_size - kInputMarginBytes; + next_hash = Hash(++ip, shift); + +main_loop: + DCHECK_LT(next_emit, ip); + /* + * The body of this loop calls EmitLiteral once and then EmitCopy one or + * more times. (The exception is that when we're close to exhausting + * the input we goto emit_remainder.) + * + * In the first iteration of this loop we're just starting, so + * there's nothing to copy, so calling EmitLiteral once is + * necessary. And we only start a new iteration when the + * current iteration has determined that a call to EmitLiteral will + * precede the next call to EmitCopy (if any). + * + * Step 1: Scan forward in the input looking for a 4-byte-long match. + * If we get close to exhausting the input then goto emit_remainder. + * + * Heuristic match skipping: If 32 bytes are scanned with no matches + * found, start looking only at every other byte. If 32 more bytes are + * scanned, look at every third byte, etc.. When a match is found, + * immediately go back to looking at every byte. This is a small loss + * (~5% performance, ~0.1% density) for compressible data due to more + * bookkeeping, but for non-compressible data (such as JPEG) it's a huge + * win since the compressor quickly "realizes" the data is incompressible + * and doesn't bother looking for matches everywhere. + * + * The "skip" variable keeps track of how many bytes there are since the + * last match; dividing it by 32 (ie. right-shifting by five) gives the + * number of bytes to move ahead for each iteration. + */ + skip = 32; + + next_ip = ip; + do { + ip = next_ip; + hash = next_hash; + DCHECK_EQ(hash, Hash(ip, shift)); + next_ip = ip + (skip++ >> 5); + if (unlikely(next_ip > ip_limit)) + goto emit_remainder; + next_hash = Hash(next_ip, shift); + candidate = base_ip + table[hash]; + DCHECK_GE(candidate, base_ip); + DCHECK_LT(candidate, ip); + + table[hash] = ip - base_ip; + } while (likely(UNALIGNED_LOAD32(ip) != + UNALIGNED_LOAD32(candidate))); + + /* + * Step 2: A 4-byte match has been found. We'll later see if more + * than 4 bytes match. But, prior to the match, input + * bytes [next_emit, ip) are unmatched. Emit them as "literal bytes." + */ + DCHECK_LE(next_emit + 16, ip_end); + op = EmitLiteral(op, next_emit, ip - next_emit, 1); + + /* + * Step 3: Call EmitCopy, and then see if another EmitCopy could + * be our next move. Repeat until we find no match for the + * input immediately after what was consumed by the last EmitCopy call. + * + * If we exit this loop normally then we need to call EmitLiteral next, + * though we don't yet know how big the literal will be. We handle that + * by proceeding to the next iteration of the main loop. We also can exit + * this loop via goto if we get close to exhausting the input. + */ + input_bytes = 0; + candidate_bytes = 0; + + do { + /* We have a 4-byte match at ip, and no need to emit any + "literal bytes" prior to ip. */ + base = ip; + matched = 4 + FindMatchLength(candidate + 4, ip + 4, ip_end); + ip += matched; + DCHECK_EQ(0, memcmp(base, candidate, matched)); + op = EmitCopy(op, base - candidate, matched); + /* We could immediately start working at ip now, but to improve + compression we first update table[Hash(ip - 1, ...)]. */ + next_emit = ip; + if (unlikely(ip >= ip_limit)) + goto emit_remainder; + input_bytes = UNALIGNED_LOAD64(ip - 1); + prev_hash = HashBytes(GetUint32AtOffset(input_bytes, 0), shift); + table[prev_hash] = ip - base_ip - 1; + cur_hash = HashBytes(GetUint32AtOffset(input_bytes, 1), shift); + candidate = base_ip + table[cur_hash]; + candidate_bytes = UNALIGNED_LOAD32(candidate); + table[cur_hash] = ip - base_ip; + } while (GetUint32AtOffset(input_bytes, 1) == candidate_bytes); + + next_hash = HashBytes(GetUint32AtOffset(input_bytes, 2), shift); + ++ip; + goto main_loop; + +emit_remainder: + /* Emit the remaining bytes as a literal */ + if (next_emit < ip_end) + op = EmitLiteral(op, next_emit, ip_end - next_emit, 0); + + return op; +} +#if defined(__KERNEL__) && !defined(STATIC) +EXPORT_SYMBOL(csnappy_compress_fragment); +#endif + +uint32_t __attribute__((const)) +csnappy_max_compressed_length(uint32_t source_len) +{ + return 32 + source_len + source_len/6; +} +#if defined(__KERNEL__) && !defined(STATIC) +EXPORT_SYMBOL(csnappy_max_compressed_length); +#endif + +void +csnappy_compress( + const char *input, + uint32_t input_length, + char *compressed, + uint32_t *compressed_length, + void *working_memory, + const int workmem_bytes_power_of_two) +{ + int workmem_size; + int num_to_read; + uint32_t written = 0; + char *p = encode_varint32(compressed, input_length); + written += (p - compressed); + compressed = p; + while (input_length > 0) { + num_to_read = min(input_length, (uint32_t)kBlockSize); + workmem_size = workmem_bytes_power_of_two; + if (num_to_read < kBlockSize) { + for (workmem_size = 9; + workmem_size < workmem_bytes_power_of_two; + ++workmem_size) { + if ((1 << (workmem_size-1)) >= num_to_read) + break; + } + } + p = csnappy_compress_fragment( + input, num_to_read, compressed, + working_memory, workmem_size); + written += (p - compressed); + compressed = p; + input_length -= num_to_read; + input += num_to_read; + } + *compressed_length = written; +} +#if defined(__KERNEL__) && !defined(STATIC) +EXPORT_SYMBOL(csnappy_compress); + +MODULE_LICENSE("BSD"); +MODULE_DESCRIPTION("Snappy Compressor"); +#endif diff --git a/drivers/staging/snappy/csnappy_decompress.c b/drivers/staging/snappy/csnappy_decompress.c new file mode 100755 index 00000000..d05d8173 --- /dev/null +++ b/drivers/staging/snappy/csnappy_decompress.c @@ -0,0 +1,321 @@ +/* +Copyright 2011, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +File modified for the Linux Kernel by +Zeev Tarantov +*/ + +#include "csnappy_internal.h" +#ifdef __KERNEL__ +#include +#include +#endif +#include "csnappy.h" + + +/* Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits */ +static const uint32_t wordmask[] = { + 0u, 0xffu, 0xffffu, 0xffffffu, 0xffffffffu +}; + +/* + * Data stored per entry in lookup table: + * Range Bits-used Description + * ------------------------------------ + * 1..64 0..7 Literal/copy length encoded in opcode byte + * 0..7 8..10 Copy offset encoded in opcode byte / 256 + * 0..4 11..13 Extra bytes after opcode + * + * We use eight bits for the length even though 7 would have sufficed + * because of efficiency reasons: + * (1) Extracting a byte is faster than a bit-field + * (2) It properly aligns copy offset so we do not need a <<8 + */ +static const uint16_t char_table[256] = { + 0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002, + 0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004, + 0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006, + 0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008, + 0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a, + 0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c, + 0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e, + 0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010, + 0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012, + 0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014, + 0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016, + 0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018, + 0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a, + 0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c, + 0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e, + 0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020, + 0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022, + 0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024, + 0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026, + 0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028, + 0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a, + 0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c, + 0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e, + 0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030, + 0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032, + 0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034, + 0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036, + 0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038, + 0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a, + 0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c, + 0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e, + 0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040 +}; + +/* + * Copy "len" bytes from "src" to "op", one byte at a time. Used for + * handling COPY operations where the input and output regions may + * overlap. For example, suppose: + * src == "ab" + * op == src + 2 + * len == 20 + * After IncrementalCopy(src, op, len), the result will have + * eleven copies of "ab" + * ababababababababababab + * Note that this does not match the semantics of either memcpy() + * or memmove(). + */ +static inline void IncrementalCopy(const char *src, char *op, int len) +{ + DCHECK_GT(len, 0); + do { + *op++ = *src++; + } while (--len > 0); +} + +/* + * Equivalent to IncrementalCopy except that it can write up to ten extra + * bytes after the end of the copy, and that it is faster. + * + * The main part of this loop is a simple copy of eight bytes at a time until + * we've copied (at least) the requested amount of bytes. However, if op and + * src are less than eight bytes apart (indicating a repeating pattern of + * length < 8), we first need to expand the pattern in order to get the correct + * results. For instance, if the buffer looks like this, with the eight-byte + * and patterns marked as intervals: + * + * abxxxxxxxxxxxx + * [------] src + * [------] op + * + * a single eight-byte copy from to will repeat the pattern once, + * after which we can move two bytes without moving : + * + * ababxxxxxxxxxx + * [------] src + * [------] op + * + * and repeat the exercise until the two no longer overlap. + * + * This allows us to do very well in the special case of one single byte + * repeated many times, without taking a big hit for more general cases. + * + * The worst case of extra writing past the end of the match occurs when + * op - src == 1 and len == 1; the last copy will read from byte positions + * [0..7] and write to [4..11], whereas it was only supposed to write to + * position 1. Thus, ten excess bytes. + */ +static const int kMaxIncrementCopyOverflow = 10; +static inline void IncrementalCopyFastPath(const char *src, char *op, int len) +{ + while (op - src < 8) { + UNALIGNED_STORE64(op, UNALIGNED_LOAD64(src)); + len -= op - src; + op += op - src; + } + while (len > 0) { + UNALIGNED_STORE64(op, UNALIGNED_LOAD64(src)); + src += 8; + op += 8; + len -= 8; + } +} + + +/* A type that writes to a flat array. */ +struct SnappyArrayWriter { + char *base; + char *op; + char *op_limit; +}; + +static inline int +SAW__Append(struct SnappyArrayWriter *this, + const char *ip, uint32_t len, int allow_fast_path) +{ + char *op = this->op; + const int space_left = this->op_limit - op; + /*Fast path, used for the majority (about 90%) of dynamic invocations.*/ + if (allow_fast_path && len <= 16 && space_left >= 16) { + UNALIGNED_STORE64(op, UNALIGNED_LOAD64(ip)); + UNALIGNED_STORE64(op + 8, UNALIGNED_LOAD64(ip + 8)); + } else { + if (space_left < len) + return CSNAPPY_E_OUTPUT_OVERRUN; + memcpy(op, ip, len); + } + this->op = op + len; + return CSNAPPY_E_OK; +} + +static inline int +SAW__AppendFromSelf(struct SnappyArrayWriter *this, + uint32_t offset, uint32_t len) +{ + char *op = this->op; + const int space_left = this->op_limit - op; + /* -1u catches offset==0 */ + if (op - this->base <= offset - 1u) + return CSNAPPY_E_DATA_MALFORMED; + /* Fast path, used for the majority (70-80%) of dynamic invocations. */ + if (len <= 16 && offset >= 8 && space_left >= 16) { + UNALIGNED_STORE64(op, UNALIGNED_LOAD64(op - offset)); + UNALIGNED_STORE64(op + 8, UNALIGNED_LOAD64(op - offset + 8)); + } else if (space_left >= len + kMaxIncrementCopyOverflow) { + IncrementalCopyFastPath(op - offset, op, len); + } else { + if (space_left < len) + return CSNAPPY_E_OUTPUT_OVERRUN; + IncrementalCopy(op - offset, op, len); + } + this->op = op + len; + return CSNAPPY_E_OK; +} + + +int +csnappy_get_uncompressed_length( + const char *src, + uint32_t src_len, + uint32_t *result) +{ + const char *src_base = src; + uint32_t shift = 0; + uint8_t c; + /* Length is encoded in 1..5 bytes */ + *result = 0; + for (;;) { + if (shift >= 32) + goto err_out; + if (src_len == 0) + goto err_out; + c = *(const uint8_t *)src++; + src_len -= 1; + *result |= (uint32_t)(c & 0x7f) << shift; + if (c < 128) + break; + shift += 7; + } + return src - src_base; +err_out: + return CSNAPPY_E_HEADER_BAD; +} +#if defined(__KERNEL__) && !defined(STATIC) +EXPORT_SYMBOL(csnappy_get_uncompressed_length); +#endif + +int +csnappy_decompress_noheader( + const char *src, + uint32_t src_remaining, + char *dst, + uint32_t *dst_len) +{ + struct SnappyArrayWriter writer; + uint32_t length, trailer, opword, extra_bytes; + int ret; + uint8_t opcode; + char scratch[5]; + writer.op = writer.base = dst; + writer.op_limit = writer.op + *dst_len; + while (src_remaining) { + if (unlikely(src_remaining < 5)) { + memcpy(scratch, src, src_remaining); + src = scratch; + } + opcode = *(const uint8_t *)src++; + opword = char_table[opcode]; + extra_bytes = opword >> 11; + trailer = get_unaligned_le32(src) & wordmask[extra_bytes]; + src += extra_bytes; + src_remaining -= 1 + extra_bytes; + length = opword & 0xff; + if (opcode & 0x3) { + trailer += opword & 0x700; + ret = SAW__AppendFromSelf(&writer, trailer, length); + if (ret < 0) + return ret; + } else { + length += trailer; + if (unlikely(src_remaining < length)) + return CSNAPPY_E_DATA_MALFORMED; + ret = src_remaining >= 16; + ret = SAW__Append(&writer, src, length, ret); + if (ret < 0) + return ret; + src += length; + src_remaining -= length; + } + } + *dst_len = writer.op - writer.base; + return CSNAPPY_E_OK; +} +#if defined(__KERNEL__) && !defined(STATIC) +EXPORT_SYMBOL(csnappy_decompress_noheader); +#endif + +int +csnappy_decompress( + const char *src, + uint32_t src_len, + char *dst, + uint32_t dst_len) +{ + int n; + uint32_t olen = 0; + /* Read uncompressed length from the front of the compressed input */ + n = csnappy_get_uncompressed_length(src, src_len, &olen); + if (unlikely(n < CSNAPPY_E_OK)) + return n; + /* Protect against possible DoS attack */ + if (unlikely(olen > dst_len)) + return CSNAPPY_E_OUTPUT_INSUF; + return csnappy_decompress_noheader(src + n, src_len - n, dst, &olen); +} +#if defined(__KERNEL__) && !defined(STATIC) +EXPORT_SYMBOL(csnappy_decompress); + +MODULE_LICENSE("BSD"); +MODULE_DESCRIPTION("Snappy Decompressor"); +#endif diff --git a/drivers/staging/snappy/csnappy_internal.h b/drivers/staging/snappy/csnappy_internal.h new file mode 100755 index 00000000..6f1a5465 --- /dev/null +++ b/drivers/staging/snappy/csnappy_internal.h @@ -0,0 +1,83 @@ +/* +Copyright 2011 Google Inc. All Rights Reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Various stubs for the open-source version of Snappy. + +File modified for the Linux Kernel by +Zeev Tarantov +*/ + +#ifndef CSNAPPY_INTERNAL_H_ +#define CSNAPPY_INTERNAL_H_ + +#ifndef __KERNEL__ +#include "csnappy_internal_userspace.h" +#else + +#include +#include +#include +#include +#include + +#ifdef DEBUG +#define DCHECK(cond) if (!(cond)) \ + printk(KERN_DEBUG "assert failed @ %s:%i\n", \ + __FILE__, __LINE__) +#else +#define DCHECK(cond) +#endif + +#define UNALIGNED_LOAD16(_p) get_unaligned((const uint16_t *)(_p)) +#define UNALIGNED_LOAD32(_p) get_unaligned((const uint32_t *)(_p)) +#define UNALIGNED_LOAD64(_p) get_unaligned((const uint64_t *)(_p)) +#define UNALIGNED_STORE16(_p, _val) put_unaligned((_val), (uint16_t *)(_p)) +#define UNALIGNED_STORE32(_p, _val) put_unaligned((_val), (uint32_t *)(_p)) +#define UNALIGNED_STORE64(_p, _val) put_unaligned((_val), (uint64_t *)(_p)) + +#define FindLSBSetNonZero(n) __builtin_ctz(n) +#define FindLSBSetNonZero64(n) __builtin_ctzll(n) + +#endif /* __KERNEL__ */ + +#define DCHECK_EQ(a, b) DCHECK(((a) == (b))) +#define DCHECK_NE(a, b) DCHECK(((a) != (b))) +#define DCHECK_GT(a, b) DCHECK(((a) > (b))) +#define DCHECK_GE(a, b) DCHECK(((a) >= (b))) +#define DCHECK_LT(a, b) DCHECK(((a) < (b))) +#define DCHECK_LE(a, b) DCHECK(((a) <= (b))) + +enum { + LITERAL = 0, + COPY_1_BYTE_OFFSET = 1, /* 3 bit length + 3 bits of offset in opcode */ + COPY_2_BYTE_OFFSET = 2, + COPY_4_BYTE_OFFSET = 3 +}; + +#endif /* CSNAPPY_INTERNAL_H_ */ diff --git a/drivers/staging/zram/Kconfig b/drivers/staging/zram/Kconfig index 3bec4dba..24027b43 100755 --- a/drivers/staging/zram/Kconfig +++ b/drivers/staging/zram/Kconfig @@ -6,8 +6,6 @@ config ZRAM tristate "Compressed RAM block device support" depends on BLOCK && SYSFS select XVMALLOC - select LZO_COMPRESS - select LZO_DECOMPRESS default n help Creates virtual block devices called /dev/zramX (X = 0, 1, ...). @@ -28,3 +26,15 @@ config ZRAM_DEBUG help This option adds additional debugging code to the compressed RAM block device driver. +config ZRAM_LZO + bool "LZO compression" + default y + depends on ZRAM + select LZO_COMPRESS + select LZO_DECOMPRESS + +config ZRAM_SNAPPY + bool "Snappy compression" + depends on ZRAM + select SNAPPY_COMPRESS + select SNAPPY_DECOMPRESS diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c index 88383651..c61261ac 100755 --- a/drivers/staging/zram/zram_drv.c +++ b/drivers/staging/zram/zram_drv.c @@ -29,12 +29,90 @@ #include #include #include -#include #include #include #include "zram_drv.h" +#if defined(CONFIG_ZRAM_LZO) +#include +#ifdef MULTIPLE_COMPRESSORS +static const struct zram_compressor lzo_compressor = { + .name = "LZO", + .workmem_bytes = LZO1X_MEM_COMPRESS, + .compress = &lzo1x_1_compress, + .decompress = &lzo1x_decompress_safe +}; +#else /* !MULTIPLE_COMPRESSORS */ +#define WMSIZE LZO1X_MEM_COMPRESS +#define COMPRESS(s, sl, d, dl, wm) \ + lzo1x_1_compress(s, sl, d, dl, wm) +#define DECOMPRESS(s, sl, d, dl) \ + lzo1x_decompress_safe(s, sl, d, dl) +#endif /* !MULTIPLE_COMPRESSORS */ +#endif /* defined(CONFIG_ZRAM_LZO) */ + +#if defined(CONFIG_ZRAM_SNAPPY) +#include "../snappy/csnappy.h" /* if built in drivers/staging */ +#define WMSIZE_ORDER ((PAGE_SHIFT > 14) ? (15) : (PAGE_SHIFT+1)) +static int +snappy_compress_( + const unsigned char *src, + size_t src_len, + unsigned char *dst, + size_t *dst_len, + void *workmem) +{ + const unsigned char *end = csnappy_compress_fragment( + src, (uint32_t)src_len, dst, workmem, WMSIZE_ORDER); + *dst_len = end - dst; + return 0; +} +static int +snappy_decompress_( + const unsigned char *src, + size_t src_len, + unsigned char *dst, + size_t *dst_len) +{ + uint32_t dst_len_ = (uint32_t)*dst_len; + int ret = csnappy_decompress_noheader(src, src_len, dst, &dst_len_); + *dst_len = (size_t)dst_len_; + return ret; +} +#ifdef MULTIPLE_COMPRESSORS +static const struct zram_compressor snappy_compressor = { + .name = "SNAPPY", + .workmem_bytes = (1 << WMSIZE_ORDER), + .compress = &snappy_compress_, + .decompress = &snappy_decompress_ +}; +#else /* !MULTIPLE_COMPRESSORS */ +#define WMSIZE (1 << WMSIZE_ORDER) +#define COMPRESS(s, sl, d, dl, wm) \ + snappy_compress_(s, sl, d, dl, wm) +#define DECOMPRESS(s, sl, d, dl) \ + snappy_decompress_(s, sl, d, dl) +#endif /* !MULTIPLE_COMPRESSORS */ +#endif /* defined(CONFIG_ZRAM_SNAPPY) */ + +#ifdef MULTIPLE_COMPRESSORS +const struct zram_compressor * const zram_compressors[] = { +#if defined(CONFIG_ZRAM_LZO) + &lzo_compressor, +#endif +#if defined(CONFIG_ZRAM_SNAPPY) + &snappy_compressor, +#endif + NULL +}; +#define WMSIZE (zram->compressor->workmem_bytes) +#define COMPRESS(s, sl, d, dl, wm) \ + (zram->compressor->compress(s, sl, d, dl, wm)) +#define DECOMPRESS(s, sl, d, dl) \ + (zram->compressor->decompress(s, sl, d, dl)) +#endif /* MULTIPLE_COMPRESSORS */ + /* Globals */ static int zram_major; struct zram *zram_devices; @@ -257,7 +335,7 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, cmem = kmap_atomic(zram->table[index].page, KM_USER1) + zram->table[index].offset; - ret = lzo1x_decompress_safe(cmem + sizeof(*zheader), + ret = DECOMPRESS(cmem + sizeof(*zheader), xv_get_object_size(cmem) - sizeof(*zheader), uncmem, &clen); @@ -271,7 +349,7 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, kunmap_atomic(user_mem, KM_USER0); /* Should NEVER happen. Return bio error if it does. */ - if (unlikely(ret != LZO_E_OK)) { + if (unlikely(ret)) { pr_err("Decompression failed! err=%d, page=%u\n", ret, index); zram_stat64_inc(zram, &zram->stats.failed_reads); return ret; @@ -305,13 +383,13 @@ static int zram_read_before_write(struct zram *zram, char *mem, u32 index) return 0; } - ret = lzo1x_decompress_safe(cmem + sizeof(*zheader), + ret = DECOMPRESS(cmem + sizeof(*zheader), xv_get_object_size(cmem) - sizeof(*zheader), mem, &clen); kunmap_atomic(cmem, KM_USER0); /* Should NEVER happen. Return bio error if it does. */ - if (unlikely(ret != LZO_E_OK)) { + if (unlikely(ret)) { pr_err("Decompression failed! err=%d, page=%u\n", ret, index); zram_stat64_inc(zram, &zram->stats.failed_reads); return ret; @@ -377,18 +455,13 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, goto out; } - ret = lzo1x_1_compress(uncmem, PAGE_SIZE, src, &clen, + COMPRESS(uncmem, PAGE_SIZE, src, &clen, zram->compress_workmem); kunmap_atomic(user_mem, KM_USER0); if (is_partial_io(bvec)) kfree(uncmem); - if (unlikely(ret != LZO_E_OK)) { - pr_err("Compression failed! err=%d\n", ret); - goto out; - } - /* * Page is incompressible. Store it as-is (uncompressed) * since we do not want to return too many disk write @@ -648,7 +721,7 @@ int zram_init_device(struct zram *zram) zram_set_disksize(zram, totalram_pages << PAGE_SHIFT); - zram->compress_workmem = kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL); + zram->compress_workmem = kzalloc(WMSIZE, GFP_KERNEL); if (!zram->compress_workmem) { pr_err("Error allocating compressor working memory!\n"); ret = -ENOMEM; @@ -753,6 +826,10 @@ static int create_device(struct zram *zram, int device_id) /* Actual capacity set using syfs (/sys/block/zram/disksize */ set_capacity(zram->disk, 0); + /* Can be changed using sysfs (/sys/block/zram/compressor) */ +#ifdef MULTIPLE_COMPRESSORS + zram->compressor = zram_compressors[0]; +#endif /* * To ensure that we always get PAGE_SIZE aligned * and n*PAGE_SIZED sized I/O requests. diff --git a/drivers/staging/zram/zram_drv.h b/drivers/staging/zram/zram_drv.h index 31617ee7..6ccb855b 100755 --- a/drivers/staging/zram/zram_drv.h +++ b/drivers/staging/zram/zram_drv.h @@ -66,6 +66,13 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3; #define ZRAM_SECTOR_PER_LOGICAL_BLOCK \ (1 << (ZRAM_LOGICAL_BLOCK_SHIFT - SECTOR_SHIFT)) +#if defined(CONFIG_ZRAM_LZO) + defined(CONFIG_ZRAM_SNAPPY) == 0 +#error At least one of CONFIG_ZRAM_LZO, CONFIG_ZRAM_SNAPPY must be defined! +#endif +#if defined(CONFIG_ZRAM_LZO) + defined(CONFIG_ZRAM_SNAPPY) > 1 +#define MULTIPLE_COMPRESSORS +#endif + /* Flags for zram pages (table[page_no].flags) */ enum zram_pageflags { /* Page is stored uncompressed */ @@ -103,6 +110,9 @@ struct zram_stats { struct zram { struct xv_pool *mem_pool; +#ifdef MULTIPLE_COMPRESSORS + const struct zram_compressor *compressor; +#endif void *compress_workmem; void *compress_buffer; struct table *table; @@ -132,5 +142,25 @@ extern struct attribute_group zram_disk_attr_group; extern int zram_init_device(struct zram *zram); extern void __zram_reset_device(struct zram *zram); +#ifdef MULTIPLE_COMPRESSORS +struct zram_compressor { + const char *name; + int (*compress)( + const unsigned char *src, + size_t src_len, + unsigned char *dst, + size_t *dst_len, + void *workmem); + int (*decompress)( + const unsigned char *src, + size_t src_len, + unsigned char *dst, + size_t *dst_len); + unsigned workmem_bytes; +}; + +extern const struct zram_compressor * const zram_compressors[]; +#endif + #endif diff --git a/drivers/staging/zram/zram_sysfs.c b/drivers/staging/zram/zram_sysfs.c index 41e51a2b..231924e0 100755 --- a/drivers/staging/zram/zram_sysfs.c +++ b/drivers/staging/zram/zram_sysfs.c @@ -76,6 +76,57 @@ static ssize_t disksize_store(struct device *dev, return len; } +#ifdef MULTIPLE_COMPRESSORS +static ssize_t compressor_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + char * const buf_base = buf; + const struct zram_compressor *p, *curr; + unsigned int i = 0; + struct zram *zram = dev_to_zram(dev); + curr = zram->compressor; + p = zram_compressors[i]; + while (p) { + if (curr == p) + buf += sprintf(buf, "*"); + buf += sprintf(buf, "%u - %s\n", i, p->name); + p = zram_compressors[++i]; + } + return buf - buf_base; +} + +static ssize_t compressor_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + const struct zram_compressor *p; + unsigned long requested; + unsigned int i = 0; + int ret; + struct zram *zram = dev_to_zram(dev); + + if (zram->init_done) { + pr_info("Cannot change compressor for initialized device\n"); + return -EBUSY; + } + + ret = strict_strtoul(buf, 10, &requested); + if (ret) + return ret; + + p = zram_compressors[i]; + while (p && (i < requested)) + p = zram_compressors[++i]; + + if (!p) { + pr_info("No compressor with index #%lu\n", requested); + return -EINVAL; + } + + zram->compressor = p; + return len; +} +#endif /* MULTIPLE_COMPRESSORS */ + static ssize_t initstate_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -194,6 +245,10 @@ static ssize_t mem_used_total_show(struct device *dev, return sprintf(buf, "%llu\n", val); } +#ifdef MULTIPLE_COMPRESSORS +static DEVICE_ATTR(compressor, S_IRUGO | S_IWUSR, + compressor_show, compressor_store); +#endif static DEVICE_ATTR(disksize, S_IRUGO | S_IWUSR, disksize_show, disksize_store); static DEVICE_ATTR(initstate, S_IRUGO, initstate_show, NULL); @@ -208,6 +263,9 @@ static DEVICE_ATTR(compr_data_size, S_IRUGO, compr_data_size_show, NULL); static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL); static struct attribute *zram_disk_attrs[] = { +#ifdef MULTIPLE_COMPRESSORS + &dev_attr_compressor.attr, +#endif &dev_attr_disksize.attr, &dev_attr_initstate.attr, &dev_attr_reset.attr, From c9ff1491834a531715dd5658ccf90b524545b7a9 Mon Sep 17 00:00:00 2001 From: securecrt Date: Thu, 23 Aug 2012 12:45:29 +0800 Subject: [PATCH 123/155] disable KSM --- arch/arm/configs/htcleo_defconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/configs/htcleo_defconfig b/arch/arm/configs/htcleo_defconfig index 8d26b82b..b6f35061 100644 --- a/arch/arm/configs/htcleo_defconfig +++ b/arch/arm/configs/htcleo_defconfig @@ -401,7 +401,7 @@ CONFIG_BOUNCE=y CONFIG_VIRT_TO_BUS=y CONFIG_HAVE_MLOCK=y CONFIG_HAVE_MLOCKED_PAGE_BIT=y -CONFIG_KSM=y +# CONFIG_KSM is not set CONFIG_DEFAULT_MMAP_MIN_ADDR=4096 CONFIG_ALIGNMENT_TRAP=y CONFIG_ALLOW_CPU_ALIGNMENT=y From f9c1be0ea35269eb4f687cf114c5b302de7b453f Mon Sep 17 00:00:00 2001 From: securecrt Date: Thu, 23 Aug 2012 13:06:34 +0800 Subject: [PATCH 124/155] fixed merge problem --- drivers/staging/android/lowmemorykiller.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c index 5430f578..5930a813 100755 --- a/drivers/staging/android/lowmemorykiller.c +++ b/drivers/staging/android/lowmemorykiller.c @@ -40,14 +40,11 @@ #include #include #include -<<<<<<< HEAD -======= #ifdef CONFIG_SWAP #include #include #endif ->>>>>>> ics_HWA static uint32_t lowmem_debug_level = 2; static int lowmem_adj[6] = { @@ -124,6 +121,8 @@ static int lmk_hotplug_callback(struct notifier_block *self, } #endif + + static void lowmem_notify_killzone_approach(void); static inline void get_free_ram(int *other_free, int *other_file) From b989638f53374c1aa496d0bcaa76528720f6d033 Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Thu, 23 Aug 2012 22:41:48 +0800 Subject: [PATCH 125/155] fixed merge problem --- drivers/staging/android/lowmemorykiller.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c index 5430f578..f9a21b86 100755 --- a/drivers/staging/android/lowmemorykiller.c +++ b/drivers/staging/android/lowmemorykiller.c @@ -40,14 +40,11 @@ #include #include #include -<<<<<<< HEAD -======= #ifdef CONFIG_SWAP #include #include #endif ->>>>>>> ics_HWA static uint32_t lowmem_debug_level = 2; static int lowmem_adj[6] = { From 035e8b5999678444c37ee4e7e17a1d0c0a5dc9d1 Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Sun, 9 Sep 2012 22:55:52 +0800 Subject: [PATCH 126/155] change build sh --- build.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build.sh b/build.sh index 3a29e816..39b2341f 100755 --- a/build.sh +++ b/build.sh @@ -9,7 +9,7 @@ if [ -f arch/arm/boot/zImage ]; then mkdir -p $KERNELBASEDIR/ rm -rf $KERNELBASEDIR/boot/* -rm -rf $KERNELBASEDIR/system/* +rm -rf $KERNELBASEDIR/system/lib/modules/* mkdir -p $KERNELBASEDIR/boot mkdir -p $KERNELBASEDIR/system/ mkdir -p $KERNELBASEDIR/system/lib/ @@ -24,7 +24,7 @@ find -iname *.ko | xargs -i -t cp {} . rm -rf $KERNELBASEDIR/system/lib/modules/lib stat $KERNELBASEDIR/boot/zImage cd ../../../ -zip -r tytung_HWA_kernel.`date +"%Y%m%d_%H_%M"`.zip boot system META-INF +zip -r tytung_HWA_kernel.`date +"%Y%m%d_%H_%M"`.zip boot system META-INF work nfo.prop else echo "Kernel STUCK in BUILD! no zImage exist" fi From b9ead040d28175b3b783c64f7f73a9a6d8e74bed Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Fri, 21 Sep 2012 01:06:00 +0800 Subject: [PATCH 127/155] enable -pipe --- arch/arm/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/Makefile b/arch/arm/Makefile index a73caaf6..f35b1588 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -17,7 +17,7 @@ endif OBJCOPYFLAGS :=-O binary -R .note -R .note.gnu.build-id -R .comment -S GZFLAGS :=-9 -#KBUILD_CFLAGS +=-pipe +KBUILD_CFLAGS +=-pipe # Explicitly specifiy 32-bit ARM ISA since toolchain default can be -mthumb: KBUILD_CFLAGS +=$(call cc-option,-marm,) From c8d5db022ffee3d4ee8a6b32710d229a9baf9283 Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Fri, 21 Sep 2012 01:06:29 +0800 Subject: [PATCH 128/155] ashmem: Implement read(2) in ashmem driver ashmem: Fix ASHMEM_SET_PROT_MASK. ashmem: Support lseek(2) in ashmem driver ashmem: Fix the build failure when OUTER_CACHE is enabled ashmem: Fix ashmem vm range comparison to stop roll-over --- mm/ashmem.c | 146 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 143 insertions(+), 3 deletions(-) mode change 100644 => 100755 mm/ashmem.c diff --git a/mm/ashmem.c b/mm/ashmem.c old mode 100644 new mode 100755 index 5e059283..0404e21f --- a/mm/ashmem.c +++ b/mm/ashmem.c @@ -29,9 +29,10 @@ #include #include #include +#include -#define ASHMEM_NAME_PREFIX "" -#define ASHMEM_NAME_PREFIX_LEN 0 +#define ASHMEM_NAME_PREFIX "dev/ashmem/" +#define ASHMEM_NAME_PREFIX_LEN (sizeof(ASHMEM_NAME_PREFIX) - 1) #define ASHMEM_FULL_NAME_LEN (ASHMEM_NAME_LEN + ASHMEM_NAME_PREFIX_LEN) /* @@ -45,6 +46,8 @@ struct ashmem_area { struct list_head unpinned_list; /* list of all ashmem areas */ struct file *file; /* the shmem-based backing file */ size_t size; /* size of the mapping, in bytes */ + unsigned long vm_start; /* Start address of vm_area + * which maps this ashmem */ unsigned long prot_mask; /* allowed prot bits, as vm_flags */ }; @@ -178,7 +181,7 @@ static int ashmem_open(struct inode *inode, struct file *file) struct ashmem_area *asma; int ret; - ret = nonseekable_open(inode, file); + ret = generic_file_open(inode, file); if (unlikely(ret)) return ret; @@ -187,6 +190,7 @@ static int ashmem_open(struct inode *inode, struct file *file) return -ENOMEM; INIT_LIST_HEAD(&asma->unpinned_list); + memcpy(asma->name, ASHMEM_NAME_PREFIX, ASHMEM_NAME_PREFIX_LEN); asma->prot_mask = PROT_MASK; file->private_data = asma; @@ -210,6 +214,67 @@ static int ashmem_release(struct inode *ignored, struct file *file) return 0; } +static ssize_t ashmem_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + struct ashmem_area *asma = file->private_data; + int ret = 0; + + mutex_lock(&ashmem_mutex); + + /* If size is not set, or set to 0, always return EOF. */ + if (asma->size == 0) { + goto out; + } + + if (!asma->file) { + ret = -EBADF; + goto out; + } + + ret = asma->file->f_op->read(asma->file, buf, len, pos); + if (ret < 0) { + goto out; + } + + /** Update backing file pos, since f_ops->read() doesn't */ + asma->file->f_pos = *pos; + +out: + mutex_unlock(&ashmem_mutex); + return ret; +} + +static loff_t ashmem_llseek(struct file *file, loff_t offset, int origin) +{ + struct ashmem_area *asma = file->private_data; + int ret; + + mutex_lock(&ashmem_mutex); + + if (asma->size == 0) { + ret = -EINVAL; + goto out; + } + + if (!asma->file) { + ret = -EBADF; + goto out; + } + + ret = asma->file->f_op->llseek(asma->file, offset, origin); + if (ret < 0) { + goto out; + } + + /** Copy f_pos from backing file, since f_ops->llseek() sets it */ + file->f_pos = asma->file->f_pos; + +out: + mutex_unlock(&ashmem_mutex); + return ret; +} + static inline unsigned long calc_vm_may_flags(unsigned long prot) { @@ -264,6 +329,7 @@ static int ashmem_mmap(struct file *file, struct vm_area_struct *vma) vma->vm_file = asma->file; } vma->vm_flags |= VM_CAN_NONLINEAR; + asma->vm_start = vma->vm_start; out: mutex_unlock(&ashmem_mutex); @@ -564,6 +630,69 @@ static int ashmem_pin_unpin(struct ashmem_area *asma, unsigned long cmd, return ret; } +#ifdef CONFIG_OUTER_CACHE +static unsigned int virtaddr_to_physaddr(unsigned int virtaddr) +{ + unsigned int physaddr = 0; + pgd_t *pgd_ptr = NULL; + pmd_t *pmd_ptr = NULL; + pte_t *pte_ptr = NULL, pte; + + spin_lock(¤t->mm->page_table_lock); + pgd_ptr = pgd_offset(current->mm, virtaddr); + if (pgd_none(*pgd) || pgd_bad(*pgd)) { + pr_err("Failed to convert virtaddr %x to pgd_ptr\n", + virtaddr); + goto done; + } + + pmd_ptr = pmd_offset(pgd_ptr, virtaddr); + if (pmd_none(*pmd_ptr) || pmd_bad(*pmd_ptr)) { + pr_err("Failed to convert pgd_ptr %p to pmd_ptr\n", + (void *)pgd_ptr); + goto done; + } + + pte_ptr = pte_offset_map(pmd_ptr, virtaddr); + if (!pte_ptr) { + pr_err("Failed to convert pmd_ptr %p to pte_ptr\n", + (void *)pmd_ptr); + goto done; + } + pte = *pte_ptr; + physaddr = pte_pfn(pte); + pte_unmap(pte_ptr); +done: + spin_unlock(¤t->mm->page_table_lock); + physaddr <<= PAGE_SHIFT; + return physaddr; +} +#endif + +static int ashmem_cache_op(struct ashmem_area *asma, + void (*cache_func)(unsigned long vstart, unsigned long length, + unsigned long pstart)) +{ +#ifdef CONFIG_OUTER_CACHE + unsigned long vaddr; +#endif + mutex_lock(&ashmem_mutex); +#ifndef CONFIG_OUTER_CACHE + cache_func(asma->vm_start, asma->size, 0); +#else + for (vaddr = asma->vm_start; vaddr < asma->vm_start + asma->size; + vaddr += PAGE_SIZE) { + unsigned long physaddr; + physaddr = virtaddr_to_physaddr(vaddr); + if (!physaddr) + return -EINVAL; + cache_func(vaddr, PAGE_SIZE, physaddr); + } +#endif + mutex_unlock(&ashmem_mutex); + return 0; +} + static long ashmem_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct ashmem_area *asma = file->private_data; @@ -604,6 +733,15 @@ static long ashmem_ioctl(struct file *file, unsigned int cmd, unsigned long arg) ashmem_shrink(ret, GFP_KERNEL); } break; + case ASHMEM_CACHE_FLUSH_RANGE: + ret = ashmem_cache_op(asma, &clean_and_invalidate_caches); + break; + case ASHMEM_CACHE_CLEAN_RANGE: + ret = ashmem_cache_op(asma, &clean_caches); + break; + case ASHMEM_CACHE_INV_RANGE: + ret = ashmem_cache_op(asma, &invalidate_caches); + break; } return ret; @@ -666,6 +804,8 @@ static struct file_operations ashmem_fops = { .owner = THIS_MODULE, .open = ashmem_open, .release = ashmem_release, + .read = ashmem_read, + .llseek = ashmem_llseek, .mmap = ashmem_mmap, .unlocked_ioctl = ashmem_ioctl, .compat_ioctl = ashmem_ioctl, From c6861409a88ed1299f90e4eea57b07522234b995 Mon Sep 17 00:00:00 2001 From: securecrt Date: Fri, 21 Sep 2012 13:04:50 +0800 Subject: [PATCH 129/155] ashmem: Implement read(2) in ashmem driver ashmem: Fix ASHMEM_SET_PROT_MASK. ashmem: Support lseek(2) in ashmem driver ashmem: Fix the build failure when OUTER_CACHE is enabled ashmem: Fix ashmem vm range comparison to stop roll-over --- mm/ashmem.c | 146 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 143 insertions(+), 3 deletions(-) diff --git a/mm/ashmem.c b/mm/ashmem.c index 5e059283..0404e21f 100644 --- a/mm/ashmem.c +++ b/mm/ashmem.c @@ -29,9 +29,10 @@ #include #include #include +#include -#define ASHMEM_NAME_PREFIX "" -#define ASHMEM_NAME_PREFIX_LEN 0 +#define ASHMEM_NAME_PREFIX "dev/ashmem/" +#define ASHMEM_NAME_PREFIX_LEN (sizeof(ASHMEM_NAME_PREFIX) - 1) #define ASHMEM_FULL_NAME_LEN (ASHMEM_NAME_LEN + ASHMEM_NAME_PREFIX_LEN) /* @@ -45,6 +46,8 @@ struct ashmem_area { struct list_head unpinned_list; /* list of all ashmem areas */ struct file *file; /* the shmem-based backing file */ size_t size; /* size of the mapping, in bytes */ + unsigned long vm_start; /* Start address of vm_area + * which maps this ashmem */ unsigned long prot_mask; /* allowed prot bits, as vm_flags */ }; @@ -178,7 +181,7 @@ static int ashmem_open(struct inode *inode, struct file *file) struct ashmem_area *asma; int ret; - ret = nonseekable_open(inode, file); + ret = generic_file_open(inode, file); if (unlikely(ret)) return ret; @@ -187,6 +190,7 @@ static int ashmem_open(struct inode *inode, struct file *file) return -ENOMEM; INIT_LIST_HEAD(&asma->unpinned_list); + memcpy(asma->name, ASHMEM_NAME_PREFIX, ASHMEM_NAME_PREFIX_LEN); asma->prot_mask = PROT_MASK; file->private_data = asma; @@ -210,6 +214,67 @@ static int ashmem_release(struct inode *ignored, struct file *file) return 0; } +static ssize_t ashmem_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + struct ashmem_area *asma = file->private_data; + int ret = 0; + + mutex_lock(&ashmem_mutex); + + /* If size is not set, or set to 0, always return EOF. */ + if (asma->size == 0) { + goto out; + } + + if (!asma->file) { + ret = -EBADF; + goto out; + } + + ret = asma->file->f_op->read(asma->file, buf, len, pos); + if (ret < 0) { + goto out; + } + + /** Update backing file pos, since f_ops->read() doesn't */ + asma->file->f_pos = *pos; + +out: + mutex_unlock(&ashmem_mutex); + return ret; +} + +static loff_t ashmem_llseek(struct file *file, loff_t offset, int origin) +{ + struct ashmem_area *asma = file->private_data; + int ret; + + mutex_lock(&ashmem_mutex); + + if (asma->size == 0) { + ret = -EINVAL; + goto out; + } + + if (!asma->file) { + ret = -EBADF; + goto out; + } + + ret = asma->file->f_op->llseek(asma->file, offset, origin); + if (ret < 0) { + goto out; + } + + /** Copy f_pos from backing file, since f_ops->llseek() sets it */ + file->f_pos = asma->file->f_pos; + +out: + mutex_unlock(&ashmem_mutex); + return ret; +} + static inline unsigned long calc_vm_may_flags(unsigned long prot) { @@ -264,6 +329,7 @@ static int ashmem_mmap(struct file *file, struct vm_area_struct *vma) vma->vm_file = asma->file; } vma->vm_flags |= VM_CAN_NONLINEAR; + asma->vm_start = vma->vm_start; out: mutex_unlock(&ashmem_mutex); @@ -564,6 +630,69 @@ static int ashmem_pin_unpin(struct ashmem_area *asma, unsigned long cmd, return ret; } +#ifdef CONFIG_OUTER_CACHE +static unsigned int virtaddr_to_physaddr(unsigned int virtaddr) +{ + unsigned int physaddr = 0; + pgd_t *pgd_ptr = NULL; + pmd_t *pmd_ptr = NULL; + pte_t *pte_ptr = NULL, pte; + + spin_lock(¤t->mm->page_table_lock); + pgd_ptr = pgd_offset(current->mm, virtaddr); + if (pgd_none(*pgd) || pgd_bad(*pgd)) { + pr_err("Failed to convert virtaddr %x to pgd_ptr\n", + virtaddr); + goto done; + } + + pmd_ptr = pmd_offset(pgd_ptr, virtaddr); + if (pmd_none(*pmd_ptr) || pmd_bad(*pmd_ptr)) { + pr_err("Failed to convert pgd_ptr %p to pmd_ptr\n", + (void *)pgd_ptr); + goto done; + } + + pte_ptr = pte_offset_map(pmd_ptr, virtaddr); + if (!pte_ptr) { + pr_err("Failed to convert pmd_ptr %p to pte_ptr\n", + (void *)pmd_ptr); + goto done; + } + pte = *pte_ptr; + physaddr = pte_pfn(pte); + pte_unmap(pte_ptr); +done: + spin_unlock(¤t->mm->page_table_lock); + physaddr <<= PAGE_SHIFT; + return physaddr; +} +#endif + +static int ashmem_cache_op(struct ashmem_area *asma, + void (*cache_func)(unsigned long vstart, unsigned long length, + unsigned long pstart)) +{ +#ifdef CONFIG_OUTER_CACHE + unsigned long vaddr; +#endif + mutex_lock(&ashmem_mutex); +#ifndef CONFIG_OUTER_CACHE + cache_func(asma->vm_start, asma->size, 0); +#else + for (vaddr = asma->vm_start; vaddr < asma->vm_start + asma->size; + vaddr += PAGE_SIZE) { + unsigned long physaddr; + physaddr = virtaddr_to_physaddr(vaddr); + if (!physaddr) + return -EINVAL; + cache_func(vaddr, PAGE_SIZE, physaddr); + } +#endif + mutex_unlock(&ashmem_mutex); + return 0; +} + static long ashmem_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct ashmem_area *asma = file->private_data; @@ -604,6 +733,15 @@ static long ashmem_ioctl(struct file *file, unsigned int cmd, unsigned long arg) ashmem_shrink(ret, GFP_KERNEL); } break; + case ASHMEM_CACHE_FLUSH_RANGE: + ret = ashmem_cache_op(asma, &clean_and_invalidate_caches); + break; + case ASHMEM_CACHE_CLEAN_RANGE: + ret = ashmem_cache_op(asma, &clean_caches); + break; + case ASHMEM_CACHE_INV_RANGE: + ret = ashmem_cache_op(asma, &invalidate_caches); + break; } return ret; @@ -666,6 +804,8 @@ static struct file_operations ashmem_fops = { .owner = THIS_MODULE, .open = ashmem_open, .release = ashmem_release, + .read = ashmem_read, + .llseek = ashmem_llseek, .mmap = ashmem_mmap, .unlocked_ioctl = ashmem_ioctl, .compat_ioctl = ashmem_ioctl, From d6a9cabcb54e234ab0eece856626b5067f594ec5 Mon Sep 17 00:00:00 2001 From: securecrt Date: Fri, 21 Sep 2012 13:59:41 +0800 Subject: [PATCH 130/155] disable GENTLE FAIR SLEEPERS Sleeper Fairness is a concept used by CFS which treat sleeping/waiting tasks as if they were in a run queue. This implies tasks which spend most of the time waiting for an user input and such will get a fair share of CPU when they need it. Disabling Gentle Fair Sleepers could improve UI responsiveness. --- kernel/sched_features.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched_features.h b/kernel/sched_features.h index 0d940835..152922a4 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h @@ -10,7 +10,7 @@ SCHED_FEAT(FAIR_SLEEPERS, 1) * them to run sooner, but does not allow tons of sleepers to * rip the spread apart. */ -SCHED_FEAT(GENTLE_FAIR_SLEEPERS, 1) +SCHED_FEAT(GENTLE_FAIR_SLEEPERS, 0) /* * By not normalizing the sleep time, heavy tasks get an effective From 4d2e9936aa9efff2b3e6162a8f7f89454a2daf5d Mon Sep 17 00:00:00 2001 From: securecrt Date: Fri, 21 Sep 2012 14:11:11 +0800 Subject: [PATCH 131/155] disable GENTLE FAIR SLEEPERS Sleeper Fairness is a concept used by CFS which treat sleeping/waiting tasks as if they were in a run queue. This implies tasks which spend most of the time waiting for an user input and such will get a fair share of CPU when they need it. Disabling Gentle Fair Sleepers could improve UI responsiveness. --- kernel/sched_features.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched_features.h b/kernel/sched_features.h index 0d940835..152922a4 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h @@ -10,7 +10,7 @@ SCHED_FEAT(FAIR_SLEEPERS, 1) * them to run sooner, but does not allow tons of sleepers to * rip the spread apart. */ -SCHED_FEAT(GENTLE_FAIR_SLEEPERS, 1) +SCHED_FEAT(GENTLE_FAIR_SLEEPERS, 0) /* * By not normalizing the sleep time, heavy tasks get an effective From 00e4d55f8a5babfc0d6f7f256cfc7113d9d8e30d Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Sat, 22 Sep 2012 18:49:06 +0800 Subject: [PATCH 132/155] base: genlock: handle error while creating lock/handle inode base: genlock: add magic to protect attach from non-genlock file base: genlock: protect kref counting with spinlock --- drivers/base/genlock.c | 50 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 9 deletions(-) mode change 100755 => 100644 drivers/base/genlock.c diff --git a/drivers/base/genlock.c b/drivers/base/genlock.c old mode 100755 new mode 100644 index b5f8e42e..8c064888 --- a/drivers/base/genlock.c +++ b/drivers/base/genlock.c @@ -35,7 +35,15 @@ #define GENLOCK_LOG_ERR(fmt, args...) \ pr_err("genlock: %s: " fmt, __func__, ##args) +/* The genlock magic stored in the kernel private data is used to protect + * against the possibility of user space passing a valid fd to a + * non-genlock file for genlock_attach_lock() + */ +#define GENLOCK_MAGIC_OK 0xD2EAD10C +#define GENLOCK_MAGIC_BAD 0xD2EADBAD + struct genlock { + unsigned int magic; /* Magic for attach verification */ struct list_head active; /* List of handles holding lock */ spinlock_t lock; /* Spinlock to protect the lock internals */ wait_queue_head_t queue; /* Holding pen for processes pending lock */ @@ -57,7 +65,7 @@ struct genlock_handle { * released while another process tries to attach it */ -static DEFINE_SPINLOCK(genlock_file_lock); +static DEFINE_SPINLOCK(genlock_ref_lock); static void genlock_destroy(struct kref *kref) { @@ -69,10 +77,9 @@ static void genlock_destroy(struct kref *kref) * still active after the lock gets released */ - spin_lock(&genlock_file_lock); if (lock->file) lock->file->private_data = NULL; - spin_unlock(&genlock_file_lock); + lock->magic = GENLOCK_MAGIC_BAD; kfree(lock); } @@ -110,6 +117,7 @@ static const struct file_operations genlock_fops = { struct genlock *genlock_create_lock(struct genlock_handle *handle) { struct genlock *lock; + void *ret; if (IS_ERR_OR_NULL(handle)) { GENLOCK_LOG_ERR("Invalid handle\n"); @@ -131,6 +139,7 @@ struct genlock *genlock_create_lock(struct genlock_handle *handle) init_waitqueue_head(&lock->queue); spin_lock_init(&lock->lock); + lock->magic = GENLOCK_MAGIC_OK; lock->state = _UNLOCKED; /* @@ -138,8 +147,13 @@ struct genlock *genlock_create_lock(struct genlock_handle *handle) * other processes */ - lock->file = anon_inode_getfile("genlock", &genlock_fops, - lock, O_RDWR); + ret = anon_inode_getfile("genlock", &genlock_fops, lock, O_RDWR); + if (IS_ERR_OR_NULL(ret)) { + GENLOCK_LOG_ERR("Unable to create lock inode\n"); + kfree(lock); + return ret; + } + lock->file = ret; /* Attach the new lock to the handle */ handle->lock = lock; @@ -204,21 +218,30 @@ struct genlock *genlock_attach_lock(struct genlock_handle *handle, int fd) * released and then attached */ - spin_lock(&genlock_file_lock); + spin_lock(&genlock_ref_lock); lock = file->private_data; - spin_unlock(&genlock_file_lock); fput(file); if (lock == NULL) { GENLOCK_LOG_ERR("File descriptor is invalid\n"); - return ERR_PTR(-EINVAL); + goto fail_invalid; + } + + if (lock->magic != GENLOCK_MAGIC_OK) { + GENLOCK_LOG_ERR("Magic is invalid - 0x%X\n", lock->magic); + goto fail_invalid; } handle->lock = lock; kref_get(&lock->refcount); + spin_unlock(&genlock_ref_lock); return lock; + +fail_invalid: + spin_unlock(&genlock_ref_lock); + return ERR_PTR(-EINVAL); } EXPORT_SYMBOL(genlock_attach_lock); @@ -596,7 +619,9 @@ static void genlock_release_lock(struct genlock_handle *handle) } spin_unlock_irqrestore(&handle->lock->lock, flags); + spin_lock(&genlock_ref_lock); kref_put(&handle->lock->refcount, genlock_destroy); + spin_unlock(&genlock_ref_lock); handle->lock = NULL; handle->active = 0; } @@ -642,12 +667,19 @@ static struct genlock_handle *_genlock_get_handle(void) struct genlock_handle *genlock_get_handle(void) { + void *ret; struct genlock_handle *handle = _genlock_get_handle(); if (IS_ERR(handle)) return handle; - handle->file = anon_inode_getfile("genlock-handle", + ret = anon_inode_getfile("genlock-handle", &genlock_handle_fops, handle, O_RDWR); + if (IS_ERR_OR_NULL(ret)) { + GENLOCK_LOG_ERR("Unable to create handle inode\n"); + kfree(handle); + return ret; + } + handle->file = ret; return handle; } From d14b09ad9bd384aa97355616e32554704c859b1c Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Sat, 22 Sep 2012 18:54:01 +0800 Subject: [PATCH 133/155] Merge ics_HWA #00e4d55 --- drivers/base/genlock.c | 50 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 9 deletions(-) diff --git a/drivers/base/genlock.c b/drivers/base/genlock.c index b5f8e42e..8c064888 100755 --- a/drivers/base/genlock.c +++ b/drivers/base/genlock.c @@ -35,7 +35,15 @@ #define GENLOCK_LOG_ERR(fmt, args...) \ pr_err("genlock: %s: " fmt, __func__, ##args) +/* The genlock magic stored in the kernel private data is used to protect + * against the possibility of user space passing a valid fd to a + * non-genlock file for genlock_attach_lock() + */ +#define GENLOCK_MAGIC_OK 0xD2EAD10C +#define GENLOCK_MAGIC_BAD 0xD2EADBAD + struct genlock { + unsigned int magic; /* Magic for attach verification */ struct list_head active; /* List of handles holding lock */ spinlock_t lock; /* Spinlock to protect the lock internals */ wait_queue_head_t queue; /* Holding pen for processes pending lock */ @@ -57,7 +65,7 @@ struct genlock_handle { * released while another process tries to attach it */ -static DEFINE_SPINLOCK(genlock_file_lock); +static DEFINE_SPINLOCK(genlock_ref_lock); static void genlock_destroy(struct kref *kref) { @@ -69,10 +77,9 @@ static void genlock_destroy(struct kref *kref) * still active after the lock gets released */ - spin_lock(&genlock_file_lock); if (lock->file) lock->file->private_data = NULL; - spin_unlock(&genlock_file_lock); + lock->magic = GENLOCK_MAGIC_BAD; kfree(lock); } @@ -110,6 +117,7 @@ static const struct file_operations genlock_fops = { struct genlock *genlock_create_lock(struct genlock_handle *handle) { struct genlock *lock; + void *ret; if (IS_ERR_OR_NULL(handle)) { GENLOCK_LOG_ERR("Invalid handle\n"); @@ -131,6 +139,7 @@ struct genlock *genlock_create_lock(struct genlock_handle *handle) init_waitqueue_head(&lock->queue); spin_lock_init(&lock->lock); + lock->magic = GENLOCK_MAGIC_OK; lock->state = _UNLOCKED; /* @@ -138,8 +147,13 @@ struct genlock *genlock_create_lock(struct genlock_handle *handle) * other processes */ - lock->file = anon_inode_getfile("genlock", &genlock_fops, - lock, O_RDWR); + ret = anon_inode_getfile("genlock", &genlock_fops, lock, O_RDWR); + if (IS_ERR_OR_NULL(ret)) { + GENLOCK_LOG_ERR("Unable to create lock inode\n"); + kfree(lock); + return ret; + } + lock->file = ret; /* Attach the new lock to the handle */ handle->lock = lock; @@ -204,21 +218,30 @@ struct genlock *genlock_attach_lock(struct genlock_handle *handle, int fd) * released and then attached */ - spin_lock(&genlock_file_lock); + spin_lock(&genlock_ref_lock); lock = file->private_data; - spin_unlock(&genlock_file_lock); fput(file); if (lock == NULL) { GENLOCK_LOG_ERR("File descriptor is invalid\n"); - return ERR_PTR(-EINVAL); + goto fail_invalid; + } + + if (lock->magic != GENLOCK_MAGIC_OK) { + GENLOCK_LOG_ERR("Magic is invalid - 0x%X\n", lock->magic); + goto fail_invalid; } handle->lock = lock; kref_get(&lock->refcount); + spin_unlock(&genlock_ref_lock); return lock; + +fail_invalid: + spin_unlock(&genlock_ref_lock); + return ERR_PTR(-EINVAL); } EXPORT_SYMBOL(genlock_attach_lock); @@ -596,7 +619,9 @@ static void genlock_release_lock(struct genlock_handle *handle) } spin_unlock_irqrestore(&handle->lock->lock, flags); + spin_lock(&genlock_ref_lock); kref_put(&handle->lock->refcount, genlock_destroy); + spin_unlock(&genlock_ref_lock); handle->lock = NULL; handle->active = 0; } @@ -642,12 +667,19 @@ static struct genlock_handle *_genlock_get_handle(void) struct genlock_handle *genlock_get_handle(void) { + void *ret; struct genlock_handle *handle = _genlock_get_handle(); if (IS_ERR(handle)) return handle; - handle->file = anon_inode_getfile("genlock-handle", + ret = anon_inode_getfile("genlock-handle", &genlock_handle_fops, handle, O_RDWR); + if (IS_ERR_OR_NULL(ret)) { + GENLOCK_LOG_ERR("Unable to create handle inode\n"); + kfree(handle); + return ret; + } + handle->file = ret; return handle; } From 7e729493811053d7c241bcceb251c45ed411c236 Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Sat, 22 Sep 2012 20:39:45 +0800 Subject: [PATCH 134/155] Staging: android: binder: Add some missing binder_stat_br calls Cached thread return errors, death notifications and new looper requests were not included in the stats. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change-Id: Iabe14b351b662d3f63009ecb3900f92fc3d72cc4 Signed-off-by: Arve Hjønnevåg --- drivers/staging/android/binder.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c index c44eb407..bd8ac9fd 100755 --- a/drivers/staging/android/binder.c +++ b/drivers/staging/android/binder.c @@ -2241,6 +2241,7 @@ retry: if (put_user(thread->return_error2, (uint32_t __user *)ptr)) return -EFAULT; ptr += sizeof(uint32_t); + binder_stat_br(proc, thread, thread->return_error2); if (ptr == end) goto done; thread->return_error2 = BR_OK; @@ -2248,6 +2249,7 @@ retry: if (put_user(thread->return_error, (uint32_t __user *)ptr)) return -EFAULT; ptr += sizeof(uint32_t); + binder_stat_br(proc, thread, thread->return_error); thread->return_error = BR_OK; goto done; } @@ -2403,6 +2405,7 @@ retry: if (put_user(death->cookie, (void * __user *)ptr)) return -EFAULT; ptr += sizeof(void *); + binder_stat_br(proc, thread, cmd); binder_debug(BINDER_DEBUG_DEATH_NOTIFICATION, "binder: %d:%d %s %p\n", proc->pid, thread->pid, @@ -2510,6 +2513,7 @@ done: proc->pid, thread->pid); if (put_user(BR_SPAWN_LOOPER, (uint32_t __user *)buffer)) return -EFAULT; + binder_stat_br(proc, thread, BR_SPAWN_LOOPER); } return 0; } From 0a250c8bdfe51b05b97d56181d7b61c4d7b181e4 Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Sat, 22 Sep 2012 20:44:03 +0800 Subject: [PATCH 135/155] Staging: android: binder: Add some missing binder_stat_br calls Cached thread return errors, death notifications and new looper requests were not included in the stats. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change-Id: Iabe14b351b662d3f63009ecb3900f92fc3d72cc4 Signed-off-by: Arve Hjønnevåg --- drivers/staging/android/binder.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c index c44eb407..bd8ac9fd 100755 --- a/drivers/staging/android/binder.c +++ b/drivers/staging/android/binder.c @@ -2241,6 +2241,7 @@ retry: if (put_user(thread->return_error2, (uint32_t __user *)ptr)) return -EFAULT; ptr += sizeof(uint32_t); + binder_stat_br(proc, thread, thread->return_error2); if (ptr == end) goto done; thread->return_error2 = BR_OK; @@ -2248,6 +2249,7 @@ retry: if (put_user(thread->return_error, (uint32_t __user *)ptr)) return -EFAULT; ptr += sizeof(uint32_t); + binder_stat_br(proc, thread, thread->return_error); thread->return_error = BR_OK; goto done; } @@ -2403,6 +2405,7 @@ retry: if (put_user(death->cookie, (void * __user *)ptr)) return -EFAULT; ptr += sizeof(void *); + binder_stat_br(proc, thread, cmd); binder_debug(BINDER_DEBUG_DEATH_NOTIFICATION, "binder: %d:%d %s %p\n", proc->pid, thread->pid, @@ -2510,6 +2513,7 @@ done: proc->pid, thread->pid); if (put_user(BR_SPAWN_LOOPER, (uint32_t __user *)buffer)) return -EFAULT; + binder_stat_br(proc, thread, BR_SPAWN_LOOPER); } return 0; } From 499a1d65edcc6240e65bf1ce676ab4401c068ee6 Mon Sep 17 00:00:00 2001 From: tytung Date: Sun, 23 Sep 2012 19:28:36 +0800 Subject: [PATCH 136/155] Revert "disable KSM" This reverts commit c9ff1491834a531715dd5658ccf90b524545b7a9. --- arch/arm/configs/htcleo_defconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/configs/htcleo_defconfig b/arch/arm/configs/htcleo_defconfig index 8b2d6ca4..eabfe521 100644 --- a/arch/arm/configs/htcleo_defconfig +++ b/arch/arm/configs/htcleo_defconfig @@ -401,7 +401,7 @@ CONFIG_BOUNCE=y CONFIG_VIRT_TO_BUS=y CONFIG_HAVE_MLOCK=y CONFIG_HAVE_MLOCKED_PAGE_BIT=y -# CONFIG_KSM is not set +CONFIG_KSM=y CONFIG_DEFAULT_MMAP_MIN_ADDR=4096 CONFIG_ALIGNMENT_TRAP=y CONFIG_ALLOW_CPU_ALIGNMENT=y From 1b9c88acd920cff4f729a7336669b32486d27412 Mon Sep 17 00:00:00 2001 From: tytung Date: Sun, 23 Sep 2012 19:30:24 +0800 Subject: [PATCH 137/155] htcleo: updated htcleo_defconfig to tytung_HWA_r3.5 --- arch/arm/configs/htcleo_defconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/configs/htcleo_defconfig b/arch/arm/configs/htcleo_defconfig index eabfe521..d06a9447 100644 --- a/arch/arm/configs/htcleo_defconfig +++ b/arch/arm/configs/htcleo_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: 2.6.32-ics -# Wed Aug 1 00:15:06 CST 2012 +# Sun Sep 23 17:26:43 CST 2012 # CONFIG_ARM=y CONFIG_SYS_SUPPORTS_APM_EMULATION=y @@ -32,7 +32,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_BROKEN_ON_SMP=y CONFIG_LOCK_KERNEL=y CONFIG_INIT_ENV_ARG_LIMIT=32 -CONFIG_LOCALVERSION="_tytung_HWA_r3.4" +CONFIG_LOCALVERSION="_tytung_HWA_r3.5" # CONFIG_LOCALVERSION_AUTO is not set CONFIG_HAVE_KERNEL_GZIP=y CONFIG_HAVE_KERNEL_BZIP2=y From 7553b3788da08c1e7b44d1dba41ce8f4bacb59cf Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Sun, 23 Sep 2012 22:39:34 +0800 Subject: [PATCH 138/155] video: msm: add mdp version to id string,and put a bogus panel id --- drivers/video/msm/msm_fb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/msm/msm_fb.c b/drivers/video/msm/msm_fb.c index 82e110ce..028a1c7c 100644 --- a/drivers/video/msm/msm_fb.c +++ b/drivers/video/msm/msm_fb.c @@ -992,7 +992,7 @@ static void setup_fb_info(struct msmfb_info *msmfb) int r; /* finish setting up the fb_info struct */ - strncpy(fb_info->fix.id, "msmfb", 16); + strncpy(fb_info->fix.id, "msmfb31_0", 16); fb_info->fix.ypanstep = 1; fb_info->fbops = &msmfb_ops; From 582f409d661d1a813e8fe2f850678c87cf3df2ba Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Mon, 24 Sep 2012 22:35:24 +0800 Subject: [PATCH 139/155] tweaks iosched for better android performance --- block/deadline-iosched.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c index b547cbca..d5873c6c 100644 --- a/block/deadline-iosched.c +++ b/block/deadline-iosched.c @@ -17,10 +17,10 @@ /* * See Documentation/block/deadline-iosched.txt */ -static const int read_expire = HZ / 2; /* max time before a read is submitted. */ +static const int read_expire = HZ / 4; /* max time before a read is submitted. */ static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */ -static const int writes_starved = 2; /* max times reads can starve a write */ -static const int fifo_batch = 16; /* # of sequential requests treated as one +static const int writes_starved = 4; /* max times reads can starve a write */ +static const int fifo_batch = 1; /* # of sequential requests treated as one by the above parameters. For throughput. */ struct deadline_data { @@ -362,7 +362,7 @@ static void *deadline_init_queue(struct request_queue *q) dd->fifo_expire[READ] = read_expire; dd->fifo_expire[WRITE] = write_expire; dd->writes_starved = writes_starved; - dd->front_merges = 1; + dd->front_merges = 0; dd->fifo_batch = fifo_batch; return dd; } From 2931196a527410ba0e7f139c809500833510b10c Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Mon, 24 Sep 2012 22:36:20 +0800 Subject: [PATCH 140/155] remove the compile warnings --- include/linux/kobject.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/kobject.h b/include/linux/kobject.h index 58ae8e00..aabe5a8d 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -106,7 +106,7 @@ extern char *kobject_get_path(struct kobject *kobj, gfp_t flag); struct kobj_type { void (*release)(struct kobject *kobj); - struct sysfs_ops *sysfs_ops; + const struct sysfs_ops *sysfs_ops; struct attribute **default_attrs; }; From bda745315c78616ca1a2474dc2f619675d849505 Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Mon, 24 Sep 2012 22:35:24 +0800 Subject: [PATCH 141/155] tweaks iosched for better android performance --- block/deadline-iosched.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c index b547cbca..d5873c6c 100644 --- a/block/deadline-iosched.c +++ b/block/deadline-iosched.c @@ -17,10 +17,10 @@ /* * See Documentation/block/deadline-iosched.txt */ -static const int read_expire = HZ / 2; /* max time before a read is submitted. */ +static const int read_expire = HZ / 4; /* max time before a read is submitted. */ static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */ -static const int writes_starved = 2; /* max times reads can starve a write */ -static const int fifo_batch = 16; /* # of sequential requests treated as one +static const int writes_starved = 4; /* max times reads can starve a write */ +static const int fifo_batch = 1; /* # of sequential requests treated as one by the above parameters. For throughput. */ struct deadline_data { @@ -362,7 +362,7 @@ static void *deadline_init_queue(struct request_queue *q) dd->fifo_expire[READ] = read_expire; dd->fifo_expire[WRITE] = write_expire; dd->writes_starved = writes_starved; - dd->front_merges = 1; + dd->front_merges = 0; dd->fifo_batch = fifo_batch; return dd; } From 5155c8ec93be8af9ec27d6756a92e1cfb02917d7 Mon Sep 17 00:00:00 2001 From: SecureCRT Date: Mon, 24 Sep 2012 22:36:20 +0800 Subject: [PATCH 142/155] remove the compile warnings --- include/linux/kobject.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/kobject.h b/include/linux/kobject.h index 58ae8e00..aabe5a8d 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -106,7 +106,7 @@ extern char *kobject_get_path(struct kobject *kobj, gfp_t flag); struct kobj_type { void (*release)(struct kobject *kobj); - struct sysfs_ops *sysfs_ops; + const struct sysfs_ops *sysfs_ops; struct attribute **default_attrs; }; From f9b8788a89c7c4c64c951ed907ab8e8596aa106c Mon Sep 17 00:00:00 2001 From: tytung Date: Sat, 13 Oct 2012 02:28:16 +0800 Subject: [PATCH 143/155] drivers: power: enable fast_charge by default --- drivers/power/ds2746_battery.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/power/ds2746_battery.c b/drivers/power/ds2746_battery.c index 885ed338..19e97980 100644 --- a/drivers/power/ds2746_battery.c +++ b/drivers/power/ds2746_battery.c @@ -73,7 +73,7 @@ static struct wake_lock vbus_wake_lock; #define PROC_FAST_CHARGE_NAME "fast_charge" static struct proc_dir_entry *fast_charge; -static int allow_fast_charge = 0; +static int allow_fast_charge = 1; static int proc_read_fast_charge(char *page, char **start, off_t off, int count, int *eof, void *data) From e42ef086be4304013f376b55ba4122a011d5a875 Mon Sep 17 00:00:00 2001 From: tytung Date: Sun, 14 Oct 2012 18:15:16 +0800 Subject: [PATCH 144/155] Revert "reduced the PMEM_ADSP size as the HW decoder still can't work on HD2" This reverts commit 411b4bcb906fc29b7c4776fa564ec8aa6329834c. --- arch/arm/mach-msm/board-htcleo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-msm/board-htcleo.h b/arch/arm/mach-msm/board-htcleo.h index 8d633974..1a4cae6b 100755 --- a/arch/arm/mach-msm/board-htcleo.h +++ b/arch/arm/mach-msm/board-htcleo.h @@ -43,7 +43,7 @@ #define MSM_PMEM_MDP_SIZE 0x02000000 #define MSM_PMEM_ADSP_BASE 0x3D700000 -#define MSM_PMEM_ADSP_SIZE 0x01800000 +#define MSM_PMEM_ADSP_SIZE 0x02900000 #define MSM_GPU_PHYS_BASE (MSM_PMEM_SMI_BASE + MSM_FB_SIZE) #define MSM_GPU_PHYS_SIZE 0x00800000 From b9cddc7f369c99dd7fc7dfca66fd75e30831bc74 Mon Sep 17 00:00:00 2001 From: tytung Date: Sun, 14 Oct 2012 18:16:44 +0800 Subject: [PATCH 145/155] remove -ics flag --- Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/Makefile b/Makefile index 958cbb0f..acde5126 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,6 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 32 -EXTRAVERSION = -ics NAME = Man-Eating Seals of Antiquity # *DOCUMENTATION* From facd1b56352443da9dcde0a84b7641622a881412 Mon Sep 17 00:00:00 2001 From: tytung Date: Sun, 14 Oct 2012 18:31:43 +0800 Subject: [PATCH 146/155] Update build.sh for JellyBean --- build.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build.sh b/build.sh index d41426e7..69b27d68 100755 --- a/build.sh +++ b/build.sh @@ -1,10 +1,10 @@ #!/bin/sh -export KERNELBASEDIR=$PWD/../ICS_Kernel_update-zip-files +export KERNELBASEDIR=$PWD/../JB_Kernel_update-zip-files #export TOOLCHAIN=$HOME/CodeSourcery/Sourcery_G++_Lite/bin/arm-none-eabi- export TOOLCHAIN=$HOME/arm-2010q1/bin/arm-none-eabi- -export KERNEL_FILE=HTCLEO-Kernel_2.6.32-ics_tytung_HWA +export KERNEL_FILE=HTCLEO-Kernel_2.6.32_tytung_jellybean rm arch/arm/boot/zImage make htcleo_defconfig From f3e70adb0411ac8b296881cdd61a6b4ef16d7821 Mon Sep 17 00:00:00 2001 From: tytung Date: Sun, 14 Oct 2012 20:28:28 +0800 Subject: [PATCH 147/155] htcleo: reduce the pmem_adsp size --- arch/arm/mach-msm/board-htcleo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-msm/board-htcleo.h b/arch/arm/mach-msm/board-htcleo.h index 1a4cae6b..4b61ff8f 100755 --- a/arch/arm/mach-msm/board-htcleo.h +++ b/arch/arm/mach-msm/board-htcleo.h @@ -43,7 +43,7 @@ #define MSM_PMEM_MDP_SIZE 0x02000000 #define MSM_PMEM_ADSP_BASE 0x3D700000 -#define MSM_PMEM_ADSP_SIZE 0x02900000 +#define MSM_PMEM_ADSP_SIZE 0x02200000 #define MSM_GPU_PHYS_BASE (MSM_PMEM_SMI_BASE + MSM_FB_SIZE) #define MSM_GPU_PHYS_SIZE 0x00800000 From 2543feb2ce7c738bf4bee4b7caf532e0912c9823 Mon Sep 17 00:00:00 2001 From: tytung Date: Sun, 14 Oct 2012 20:29:01 +0800 Subject: [PATCH 148/155] htcleo: updated htcleo_defconfig to tytung_jellybean_r1' --- arch/arm/configs/htcleo_defconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/configs/htcleo_defconfig b/arch/arm/configs/htcleo_defconfig index 9b39d2b8..0d069470 100644 --- a/arch/arm/configs/htcleo_defconfig +++ b/arch/arm/configs/htcleo_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: 2.6.32-ics -# Sun Sep 23 17:26:43 CST 2012 +# Sun Oct 14 20:28:45 CST 2012 # CONFIG_ARM=y CONFIG_SYS_SUPPORTS_APM_EMULATION=y @@ -32,7 +32,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_BROKEN_ON_SMP=y CONFIG_LOCK_KERNEL=y CONFIG_INIT_ENV_ARG_LIMIT=32 -CONFIG_LOCALVERSION="_tytung_HWA_r3.3_JB" +CONFIG_LOCALVERSION="_tytung_jellybean_r1" # CONFIG_LOCALVERSION_AUTO is not set CONFIG_HAVE_KERNEL_GZIP=y CONFIG_HAVE_KERNEL_BZIP2=y From 783883f2fef1804ff1572b2fd0e073a8a24b0f0b Mon Sep 17 00:00:00 2001 From: tytung Date: Sun, 11 Nov 2012 13:24:22 +0800 Subject: [PATCH 149/155] htcleo: restore the PMEM_ADSP size to the default value as the HW video decoder works on HD2 now. --- arch/arm/mach-msm/board-htcleo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-msm/board-htcleo.h b/arch/arm/mach-msm/board-htcleo.h index 4b61ff8f..1a4cae6b 100755 --- a/arch/arm/mach-msm/board-htcleo.h +++ b/arch/arm/mach-msm/board-htcleo.h @@ -43,7 +43,7 @@ #define MSM_PMEM_MDP_SIZE 0x02000000 #define MSM_PMEM_ADSP_BASE 0x3D700000 -#define MSM_PMEM_ADSP_SIZE 0x02200000 +#define MSM_PMEM_ADSP_SIZE 0x02900000 #define MSM_GPU_PHYS_BASE (MSM_PMEM_SMI_BASE + MSM_FB_SIZE) #define MSM_GPU_PHYS_SIZE 0x00800000 From bd5ec6a5e7a3797c25c33635f965d1d6e4f97f84 Mon Sep 17 00:00:00 2001 From: tytung Date: Sun, 11 Nov 2012 13:26:46 +0800 Subject: [PATCH 150/155] htcleo: updated htcleo_defconfig to tytung_jellybean_r2 --- arch/arm/configs/htcleo_defconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/configs/htcleo_defconfig b/arch/arm/configs/htcleo_defconfig index 0d069470..064df97d 100644 --- a/arch/arm/configs/htcleo_defconfig +++ b/arch/arm/configs/htcleo_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: 2.6.32-ics -# Sun Oct 14 20:28:45 CST 2012 +# Sun Nov 11 13:26:11 CST 2012 # CONFIG_ARM=y CONFIG_SYS_SUPPORTS_APM_EMULATION=y @@ -32,7 +32,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_BROKEN_ON_SMP=y CONFIG_LOCK_KERNEL=y CONFIG_INIT_ENV_ARG_LIMIT=32 -CONFIG_LOCALVERSION="_tytung_jellybean_r1" +CONFIG_LOCALVERSION="_tytung_jellybean_r2" # CONFIG_LOCALVERSION_AUTO is not set CONFIG_HAVE_KERNEL_GZIP=y CONFIG_HAVE_KERNEL_BZIP2=y From a0b0a0161981da304e17aecddae4c474007f6beb Mon Sep 17 00:00:00 2001 From: tytung Date: Sat, 17 Nov 2012 01:10:09 +0800 Subject: [PATCH 151/155] Staging: android: binder: Fix memory leak on thread/process exit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If a thread or process exited while a reply, one-way transaction or death notification was pending, the struct holding the pending work was leaked. Signed-off-by: Arve HjønnevÃ¥g Signed-off-by: Greg Kroah-Hartman --- drivers/staging/android/binder.c | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) mode change 100755 => 100644 drivers/staging/android/binder.c diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c old mode 100755 new mode 100644 index bd8ac9fd..ffb07117 --- a/drivers/staging/android/binder.c +++ b/drivers/staging/android/binder.c @@ -2529,14 +2529,38 @@ static void binder_release_work(struct list_head *list) struct binder_transaction *t; t = container_of(w, struct binder_transaction, work); - if (t->buffer->target_node && !(t->flags & TF_ONE_WAY)) + if (t->buffer->target_node && + !(t->flags & TF_ONE_WAY)) { binder_send_failed_reply(t, BR_DEAD_REPLY); + } else { + binder_debug(BINDER_DEBUG_DEAD_TRANSACTION, + "binder: undelivered transaction %d\n", + t->debug_id); + t->buffer->transaction = NULL; + kfree(t); + binder_stats_deleted(BINDER_STAT_TRANSACTION); + } } break; case BINDER_WORK_TRANSACTION_COMPLETE: { + binder_debug(BINDER_DEBUG_DEAD_TRANSACTION, + "binder: undelivered TRANSACTION_COMPLETE\n"); kfree(w); binder_stats_deleted(BINDER_STAT_TRANSACTION_COMPLETE); } break; + case BINDER_WORK_DEAD_BINDER_AND_CLEAR: + case BINDER_WORK_CLEAR_DEATH_NOTIFICATION: { + struct binder_ref_death *death; + + death = container_of(w, struct binder_ref_death, work); + binder_debug(BINDER_DEBUG_DEAD_TRANSACTION, + "binder: undelivered death notification, %p\n", + death->cookie); + kfree(death); + binder_stats_deleted(BINDER_STAT_DEATH); + } break; default: + pr_err("binder: unexpected work type, %d, not freed\n", + w->type); break; } } @@ -3013,6 +3037,7 @@ static void binder_deferred_release(struct binder_proc *proc) nodes++; rb_erase(&node->rb_node, &proc->nodes); list_del_init(&node->work.entry); + binder_release_work(&node->async_todo); if (hlist_empty(&node->refs)) { kfree(node); binder_stats_deleted(BINDER_STAT_NODE); @@ -3051,6 +3076,7 @@ static void binder_deferred_release(struct binder_proc *proc) binder_delete_ref(ref); } binder_release_work(&proc->todo); + binder_release_work(&proc->delivered_death); buffers = 0; while ((n = rb_first(&proc->allocated_buffers))) { From bba549a7dc5a1edd99fa5f329e843177e51a4941 Mon Sep 17 00:00:00 2001 From: tytung Date: Sat, 17 Nov 2012 01:13:40 +0800 Subject: [PATCH 152/155] Staging: android: binder: Allow using highmem for binder buffers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The default kernel mapping for the pages allocated for the binder buffers is never used. Set the __GFP_HIGHMEM flag when allocating these pages so we don't needlessly use low memory pages that may be required elsewhere. Signed-off-by: Arve HjønnevÃ¥g Signed-off-by: Greg Kroah-Hartman --- drivers/staging/android/binder.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c index ffb07117..77dd3e04 100644 --- a/drivers/staging/android/binder.c +++ b/drivers/staging/android/binder.c @@ -656,7 +656,7 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate, page = &proc->pages[(page_addr - proc->buffer) / PAGE_SIZE]; BUG_ON(*page); - *page = alloc_page(GFP_KERNEL | __GFP_ZERO); + *page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO); if (*page == NULL) { binder_debug(BINDER_DEBUG_TOP_ERRORS, "binder: %d: binder_alloc_buf failed " From 9031c37be6939e792da14051857829fbd78eedca Mon Sep 17 00:00:00 2001 From: tytung Date: Sat, 17 Nov 2012 15:28:02 +0800 Subject: [PATCH 153/155] htcleo: pm: add HD2 off-mode Alarm Clock for cLK (Credit goes to kokotas and Rick_1995) Visit http://forum.xda-developers.com/showthread.php?t=1990111 for more info. --- arch/arm/mach-msm/pm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/mach-msm/pm.c b/arch/arm/mach-msm/pm.c index ed19e679..3155ec3f 100644 --- a/arch/arm/mach-msm/pm.c +++ b/arch/arm/mach-msm/pm.c @@ -777,6 +777,9 @@ static int msm_reboot_call(struct notifier_block *this, unsigned long code, void } else if (!strncmp(cmd, "oem-", 4)) { unsigned code = simple_strtoul(cmd + 4, 0, 16) & 0xff; restart_reason = 0x6f656d00 | code; + } else if (!strncmp(cmd, "S", 1)) { + unsigned code = simple_strtoul(cmd + 1, 0, 16) & 0x00ffffff; + restart_reason = 0x53000000 | code; } else if (!strcmp(cmd, "force-hard")) { restart_reason = 0x776655AA; } else { From 139f6ae5207c900a4eb38b06f7f1acdfa4e2c7f0 Mon Sep 17 00:00:00 2001 From: tytung Date: Sat, 17 Nov 2012 21:13:29 +0800 Subject: [PATCH 154/155] Revert "htcleo: updated htcleo_defconfig to tytung_jellybean_r2" This reverts commit bd5ec6a5e7a3797c25c33635f965d1d6e4f97f84. --- arch/arm/configs/htcleo_defconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/configs/htcleo_defconfig b/arch/arm/configs/htcleo_defconfig index 064df97d..0d069470 100644 --- a/arch/arm/configs/htcleo_defconfig +++ b/arch/arm/configs/htcleo_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: 2.6.32-ics -# Sun Nov 11 13:26:11 CST 2012 +# Sun Oct 14 20:28:45 CST 2012 # CONFIG_ARM=y CONFIG_SYS_SUPPORTS_APM_EMULATION=y @@ -32,7 +32,7 @@ CONFIG_EXPERIMENTAL=y CONFIG_BROKEN_ON_SMP=y CONFIG_LOCK_KERNEL=y CONFIG_INIT_ENV_ARG_LIMIT=32 -CONFIG_LOCALVERSION="_tytung_jellybean_r2" +CONFIG_LOCALVERSION="_tytung_jellybean_r1" # CONFIG_LOCALVERSION_AUTO is not set CONFIG_HAVE_KERNEL_GZIP=y CONFIG_HAVE_KERNEL_BZIP2=y From a6067c0e2d0c408fd807a8cb52f4a1332a822802 Mon Sep 17 00:00:00 2001 From: tytung Date: Sat, 17 Nov 2012 21:14:15 +0800 Subject: [PATCH 155/155] Revert "htcleo: restore the PMEM_ADSP size to the default value as the HW video decoder works on HD2 now." This reverts commit 783883f2fef1804ff1572b2fd0e073a8a24b0f0b. --- arch/arm/mach-msm/board-htcleo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-msm/board-htcleo.h b/arch/arm/mach-msm/board-htcleo.h index 1a4cae6b..4b61ff8f 100755 --- a/arch/arm/mach-msm/board-htcleo.h +++ b/arch/arm/mach-msm/board-htcleo.h @@ -43,7 +43,7 @@ #define MSM_PMEM_MDP_SIZE 0x02000000 #define MSM_PMEM_ADSP_BASE 0x3D700000 -#define MSM_PMEM_ADSP_SIZE 0x02900000 +#define MSM_PMEM_ADSP_SIZE 0x02200000 #define MSM_GPU_PHYS_BASE (MSM_PMEM_SMI_BASE + MSM_FB_SIZE) #define MSM_GPU_PHYS_SIZE 0x00800000