diff --git a/Makefile b/Makefile index 958cbb0f..acde5126 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,6 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 32 -EXTRAVERSION = -ics NAME = Man-Eating Seals of Antiquity # *DOCUMENTATION* diff --git a/README b/README index 5d014817..35d18fb0 100644 --- a/README +++ b/README @@ -20,19 +20,18 @@ Primary features: - Two-way call recording (Credits: avs333, snq-, and tytung) - T-Mobile Wi-Fi Calling (Credits: tytung) - Wi-Fi IEEE 802.1x/EAP authentication (Credits: tytung) -- Native USB Tethering (for Gingerbread) (Credits: tytung) +- Native USB Tethering (Credits: tytung) - Native Wi-Fi Tethering (Credits: tytung) -- Real Wi-Fi MAC address (only for SD build on WinMo 6.5) (Credits: savan and tytung) -- Unique Wi-Fi MAC address (for MAGLDR and cLK) (Credits: markinus) -- Unique Bluetooth MAC address (Credits: markinus and tytung) - Official HTC extended battery support (HTC EB 2300mAh) (Credits: arne) - ALSA sound driver as kernel modules (alsa-pcm-htc-leo.ko and alsa-mix-htc-leo.ko) (Credits: cotulla) - Wired headphones support for ICS. (Credits: zivan56) - Backported xt_qtaguid and xt_quota2 to support data usage for ICS. (Credits: tytung) - Improved Flashlight compatibility for ICS. (Credits: tytung) - Backported the GPU driver to enable the Hardware Acceleration for ICS. (Credits: Securecrt and Rick_1995) +- Updated to msm-kgsl3d0 v3.8 to match the latest QCOM Adreno200 drivers for ICS. (Credits: Rick_1995) +- Real WiFi and Bluetooth MAC addresses. (Credits: Franck78, Rick_1995 and Marc1706) -Credits: Cotulla, Markinus, Hastarin, TYTung, Letama, Rajko, Dan1j3l, Cedesmith, Arne, Trilu, Charansingh, Mdebeljuh, Jdivic, Avs333, Snq-, Savan, Drizztje, Marc1706, Zivan56, Securecrt, Rick_1995, other devs, and testers. +Credits: Cotulla, Markinus, Hastarin, TYTung, Letama, Rajko, Dan1j3l, Cedesmith, Arne, Trilu, Charansingh, Mdebeljuh, Jdivic, Avs333, Snq-, Savan, Drizztje, Marc1706, Zivan56, Securecrt, Rick_1995, Franck78, other devs, and testers. =============================================================================== diff --git a/arch/arm/Makefile b/arch/arm/Makefile index a73caaf6..f35b1588 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -17,7 +17,7 @@ endif OBJCOPYFLAGS :=-O binary -R .note -R .note.gnu.build-id -R .comment -S GZFLAGS :=-9 -#KBUILD_CFLAGS +=-pipe +KBUILD_CFLAGS +=-pipe # Explicitly specifiy 32-bit ARM ISA since toolchain default can be -mthumb: KBUILD_CFLAGS +=$(call cc-option,-marm,) diff --git a/arch/arm/configs/htcleo_defconfig b/arch/arm/configs/htcleo_defconfig index a567f969..220270c5 100644 --- a/arch/arm/configs/htcleo_defconfig +++ b/arch/arm/configs/htcleo_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: 2.6.32-ics -# Sat May 12 16:06:22 CST 2012 +# Sun Oct 14 20:28:45 CST 2012 # CONFIG_ARM=y CONFIG_SYS_SUPPORTS_APM_EMULATION=y @@ -604,11 +604,10 @@ CONFIG_NETFILTER_XT_CONNMARK=y # CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y CONFIG_NETFILTER_XT_TARGET_CONNMARK=y -# CONFIG_NETFILTER_XT_TARGET_CT is not set # CONFIG_NETFILTER_XT_TARGET_DSCP is not set # CONFIG_NETFILTER_XT_TARGET_HL is not set CONFIG_NETFILTER_XT_TARGET_MARK=y -# CONFIG_NETFILTER_XT_TARGET_NFLOG is not set +CONFIG_NETFILTER_XT_TARGET_NFLOG=y CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y # CONFIG_NETFILTER_XT_TARGET_NOTRACK is not set # CONFIG_NETFILTER_XT_TARGET_RATEEST is not set @@ -632,7 +631,7 @@ CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y # CONFIG_NETFILTER_XT_MATCH_ESP is not set CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y CONFIG_NETFILTER_XT_MATCH_HELPER=y -CONFIG_NETFILTER_XT_MATCH_HL=y +# CONFIG_NETFILTER_XT_MATCH_HL is not set CONFIG_NETFILTER_XT_MATCH_IPRANGE=y CONFIG_NETFILTER_XT_MATCH_LENGTH=y CONFIG_NETFILTER_XT_MATCH_LIMIT=y @@ -709,8 +708,21 @@ CONFIG_IP_NF_ARP_MANGLE=y CONFIG_NF_DEFRAG_IPV6=y CONFIG_NF_CONNTRACK_IPV6=y # CONFIG_IP6_NF_QUEUE is not set -# CONFIG_IP6_NF_IPTABLES is not set -# CONFIG_BRIDGE_NF_EBTABLES is not set +CONFIG_IP6_NF_IPTABLES=y +# CONFIG_IP6_NF_MATCH_AH is not set +# CONFIG_IP6_NF_MATCH_EUI64 is not set +# CONFIG_IP6_NF_MATCH_FRAG is not set +# CONFIG_IP6_NF_MATCH_OPTS is not set +# CONFIG_IP6_NF_MATCH_HL is not set +# CONFIG_IP6_NF_MATCH_IPV6HEADER is not set +# CONFIG_IP6_NF_MATCH_MH is not set +# CONFIG_IP6_NF_MATCH_RT is not set +# CONFIG_IP6_NF_TARGET_HL is not set +CONFIG_IP6_NF_TARGET_LOG=y +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IP6_NF_MANGLE=y +CONFIG_IP6_NF_RAW=y # CONFIG_IP_DCCP is not set # CONFIG_IP_SCTP is not set # CONFIG_RDS is not set @@ -1681,10 +1693,14 @@ CONFIG_ANDROID_LOW_MEMORY_KILLER=y # CONFIG_XVMALLOC=y CONFIG_ZRAM=y -CONFIG_ZRAM_NUM_DEVICES=3 +CONFIG_ZRAM_NUM_DEVICES=1 CONFIG_ZRAM_DEFAULT_PERCENTAGE=18 # CONFIG_ZRAM_DEBUG is not set CONFIG_ZRAM_DEFAULT_DISKSIZE=100000000 +# CONFIG_ZRAM_LZO is not set +CONFIG_ZRAM_SNAPPY=y +CONFIG_SNAPPY_COMPRESS=y +CONFIG_SNAPPY_DECOMPRESS=y # # File systems diff --git a/arch/arm/mach-msm/board-htcleo.c b/arch/arm/mach-msm/board-htcleo.c old mode 100644 new mode 100755 index 948af5cd..4913f043 --- a/arch/arm/mach-msm/board-htcleo.c +++ b/arch/arm/mach-msm/board-htcleo.c @@ -755,7 +755,7 @@ static struct android_pmem_platform_data mdp_pmem_pdata = { .start = MSM_PMEM_MDP_BASE, .size = MSM_PMEM_MDP_SIZE, #ifdef CONFIG_MSM_KGSL - .allocator_type = PMEM_ALLOCATORTYPE_BITMAP, + .allocator_type = PMEM_ALLOCATORTYPE_ALLORNOTHING, #else .no_allocator = 0, #endif @@ -771,7 +771,7 @@ static struct android_pmem_platform_data android_pmem_adsp_pdata = { #else .no_allocator = 0, #endif - .cached = 1, + .cached = 0, }; @@ -784,7 +784,7 @@ static struct android_pmem_platform_data android_pmem_venc_pdata = { #else .no_allocator = 0, #endif - .cached = 1, + .cached = 0, }; static struct platform_device android_pmem_mdp_device = { diff --git a/arch/arm/mach-msm/board-htcleo.h b/arch/arm/mach-msm/board-htcleo.h old mode 100644 new mode 100755 index daf3db03..4b61ff8f --- a/arch/arm/mach-msm/board-htcleo.h +++ b/arch/arm/mach-msm/board-htcleo.h @@ -26,6 +26,7 @@ #define MSM_EBI1_BANK0_SIZE 0x1E7C0000 /* 488MB - 0x00040000 RAM CONSOLE*/ #endif + /* Don't change that */ #define MSM_SMI_BASE 0x00000000 #define MSM_SMI_SIZE 0x04000000 @@ -42,7 +43,7 @@ #define MSM_PMEM_MDP_SIZE 0x02000000 #define MSM_PMEM_ADSP_BASE 0x3D700000 -#define MSM_PMEM_ADSP_SIZE 0x02900000 +#define MSM_PMEM_ADSP_SIZE 0x02200000 #define MSM_GPU_PHYS_BASE (MSM_PMEM_SMI_BASE + MSM_FB_SIZE) #define MSM_GPU_PHYS_SIZE 0x00800000 diff --git a/arch/arm/mach-msm/include/mach/ion.h b/arch/arm/mach-msm/include/mach/ion.h new file mode 100755 index 00000000..4d12249d --- /dev/null +++ b/arch/arm/mach-msm/include/mach/ion.h @@ -0,0 +1,23 @@ +/** + * + * Copyright (c) 2011, Code Aurora Forum. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __MACH_ION_H_ +#define __MACH_ION_H_ + +enum ion_memory_types { + ION_EBI_TYPE, + ION_SMI_TYPE, +}; + +#endif diff --git a/arch/arm/mach-msm/pm.c b/arch/arm/mach-msm/pm.c index ed19e679..3155ec3f 100644 --- a/arch/arm/mach-msm/pm.c +++ b/arch/arm/mach-msm/pm.c @@ -777,6 +777,9 @@ static int msm_reboot_call(struct notifier_block *this, unsigned long code, void } else if (!strncmp(cmd, "oem-", 4)) { unsigned code = simple_strtoul(cmd + 4, 0, 16) & 0xff; restart_reason = 0x6f656d00 | code; + } else if (!strncmp(cmd, "S", 1)) { + unsigned code = simple_strtoul(cmd + 1, 0, 16) & 0x00ffffff; + restart_reason = 0x53000000 | code; } else if (!strcmp(cmd, "force-hard")) { restart_reason = 0x776655AA; } else { diff --git a/arch/arm/mach-msm/qdsp6_1550/msm_q6vdec.c b/arch/arm/mach-msm/qdsp6_1550/msm_q6vdec.c old mode 100644 new mode 100755 index 69ce1804..b6162d89 --- a/arch/arm/mach-msm/qdsp6_1550/msm_q6vdec.c +++ b/arch/arm/mach-msm/qdsp6_1550/msm_q6vdec.c @@ -61,7 +61,7 @@ #define VDEC_GET_MAJOR_VERSION(version) (((version)&MAJOR_MASK)>>16) #define VDEC_GET_MINOR_VERSION(version) ((version)&MINOR_MASK) - +//#define DEBUG_TRACE_VDEC #ifdef DEBUG_TRACE_VDEC #define TRACE(fmt,x...) \ do { pr_debug("%s:%d " fmt, __func__, __LINE__, ##x); } while (0) @@ -69,6 +69,8 @@ #define TRACE(fmt,x...) do { } while (0) #endif +/* the version check will cause vdec hang up!!! */ +#define VERSION_CHECK 0 static DEFINE_MUTEX(idlecount_lock); static int idlecount; @@ -696,7 +698,7 @@ static long vdec_ioctl(struct file *file, unsigned int cmd, unsigned long arg) break; default: - pr_err("%s: invalid ioctl!\n", __func__); + pr_err("%s: invalid ioctl! cmd= %08x \n", __func__,cmd); ret = -EINVAL; break; } @@ -799,8 +801,9 @@ static int vdec_open(struct inode *inode, struct file *file) int i; struct vdec_msg_list *l; struct vdec_data *vd; +#if VERSION_CHECK struct dal_info version_info; - +#endif pr_info("q6vdec_open()\n"); mutex_lock(&vdec_ref_lock); if (ref_cnt >= MAX_SUPPORTED_INSTANCES) { @@ -845,6 +848,7 @@ static int vdec_open(struct inode *inode, struct file *file) ret = -EIO; goto vdec_open_err_handle_list; } +#if VERSION_CHECK ret = dal_call_f9(vd->vdec_handle, DAL_OP_INFO, &version_info, sizeof(struct dal_info)); @@ -859,12 +863,15 @@ static int vdec_open(struct inode *inode, struct file *file) pr_err("%s: driver version mismatch !\n", __func__); goto vdec_open_err_handle_version; } - +#endif vd->running = 1; prevent_sleep(); return 0; + +#if VERSION_CHECK vdec_open_err_handle_version: dal_detach(vd->vdec_handle); +#endif vdec_open_err_handle_list: { struct vdec_msg_list *l, *n; diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c index b547cbca..d5873c6c 100644 --- a/block/deadline-iosched.c +++ b/block/deadline-iosched.c @@ -17,10 +17,10 @@ /* * See Documentation/block/deadline-iosched.txt */ -static const int read_expire = HZ / 2; /* max time before a read is submitted. */ +static const int read_expire = HZ / 4; /* max time before a read is submitted. */ static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */ -static const int writes_starved = 2; /* max times reads can starve a write */ -static const int fifo_batch = 16; /* # of sequential requests treated as one +static const int writes_starved = 4; /* max times reads can starve a write */ +static const int fifo_batch = 1; /* # of sequential requests treated as one by the above parameters. For throughput. */ struct deadline_data { @@ -362,7 +362,7 @@ static void *deadline_init_queue(struct request_queue *q) dd->fifo_expire[READ] = read_expire; dd->fifo_expire[WRITE] = write_expire; dd->writes_starved = writes_starved; - dd->front_merges = 1; + dd->front_merges = 0; dd->fifo_batch = fifo_batch; return dd; } diff --git a/build.sh b/build.sh new file mode 100755 index 00000000..69b27d68 --- /dev/null +++ b/build.sh @@ -0,0 +1,36 @@ +#!/bin/sh + +export KERNELBASEDIR=$PWD/../JB_Kernel_update-zip-files +#export TOOLCHAIN=$HOME/CodeSourcery/Sourcery_G++_Lite/bin/arm-none-eabi- +export TOOLCHAIN=$HOME/arm-2010q1/bin/arm-none-eabi- + +export KERNEL_FILE=HTCLEO-Kernel_2.6.32_tytung_jellybean + +rm arch/arm/boot/zImage +make htcleo_defconfig +make ARCH=arm CROSS_COMPILE=$TOOLCHAIN zImage -j8 && make ARCH=arm CROSS_COMPILE=$TOOLCHAIN modules -j8 + +if [ -f arch/arm/boot/zImage ]; then + +mkdir -p $KERNELBASEDIR/ +rm -rf $KERNELBASEDIR/boot/* +rm -rf $KERNELBASEDIR/system/lib/modules/* +mkdir -p $KERNELBASEDIR/boot +mkdir -p $KERNELBASEDIR/system/ +mkdir -p $KERNELBASEDIR/system/lib/ +mkdir -p $KERNELBASEDIR/system/lib/modules + +cp -a arch/arm/boot/zImage $KERNELBASEDIR/boot/zImage + +make ARCH=arm CROSS_COMPILE=$TOOLCHAIN INSTALL_MOD_PATH=$KERNELBASEDIR/system/lib/modules modules_install -j8 + +cd $KERNELBASEDIR/system/lib/modules +find -iname *.ko | xargs -i -t cp {} . +rm -rf $KERNELBASEDIR/system/lib/modules/lib +stat $KERNELBASEDIR/boot/zImage +cd ../../../ +zip -r ${KERNEL_FILE}_`date +"%Y%m%d_%H_%M"`.zip boot system META-INF work +else +echo "Kernel STUCK in BUILD! no zImage exist" +fi + diff --git a/drivers/base/genlock.c b/drivers/base/genlock.c index afe8eb1c..8c064888 100644 --- a/drivers/base/genlock.c +++ b/drivers/base/genlock.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2011, Code Aurora Forum. All rights reserved. +/* Copyright (c) 2011-2012, Code Aurora Forum. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -22,7 +22,7 @@ #include #include #include -#include /* for in_interrupt() */ +#include /* Lock states - can either be unlocked, held as an exclusive write lock or a * shared read lock @@ -32,7 +32,18 @@ #define _RDLOCK GENLOCK_RDLOCK #define _WRLOCK GENLOCK_WRLOCK +#define GENLOCK_LOG_ERR(fmt, args...) \ +pr_err("genlock: %s: " fmt, __func__, ##args) + +/* The genlock magic stored in the kernel private data is used to protect + * against the possibility of user space passing a valid fd to a + * non-genlock file for genlock_attach_lock() + */ +#define GENLOCK_MAGIC_OK 0xD2EAD10C +#define GENLOCK_MAGIC_BAD 0xD2EADBAD + struct genlock { + unsigned int magic; /* Magic for attach verification */ struct list_head active; /* List of handles holding lock */ spinlock_t lock; /* Spinlock to protect the lock internals */ wait_queue_head_t queue; /* Holding pen for processes pending lock */ @@ -49,12 +60,28 @@ struct genlock_handle { taken */ }; +/* + * Create a spinlock to protect against a race condition when a lock gets + * released while another process tries to attach it + */ + +static DEFINE_SPINLOCK(genlock_ref_lock); + static void genlock_destroy(struct kref *kref) { - struct genlock *lock = container_of(kref, struct genlock, - refcount); + struct genlock *lock = container_of(kref, struct genlock, + refcount); - kfree(lock); + /* + * Clear the private data for the file descriptor in case the fd is + * still active after the lock gets released + */ + + if (lock->file) + lock->file->private_data = NULL; + lock->magic = GENLOCK_MAGIC_BAD; + + kfree(lock); } /* @@ -64,6 +91,15 @@ static void genlock_destroy(struct kref *kref) static int genlock_release(struct inode *inodep, struct file *file) { + struct genlock *lock = file->private_data; + /* + * Clear the refrence back to this file structure to avoid + * somehow reusing the lock after the file has been destroyed + */ + + if (lock) + lock->file = NULL; + return 0; } @@ -81,18 +117,29 @@ static const struct file_operations genlock_fops = { struct genlock *genlock_create_lock(struct genlock_handle *handle) { struct genlock *lock; + void *ret; - if (handle->lock != NULL) + if (IS_ERR_OR_NULL(handle)) { + GENLOCK_LOG_ERR("Invalid handle\n"); return ERR_PTR(-EINVAL); + } + + if (handle->lock != NULL) { + GENLOCK_LOG_ERR("Handle already has a lock attached\n"); + return ERR_PTR(-EINVAL); + } lock = kzalloc(sizeof(*lock), GFP_KERNEL); - if (lock == NULL) + if (lock == NULL) { + GENLOCK_LOG_ERR("Unable to allocate memory for a lock\n"); return ERR_PTR(-ENOMEM); + } INIT_LIST_HEAD(&lock->active); init_waitqueue_head(&lock->queue); spin_lock_init(&lock->lock); + lock->magic = GENLOCK_MAGIC_OK; lock->state = _UNLOCKED; /* @@ -100,8 +147,13 @@ struct genlock *genlock_create_lock(struct genlock_handle *handle) * other processes */ - lock->file = anon_inode_getfile("genlock", &genlock_fops, - lock, O_RDWR); + ret = anon_inode_getfile("genlock", &genlock_fops, lock, O_RDWR); + if (IS_ERR_OR_NULL(ret)) { + GENLOCK_LOG_ERR("Unable to create lock inode\n"); + kfree(lock); + return ret; + } + lock->file = ret; /* Attach the new lock to the handle */ handle->lock = lock; @@ -120,8 +172,10 @@ static int genlock_get_fd(struct genlock *lock) { int ret; - if (!lock->file) + if (!lock->file) { + GENLOCK_LOG_ERR("No file attached to the lock\n"); return -EINVAL; + } ret = get_unused_fd_flags(0); if (ret < 0) @@ -143,24 +197,51 @@ struct genlock *genlock_attach_lock(struct genlock_handle *handle, int fd) struct file *file; struct genlock *lock; - if (handle->lock != NULL) + if (IS_ERR_OR_NULL(handle)) { + GENLOCK_LOG_ERR("Invalid handle\n"); return ERR_PTR(-EINVAL); + } + + if (handle->lock != NULL) { + GENLOCK_LOG_ERR("Handle already has a lock attached\n"); + return ERR_PTR(-EINVAL); + } file = fget(fd); - if (file == NULL) + if (file == NULL) { + GENLOCK_LOG_ERR("Bad file descriptor\n"); return ERR_PTR(-EBADF); + } + /* + * take a spinlock to avoid a race condition if the lock is + * released and then attached + */ + + spin_lock(&genlock_ref_lock); lock = file->private_data; fput(file); - if (lock == NULL) - return ERR_PTR(-EINVAL); + if (lock == NULL) { + GENLOCK_LOG_ERR("File descriptor is invalid\n"); + goto fail_invalid; + } + + if (lock->magic != GENLOCK_MAGIC_OK) { + GENLOCK_LOG_ERR("Magic is invalid - 0x%X\n", lock->magic); + goto fail_invalid; + } handle->lock = lock; kref_get(&lock->refcount); + spin_unlock(&genlock_ref_lock); return lock; + +fail_invalid: + spin_unlock(&genlock_ref_lock); + return ERR_PTR(-EINVAL); } EXPORT_SYMBOL(genlock_attach_lock); @@ -199,13 +280,16 @@ static int _genlock_unlock(struct genlock *lock, struct genlock_handle *handle) spin_lock_irqsave(&lock->lock, irqflags); - if (lock->state == _UNLOCKED) + if (lock->state == _UNLOCKED) { + GENLOCK_LOG_ERR("Trying to unlock an unlocked handle\n"); goto done; + } /* Make sure this handle is an owner of the lock */ - if (!handle_has_lock(lock, handle)) + if (!handle_has_lock(lock, handle)) { + GENLOCK_LOG_ERR("handle does not have lock attached to it\n"); goto done; - + } /* If the handle holds no more references to the lock then release it (maybe) */ @@ -228,7 +312,7 @@ static int _genlock_lock(struct genlock *lock, struct genlock_handle *handle, { unsigned long irqflags; int ret = 0; - unsigned int ticks = msecs_to_jiffies(timeout); + unsigned long ticks = msecs_to_jiffies(timeout); spin_lock_irqsave(&lock->lock, irqflags); @@ -247,12 +331,15 @@ static int _genlock_lock(struct genlock *lock, struct genlock_handle *handle, if (handle_has_lock(lock, handle)) { /* - * If the handle already holds the lock and the type matches, - * then just increment the active pointer. This allows the - * handle to do recursive locks + * If the handle already holds the lock and the lock type is + * a read lock then just increment the active pointer. This + * allows the handle to do recursive read locks. Recursive + * write locks are not allowed in order to support + * synchronization within a process using a single gralloc + * handle. */ - if (lock->state == op) { + if (lock->state == _RDLOCK && op == _RDLOCK) { handle->active++; goto done; } @@ -261,32 +348,46 @@ static int _genlock_lock(struct genlock *lock, struct genlock_handle *handle, * If the handle holds a write lock then the owner can switch * to a read lock if they want. Do the transition atomically * then wake up any pending waiters in case they want a read - * lock too. + * lock too. In order to support synchronization within a + * process the caller must explicity request to convert the + * lock type with the GENLOCK_WRITE_TO_READ flag. */ - if (op == _RDLOCK && handle->active == 1) { - lock->state = _RDLOCK; - wake_up(&lock->queue); + if (flags & GENLOCK_WRITE_TO_READ) { + if (lock->state == _WRLOCK && op == _RDLOCK) { + lock->state = _RDLOCK; + wake_up(&lock->queue); + goto done; + } else { + GENLOCK_LOG_ERR("Invalid state to convert" + "write to read\n"); + ret = -EINVAL; + goto done; + } + } + } else { + + /* + * Check to ensure the caller has not attempted to convert a + * write to a read without holding the lock. + */ + + if (flags & GENLOCK_WRITE_TO_READ) { + GENLOCK_LOG_ERR("Handle must have lock to convert" + "write to read\n"); + ret = -EINVAL; goto done; } /* - * Otherwise the user tried to turn a read into a write, and we - * don't allow that. + * If we request a read and the lock is held by a read, then go + * ahead and share the lock */ - ret = -EINVAL; - goto done; + if (op == GENLOCK_RDLOCK && lock->state == _RDLOCK) + goto dolock; } - /* - * If we request a read and the lock is held by a read, then go - * ahead and share the lock - */ - - if (op == GENLOCK_RDLOCK && lock->state == _RDLOCK) - goto dolock; - /* Treat timeout 0 just like a NOBLOCK flag and return if the lock cannot be aquired without blocking */ @@ -295,15 +396,26 @@ static int _genlock_lock(struct genlock *lock, struct genlock_handle *handle, goto done; } - /* Wait while the lock remains in an incompatible state */ + /* + * Wait while the lock remains in an incompatible state + * state op wait + * ------------------- + * unlocked n/a no + * read read no + * read write yes + * write n/a yes + */ - while (lock->state != _UNLOCKED) { - unsigned int elapsed; + while ((lock->state == _RDLOCK && op == _WRLOCK) || + lock->state == _WRLOCK) { + signed long elapsed; spin_unlock_irqrestore(&lock->lock, irqflags); elapsed = wait_event_interruptible_timeout(lock->queue, - lock->state == _UNLOCKED, ticks); + lock->state == _UNLOCKED || + (lock->state == _RDLOCK && op == _RDLOCK), + ticks); spin_lock_irqsave(&lock->lock, irqflags); @@ -312,7 +424,7 @@ static int _genlock_lock(struct genlock *lock, struct genlock_handle *handle, goto done; } - ticks = elapsed; + ticks = (unsigned long) elapsed; } dolock: @@ -320,7 +432,7 @@ dolock: list_add_tail(&handle->entry, &lock->active); lock->state = op; - handle->active = 1; + handle->active++; done: spin_unlock_irqrestore(&lock->lock, irqflags); @@ -329,7 +441,7 @@ done: } /** - * genlock_lock - Acquire or release a lock + * genlock_lock - Acquire or release a lock (depreciated) * @handle - pointer to the genlock handle that is requesting the lock * @op - the operation to perform (RDLOCK, WRLOCK, UNLOCK) * @flags - flags to control the operation @@ -341,11 +453,76 @@ done: int genlock_lock(struct genlock_handle *handle, int op, int flags, uint32_t timeout) { - struct genlock *lock = handle->lock; + struct genlock *lock; + unsigned long irqflags; + int ret = 0; - if (lock == NULL) + if (IS_ERR_OR_NULL(handle)) { + GENLOCK_LOG_ERR("Invalid handle\n"); return -EINVAL; + } + + lock = handle->lock; + + if (lock == NULL) { + GENLOCK_LOG_ERR("Handle does not have a lock attached\n"); + return -EINVAL; + } + + switch (op) { + case GENLOCK_UNLOCK: + ret = _genlock_unlock(lock, handle); + break; + case GENLOCK_RDLOCK: + spin_lock_irqsave(&lock->lock, irqflags); + if (handle_has_lock(lock, handle)) { + /* request the WRITE_TO_READ flag for compatibility */ + flags |= GENLOCK_WRITE_TO_READ; + } + spin_unlock_irqrestore(&lock->lock, irqflags); + /* fall through to take lock */ + case GENLOCK_WRLOCK: + ret = _genlock_lock(lock, handle, op, flags, timeout); + break; + default: + GENLOCK_LOG_ERR("Invalid lock operation\n"); + ret = -EINVAL; + break; + } + + return ret; +} +EXPORT_SYMBOL(genlock_lock); + +/** + * genlock_dreadlock - Acquire or release a lock + * @handle - pointer to the genlock handle that is requesting the lock + * @op - the operation to perform (RDLOCK, WRLOCK, UNLOCK) + * @flags - flags to control the operation + * @timeout - optional timeout to wait for the lock to come free + * + * Returns: 0 on success or error code on failure + */ + +int genlock_dreadlock(struct genlock_handle *handle, int op, int flags, + uint32_t timeout) +{ + struct genlock *lock; + + int ret = 0; + + if (IS_ERR_OR_NULL(handle)) { + GENLOCK_LOG_ERR("Invalid handle\n"); + return -EINVAL; + } + + lock = handle->lock; + + if (lock == NULL) { + GENLOCK_LOG_ERR("Handle does not have a lock attached\n"); + return -EINVAL; + } switch (op) { case GENLOCK_UNLOCK: @@ -356,13 +533,14 @@ int genlock_lock(struct genlock_handle *handle, int op, int flags, ret = _genlock_lock(lock, handle, op, flags, timeout); break; default: + GENLOCK_LOG_ERR("Invalid lock operation\n"); ret = -EINVAL; break; } return ret; } -EXPORT_SYMBOL(genlock_lock); +EXPORT_SYMBOL(genlock_dreadlock); /** * genlock_wait - Wait for the lock to be released @@ -372,13 +550,22 @@ EXPORT_SYMBOL(genlock_lock); int genlock_wait(struct genlock_handle *handle, uint32_t timeout) { - struct genlock *lock = handle->lock; + struct genlock *lock; unsigned long irqflags; int ret = 0; - unsigned int ticks = msecs_to_jiffies(timeout); + unsigned long ticks = msecs_to_jiffies(timeout); - if (lock == NULL) + if (IS_ERR_OR_NULL(handle)) { + GENLOCK_LOG_ERR("Invalid handle\n"); return -EINVAL; + } + + lock = handle->lock; + + if (lock == NULL) { + GENLOCK_LOG_ERR("Handle does not have a lock attached\n"); + return -EINVAL; + } spin_lock_irqsave(&lock->lock, irqflags); @@ -393,7 +580,7 @@ int genlock_wait(struct genlock_handle *handle, uint32_t timeout) } while (lock->state != _UNLOCKED) { - unsigned int elapsed; + signed long elapsed; spin_unlock_irqrestore(&lock->lock, irqflags); @@ -407,7 +594,7 @@ int genlock_wait(struct genlock_handle *handle, uint32_t timeout) break; } - ticks = elapsed; + ticks = (unsigned long) elapsed; } done: @@ -415,12 +602,7 @@ done: return ret; } -/** - * genlock_release_lock - Release a lock attached to a handle - * @handle - Pointer to the handle holding the lock - */ - -void genlock_release_lock(struct genlock_handle *handle) +static void genlock_release_lock(struct genlock_handle *handle) { unsigned long flags; @@ -437,11 +619,12 @@ void genlock_release_lock(struct genlock_handle *handle) } spin_unlock_irqrestore(&handle->lock->lock, flags); + spin_lock(&genlock_ref_lock); kref_put(&handle->lock->refcount, genlock_destroy); + spin_unlock(&genlock_ref_lock); handle->lock = NULL; handle->active = 0; } -EXPORT_SYMBOL(genlock_release_lock); /* * Release function called when all references to a handle are released @@ -468,8 +651,10 @@ static const struct file_operations genlock_handle_fops = { static struct genlock_handle *_genlock_get_handle(void) { struct genlock_handle *handle = kzalloc(sizeof(*handle), GFP_KERNEL); - if (handle == NULL) + if (handle == NULL) { + GENLOCK_LOG_ERR("Unable to allocate memory for the handle\n"); return ERR_PTR(-ENOMEM); + } return handle; } @@ -482,12 +667,19 @@ static struct genlock_handle *_genlock_get_handle(void) struct genlock_handle *genlock_get_handle(void) { + void *ret; struct genlock_handle *handle = _genlock_get_handle(); if (IS_ERR(handle)) return handle; - handle->file = anon_inode_getfile("genlock-handle", + ret = anon_inode_getfile("genlock-handle", &genlock_handle_fops, handle, O_RDWR); + if (IS_ERR_OR_NULL(ret)) { + GENLOCK_LOG_ERR("Unable to create handle inode\n"); + kfree(handle); + return ret; + } + handle->file = ret; return handle; } @@ -531,6 +723,9 @@ static long genlock_dev_ioctl(struct file *filep, unsigned int cmd, struct genlock *lock; int ret; + if (IS_ERR_OR_NULL(handle)) + return -EINVAL; + switch (cmd) { case GENLOCK_IOC_NEW: { lock = genlock_create_lock(handle); @@ -540,8 +735,11 @@ static long genlock_dev_ioctl(struct file *filep, unsigned int cmd, return 0; } case GENLOCK_IOC_EXPORT: { - if (handle->lock == NULL) + if (handle->lock == NULL) { + GENLOCK_LOG_ERR("Handle does not have a lock" + "attached\n"); return -EINVAL; + } ret = genlock_get_fd(handle->lock); if (ret < 0) @@ -574,6 +772,14 @@ static long genlock_dev_ioctl(struct file *filep, unsigned int cmd, return genlock_lock(handle, param.op, param.flags, param.timeout); } + case GENLOCK_IOC_DREADLOCK: { + if (copy_from_user(¶m, (void __user *) arg, + sizeof(param))) + return -EFAULT; + + return genlock_dreadlock(handle, param.op, param.flags, + param.timeout); + } case GENLOCK_IOC_WAIT: { if (copy_from_user(¶m, (void __user *) arg, sizeof(param))) @@ -582,10 +788,16 @@ static long genlock_dev_ioctl(struct file *filep, unsigned int cmd, return genlock_wait(handle, param.timeout); } case GENLOCK_IOC_RELEASE: { - genlock_release_lock(handle); - return 0; + /* + * Return error - this ioctl has been deprecated. + * Locks should only be released when the handle is + * destroyed + */ + GENLOCK_LOG_ERR("Deprecated RELEASE ioctl called\n"); + return -EINVAL; } default: + GENLOCK_LOG_ERR("Invalid ioctl\n"); return -EINVAL; } } diff --git a/drivers/gpu/msm/Makefile b/drivers/gpu/msm/Makefile old mode 100644 new mode 100755 index f49e7164..8ef724e2 --- a/drivers/gpu/msm/Makefile +++ b/drivers/gpu/msm/Makefile @@ -19,6 +19,7 @@ msm_adreno-y += \ adreno_drawctxt.o \ adreno_postmortem.o \ adreno_a2xx.o \ + adreno_a3xx.o \ adreno.o msm_adreno-$(CONFIG_DEBUG_FS) += adreno_debugfs.o diff --git a/drivers/gpu/msm/a2xx_reg.h b/drivers/gpu/msm/a2xx_reg.h old mode 100644 new mode 100755 index d859d61c..1c7be3e6 --- a/drivers/gpu/msm/a2xx_reg.h +++ b/drivers/gpu/msm/a2xx_reg.h @@ -140,24 +140,9 @@ union reg_rb_edram_info { struct rb_edram_info_t f; }; -#define RBBM_READ_ERROR_UNUSED0_SIZE 2 -#define RBBM_READ_ERROR_READ_ADDRESS_SIZE 15 -#define RBBM_READ_ERROR_UNUSED1_SIZE 13 -#define RBBM_READ_ERROR_READ_REQUESTER_SIZE 1 -#define RBBM_READ_ERROR_READ_ERROR_SIZE 1 - -struct rbbm_read_error_t { - unsigned int unused0:RBBM_READ_ERROR_UNUSED0_SIZE; - unsigned int read_address:RBBM_READ_ERROR_READ_ADDRESS_SIZE; - unsigned int unused1:RBBM_READ_ERROR_UNUSED1_SIZE; - unsigned int read_requester:RBBM_READ_ERROR_READ_REQUESTER_SIZE; - unsigned int read_error:RBBM_READ_ERROR_READ_ERROR_SIZE; -}; - -union rbbm_read_error_u { - unsigned int val:32; - struct rbbm_read_error_t f; -}; +#define RBBM_READ_ERROR_ADDRESS_MASK 0x0001fffc +#define RBBM_READ_ERROR_REQUESTER (1<<30) +#define RBBM_READ_ERROR_ERROR (1<<31) #define CP_RB_CNTL_RB_BUFSZ_SIZE 6 #define CP_RB_CNTL_UNUSED0_SIZE 2 @@ -278,6 +263,7 @@ union reg_cp_rb_cntl { #define REG_CP_ME_CNTL 0x01F6 #define REG_CP_ME_RAM_DATA 0x01FA #define REG_CP_ME_RAM_WADDR 0x01F8 +#define REG_CP_ME_RAM_RADDR 0x01F9 #define REG_CP_ME_STATUS 0x01F7 #define REG_CP_PFP_UCODE_ADDR 0x00C0 #define REG_CP_PFP_UCODE_DATA 0x00C1 diff --git a/drivers/gpu/msm/a3xx_reg.h b/drivers/gpu/msm/a3xx_reg.h new file mode 100755 index 00000000..84c83b8c --- /dev/null +++ b/drivers/gpu/msm/a3xx_reg.h @@ -0,0 +1,453 @@ +/* Copyright (c) 2012, Code Aurora Forum. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _A300_REG_H +#define _A300_REG_H + +/* Interrupt bit positions within RBBM_INT_0 */ + +#define A3XX_INT_RBBM_GPU_IDLE 0 +#define A3XX_INT_RBBM_AHB_ERROR 1 +#define A3XX_INT_RBBM_REG_TIMEOUT 2 +#define A3XX_INT_RBBM_ME_MS_TIMEOUT 3 +#define A3XX_INT_RBBM_PFP_MS_TIMEOUT 4 +#define A3XX_INT_RBBM_ATB_BUS_OVERFLOW 5 +#define A3XX_INT_VFD_ERROR 6 +#define A3XX_INT_CP_SW_INT 7 +#define A3XX_INT_CP_T0_PACKET_IN_IB 8 +#define A3XX_INT_CP_OPCODE_ERROR 9 +#define A3XX_INT_CP_RESERVED_BIT_ERROR 10 +#define A3XX_INT_CP_HW_FAULT 11 +#define A3xx_INT_CP_DMA 12 +#define A3XX_INT_CP_IB2_INT 13 +#define A3XX_INT_CP_IB1_INT 14 +#define A3XX_INT_CP_RB_INT 15 +#define A3XX_INT_CP_REG_PROTECT_FAULT 16 +#define A3XX_INT_CP_RB_DONE_TS 17 +#define A3XX_INT_CP_VS_DONE_TS 18 +#define A3XX_INT_CP_PS_DONE_TS 19 +#define A3XX_INT_CACHE_FLUSH_TS 20 +#define A3XX_INT_CP_AHB_ERROR_HALT 21 +#define A3XX_INT_MISC_HANG_DETECT 24 +#define A3XX_INT_UCHE_OOB_ACCESS 25 + +/* Register definitions */ + +#define A3XX_RBBM_HW_VERSION 0x000 +#define A3XX_RBBM_HW_RELEASE 0x001 +#define A3XX_RBBM_HW_CONFIGURATION 0x002 +#define A3XX_RBBM_SW_RESET_CMD 0x018 +#define A3XX_RBBM_AHB_CTL0 0x020 +#define A3XX_RBBM_AHB_CTL1 0x021 +#define A3XX_RBBM_AHB_CMD 0x022 +#define A3XX_RBBM_AHB_ERROR_STATUS 0x027 +#define A3XX_RBBM_GPR0_CTL 0x02E +/* This the same register as on A2XX, just in a different place */ +#define A3XX_RBBM_STATUS 0x030 +#define A3XX_RBBM_INTERFACE_HANG_INT_CTL 0x50 +#define A3XX_RBBM_INTERFACE_HANG_MASK_CTL0 0x51 +#define A3XX_RBBM_INTERFACE_HANG_MASK_CTL1 0x54 +#define A3XX_RBBM_INTERFACE_HANG_MASK_CTL2 0x57 +#define A3XX_RBBM_INTERFACE_HANG_MASK_CTL3 0x5A +#define A3XX_RBBM_INT_CLEAR_CMD 0x061 +#define A3XX_RBBM_INT_0_MASK 0x063 +#define A3XX_RBBM_INT_0_STATUS 0x064 +#define A3XX_RBBM_GPU_BUSY_MASKED 0x88 +#define A3XX_RBBM_RBBM_CTL 0x100 +#define A3XX_RBBM_RBBM_CTL 0x100 +#define A3XX_RBBM_PERFCTR_PWR_1_LO 0x0EC +#define A3XX_RBBM_PERFCTR_PWR_1_HI 0x0ED +/* Following two are same as on A2XX, just in a different place */ +#define A3XX_CP_PFP_UCODE_ADDR 0x1C9 +#define A3XX_CP_PFP_UCODE_DATA 0x1CA +#define A3XX_CP_HW_FAULT 0x45C +#define A3XX_CP_AHB_FAULT 0x54D +#define A3XX_CP_PROTECT_CTRL 0x45E +#define A3XX_CP_PROTECT_STATUS 0x45F +#define A3XX_CP_PROTECT_REG_0 0x460 +#define A3XX_CP_PROTECT_REG_1 0x461 +#define A3XX_CP_PROTECT_REG_2 0x462 +#define A3XX_CP_PROTECT_REG_3 0x463 +#define A3XX_CP_PROTECT_REG_4 0x464 +#define A3XX_CP_PROTECT_REG_5 0x465 +#define A3XX_CP_PROTECT_REG_6 0x466 +#define A3XX_CP_PROTECT_REG_7 0x467 +#define A3XX_CP_PROTECT_REG_8 0x468 +#define A3XX_CP_PROTECT_REG_9 0x469 +#define A3XX_CP_PROTECT_REG_A 0x46A +#define A3XX_CP_PROTECT_REG_B 0x46B +#define A3XX_CP_PROTECT_REG_C 0x46C +#define A3XX_CP_PROTECT_REG_D 0x46D +#define A3XX_CP_PROTECT_REG_E 0x46E +#define A3XX_CP_PROTECT_REG_F 0x46F +#define A3XX_CP_SCRATCH_REG2 0x57A +#define A3XX_CP_SCRATCH_REG3 0x57B +#define A3XX_VSC_BIN_SIZE 0xC01 +#define A3XX_VSC_SIZE_ADDRESS 0xC02 +#define A3XX_VSC_PIPE_CONFIG_0 0xC06 +#define A3XX_VSC_PIPE_DATA_ADDRESS_0 0xC07 +#define A3XX_VSC_PIPE_DATA_LENGTH_0 0xC08 +#define A3XX_VSC_PIPE_CONFIG_1 0xC09 +#define A3XX_VSC_PIPE_DATA_ADDRESS_1 0xC0A +#define A3XX_VSC_PIPE_DATA_LENGTH_1 0xC0B +#define A3XX_VSC_PIPE_CONFIG_2 0xC0C +#define A3XX_VSC_PIPE_DATA_ADDRESS_2 0xC0D +#define A3XX_VSC_PIPE_DATA_LENGTH_2 0xC0E +#define A3XX_VSC_PIPE_CONFIG_3 0xC0F +#define A3XX_VSC_PIPE_DATA_ADDRESS_3 0xC10 +#define A3XX_VSC_PIPE_DATA_LENGTH_3 0xC11 +#define A3XX_VSC_PIPE_CONFIG_4 0xC12 +#define A3XX_VSC_PIPE_DATA_ADDRESS_4 0xC13 +#define A3XX_VSC_PIPE_DATA_LENGTH_4 0xC14 +#define A3XX_VSC_PIPE_CONFIG_5 0xC15 +#define A3XX_VSC_PIPE_DATA_ADDRESS_5 0xC16 +#define A3XX_VSC_PIPE_DATA_LENGTH_5 0xC17 +#define A3XX_VSC_PIPE_CONFIG_6 0xC18 +#define A3XX_VSC_PIPE_DATA_ADDRESS_6 0xC19 +#define A3XX_VSC_PIPE_DATA_LENGTH_6 0xC1A +#define A3XX_VSC_PIPE_CONFIG_7 0xC1B +#define A3XX_VSC_PIPE_DATA_ADDRESS_7 0xC1C +#define A3XX_VSC_PIPE_DATA_LENGTH_7 0xC1D +#define A3XX_GRAS_CL_USER_PLANE_X0 0xCA0 +#define A3XX_GRAS_CL_USER_PLANE_Y0 0xCA1 +#define A3XX_GRAS_CL_USER_PLANE_Z0 0xCA2 +#define A3XX_GRAS_CL_USER_PLANE_W0 0xCA3 +#define A3XX_GRAS_CL_USER_PLANE_X1 0xCA4 +#define A3XX_GRAS_CL_USER_PLANE_Y1 0xCA5 +#define A3XX_GRAS_CL_USER_PLANE_Z1 0xCA6 +#define A3XX_GRAS_CL_USER_PLANE_W1 0xCA7 +#define A3XX_GRAS_CL_USER_PLANE_X2 0xCA8 +#define A3XX_GRAS_CL_USER_PLANE_Y2 0xCA9 +#define A3XX_GRAS_CL_USER_PLANE_Z2 0xCAA +#define A3XX_GRAS_CL_USER_PLANE_W2 0xCAB +#define A3XX_GRAS_CL_USER_PLANE_X3 0xCAC +#define A3XX_GRAS_CL_USER_PLANE_Y3 0xCAD +#define A3XX_GRAS_CL_USER_PLANE_Z3 0xCAE +#define A3XX_GRAS_CL_USER_PLANE_W3 0xCAF +#define A3XX_GRAS_CL_USER_PLANE_X4 0xCB0 +#define A3XX_GRAS_CL_USER_PLANE_Y4 0xCB1 +#define A3XX_GRAS_CL_USER_PLANE_Z4 0xCB2 +#define A3XX_GRAS_CL_USER_PLANE_W4 0xCB3 +#define A3XX_GRAS_CL_USER_PLANE_X5 0xCB4 +#define A3XX_GRAS_CL_USER_PLANE_Y5 0xCB5 +#define A3XX_GRAS_CL_USER_PLANE_Z5 0xCB6 +#define A3XX_GRAS_CL_USER_PLANE_W5 0xCB7 +#define A3XX_UCHE_CACHE_INVALIDATE0_REG 0xEA0 +#define A3XX_GRAS_CL_CLIP_CNTL 0x2040 +#define A3XX_GRAS_CL_GB_CLIP_ADJ 0x2044 +#define A3XX_GRAS_CL_VPORT_XOFFSET 0x2048 +#define A3XX_GRAS_CL_VPORT_ZOFFSET 0x204C +#define A3XX_GRAS_CL_VPORT_ZSCALE 0x204D +#define A3XX_GRAS_SU_POINT_MINMAX 0x2068 +#define A3XX_GRAS_SU_POINT_SIZE 0x2069 +#define A3XX_GRAS_SU_POLY_OFFSET_SCALE 0x206C +#define A3XX_GRAS_SU_POLY_OFFSET_OFFSET 0x206D +#define A3XX_GRAS_SU_MODE_CONTROL 0x2070 +#define A3XX_GRAS_SC_CONTROL 0x2072 +#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL 0x2074 +#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR 0x2075 +#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL 0x2079 +#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR 0x207A +#define A3XX_RB_MODE_CONTROL 0x20C0 +#define A3XX_RB_RENDER_CONTROL 0x20C1 +#define A3XX_RB_MSAA_CONTROL 0x20C2 +#define A3XX_RB_MRT_CONTROL0 0x20C4 +#define A3XX_RB_MRT_BUF_INFO0 0x20C5 +#define A3XX_RB_MRT_BLEND_CONTROL0 0x20C7 +#define A3XX_RB_MRT_BLEND_CONTROL1 0x20CB +#define A3XX_RB_MRT_BLEND_CONTROL2 0x20CF +#define A3XX_RB_MRT_BLEND_CONTROL3 0x20D3 +#define A3XX_RB_BLEND_RED 0x20E4 +#define A3XX_RB_COPY_CONTROL 0x20EC +#define A3XX_RB_COPY_DEST_INFO 0x20EF +#define A3XX_RB_DEPTH_CONTROL 0x2100 +#define A3XX_RB_STENCIL_CONTROL 0x2104 +#define A3XX_PC_VSTREAM_CONTROL 0x21E4 +#define A3XX_PC_VERTEX_REUSE_BLOCK_CNTL 0x21EA +#define A3XX_PC_PRIM_VTX_CNTL 0x21EC +#define A3XX_PC_RESTART_INDEX 0x21ED +#define A3XX_HLSQ_CONTROL_0_REG 0x2200 +#define A3XX_HLSQ_VS_CONTROL_REG 0x2204 +#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG 0x2207 +#define A3XX_HLSQ_CL_NDRANGE_0_REG 0x220A +#define A3XX_HLSQ_CL_NDRANGE_2_REG 0x220C +#define A3XX_HLSQ_CL_CONTROL_0_REG 0x2211 +#define A3XX_HLSQ_CL_CONTROL_1_REG 0x2212 +#define A3XX_HLSQ_CL_KERNEL_CONST_REG 0x2214 +#define A3XX_HLSQ_CL_KERNEL_GROUP_X_REG 0x2215 +#define A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG 0x2217 +#define A3XX_HLSQ_CL_WG_OFFSET_REG 0x221A +#define A3XX_VFD_CONTROL_0 0x2240 +#define A3XX_VFD_INDEX_MIN 0x2242 +#define A3XX_VFD_FETCH_INSTR_0_0 0x2246 +#define A3XX_VFD_FETCH_INSTR_0_4 0x224E +#define A3XX_VFD_DECODE_INSTR_0 0x2266 +#define A3XX_VFD_VS_THREADING_THRESHOLD 0x227E +#define A3XX_VPC_ATTR 0x2280 +#define A3XX_VPC_VARY_CYLWRAP_ENABLE_1 0x228B +#define A3XX_SP_SP_CTRL_REG 0x22C0 +#define A3XX_SP_VS_CTRL_REG0 0x22C4 +#define A3XX_SP_VS_CTRL_REG1 0x22C5 +#define A3XX_SP_VS_PARAM_REG 0x22C6 +#define A3XX_SP_VS_OUT_REG_7 0x22CE +#define A3XX_SP_VS_VPC_DST_REG_0 0x22D0 +#define A3XX_SP_VS_OBJ_OFFSET_REG 0x22D4 +#define A3XX_SP_VS_PVT_MEM_SIZE_REG 0x22D8 +#define A3XX_SP_VS_LENGTH_REG 0x22DF +#define A3XX_SP_FS_CTRL_REG0 0x22E0 +#define A3XX_SP_FS_CTRL_REG1 0x22E1 +#define A3XX_SP_FS_OBJ_OFFSET_REG 0x22E2 +#define A3XX_SP_FS_PVT_MEM_SIZE_REG 0x22E6 +#define A3XX_SP_FS_FLAT_SHAD_MODE_REG_0 0x22E8 +#define A3XX_SP_FS_FLAT_SHAD_MODE_REG_1 0x22E9 +#define A3XX_SP_FS_OUTPUT_REG 0x22EC +#define A3XX_SP_FS_MRT_REG_0 0x22F0 +#define A3XX_SP_FS_IMAGE_OUTPUT_REG_0 0x22F4 +#define A3XX_SP_FS_IMAGE_OUTPUT_REG_3 0x22F7 +#define A3XX_SP_FS_LENGTH_REG 0x22FF +#define A3XX_TPL1_TP_VS_TEX_OFFSET 0x2340 +#define A3XX_TPL1_TP_FS_TEX_OFFSET 0x2342 +#define A3XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR 0x2343 +#define A3XX_VBIF_FIXED_SORT_EN 0x300C +#define A3XX_VBIF_FIXED_SORT_SEL0 0x300D +#define A3XX_VBIF_FIXED_SORT_SEL1 0x300E + +/* Bit flags for RBBM_CTL */ +#define RBBM_RBBM_CTL_RESET_PWR_CTR1 (1 << 1) +#define RBBM_RBBM_CTL_ENABLE_PWR_CTR1 (17 << 1) + +/* Various flags used by the context switch code */ + +#define SP_MULTI 0 +#define SP_BUFFER_MODE 1 +#define SP_TWO_VTX_QUADS 0 +#define SP_PIXEL_BASED 0 +#define SP_R8G8B8A8_UNORM 8 +#define SP_FOUR_PIX_QUADS 1 + +#define HLSQ_DIRECT 0 +#define HLSQ_BLOCK_ID_SP_VS 4 +#define HLSQ_SP_VS_INSTR 0 +#define HLSQ_SP_FS_INSTR 0 +#define HLSQ_BLOCK_ID_SP_FS 6 +#define HLSQ_TWO_PIX_QUADS 0 +#define HLSQ_TWO_VTX_QUADS 0 +#define HLSQ_BLOCK_ID_TP_TEX 2 +#define HLSQ_TP_TEX_SAMPLERS 0 +#define HLSQ_TP_TEX_MEMOBJ 1 +#define HLSQ_BLOCK_ID_TP_MIPMAP 3 +#define HLSQ_TP_MIPMAP_BASE 1 +#define HLSQ_FOUR_PIX_QUADS 1 + +#define RB_FACTOR_ONE 1 +#define RB_BLEND_OP_ADD 0 +#define RB_FACTOR_ZERO 0 +#define RB_DITHER_DISABLE 0 +#define RB_DITHER_ALWAYS 1 +#define RB_FRAG_NEVER 0 +#define RB_ENDIAN_NONE 0 +#define RB_R8G8B8A8_UNORM 8 +#define RB_RESOLVE_PASS 2 +#define RB_CLEAR_MODE_RESOLVE 1 +#define RB_TILINGMODE_LINEAR 0 +#define RB_REF_NEVER 0 +#define RB_STENCIL_KEEP 0 +#define RB_RENDERING_PASS 0 +#define RB_TILINGMODE_32X32 2 + +#define PC_DRAW_TRIANGLES 2 +#define PC_DI_PT_RECTLIST 8 +#define PC_DI_SRC_SEL_AUTO_INDEX 2 +#define PC_DI_INDEX_SIZE_16_BIT 0 +#define PC_DI_IGNORE_VISIBILITY 0 +#define PC_DI_PT_TRILIST 4 +#define PC_DI_SRC_SEL_IMMEDIATE 1 +#define PC_DI_INDEX_SIZE_32_BIT 1 + +#define UCHE_ENTIRE_CACHE 1 +#define UCHE_OP_INVALIDATE 1 + +/* + * The following are bit field shifts within some of the registers defined + * above. These are used in the context switch code in conjunction with the + * _SET macro + */ + +#define GRAS_CL_CLIP_CNTL_CLIP_DISABLE 16 +#define GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER 12 +#define GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE 21 +#define GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE 19 +#define GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE 20 +#define GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE 17 +#define GRAS_CL_VPORT_XSCALE_VPORT_XSCALE 0 +#define GRAS_CL_VPORT_YSCALE_VPORT_YSCALE 0 +#define GRAS_CL_VPORT_ZSCALE_VPORT_ZSCALE 0 +#define GRAS_SC_CONTROL_RASTER_MODE 12 +#define GRAS_SC_CONTROL_RENDER_MODE 4 +#define GRAS_SC_SCREEN_SCISSOR_BR_BR_X 0 +#define GRAS_SC_SCREEN_SCISSOR_BR_BR_Y 16 +#define GRAS_SC_WINDOW_SCISSOR_BR_BR_X 0 +#define GRAS_SC_WINDOW_SCISSOR_BR_BR_Y 16 +#define HLSQ_CONSTFSPRESERVEDRANGEREG_ENDENTRY 16 +#define HLSQ_CONSTFSPRESERVEDRANGEREG_STARTENTRY 0 +#define HLSQ_CTRL0REG_CHUNKDISABLE 26 +#define HLSQ_CTRL0REG_CONSTSWITCHMODE 27 +#define HLSQ_CTRL0REG_FSSUPERTHREADENABLE 6 +#define HLSQ_CTRL0REG_FSTHREADSIZE 4 +#define HLSQ_CTRL0REG_LAZYUPDATEDISABLE 28 +#define HLSQ_CTRL0REG_RESERVED2 10 +#define HLSQ_CTRL0REG_SPCONSTFULLUPDATE 29 +#define HLSQ_CTRL0REG_SPSHADERRESTART 9 +#define HLSQ_CTRL0REG_TPFULLUPDATE 30 +#define HLSQ_CTRL1REG_RESERVED1 9 +#define HLSQ_CTRL1REG_VSSUPERTHREADENABLE 8 +#define HLSQ_CTRL1REG_VSTHREADSIZE 6 +#define HLSQ_CTRL2REG_PRIMALLOCTHRESHOLD 26 +#define HLSQ_FSCTRLREG_FSCONSTLENGTH 0 +#define HLSQ_FSCTRLREG_FSCONSTSTARTOFFSET 12 +#define HLSQ_FSCTRLREG_FSINSTRLENGTH 24 +#define HLSQ_VSCTRLREG_VSINSTRLENGTH 24 +#define PC_PRIM_VTX_CONTROL_POLYMODE_BACK_PTYPE 8 +#define PC_PRIM_VTX_CONTROL_POLYMODE_FRONT_PTYPE 5 +#define PC_PRIM_VTX_CONTROL_PROVOKING_VTX_LAST 25 +#define PC_PRIM_VTX_CONTROL_STRIDE_IN_VPC 0 +#define PC_DRAW_INITIATOR_PRIM_TYPE 0 +#define PC_DRAW_INITIATOR_SOURCE_SELECT 6 +#define PC_DRAW_INITIATOR_VISIBILITY_CULLING_MODE 9 +#define PC_DRAW_INITIATOR_INDEX_SIZE 0x0B +#define PC_DRAW_INITIATOR_SMALL_INDEX 0x0D +#define PC_DRAW_INITIATOR_PRE_DRAW_INITIATOR_ENABLE 0x0E +#define RB_COPYCONTROL_COPY_GMEM_BASE 14 +#define RB_COPYCONTROL_RESOLVE_CLEAR_MODE 4 +#define RB_COPYDESTBASE_COPY_DEST_BASE 4 +#define RB_COPYDESTINFO_COPY_COMPONENT_ENABLE 14 +#define RB_COPYDESTINFO_COPY_DEST_ENDIAN 18 +#define RB_COPYDESTINFO_COPY_DEST_FORMAT 2 +#define RB_COPYDESTINFO_COPY_DEST_TILE 0 +#define RB_COPYDESTPITCH_COPY_DEST_PITCH 0 +#define RB_DEPTHCONTROL_Z_TEST_FUNC 4 +#define RB_MODECONTROL_RENDER_MODE 8 +#define RB_MODECONTROL_MARB_CACHE_SPLIT_MODE 15 +#define RB_MODECONTROL_PACKER_TIMER_ENABLE 16 +#define RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE 21 +#define RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR 24 +#define RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR 16 +#define RB_MRTBLENDCONTROL_CLAMP_ENABLE 29 +#define RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE 5 +#define RB_MRTBLENDCONTROL_RGB_DEST_FACTOR 8 +#define RB_MRTBLENDCONTROL_RGB_SRC_FACTOR 0 +#define RB_MRTBUFBASE_COLOR_BUF_BASE 4 +#define RB_MRTBUFINFO_COLOR_BUF_PITCH 17 +#define RB_MRTBUFINFO_COLOR_FORMAT 0 +#define RB_MRTBUFINFO_COLOR_TILE_MODE 6 +#define RB_MRTCONTROL_COMPONENT_ENABLE 24 +#define RB_MRTCONTROL_DITHER_MODE 12 +#define RB_MRTCONTROL_READ_DEST_ENABLE 3 +#define RB_MRTCONTROL_ROP_CODE 8 +#define RB_MSAACONTROL_MSAA_DISABLE 10 +#define RB_MSAACONTROL_SAMPLE_MASK 16 +#define RB_RENDERCONTROL_ALPHA_TEST_FUNC 24 +#define RB_RENDERCONTROL_BIN_WIDTH 4 +#define RB_RENDERCONTROL_DISABLE_COLOR_PIPE 12 +#define RB_STENCILCONTROL_STENCIL_FAIL 11 +#define RB_STENCILCONTROL_STENCIL_FAIL_BF 23 +#define RB_STENCILCONTROL_STENCIL_FUNC 8 +#define RB_STENCILCONTROL_STENCIL_FUNC_BF 20 +#define RB_STENCILCONTROL_STENCIL_ZFAIL 17 +#define RB_STENCILCONTROL_STENCIL_ZFAIL_BF 29 +#define RB_STENCILCONTROL_STENCIL_ZPASS 14 +#define RB_STENCILCONTROL_STENCIL_ZPASS_BF 26 +#define SP_FSCTRLREG0_FSFULLREGFOOTPRINT 10 +#define SP_FSCTRLREG0_FSICACHEINVALID 2 +#define SP_FSCTRLREG0_FSINOUTREGOVERLAP 18 +#define SP_FSCTRLREG0_FSINSTRBUFFERMODE 1 +#define SP_FSCTRLREG0_FSLENGTH 24 +#define SP_FSCTRLREG0_FSSUPERTHREADMODE 21 +#define SP_FSCTRLREG0_FSTHREADMODE 0 +#define SP_FSCTRLREG0_FSTHREADSIZE 20 +#define SP_FSCTRLREG0_PIXLODENABLE 22 +#define SP_FSCTRLREG1_FSCONSTLENGTH 0 +#define SP_FSCTRLREG1_FSINITIALOUTSTANDING 20 +#define SP_FSCTRLREG1_HALFPRECVAROFFSET 24 +#define SP_FSMRTREG_REGID 0 +#define SP_FSOUTREG_PAD0 2 +#define SP_IMAGEOUTPUTREG_MRTFORMAT 0 +#define SP_IMAGEOUTPUTREG_PAD0 6 +#define SP_OBJOFFSETREG_CONSTOBJECTSTARTOFFSET 16 +#define SP_OBJOFFSETREG_SHADEROBJOFFSETINIC 25 +#define SP_SHADERLENGTH_LEN 0 +#define SP_SPCTRLREG_CONSTMODE 18 +#define SP_SPCTRLREG_SLEEPMODE 20 +#define SP_VSCTRLREG0_VSFULLREGFOOTPRINT 10 +#define SP_VSCTRLREG0_VSICACHEINVALID 2 +#define SP_VSCTRLREG0_VSINSTRBUFFERMODE 1 +#define SP_VSCTRLREG0_VSLENGTH 24 +#define SP_VSCTRLREG0_VSSUPERTHREADMODE 21 +#define SP_VSCTRLREG0_VSTHREADMODE 0 +#define SP_VSCTRLREG0_VSTHREADSIZE 20 +#define SP_VSCTRLREG1_VSINITIALOUTSTANDING 24 +#define SP_VSOUTREG_COMPMASK0 9 +#define SP_VSPARAMREG_POSREGID 0 +#define SP_VSPARAMREG_PSIZEREGID 8 +#define SP_VSPARAMREG_TOTALVSOUTVAR 20 +#define SP_VSVPCDSTREG_OUTLOC0 0 +#define TPL1_TPTEXOFFSETREG_BASETABLEPTR 16 +#define TPL1_TPTEXOFFSETREG_MEMOBJOFFSET 8 +#define TPL1_TPTEXOFFSETREG_SAMPLEROFFSET 0 +#define UCHE_INVALIDATE1REG_OPCODE 0x1C +#define UCHE_INVALIDATE1REG_ALLORPORTION 0x1F +#define VFD_BASEADDR_BASEADDR 0 +#define VFD_CTRLREG0_PACKETSIZE 18 +#define VFD_CTRLREG0_STRMDECINSTRCNT 22 +#define VFD_CTRLREG0_STRMFETCHINSTRCNT 27 +#define VFD_CTRLREG0_TOTALATTRTOVS 0 +#define VFD_CTRLREG1_MAXSTORAGE 0 +#define VFD_CTRLREG1_REGID4INST 24 +#define VFD_CTRLREG1_REGID4VTX 16 +#define VFD_DECODEINSTRUCTIONS_CONSTFILL 4 +#define VFD_DECODEINSTRUCTIONS_FORMAT 6 +#define VFD_DECODEINSTRUCTIONS_LASTCOMPVALID 29 +#define VFD_DECODEINSTRUCTIONS_REGID 12 +#define VFD_DECODEINSTRUCTIONS_SHIFTCNT 24 +#define VFD_DECODEINSTRUCTIONS_SWITCHNEXT 30 +#define VFD_DECODEINSTRUCTIONS_WRITEMASK 0 +#define VFD_FETCHINSTRUCTIONS_BUFSTRIDE 7 +#define VFD_FETCHINSTRUCTIONS_FETCHSIZE 0 +#define VFD_FETCHINSTRUCTIONS_INDEXDECODE 18 +#define VFD_FETCHINSTRUCTIONS_STEPRATE 24 +#define VFD_FETCHINSTRUCTIONS_SWITCHNEXT 17 +#define VFD_THREADINGTHRESHOLD_REGID_VTXCNT 8 +#define VFD_THREADINGTHRESHOLD_RESERVED6 4 +#define VPC_VPCATTR_LMSIZE 28 +#define VPC_VPCATTR_THRHDASSIGN 12 +#define VPC_VPCATTR_TOTALATTR 0 +#define VPC_VPCPACK_NUMFPNONPOSVAR 8 +#define VPC_VPCPACK_NUMNONPOSVSVAR 16 +#define VPC_VPCVARPSREPLMODE_COMPONENT08 0 +#define VPC_VPCVARPSREPLMODE_COMPONENT09 2 +#define VPC_VPCVARPSREPLMODE_COMPONENT0A 4 +#define VPC_VPCVARPSREPLMODE_COMPONENT0B 6 +#define VPC_VPCVARPSREPLMODE_COMPONENT0C 8 +#define VPC_VPCVARPSREPLMODE_COMPONENT0D 10 +#define VPC_VPCVARPSREPLMODE_COMPONENT0E 12 +#define VPC_VPCVARPSREPLMODE_COMPONENT0F 14 +#define VPC_VPCVARPSREPLMODE_COMPONENT10 16 +#define VPC_VPCVARPSREPLMODE_COMPONENT11 18 +#define VPC_VPCVARPSREPLMODE_COMPONENT12 20 +#define VPC_VPCVARPSREPLMODE_COMPONENT13 22 +#define VPC_VPCVARPSREPLMODE_COMPONENT14 24 +#define VPC_VPCVARPSREPLMODE_COMPONENT15 26 +#define VPC_VPCVARPSREPLMODE_COMPONENT16 28 +#define VPC_VPCVARPSREPLMODE_COMPONENT17 30 + +#endif diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c old mode 100644 new mode 100755 index 61f14a4a..96e9c75c --- a/drivers/gpu/msm/adreno.c +++ b/drivers/gpu/msm/adreno.c @@ -72,6 +72,7 @@ | (MMU_CONFIG << MH_MMU_CONFIG__PA_W_CLNT_BEHAVIOR__SHIFT)) static const struct kgsl_functable adreno_functable; +unsigned int kgsl_cff_dump_enable=0; static struct adreno_device device_3d0 = { .dev = { @@ -120,8 +121,11 @@ static struct adreno_device device_3d0 = { }, .pfp_fw = NULL, .pm4_fw = NULL, + .wait_timeout = 10000, /* in milliseconds */ + .ib_check_level = 0, }; + /* * This is the master list of all GPU cores that are supported by this * driver. @@ -135,50 +139,47 @@ static const struct { const char *pm4fw; const char *pfpfw; struct adreno_gpudev *gpudev; + unsigned int istore_size; + unsigned int pix_shader_start; + unsigned int instruction_size; /* Size of an instruction in dwords */ + unsigned int gmem_size; /* size of gmem for gpu*/ } adreno_gpulist[] = { { ADRENO_REV_A200, 0, 2, ANY_ID, ANY_ID, - "yamato_pm4.fw", "yamato_pfp.fw", &adreno_a2xx_gpudev }, + "yamato_pm4.fw", "yamato_pfp.fw", &adreno_a2xx_gpudev, + 512, 384, 3, SZ_256K }, + { ADRENO_REV_A203, 0, 1, 1, ANY_ID, + "yamato_pm4.fw", "yamato_pfp.fw", &adreno_a2xx_gpudev, + 512, 384, 3, SZ_256K }, { ADRENO_REV_A205, 0, 1, 0, ANY_ID, - "yamato_pm4.fw", "yamato_pfp.fw", &adreno_a2xx_gpudev }, + "yamato_pm4.fw", "yamato_pfp.fw", &adreno_a2xx_gpudev, + 512, 384, 3, SZ_256K }, { ADRENO_REV_A220, 2, 1, ANY_ID, ANY_ID, - "leia_pm4_470.fw", "leia_pfp_470.fw", &adreno_a2xx_gpudev }, + "leia_pm4_470.fw", "leia_pfp_470.fw", &adreno_a2xx_gpudev, + 512, 384, 3, SZ_512K }, /* * patchlevel 5 (8960v2) needs special pm4 firmware to work around * a hardware problem. */ { ADRENO_REV_A225, 2, 2, 0, 5, - "a225p5_pm4.fw", "a225_pfp.fw", &adreno_a2xx_gpudev }, + "a225p5_pm4.fw", "a225_pfp.fw", &adreno_a2xx_gpudev, + 1536, 768, 3, SZ_512K }, + { ADRENO_REV_A225, 2, 2, 0, 6, + "a225_pm4.fw", "a225_pfp.fw", &adreno_a2xx_gpudev, + 1536, 768, 3, SZ_512K }, { ADRENO_REV_A225, 2, 2, ANY_ID, ANY_ID, - "a225_pm4.fw", "a225_pfp.fw", &adreno_a2xx_gpudev }, + "a225_pm4.fw", "a225_pfp.fw", &adreno_a2xx_gpudev, + 1536, 768, 3, SZ_512K }, + /* A3XX doesn't use the pix_shader_start */ + { ADRENO_REV_A305, 3, 1, ANY_ID, ANY_ID, + "a300_pm4.fw", "a300_pfp.fw", &adreno_a3xx_gpudev, + 512, 0, 2, SZ_256K }, + /* A3XX doesn't use the pix_shader_start */ + { ADRENO_REV_A320, 3, 1, ANY_ID, ANY_ID, + "a300_pm4.fw", "a300_pfp.fw", &adreno_a3xx_gpudev, + 512, 0, 2, SZ_512K }, + }; -static void adreno_gmeminit(struct adreno_device *adreno_dev) -{ - struct kgsl_device *device = &adreno_dev->dev; - union reg_rb_edram_info rb_edram_info; - unsigned int gmem_size; - unsigned int edram_value = 0; - - /* make sure edram range is aligned to size */ - BUG_ON(adreno_dev->gmemspace.gpu_base & - (adreno_dev->gmemspace.sizebytes - 1)); - - /* get edram_size value equivalent */ - gmem_size = (adreno_dev->gmemspace.sizebytes >> 14); - while (gmem_size >>= 1) - edram_value++; - - rb_edram_info.val = 0; - - rb_edram_info.f.edram_size = edram_value; - rb_edram_info.f.edram_mapping_mode = 0; /* EDRAM_MAP_UPPER */ - - /* must be aligned to size */ - rb_edram_info.f.edram_range = (adreno_dev->gmemspace.gpu_base >> 14); - - adreno_regwrite(device, REG_RB_EDRAM_INFO, rb_edram_info.val); -} - static irqreturn_t adreno_isr(int irq, void *data) { irqreturn_t result; @@ -268,10 +269,13 @@ static void adreno_setstate(struct kgsl_device *device, int sizedwords = 0; unsigned int mh_mmu_invalidate = 0x00000003; /*invalidate all and tc */ - /* If possible, then set the state via the command stream to avoid - a CPU idle. Otherwise, use the default setstate which uses register - writes */ - if (adreno_dev->drawctxt_active) { + /* + * If possible, then set the state via the command stream to avoid + * a CPU idle. Otherwise, use the default setstate which uses register + * writes For CFF dump we must idle and use the registers so that it is + * easier to filter out the mmu accesses from the dump + */ + if (!kgsl_cff_dump_enable && adreno_dev->drawctxt_active) { if (flags & KGSL_MMUFLAGS_PTUPDATE) { /* wait for graphics pipe to be idle */ *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); @@ -419,6 +423,10 @@ adreno_identify_gpu(struct adreno_device *adreno_dev) adreno_dev->gpudev = adreno_gpulist[i].gpudev; adreno_dev->pfp_fwfile = adreno_gpulist[i].pfpfw; adreno_dev->pm4_fwfile = adreno_gpulist[i].pm4fw; + adreno_dev->istore_size = adreno_gpulist[i].istore_size; + adreno_dev->pix_shader_start = adreno_gpulist[i].pix_shader_start; + adreno_dev->instruction_size = adreno_gpulist[i].instruction_size; + adreno_dev->gmemspace.sizebytes = adreno_gpulist[i].gmem_size; } static int __devinit @@ -434,8 +442,6 @@ adreno_probe(struct platform_device *pdev) adreno_dev->wait_timeout = 10000; /* default value in milliseconds */ - init_completion(&device->recovery_gate); - status = adreno_ringbuffer_init(device); if (status != 0) goto error; @@ -480,7 +486,6 @@ static int adreno_start(struct kgsl_device *device, unsigned int init_ram) { int status = -EINVAL; struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - int init_reftimestamp = 0x7fffffff; device->state = KGSL_STATE_INIT; device->requested_state = KGSL_STATE_NONE; @@ -508,80 +513,22 @@ static int adreno_start(struct kgsl_device *device, unsigned int init_ram) kgsl_mh_start(device); - if (kgsl_mmu_start(device)) + status = kgsl_mmu_start(device); + if (status) goto error_clk_off; - /*We need to make sure all blocks are powered up and clocked before - *issuing a soft reset. The overrides will then be turned off (set to 0) - */ - adreno_regwrite(device, REG_RBBM_PM_OVERRIDE1, 0xfffffffe); - adreno_regwrite(device, REG_RBBM_PM_OVERRIDE2, 0xffffffff); - - /* Only reset CP block if all blocks have previously been reset */ - if (!(device->flags & KGSL_FLAGS_SOFT_RESET) || - !adreno_is_a22x(adreno_dev)) { - adreno_regwrite(device, REG_RBBM_SOFT_RESET, 0xFFFFFFFF); - device->flags |= KGSL_FLAGS_SOFT_RESET; - } else - adreno_regwrite(device, REG_RBBM_SOFT_RESET, 0x00000001); - - /* The core is in an indeterminate state until the reset completes - * after 30ms. - */ - msleep(30); - - adreno_regwrite(device, REG_RBBM_SOFT_RESET, 0x00000000); - - adreno_regwrite(device, REG_RBBM_CNTL, 0x00004442); - - if (adreno_is_a225(adreno_dev)) { - /* Enable large instruction store for A225 */ - adreno_regwrite(device, REG_SQ_FLOW_CONTROL, 0x18000000); - } - - adreno_regwrite(device, REG_SQ_VS_PROGRAM, 0x00000000); - adreno_regwrite(device, REG_SQ_PS_PROGRAM, 0x00000000); - - if (cpu_is_msm8960() || cpu_is_msm8930()) - adreno_regwrite(device, REG_RBBM_PM_OVERRIDE1, 0x200); - else - adreno_regwrite(device, REG_RBBM_PM_OVERRIDE1, 0); - - if (!adreno_is_a22x(adreno_dev)) - adreno_regwrite(device, REG_RBBM_PM_OVERRIDE2, 0); - else - adreno_regwrite(device, REG_RBBM_PM_OVERRIDE2, 0x80); - - kgsl_sharedmem_set(&device->memstore, 0, 0, device->memstore.size); - - kgsl_sharedmem_writel(&device->memstore, - KGSL_DEVICE_MEMSTORE_OFFSET(ref_wait_ts), - init_reftimestamp); - - adreno_regwrite(device, REG_RBBM_DEBUG, 0x00080000); - - /* Make sure interrupts are disabled */ - - adreno_regwrite(device, REG_RBBM_INT_CNTL, 0); - adreno_regwrite(device, REG_CP_INT_CNTL, 0); - adreno_regwrite(device, REG_SQ_INT_CNTL, 0); - - if (adreno_is_a22x(adreno_dev)) - adreno_dev->gmemspace.sizebytes = SZ_512K; - else - adreno_dev->gmemspace.sizebytes = SZ_256K; - adreno_gmeminit(adreno_dev); + /* Start the GPU */ + adreno_dev->gpudev->start(adreno_dev); kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_ON); + device->ftbl->irqctrl(device, 1); status = adreno_ringbuffer_start(&adreno_dev->ringbuffer, init_ram); - if (status != 0) - goto error_irq_off; - + if (status == 0) { mod_timer(&device->idle_timer, jiffies + FIRST_TIMEOUT); - return status; + return 0; + } -error_irq_off: kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF); kgsl_mmu_stop(device); error_clk_off: @@ -618,12 +565,14 @@ adreno_recover_hang(struct kgsl_device *device) struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer; unsigned int timestamp; unsigned int num_rb_contents; - unsigned int bad_context; unsigned int reftimestamp; unsigned int enable_ts; unsigned int soptimestamp; unsigned int eoptimestamp; - struct adreno_context *drawctxt; + unsigned int context_id; + struct kgsl_context *context; + struct adreno_context *adreno_context; + int next = 0; KGSL_DRV_ERR(device, "Starting recovery from 3D GPU hang....\n"); rb_buffer = vmalloc(rb->buffer_desc.size); @@ -638,22 +587,35 @@ adreno_recover_hang(struct kgsl_device *device) ret = adreno_ringbuffer_extract(rb, rb_buffer, &num_rb_contents); if (ret) goto done; - timestamp = rb->timestamp; - KGSL_DRV_ERR(device, "Last issued timestamp: %x\n", timestamp); - kgsl_sharedmem_readl(&device->memstore, &bad_context, - KGSL_DEVICE_MEMSTORE_OFFSET(current_context)); + kgsl_sharedmem_readl(&device->memstore, &context_id, + KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, + current_context)); + context = idr_find(&device->context_idr, context_id); + if (context == NULL) { + KGSL_DRV_ERR(device, "Last context unknown id:%d\n", + context_id); + context_id = KGSL_MEMSTORE_GLOBAL; + } + + timestamp = rb->timestamp[KGSL_MEMSTORE_GLOBAL]; + KGSL_DRV_ERR(device, "Last issued global timestamp: %x\n", timestamp); + kgsl_sharedmem_readl(&device->memstore, &reftimestamp, - KGSL_DEVICE_MEMSTORE_OFFSET(ref_wait_ts)); + KGSL_MEMSTORE_OFFSET(context_id, + ref_wait_ts)); kgsl_sharedmem_readl(&device->memstore, &enable_ts, - KGSL_DEVICE_MEMSTORE_OFFSET(ts_cmp_enable)); + KGSL_MEMSTORE_OFFSET(context_id, + ts_cmp_enable)); kgsl_sharedmem_readl(&device->memstore, &soptimestamp, - KGSL_DEVICE_MEMSTORE_OFFSET(soptimestamp)); + KGSL_MEMSTORE_OFFSET(context_id, + soptimestamp)); kgsl_sharedmem_readl(&device->memstore, &eoptimestamp, - KGSL_DEVICE_MEMSTORE_OFFSET(eoptimestamp)); + KGSL_MEMSTORE_OFFSET(context_id, + eoptimestamp)); /* Make sure memory is synchronized before restarting the GPU */ mb(); KGSL_CTXT_ERR(device, - "Context that caused a GPU hang: %x\n", bad_context); + "Context id that caused a GPU hang: %d\n", context_id); /* restart device */ ret = adreno_stop(device); if (ret) @@ -664,20 +626,20 @@ adreno_recover_hang(struct kgsl_device *device) KGSL_DRV_ERR(device, "Device has been restarted after hang\n"); /* Restore timestamp states */ kgsl_sharedmem_writel(&device->memstore, - KGSL_DEVICE_MEMSTORE_OFFSET(soptimestamp), + KGSL_MEMSTORE_OFFSET(context_id, soptimestamp), soptimestamp); kgsl_sharedmem_writel(&device->memstore, - KGSL_DEVICE_MEMSTORE_OFFSET(eoptimestamp), + KGSL_MEMSTORE_OFFSET(context_id, eoptimestamp), eoptimestamp); kgsl_sharedmem_writel(&device->memstore, - KGSL_DEVICE_MEMSTORE_OFFSET(soptimestamp), + KGSL_MEMSTORE_OFFSET(context_id, soptimestamp), soptimestamp); if (num_rb_contents) { kgsl_sharedmem_writel(&device->memstore, - KGSL_DEVICE_MEMSTORE_OFFSET(ref_wait_ts), + KGSL_MEMSTORE_OFFSET(context_id, ref_wait_ts), reftimestamp); kgsl_sharedmem_writel(&device->memstore, - KGSL_DEVICE_MEMSTORE_OFFSET(ts_cmp_enable), + KGSL_MEMSTORE_OFFSET(context_id, ts_cmp_enable), enable_ts); } /* Make sure all writes are posted before the GPU reads them */ @@ -685,16 +647,34 @@ adreno_recover_hang(struct kgsl_device *device) /* Mark the invalid context so no more commands are accepted from * that context */ - drawctxt = (struct adreno_context *) bad_context; + adreno_context = context->devctxt; KGSL_CTXT_ERR(device, - "Context that caused a GPU hang: %x\n", bad_context); + "Context that caused a GPU hang: %d\n", adreno_context->id); - drawctxt->flags |= CTXT_FLAGS_GPU_HANG; + adreno_context->flags |= CTXT_FLAGS_GPU_HANG; + + /* + * Set the reset status of all contexts to + * INNOCENT_CONTEXT_RESET_EXT except for the bad context + * since thats the guilty party + */ + while ((context = idr_get_next(&device->context_idr, &next))) { + if (KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT != + context->reset_status) { + if (context->id != context_id) + context->reset_status = + KGSL_CTX_STAT_INNOCENT_CONTEXT_RESET_EXT; + else + context->reset_status = + KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT; + } + next = next + 1; + } /* Restore valid commands in ringbuffer */ adreno_ringbuffer_restore(rb, rb_buffer, num_rb_contents); - rb->timestamp = timestamp; + rb->timestamp[KGSL_MEMSTORE_GLOBAL] = timestamp; done: vfree(rb_buffer); return ret; @@ -788,7 +768,8 @@ static int adreno_getproperty(struct kgsl_device *device, shadowprop.size = device->memstore.size; /* GSL needs this to be set, even if it appears to be meaningless */ - shadowprop.flags = KGSL_FLAGS_INITIALIZED; + shadowprop.flags = KGSL_FLAGS_INITIALIZED | + KGSL_FLAGS_PER_CONTEXT_TIMESTAMPS; } if (copy_to_user(value, &shadowprop, sizeof(shadowprop))) { @@ -834,6 +815,12 @@ static int adreno_getproperty(struct kgsl_device *device, return status; } +static inline void adreno_poke(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + adreno_regwrite(device, REG_CP_RB_WPTR, adreno_dev->ringbuffer.wptr); +} + /* Caller must hold the device mutex. */ int adreno_idle(struct kgsl_device *device, unsigned int timeout) { @@ -842,16 +829,32 @@ int adreno_idle(struct kgsl_device *device, unsigned int timeout) unsigned int rbbm_status; unsigned long wait_timeout = msecs_to_jiffies(adreno_dev->wait_timeout); - unsigned long wait_time = jiffies + wait_timeout; + unsigned long wait_time; + unsigned long wait_time_part; + unsigned int msecs; + unsigned int msecs_first; + unsigned int msecs_part; - kgsl_cffdump_regpoll(device->id, REG_RBBM_STATUS << 2, + kgsl_cffdump_regpoll(device->id, + adreno_dev->gpudev->reg_rbbm_status << 2, 0x00000000, 0x80000000); /* first, wait until the CP has consumed all the commands in * the ring buffer */ retry: if (rb->flags & KGSL_FLAGS_STARTED) { + msecs = adreno_dev->wait_timeout; + msecs_first = (msecs <= 100) ? ((msecs + 4) / 5) : 100; + msecs_part = (msecs - msecs_first + 3) / 4; + wait_time = jiffies + wait_timeout; + wait_time_part = jiffies + msecs_to_jiffies(msecs_first); + adreno_poke(device); do { + if (time_after(jiffies, wait_time_part)) { + adreno_poke(device); + wait_time_part = jiffies + + msecs_to_jiffies(msecs_part); + } GSL_RB_GET_READPTR(rb, &rb->rptr); if (time_after(jiffies, wait_time)) { KGSL_DRV_ERR(device, "rptr: %x, wptr: %x\n", @@ -864,7 +867,8 @@ retry: /* now, wait for the GPU to finish its operations */ wait_time = jiffies + wait_timeout; while (time_before(jiffies, wait_time)) { - adreno_regread(device, REG_RBBM_STATUS, &rbbm_status); + adreno_regread(device, adreno_dev->gpudev->reg_rbbm_status, + &rbbm_status); if (rbbm_status == 0x110) return 0; } @@ -918,58 +922,70 @@ static int adreno_suspend_context(struct kgsl_device *device) return status; } -uint8_t *kgsl_sharedmem_convertaddr(struct kgsl_device *device, - unsigned int pt_base, unsigned int gpuaddr, unsigned int *size) +struct kgsl_memdesc *adreno_find_region(struct kgsl_device *device, + unsigned int pt_base, + unsigned int gpuaddr, + unsigned int size) { - uint8_t *result = NULL; + struct kgsl_memdesc *result = NULL; struct kgsl_mem_entry *entry; - struct kgsl_process_private *priv; struct adreno_device *adreno_dev = ADRENO_DEVICE(device); struct adreno_ringbuffer *ringbuffer = &adreno_dev->ringbuffer; + struct kgsl_context *context; + int next = 0; - if (kgsl_gpuaddr_in_memdesc(&ringbuffer->buffer_desc, gpuaddr)) { - return kgsl_gpuaddr_to_vaddr(&ringbuffer->buffer_desc, - gpuaddr, size); - } + if (kgsl_gpuaddr_in_memdesc(&ringbuffer->buffer_desc, gpuaddr, size)) + return &ringbuffer->buffer_desc; - if (kgsl_gpuaddr_in_memdesc(&ringbuffer->memptrs_desc, gpuaddr)) { - return kgsl_gpuaddr_to_vaddr(&ringbuffer->memptrs_desc, - gpuaddr, size); - } + if (kgsl_gpuaddr_in_memdesc(&ringbuffer->memptrs_desc, gpuaddr, size)) + return &ringbuffer->memptrs_desc; - if (kgsl_gpuaddr_in_memdesc(&device->memstore, gpuaddr)) { - return kgsl_gpuaddr_to_vaddr(&device->memstore, - gpuaddr, size); - } + if (kgsl_gpuaddr_in_memdesc(&device->memstore, gpuaddr, size)) + return &device->memstore; - mutex_lock(&kgsl_driver.process_mutex); - list_for_each_entry(priv, &kgsl_driver.process_list, list) { - if (!kgsl_mmu_pt_equal(priv->pagetable, pt_base)) - continue; - spin_lock(&priv->mem_lock); - entry = kgsl_sharedmem_find_region(priv, gpuaddr, - sizeof(unsigned int)); - if (entry) { - result = kgsl_gpuaddr_to_vaddr(&entry->memdesc, - gpuaddr, size); - spin_unlock(&priv->mem_lock); - mutex_unlock(&kgsl_driver.process_mutex); - return result; - } - spin_unlock(&priv->mem_lock); - } - mutex_unlock(&kgsl_driver.process_mutex); + entry = kgsl_get_mem_entry(pt_base, gpuaddr, size); - BUG_ON(!mutex_is_locked(&device->mutex)); - list_for_each_entry(entry, &device->memqueue, list) { - if (kgsl_gpuaddr_in_memdesc(&entry->memdesc, gpuaddr)) { - result = kgsl_gpuaddr_to_vaddr(&entry->memdesc, - gpuaddr, size); + if (entry) + return &entry->memdesc; + + while (1) { + struct adreno_context *adreno_context = NULL; + context = idr_get_next(&device->context_idr, &next); + if (context == NULL) break; + + adreno_context = (struct adreno_context *)context->devctxt; + + if (kgsl_mmu_pt_equal(adreno_context->pagetable, pt_base)) { + struct kgsl_memdesc *desc; + + desc = &adreno_context->gpustate; + if (kgsl_gpuaddr_in_memdesc(desc, gpuaddr, size)) { + result = desc; + return result; } + desc = &adreno_context->context_gmem_shadow.gmemshadow; + if (kgsl_gpuaddr_in_memdesc(desc, gpuaddr, size)) { + result = desc; + return result; + } } - return result; + next = next + 1; + } + + return NULL; + +} + +uint8_t *adreno_convertaddr(struct kgsl_device *device, unsigned int pt_base, + unsigned int gpuaddr, unsigned int size) +{ + struct kgsl_memdesc *memdesc; + + memdesc = adreno_find_region(device, pt_base, gpuaddr, size); + + return memdesc ? kgsl_gpuaddr_to_vaddr(memdesc, gpuaddr) : NULL; } void adreno_regread(struct kgsl_device *device, unsigned int offsetwords, @@ -1009,45 +1025,66 @@ void adreno_regwrite(struct kgsl_device *device, unsigned int offsetwords, __raw_writel(value, reg); } +static unsigned int _get_context_id(struct kgsl_context *k_ctxt) +{ + unsigned int context_id = KGSL_MEMSTORE_GLOBAL; + + if (k_ctxt != NULL) { + struct adreno_context *a_ctxt = k_ctxt->devctxt; + /* + * if the context was not created with per context timestamp + * support, we must use the global timestamp since issueibcmds + * will be returning that one. + */ + if (a_ctxt->flags & CTXT_FLAGS_PER_CONTEXT_TS) + context_id = a_ctxt->id; + } + + return context_id; +} + static int kgsl_check_interrupt_timestamp(struct kgsl_device *device, - unsigned int timestamp) + struct kgsl_context *context, unsigned int timestamp) { int status; unsigned int ref_ts, enableflag; + unsigned int context_id = _get_context_id(context); - status = kgsl_check_timestamp(device, timestamp); + status = kgsl_check_timestamp(device, context, timestamp); if (!status) { mutex_lock(&device->mutex); kgsl_sharedmem_readl(&device->memstore, &enableflag, - KGSL_DEVICE_MEMSTORE_OFFSET(ts_cmp_enable)); + KGSL_MEMSTORE_OFFSET(context_id, ts_cmp_enable)); mb(); if (enableflag) { kgsl_sharedmem_readl(&device->memstore, &ref_ts, - KGSL_DEVICE_MEMSTORE_OFFSET(ref_wait_ts)); + KGSL_MEMSTORE_OFFSET(context_id, + ref_wait_ts)); mb(); if (timestamp_cmp(ref_ts, timestamp) >= 0) { kgsl_sharedmem_writel(&device->memstore, - KGSL_DEVICE_MEMSTORE_OFFSET(ref_wait_ts), - timestamp); + KGSL_MEMSTORE_OFFSET(context_id, + ref_wait_ts), timestamp); wmb(); } } else { unsigned int cmds[2]; kgsl_sharedmem_writel(&device->memstore, - KGSL_DEVICE_MEMSTORE_OFFSET(ref_wait_ts), - timestamp); + KGSL_MEMSTORE_OFFSET(context_id, + ref_wait_ts), timestamp); enableflag = 1; kgsl_sharedmem_writel(&device->memstore, - KGSL_DEVICE_MEMSTORE_OFFSET(ts_cmp_enable), - enableflag); + KGSL_MEMSTORE_OFFSET(context_id, + ts_cmp_enable), enableflag); wmb(); /* submit a dummy packet so that even if all * commands upto timestamp get executed we will still * get an interrupt */ cmds[0] = cp_type3_packet(CP_NOP, 1); cmds[1] = 0; - adreno_ringbuffer_issuecmds(device, 0, &cmds[0], 2); + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, + &cmds[0], 2); } mutex_unlock(&device->mutex); } @@ -1073,80 +1110,110 @@ static int kgsl_check_interrupt_timestamp(struct kgsl_device *device, /* MUST be called with the device mutex held */ static int adreno_waittimestamp(struct kgsl_device *device, + struct kgsl_context *context, unsigned int timestamp, unsigned int msecs) { long status = 0; uint io = 1; + static uint io_cnt; struct adreno_device *adreno_dev = ADRENO_DEVICE(device); struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int retries; + unsigned int msecs_first; + unsigned int msecs_part; + unsigned int ts_issued; + unsigned int context_id = _get_context_id(context); + + ts_issued = adreno_dev->ringbuffer.timestamp[context_id]; /* Don't wait forever, set a max value for now */ if (msecs == -1) msecs = adreno_dev->wait_timeout; - if (timestamp_cmp(timestamp, adreno_dev->ringbuffer.timestamp) > 0) { - KGSL_DRV_ERR(device, "Cannot wait for invalid ts: %x, " - "rb->timestamp: %x\n", - timestamp, adreno_dev->ringbuffer.timestamp); + if (timestamp_cmp(timestamp, ts_issued) > 0) { + KGSL_DRV_ERR(device, "Cannot wait for invalid ts <%d:0x%x>, " + "last issued ts <%d:0x%x>\n", + context_id, timestamp, context_id, ts_issued); status = -EINVAL; goto done; } - if (!kgsl_check_timestamp(device, timestamp)) { - if (pwr->active_pwrlevel) { - int low_pwrlevel = pwr->num_pwrlevels - - KGSL_PWRLEVEL_LOW_OFFSET; - if (pwr->active_pwrlevel == low_pwrlevel) - io = 0; + + /* Keep the first timeout as 100msecs before rewriting + * the WPTR. Less visible impact if the WPTR has not + * been updated properly. + */ + msecs_first = (msecs <= 100) ? ((msecs + 4) / 5) : 100; + msecs_part = (msecs - msecs_first + 3) / 4; + for (retries = 0; retries < 5; retries++) { + if (kgsl_check_timestamp(device, context, timestamp)) { + /* if the timestamp happens while we're not + * waiting, there's a chance that an interrupt + * will not be generated and thus the timestamp + * work needs to be queued. + */ + queue_work(device->work_queue, &device->ts_expired_ws); + status = 0; + goto done; } + adreno_poke(device); +// the QSD8X50 don't support io_fraction ?? // SecureCRT 2012-06-20 +// io_cnt = (io_cnt + 1) % 100; +// if (io_cnt < +// pwr->pwrlevels[pwr->active_pwrlevel].o_fraction) +// io = 0; mutex_unlock(&device->mutex); - /* We need to make sure that the process is placed in wait-q - * before its condition is called */ + /* We need to make sure that the process is + * placed in wait-q before its condition is called + */ status = kgsl_wait_event_interruptible_timeout( device->wait_queue, kgsl_check_interrupt_timestamp(device, - timestamp), - msecs_to_jiffies(msecs), io); + context, timestamp), + msecs_to_jiffies(retries ? + msecs_part : msecs_first), io); mutex_lock(&device->mutex); - if (status > 0) - status = 0; - else if (status == 0) { - if (!kgsl_check_timestamp(device, timestamp)) { + if (status > 0) { + /*completed before the wait finished */ + status = 0; + goto done; + } else if (status < 0) { + /*an error occurred*/ + goto done; + } + /*this wait timed out*/ + } status = -ETIMEDOUT; KGSL_DRV_ERR(device, - "Device hang detected while waiting " - "for timestamp: %x, last " - "submitted(rb->timestamp): %x, wptr: " - "%x\n", timestamp, - adreno_dev->ringbuffer.timestamp, + "Device hang detected while waiting for timestamp: " + "<%d:0x%x>, last submitted timestamp: <%d:0x%x>, " + "wptr: 0x%x\n", + context_id, timestamp, context_id, ts_issued, adreno_dev->ringbuffer.wptr); if (!adreno_dump_and_recover(device)) { /* wait for idle after recovery as the * timestamp that this process wanted * to wait on may be invalid */ - if (!adreno_idle(device, - KGSL_TIMEOUT_DEFAULT)) - status = 0; - } - } - } + if (!adreno_idle(device, KGSL_TIMEOUT_DEFAULT)) + status = 0; } - done: return (int)status; } static unsigned int adreno_readtimestamp(struct kgsl_device *device, - enum kgsl_timestamp_type type) + struct kgsl_context *context, enum kgsl_timestamp_type type) { unsigned int timestamp = 0; + unsigned int context_id = _get_context_id(context); if (type == KGSL_TIMESTAMP_CONSUMED) adreno_regread(device, REG_CP_TIMESTAMP, ×tamp); else if (type == KGSL_TIMESTAMP_RETIRED) kgsl_sharedmem_readl(&device->memstore, ×tamp, - KGSL_DEVICE_MEMSTORE_OFFSET(eoptimestamp)); + KGSL_MEMSTORE_OFFSET(context_id, + eoptimestamp)); rmb(); return timestamp; @@ -1179,7 +1246,7 @@ static long adreno_ioctl(struct kgsl_device_private *dev_priv, default: KGSL_DRV_INFO(dev_priv->device, "invalid ioctl code %08x\n", cmd); - result = -EINVAL; + result = -ENOIOCTLCMD; break; } return result; @@ -1195,44 +1262,29 @@ static inline s64 adreno_ticks_to_us(u32 ticks, u32 gpu_freq) static void adreno_power_stats(struct kgsl_device *device, struct kgsl_power_stats *stats) { - unsigned int reg; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); struct kgsl_pwrctrl *pwr = &device->pwrctrl; + unsigned int cycles; + + /* Get the busy cycles counted since the counter was last reset */ + /* Calling this function also resets and restarts the counter */ + + cycles = adreno_dev->gpudev->busy_cycles(adreno_dev); /* In order to calculate idle you have to have run the algorithm * * at least once to get a start time. */ if (pwr->time != 0) { - s64 tmp; - /* Stop the performance moniter and read the current * - * busy cycles. */ - adreno_regwrite(device, - REG_CP_PERFMON_CNTL, - REG_PERF_MODE_CNT | - REG_PERF_STATE_FREEZE); - adreno_regread(device, REG_RBBM_PERFCOUNTER1_LO, ®); - tmp = ktime_to_us(ktime_get()); + s64 tmp = ktime_to_us(ktime_get()); stats->total_time = tmp - pwr->time; pwr->time = tmp; - stats->busy_time = adreno_ticks_to_us(reg, device->pwrctrl. + stats->busy_time = adreno_ticks_to_us(cycles, device->pwrctrl. pwrlevels[device->pwrctrl.active_pwrlevel]. gpu_freq); - - adreno_regwrite(device, - REG_CP_PERFMON_CNTL, - REG_PERF_MODE_CNT | - REG_PERF_STATE_RESET); } else { stats->total_time = 0; stats->busy_time = 0; pwr->time = ktime_to_us(ktime_get()); } - - /* re-enable the performance moniters */ - adreno_regread(device, REG_RBBM_PM_OVERRIDE2, ®); - adreno_regwrite(device, REG_RBBM_PM_OVERRIDE2, (reg | 0x40)); - adreno_regwrite(device, REG_RBBM_PERFCOUNTER1_SELECT, 0x1); - adreno_regwrite(device, - REG_CP_PERFMON_CNTL, - REG_PERF_MODE_CNT | REG_PERF_STATE_ENABLE); } void adreno_irqctrl(struct kgsl_device *device, int state) diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h old mode 100644 new mode 100755 index 51ee31a5..929b5f08 --- a/drivers/gpu/msm/adreno.h +++ b/drivers/gpu/msm/adreno.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-2011, Code Aurora Forum. All rights reserved. +/* Copyright (c) 2008-2012, Code Aurora Forum. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -24,13 +24,37 @@ KGSL_CONTAINER_OF(device, struct adreno_device, dev) /* Flags to control command packet settings */ +#define KGSL_CMD_FLAGS_NONE 0x00000000 #define KGSL_CMD_FLAGS_PMODE 0x00000001 #define KGSL_CMD_FLAGS_NO_TS_CMP 0x00000002 #define KGSL_CMD_FLAGS_NOT_KERNEL_CMD 0x00000004 /* Command identifiers */ -#define KGSL_CONTEXT_TO_MEM_IDENTIFIER 0xDEADBEEF -#define KGSL_CMD_IDENTIFIER 0xFEEDFACE +#define KGSL_CONTEXT_TO_MEM_IDENTIFIER 0x2EADBEEF +#define KGSL_CMD_IDENTIFIER 0x2EEDFACE +#define KGSL_START_OF_IB_IDENTIFIER 0x2EADEABE +#define KGSL_END_OF_IB_IDENTIFIER 0x2ABEDEAD + +#ifdef CONFIG_MSM_SCM +#define ADRENO_DEFAULT_PWRSCALE_POLICY (&kgsl_pwrscale_policy_tz) +#else +#define ADRENO_DEFAULT_PWRSCALE_POLICY NULL +#endif + +#define ADRENO_ISTORE_START 0x5000 /* Istore offset */ + +enum adreno_gpurev { + ADRENO_REV_UNKNOWN = 0, + ADRENO_REV_A200 = 200, + ADRENO_REV_A203 = 203, + ADRENO_REV_A205 = 205, + ADRENO_REV_A220 = 220, + ADRENO_REV_A225 = 225, + ADRENO_REV_A305 = 305, + ADRENO_REV_A320 = 320, +}; + +struct adreno_gpudev; #ifdef CONFIG_MSM_SCM #define ADRENO_DEFAULT_PWRSCALE_POLICY (&kgsl_pwrscale_policy_tz) @@ -64,20 +88,34 @@ struct adreno_device { unsigned int mharb; struct adreno_gpudev *gpudev; unsigned int wait_timeout; + unsigned int istore_size; + unsigned int pix_shader_start; + unsigned int instruction_size; + unsigned int ib_check_level; }; struct adreno_gpudev { - int (*ctxt_gpustate_shadow)(struct adreno_device *, - struct adreno_context *); - int (*ctxt_gmem_shadow)(struct adreno_device *, - struct adreno_context *); + /* + * These registers are in a different location on A3XX, so define + * them in the structure and use them as variables. + */ + unsigned int reg_rbbm_status; + unsigned int reg_cp_pfp_ucode_data; + unsigned int reg_cp_pfp_ucode_addr; + + /* GPU specific function hooks */ + int (*ctxt_create)(struct adreno_device *, struct adreno_context *); void (*ctxt_save)(struct adreno_device *, struct adreno_context *); void (*ctxt_restore)(struct adreno_device *, struct adreno_context *); irqreturn_t (*irq_handler)(struct adreno_device *); void (*irq_control)(struct adreno_device *, int); + void (*rb_init)(struct adreno_device *, struct adreno_ringbuffer *); + void (*start)(struct adreno_device *); + unsigned int (*busy_cycles)(struct adreno_device *); }; extern struct adreno_gpudev adreno_a2xx_gpudev; +extern struct adreno_gpudev adreno_a3xx_gpudev; int adreno_idle(struct kgsl_device *device, unsigned int timeout); void adreno_regread(struct kgsl_device *device, unsigned int offsetwords, @@ -85,23 +123,32 @@ void adreno_regread(struct kgsl_device *device, unsigned int offsetwords, void adreno_regwrite(struct kgsl_device *device, unsigned int offsetwords, unsigned int value); -uint8_t *kgsl_sharedmem_convertaddr(struct kgsl_device *device, - unsigned int pt_base, unsigned int gpuaddr, unsigned int *size); +struct kgsl_memdesc *adreno_find_region(struct kgsl_device *device, + unsigned int pt_base, + unsigned int gpuaddr, + unsigned int size); + +uint8_t *adreno_convertaddr(struct kgsl_device *device, + unsigned int pt_base, unsigned int gpuaddr, unsigned int size); static inline int adreno_is_a200(struct adreno_device *adreno_dev) { return (adreno_dev->gpurev == ADRENO_REV_A200); } +static inline int adreno_is_a203(struct adreno_device *adreno_dev) +{ + return (adreno_dev->gpurev == ADRENO_REV_A203); +} + static inline int adreno_is_a205(struct adreno_device *adreno_dev) { - return (adreno_dev->gpurev == ADRENO_REV_A200); + return (adreno_dev->gpurev == ADRENO_REV_A205); } static inline int adreno_is_a20x(struct adreno_device *adreno_dev) { - return (adreno_dev->gpurev == ADRENO_REV_A200 || - adreno_dev->gpurev == ADRENO_REV_A205); + return (adreno_dev->gpurev <= 209); } static inline int adreno_is_a220(struct adreno_device *adreno_dev) @@ -122,7 +169,36 @@ static inline int adreno_is_a22x(struct adreno_device *adreno_dev) static inline int adreno_is_a2xx(struct adreno_device *adreno_dev) { - return (adreno_dev->gpurev <= ADRENO_REV_A225); + return (adreno_dev->gpurev <= 299); +} + +static inline int adreno_is_a3xx(struct adreno_device *adreno_dev) +{ + return (adreno_dev->gpurev >= 300); +} + +/** + * adreno_encode_istore_size - encode istore size in CP format + * @adreno_dev - The 3D device. + * + * Encode the istore size into the format expected that the + * CP_SET_SHADER_BASES and CP_ME_INIT commands: + * bits 31:29 - istore size as encoded by this function + * bits 27:16 - vertex shader start offset in instructions + * bits 11:0 - pixel shader start offset in instructions. + */ +static inline int adreno_encode_istore_size(struct adreno_device *adreno_dev) +{ + unsigned int size; + /* in a225 the CP microcode multiplies the encoded + * value by 3 while decoding. + */ + if (adreno_is_a225(adreno_dev)) + size = adreno_dev->istore_size/3; + else + size = adreno_dev->istore_size; + + return (ilog2(size) - 5) << 29; } diff --git a/drivers/gpu/msm/adreno_a2xx.c b/drivers/gpu/msm/adreno_a2xx.c index 064b05e9..dc43062e 100644 --- a/drivers/gpu/msm/adreno_a2xx.c +++ b/drivers/gpu/msm/adreno_a2xx.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved. +/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -11,6 +11,8 @@ * */ +#include + #include "kgsl.h" #include "kgsl_sharedmem.h" #include "kgsl_cffdump.h" @@ -72,10 +74,6 @@ #define TEX_CONSTANTS (32*6) /* DWORDS */ #define BOOL_CONSTANTS 8 /* DWORDS */ #define LOOP_CONSTANTS 56 /* DWORDS */ -#define SHADER_INSTRUCT_LOG2 9U /* 2^n == SHADER_INSTRUCTIONS */ - -/* 96-bit instructions */ -#define SHADER_INSTRUCT (1<istore_size * + (adreno_dev->instruction_size * sizeof(unsigned int)); +} + +static inline int _context_size(struct adreno_device *adreno_dev) +{ + return SHADER_OFFSET + 3*_shader_shadow_size(adreno_dev); +} /* A scratchpad used to build commands during context create */ @@ -546,6 +552,7 @@ static unsigned int *build_gmem2sys_cmds(struct adreno_device *adreno_dev, unsigned int addr = shadow->gmemshadow.gpuaddr; unsigned int offset = (addr - (addr & 0xfffff000)) / bytesperpixel; + if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) { /* Store TP0_CHICKEN register */ *cmds++ = cp_type3_packet(CP_REG_TO_MEM, 2); *cmds++ = REG_TP0_CHICKEN; @@ -554,6 +561,7 @@ static unsigned int *build_gmem2sys_cmds(struct adreno_device *adreno_dev, *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); *cmds++ = 0; + } /* Set TP0_CHICKEN to zero */ *cmds++ = cp_type0_packet(REG_TP0_CHICKEN, 1); @@ -601,7 +609,8 @@ static unsigned int *build_gmem2sys_cmds(struct adreno_device *adreno_dev, *cmds++ = 0x00003F00; *cmds++ = cp_type3_packet(CP_SET_SHADER_BASES, 1); - *cmds++ = (0x80000000) | 0x180; + *cmds++ = adreno_encode_istore_size(adreno_dev) + | adreno_dev->pix_shader_start; /* load the patched vertex shader stream */ cmds = program_shader(cmds, 0, gmem2sys_vtx_pgm, GMEM2SYS_VTX_PGM_LEN); @@ -755,6 +764,7 @@ static unsigned int *build_sys2gmem_cmds(struct adreno_device *adreno_dev, unsigned int *cmds = shadow->gmem_restore_commands; unsigned int *start = cmds; + if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) { /* Store TP0_CHICKEN register */ *cmds++ = cp_type3_packet(CP_REG_TO_MEM, 2); *cmds++ = REG_TP0_CHICKEN; @@ -762,6 +772,7 @@ static unsigned int *build_sys2gmem_cmds(struct adreno_device *adreno_dev, *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); *cmds++ = 0; + } /* Set TP0_CHICKEN to zero */ *cmds++ = cp_type0_packet(REG_TP0_CHICKEN, 1); @@ -802,7 +813,8 @@ static unsigned int *build_sys2gmem_cmds(struct adreno_device *adreno_dev, *cmds++ = 0x00000300; /* 0x100 = Vertex, 0x200 = Pixel */ *cmds++ = cp_type3_packet(CP_SET_SHADER_BASES, 1); - *cmds++ = (0x80000000) | 0x180; + *cmds++ = adreno_encode_istore_size(adreno_dev) + | adreno_dev->pix_shader_start; /* Load the patched fragment shader stream */ cmds = @@ -1089,7 +1101,8 @@ static void build_regrestore_cmds(struct adreno_device *adreno_dev, } static void -build_shader_save_restore_cmds(struct adreno_context *drawctxt) +build_shader_save_restore_cmds(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) { unsigned int *cmd = tmp_ctx.cmd; unsigned int *save, *restore, *fixup; @@ -1099,8 +1112,10 @@ build_shader_save_restore_cmds(struct adreno_context *drawctxt) /* compute vertex, pixel and shared instruction shadow GPU addresses */ tmp_ctx.shader_vertex = drawctxt->gpustate.gpuaddr + SHADER_OFFSET; - tmp_ctx.shader_pixel = tmp_ctx.shader_vertex + SHADER_SHADOW_SIZE; - tmp_ctx.shader_shared = tmp_ctx.shader_pixel + SHADER_SHADOW_SIZE; + tmp_ctx.shader_pixel = tmp_ctx.shader_vertex + + _shader_shadow_size(adreno_dev); + tmp_ctx.shader_shared = tmp_ctx.shader_pixel + + _shader_shadow_size(adreno_dev); /* restore shader partitioning and instructions */ @@ -1156,8 +1171,8 @@ build_shader_save_restore_cmds(struct adreno_context *drawctxt) *cmd++ = REG_SCRATCH_REG2; /* AND off invalid bits. */ *cmd++ = 0x0FFF0FFF; - /* OR in instruction memory size */ - *cmd++ = (unsigned int)((SHADER_INSTRUCT_LOG2 - 5U) << 29); + /* OR in instruction memory size. */ + *cmd++ = adreno_encode_istore_size(adreno_dev); /* write the computed value to the SET_SHADER_BASES data field */ *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); @@ -1219,45 +1234,22 @@ build_shader_save_restore_cmds(struct adreno_context *drawctxt) } /* create buffers for saving/restoring registers, constants, & GMEM */ -static int a2xx_ctxt_gpustate_shadow(struct adreno_device *adreno_dev, +static int a2xx_create_gpustate_shadow(struct adreno_device *adreno_dev, struct adreno_context *drawctxt) { - int result; - - /* Allocate vmalloc memory to store the gpustate */ - result = kgsl_allocate(&drawctxt->gpustate, - drawctxt->pagetable, CONTEXT_SIZE); - - if (result) - return result; - drawctxt->flags |= CTXT_FLAGS_STATE_SHADOW; - /* Blank out h/w register, constant, and command buffer shadows. */ - kgsl_sharedmem_set(&drawctxt->gpustate, 0, 0, CONTEXT_SIZE); - - /* set-up command and vertex buffer pointers */ - tmp_ctx.cmd = tmp_ctx.start - = (unsigned int *)((char *)drawctxt->gpustate.hostptr + CMD_OFFSET); - /* build indirect command buffers to save & restore regs/constants */ - adreno_idle(&adreno_dev->dev, KGSL_TIMEOUT_DEFAULT); build_regrestore_cmds(adreno_dev, drawctxt); build_regsave_cmds(adreno_dev, drawctxt); - build_shader_save_restore_cmds(drawctxt); + build_shader_save_restore_cmds(adreno_dev, drawctxt); - kgsl_cache_range_op(&drawctxt->gpustate, - KGSL_CACHE_OP_FLUSH); - - kgsl_cffdump_syncmem(NULL, &drawctxt->gpustate, - drawctxt->gpustate.gpuaddr, - drawctxt->gpustate.size, false); return 0; } /* create buffers for saving/restoring registers, constants, & GMEM */ -static int a2xx_ctxt_gmem_shadow(struct adreno_device *adreno_dev, +static int a2xx_create_gmem_shadow(struct adreno_device *adreno_dev, struct adreno_context *drawctxt) { int result; @@ -1272,8 +1264,8 @@ static int a2xx_ctxt_gmem_shadow(struct adreno_device *adreno_dev, if (result) return result; - /* we've allocated the shadow, when swapped out, GMEM must be saved. */ - drawctxt->flags |= CTXT_FLAGS_GMEM_SHADOW | CTXT_FLAGS_GMEM_SAVE; + /* set the gmem shadow flag for the context */ + drawctxt->flags |= CTXT_FLAGS_GMEM_SHADOW; /* blank out gmem shadow. */ kgsl_sharedmem_set(&drawctxt->context_gmem_shadow.gmemshadow, 0, 0, @@ -1284,6 +1276,7 @@ static int a2xx_ctxt_gmem_shadow(struct adreno_device *adreno_dev, &tmp_ctx.cmd); /* build TP0_CHICKEN register restore command buffer */ + if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) tmp_ctx.cmd = build_chicken_restore_cmds(drawctxt); /* build indirect command buffers to save & restore gmem */ @@ -1309,7 +1302,61 @@ static int a2xx_ctxt_gmem_shadow(struct adreno_device *adreno_dev, return 0; } -static void a2xx_ctxt_save(struct adreno_device *adreno_dev, +static int a2xx_drawctxt_create(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + int ret; + + /* + * Allocate memory for the GPU state and the context commands. + * Despite the name, this is much more then just storage for + * the gpustate. This contains command space for gmem save + * and texture and vertex buffer storage too + */ + + ret = kgsl_allocate(&drawctxt->gpustate, + drawctxt->pagetable, _context_size(adreno_dev)); + + if (ret) + return ret; + + kgsl_sharedmem_set(&drawctxt->gpustate, 0, 0, + _context_size(adreno_dev)); + + tmp_ctx.cmd = tmp_ctx.start + = (unsigned int *)((char *)drawctxt->gpustate.hostptr + CMD_OFFSET); + + if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) { + ret = a2xx_create_gpustate_shadow(adreno_dev, drawctxt); + if (ret) + goto done; + + drawctxt->flags |= CTXT_FLAGS_SHADER_SAVE; + } + + if (!(drawctxt->flags & CTXT_FLAGS_NOGMEMALLOC)) { + ret = a2xx_create_gmem_shadow(adreno_dev, drawctxt); + if (ret) + goto done; + } + + /* Flush and sync the gpustate memory */ + + kgsl_cache_range_op(&drawctxt->gpustate, + KGSL_CACHE_OP_FLUSH); + + kgsl_cffdump_syncmem(NULL, &drawctxt->gpustate, + drawctxt->gpustate.gpuaddr, + drawctxt->gpustate.size, false); + +done: + if (ret) + kgsl_sharedmem_free(&drawctxt->gpustate); + + return ret; +} + +static void a2xx_drawctxt_save(struct adreno_device *adreno_dev, struct adreno_context *context) { struct kgsl_device *device = &adreno_dev->dev; @@ -1321,25 +1368,28 @@ static void a2xx_ctxt_save(struct adreno_device *adreno_dev, KGSL_CTXT_WARN(device, "Current active context has caused gpu hang\n"); - KGSL_CTXT_INFO(device, - "active context flags %08x\n", context->flags); + if (!(context->flags & CTXT_FLAGS_PREAMBLE)) { /* save registers and constants. */ - adreno_ringbuffer_issuecmds(device, 0, context->reg_save, 3); + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, + context->reg_save, 3); if (context->flags & CTXT_FLAGS_SHADER_SAVE) { /* save shader partitioning and instructions. */ - adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE, + adreno_ringbuffer_issuecmds(device, + KGSL_CMD_FLAGS_PMODE, context->shader_save, 3); - /* fixup shader partitioning parameter for + /* + * fixup shader partitioning parameter for * SET_SHADER_BASES. */ - adreno_ringbuffer_issuecmds(device, 0, + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, context->shader_fixup, 3); context->flags |= CTXT_FLAGS_SHADER_RESTORE; } + } if ((context->flags & CTXT_FLAGS_GMEM_SAVE) && (context->flags & CTXT_FLAGS_GMEM_SHADOW)) { @@ -1350,14 +1400,16 @@ static void a2xx_ctxt_save(struct adreno_device *adreno_dev, context->context_gmem_shadow.gmem_save, 3); /* Restore TP0_CHICKEN */ - adreno_ringbuffer_issuecmds(device, 0, + if (!(context->flags & CTXT_FLAGS_PREAMBLE)) { + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, context->chicken_restore, 3); + } context->flags |= CTXT_FLAGS_GMEM_RESTORE; } } -static void a2xx_ctxt_restore(struct adreno_device *adreno_dev, +static void a2xx_drawctxt_restore(struct adreno_device *adreno_dev, struct adreno_context *context) { struct kgsl_device *device = &adreno_dev->dev; @@ -1375,9 +1427,9 @@ static void a2xx_ctxt_restore(struct adreno_device *adreno_dev, cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER; cmds[2] = cp_type3_packet(CP_MEM_WRITE, 2); cmds[3] = device->memstore.gpuaddr + - KGSL_DEVICE_MEMSTORE_OFFSET(current_context); - cmds[4] = (unsigned int) context; - adreno_ringbuffer_issuecmds(device, 0, cmds, 5); + KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, current_context); + cmds[4] = context->id; + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, cmds, 5); kgsl_mmu_setstate(device, context->pagetable); #ifndef CONFIG_MSM_KGSL_CFF_DUMP_NO_CONTEXT_MEM_DUMP @@ -1393,27 +1445,34 @@ static void a2xx_ctxt_restore(struct adreno_device *adreno_dev, adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE, context->context_gmem_shadow.gmem_restore, 3); + if (!(context->flags & CTXT_FLAGS_PREAMBLE)) { /* Restore TP0_CHICKEN */ - adreno_ringbuffer_issuecmds(device, 0, + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, context->chicken_restore, 3); + } context->flags &= ~CTXT_FLAGS_GMEM_RESTORE; } + if (!(context->flags & CTXT_FLAGS_PREAMBLE)) { + /* restore registers and constants. */ - adreno_ringbuffer_issuecmds(device, 0, + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, context->reg_restore, 3); /* restore shader instructions & partitioning. */ if (context->flags & CTXT_FLAGS_SHADER_RESTORE) { - adreno_ringbuffer_issuecmds(device, 0, + adreno_ringbuffer_issuecmds(device, + KGSL_CMD_FLAGS_NONE, context->shader_restore, 3); } + } if (adreno_is_a20x(adreno_dev)) { cmds[0] = cp_type3_packet(CP_SET_BIN_BASE_OFFSET, 1); cmds[1] = context->bin_base_offset; - adreno_ringbuffer_issuecmds(device, 0, cmds, 2); + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, + cmds, 2); } } @@ -1492,11 +1551,18 @@ static void a2xx_cp_intrcallback(struct kgsl_device *device) if (status & CP_INT_CNTL__RB_INT_MASK) { /* signal intr completion event */ - unsigned int enableflag = 0; + unsigned int context_id; + kgsl_sharedmem_readl(&device->memstore, + &context_id, + KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, + current_context)); + if (context_id < KGSL_MEMSTORE_MAX) { kgsl_sharedmem_writel(&rb->device->memstore, - KGSL_DEVICE_MEMSTORE_OFFSET(ts_cmp_enable), - enableflag); + KGSL_MEMSTORE_OFFSET(context_id, + ts_cmp_enable), 0); + device->last_expired_ctxt_id = context_id; wmb(); + } KGSL_CMD_WARN(rb->device, "ringbuffer rb interrupt\n"); } @@ -1532,21 +1598,33 @@ static void a2xx_rbbm_intrcallback(struct kgsl_device *device) { unsigned int status = 0; unsigned int rderr = 0; + unsigned int addr = 0; + const char *source; adreno_regread(device, REG_RBBM_INT_STATUS, &status); if (status & RBBM_INT_CNTL__RDERR_INT_MASK) { - union rbbm_read_error_u rerr; adreno_regread(device, REG_RBBM_READ_ERROR, &rderr); - rerr.val = rderr; - if (rerr.f.read_address == REG_CP_INT_STATUS && - rerr.f.read_error && - rerr.f.read_requester) + source = (rderr & RBBM_READ_ERROR_REQUESTER) + ? "host" : "cp"; + /* convert to dword address */ + addr = (rderr & RBBM_READ_ERROR_ADDRESS_MASK) >> 2; + + /* + * Log CP_INT_STATUS interrupts from the CP at a + * lower level because they can happen frequently + * and are worked around in a2xx_irq_handler. + */ + if (addr == REG_CP_INT_STATUS && + rderr & RBBM_READ_ERROR_ERROR && + rderr & RBBM_READ_ERROR_REQUESTER) KGSL_DRV_WARN(device, - "rbbm read error interrupt: %08x\n", rderr); + "rbbm read error interrupt: %s reg: %04X\n", + source, addr); else KGSL_DRV_CRIT(device, - "rbbm read error interrupt: %08x\n", rderr); + "rbbm read error interrupt: %s reg: %04X\n", + source, addr); } status &= RBBM_INT_MASK; @@ -1597,11 +1675,195 @@ static void a2xx_irq_control(struct adreno_device *adreno_dev, int state) wmb(); } +static void a2xx_rb_init(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb) +{ + unsigned int *cmds, cmds_gpu; + + /* ME_INIT */ + cmds = adreno_ringbuffer_allocspace(rb, 19); + cmds_gpu = rb->buffer_desc.gpuaddr + sizeof(uint)*(rb->wptr-19); + + GSL_RB_WRITE(cmds, cmds_gpu, cp_type3_packet(CP_ME_INIT, 18)); + /* All fields present (bits 9:0) */ + GSL_RB_WRITE(cmds, cmds_gpu, 0x000003ff); + /* Disable/Enable Real-Time Stream processing (present but ignored) */ + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); + /* Enable (2D <-> 3D) implicit synchronization (present but ignored) */ + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); + + GSL_RB_WRITE(cmds, cmds_gpu, + SUBBLOCK_OFFSET(REG_RB_SURFACE_INFO)); + GSL_RB_WRITE(cmds, cmds_gpu, + SUBBLOCK_OFFSET(REG_PA_SC_WINDOW_OFFSET)); + GSL_RB_WRITE(cmds, cmds_gpu, + SUBBLOCK_OFFSET(REG_VGT_MAX_VTX_INDX)); + GSL_RB_WRITE(cmds, cmds_gpu, + SUBBLOCK_OFFSET(REG_SQ_PROGRAM_CNTL)); + GSL_RB_WRITE(cmds, cmds_gpu, + SUBBLOCK_OFFSET(REG_RB_DEPTHCONTROL)); + GSL_RB_WRITE(cmds, cmds_gpu, + SUBBLOCK_OFFSET(REG_PA_SU_POINT_SIZE)); + GSL_RB_WRITE(cmds, cmds_gpu, + SUBBLOCK_OFFSET(REG_PA_SC_LINE_CNTL)); + GSL_RB_WRITE(cmds, cmds_gpu, + SUBBLOCK_OFFSET(REG_PA_SU_POLY_OFFSET_FRONT_SCALE)); + + /* Instruction memory size: */ + GSL_RB_WRITE(cmds, cmds_gpu, + (adreno_encode_istore_size(adreno_dev) + | adreno_dev->pix_shader_start)); + /* Maximum Contexts */ + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000001); + /* Write Confirm Interval and The CP will wait the + * wait_interval * 16 clocks between polling */ + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); + + /* NQ and External Memory Swap */ + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); + /* Protected mode error checking */ + GSL_RB_WRITE(cmds, cmds_gpu, GSL_RB_PROTECTED_MODE_CONTROL); + /* Disable header dumping and Header dump address */ + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); + /* Header dump size */ + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); + + adreno_ringbuffer_submit(rb); +} + +static unsigned int a2xx_busy_cycles(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = &adreno_dev->dev; + unsigned int reg, val; + + /* Freeze the counter */ + adreno_regwrite(device, REG_CP_PERFMON_CNTL, + REG_PERF_MODE_CNT | REG_PERF_STATE_FREEZE); + + /* Get the value */ + adreno_regread(device, REG_RBBM_PERFCOUNTER1_LO, &val); + + /* Reset the counter */ + adreno_regwrite(device, REG_CP_PERFMON_CNTL, + REG_PERF_MODE_CNT | REG_PERF_STATE_RESET); + + /* Re-Enable the performance monitors */ + adreno_regread(device, REG_RBBM_PM_OVERRIDE2, ®); + adreno_regwrite(device, REG_RBBM_PM_OVERRIDE2, (reg | 0x40)); + adreno_regwrite(device, REG_RBBM_PERFCOUNTER1_SELECT, 0x1); + adreno_regwrite(device, REG_CP_PERFMON_CNTL, + REG_PERF_MODE_CNT | REG_PERF_STATE_ENABLE); + + return val; +} + +static void a2xx_gmeminit(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = &adreno_dev->dev; + union reg_rb_edram_info rb_edram_info; + unsigned int gmem_size; + unsigned int edram_value = 0; + + /* make sure edram range is aligned to size */ + BUG_ON(adreno_dev->gmemspace.gpu_base & + (adreno_dev->gmemspace.sizebytes - 1)); + + /* get edram_size value equivalent */ + gmem_size = (adreno_dev->gmemspace.sizebytes >> 14); + while (gmem_size >>= 1) + edram_value++; + + rb_edram_info.val = 0; + + rb_edram_info.f.edram_size = edram_value; + rb_edram_info.f.edram_mapping_mode = 0; /* EDRAM_MAP_UPPER */ + + /* must be aligned to size */ + rb_edram_info.f.edram_range = (adreno_dev->gmemspace.gpu_base >> 14); + + adreno_regwrite(device, REG_RB_EDRAM_INFO, rb_edram_info.val); +} + +static void a2xx_start(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = &adreno_dev->dev; + + /* + * We need to make sure all blocks are powered up and clocked + * before issuing a soft reset. The overrides will then be + * turned off (set to 0) + */ + adreno_regwrite(device, REG_RBBM_PM_OVERRIDE1, 0xfffffffe); + adreno_regwrite(device, REG_RBBM_PM_OVERRIDE2, 0xffffffff); + + /* + * Only reset CP block if all blocks have previously been + * reset + */ + if (!(device->flags & KGSL_FLAGS_SOFT_RESET) || + !adreno_is_a22x(adreno_dev)) { + adreno_regwrite(device, REG_RBBM_SOFT_RESET, + 0xFFFFFFFF); + device->flags |= KGSL_FLAGS_SOFT_RESET; + } else { + adreno_regwrite(device, REG_RBBM_SOFT_RESET, + 0x00000001); + } + /* + * The core is in an indeterminate state until the reset + * completes after 30ms. + */ + msleep(30); + + adreno_regwrite(device, REG_RBBM_SOFT_RESET, 0x00000000); + + if (adreno_is_a225(adreno_dev)) { + /* Enable large instruction store for A225 */ + adreno_regwrite(device, REG_SQ_FLOW_CONTROL, + 0x18000000); + } + + adreno_regwrite(device, REG_RBBM_CNTL, 0x00004442); + + adreno_regwrite(device, REG_SQ_VS_PROGRAM, 0x00000000); + adreno_regwrite(device, REG_SQ_PS_PROGRAM, 0x00000000); + +// if (cpu_is_msm8960() || cpu_is_msm8930()) + if(0) + adreno_regwrite(device, REG_RBBM_PM_OVERRIDE1, 0x200); + else + adreno_regwrite(device, REG_RBBM_PM_OVERRIDE1, 0); + + if (!adreno_is_a22x(adreno_dev)) + adreno_regwrite(device, REG_RBBM_PM_OVERRIDE2, 0); + else + adreno_regwrite(device, REG_RBBM_PM_OVERRIDE2, 0x80); + + adreno_regwrite(device, REG_RBBM_DEBUG, 0x00080000); + + /* Make sure interrupts are disabled */ + adreno_regwrite(device, REG_RBBM_INT_CNTL, 0); + adreno_regwrite(device, REG_CP_INT_CNTL, 0); + adreno_regwrite(device, REG_SQ_INT_CNTL, 0); + + a2xx_gmeminit(adreno_dev); +} + +/* Defined in adreno_a2xx_snapshot.c */ +void *a2xx_snapshot(struct adreno_device *adreno_dev, void *snapshot, + int *remain, int hang); + struct adreno_gpudev adreno_a2xx_gpudev = { - .ctxt_gpustate_shadow = a2xx_ctxt_gpustate_shadow, - .ctxt_gmem_shadow = a2xx_ctxt_gmem_shadow, - .ctxt_save = a2xx_ctxt_save, - .ctxt_restore = a2xx_ctxt_restore, + .reg_rbbm_status = REG_RBBM_STATUS, + .reg_cp_pfp_ucode_addr = REG_CP_PFP_UCODE_ADDR, + .reg_cp_pfp_ucode_data = REG_CP_PFP_UCODE_DATA, + + .ctxt_create = a2xx_drawctxt_create, + .ctxt_save = a2xx_drawctxt_save, + .ctxt_restore = a2xx_drawctxt_restore, .irq_handler = a2xx_irq_handler, .irq_control = a2xx_irq_control, + .rb_init = a2xx_rb_init, + .busy_cycles = a2xx_busy_cycles, + .start = a2xx_start, }; diff --git a/drivers/gpu/msm/adreno_a3xx.c b/drivers/gpu/msm/adreno_a3xx.c new file mode 100755 index 00000000..507ad02e --- /dev/null +++ b/drivers/gpu/msm/adreno_a3xx.c @@ -0,0 +1,2555 @@ +/* Copyright (c) 2012, Code Aurora Forum. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include + +#include "kgsl.h" +#include "adreno.h" +#include "kgsl_sharedmem.h" +#include "kgsl_cffdump.h" +#include "a3xx_reg.h" + +/* Simple macro to facilitate bit setting in the gmem2sys and sys2gmem + * functions. + */ + +#define _SET(_shift, _val) ((_val) << (_shift)) + +/* + **************************************************************************** + * + * Context state shadow structure: + * + * +---------------------+------------+-------------+---------------------+---+ + * | ALU Constant Shadow | Reg Shadow | C&V Buffers | Shader Instr Shadow |Tex| + * +---------------------+------------+-------------+---------------------+---+ + * + * 8K - ALU Constant Shadow (8K aligned) + * 4K - H/W Register Shadow (8K aligned) + * 5K - Command and Vertex Buffers + * 8K - Shader Instruction Shadow + * ~6K - Texture Constant Shadow + * + * + *************************************************************************** + */ + +/* Sizes of all sections in state shadow memory */ +#define ALU_SHADOW_SIZE (8*1024) /* 8KB */ +#define REG_SHADOW_SIZE (4*1024) /* 4KB */ +#define CMD_BUFFER_SIZE (5*1024) /* 5KB */ +#define TEX_SIZE_MEM_OBJECTS 896 /* bytes */ +#define TEX_SIZE_MIPMAP 1936 /* bytes */ +#define TEX_SIZE_SAMPLER_OBJ 256 /* bytes */ +#define TEX_SHADOW_SIZE \ + ((TEX_SIZE_MEM_OBJECTS + TEX_SIZE_MIPMAP + \ + TEX_SIZE_SAMPLER_OBJ)*2) /* ~6KB */ +#define SHADER_SHADOW_SIZE (8*1024) /* 8KB */ + +/* Total context size, excluding GMEM shadow */ +#define CONTEXT_SIZE \ + (ALU_SHADOW_SIZE+REG_SHADOW_SIZE + \ + CMD_BUFFER_SIZE+SHADER_SHADOW_SIZE + \ + TEX_SHADOW_SIZE) + +/* Offsets to different sections in context shadow memory */ +#define REG_OFFSET ALU_SHADOW_SIZE +#define CMD_OFFSET (REG_OFFSET+REG_SHADOW_SIZE) +#define SHADER_OFFSET (CMD_OFFSET+CMD_BUFFER_SIZE) +#define TEX_OFFSET (SHADER_OFFSET+SHADER_SHADOW_SIZE) +#define VS_TEX_OFFSET_MEM_OBJECTS TEX_OFFSET +#define VS_TEX_OFFSET_MIPMAP (VS_TEX_OFFSET_MEM_OBJECTS+TEX_SIZE_MEM_OBJECTS) +#define VS_TEX_OFFSET_SAMPLER_OBJ (VS_TEX_OFFSET_MIPMAP+TEX_SIZE_MIPMAP) +#define FS_TEX_OFFSET_MEM_OBJECTS \ + (VS_TEX_OFFSET_SAMPLER_OBJ+TEX_SIZE_SAMPLER_OBJ) +#define FS_TEX_OFFSET_MIPMAP (FS_TEX_OFFSET_MEM_OBJECTS+TEX_SIZE_MEM_OBJECTS) +#define FS_TEX_OFFSET_SAMPLER_OBJ (FS_TEX_OFFSET_MIPMAP+TEX_SIZE_MIPMAP) + +/* The offset for fragment shader data in HLSQ context */ +#define SSIZE (16*1024) + +#define HLSQ_SAMPLER_OFFSET 0x000 +#define HLSQ_MEMOBJ_OFFSET 0x400 +#define HLSQ_MIPMAP_OFFSET 0x800 + +#ifdef GSL_USE_A3XX_HLSQ_SHADOW_RAM +/* Use shadow RAM */ +#define HLSQ_SHADOW_BASE (0x10000+SSIZE*2) +#else +/* Use working RAM */ +#define HLSQ_SHADOW_BASE 0x10000 +#endif + +#define REG_TO_MEM_LOOP_COUNT_SHIFT 15 + +#define BUILD_PC_DRAW_INITIATOR(prim_type, source_select, index_size, \ + vis_cull_mode) \ + (((prim_type) << PC_DRAW_INITIATOR_PRIM_TYPE) | \ + ((source_select) << PC_DRAW_INITIATOR_SOURCE_SELECT) | \ + ((index_size & 1) << PC_DRAW_INITIATOR_INDEX_SIZE) | \ + ((index_size >> 1) << PC_DRAW_INITIATOR_SMALL_INDEX) | \ + ((vis_cull_mode) << PC_DRAW_INITIATOR_VISIBILITY_CULLING_MODE) | \ + (1 << PC_DRAW_INITIATOR_PRE_DRAW_INITIATOR_ENABLE)) + +/* + * List of context registers (starting from dword offset 0x2000). + * Each line contains start and end of a range of registers. + */ +static const unsigned int context_register_ranges[] = { + A3XX_GRAS_CL_CLIP_CNTL, A3XX_GRAS_CL_CLIP_CNTL, + A3XX_GRAS_CL_GB_CLIP_ADJ, A3XX_GRAS_CL_GB_CLIP_ADJ, + A3XX_GRAS_CL_VPORT_XOFFSET, A3XX_GRAS_CL_VPORT_ZSCALE, + A3XX_GRAS_SU_POINT_MINMAX, A3XX_GRAS_SU_POINT_SIZE, + A3XX_GRAS_SU_POLY_OFFSET_SCALE, A3XX_GRAS_SU_POLY_OFFSET_OFFSET, + A3XX_GRAS_SU_MODE_CONTROL, A3XX_GRAS_SU_MODE_CONTROL, + A3XX_GRAS_SC_CONTROL, A3XX_GRAS_SC_CONTROL, + A3XX_GRAS_SC_SCREEN_SCISSOR_TL, A3XX_GRAS_SC_SCREEN_SCISSOR_BR, + A3XX_GRAS_SC_WINDOW_SCISSOR_TL, A3XX_GRAS_SC_WINDOW_SCISSOR_BR, + A3XX_RB_MODE_CONTROL, A3XX_RB_MRT_BLEND_CONTROL3, + A3XX_RB_BLEND_RED, A3XX_RB_COPY_DEST_INFO, + A3XX_RB_DEPTH_CONTROL, A3XX_RB_DEPTH_CONTROL, + A3XX_PC_VSTREAM_CONTROL, A3XX_PC_VSTREAM_CONTROL, + A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, + A3XX_PC_PRIM_VTX_CNTL, A3XX_PC_RESTART_INDEX, + A3XX_HLSQ_CONTROL_0_REG, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG, + A3XX_HLSQ_CL_NDRANGE_0_REG, A3XX_HLSQ_CL_NDRANGE_0_REG, + A3XX_HLSQ_CL_NDRANGE_2_REG, A3XX_HLSQ_CL_CONTROL_1_REG, + A3XX_HLSQ_CL_KERNEL_CONST_REG, A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG, + A3XX_HLSQ_CL_WG_OFFSET_REG, A3XX_HLSQ_CL_WG_OFFSET_REG, + A3XX_VFD_CONTROL_0, A3XX_VFD_VS_THREADING_THRESHOLD, + A3XX_SP_SP_CTRL_REG, A3XX_SP_SP_CTRL_REG, + A3XX_SP_VS_CTRL_REG0, A3XX_SP_VS_OUT_REG_7, + A3XX_SP_VS_VPC_DST_REG_0, A3XX_SP_VS_PVT_MEM_SIZE_REG, + A3XX_SP_VS_LENGTH_REG, A3XX_SP_FS_PVT_MEM_SIZE_REG, + A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, A3XX_SP_FS_FLAT_SHAD_MODE_REG_1, + A3XX_SP_FS_OUTPUT_REG, A3XX_SP_FS_OUTPUT_REG, + A3XX_SP_FS_MRT_REG_0, A3XX_SP_FS_IMAGE_OUTPUT_REG_3, + A3XX_SP_FS_LENGTH_REG, A3XX_SP_FS_LENGTH_REG, + A3XX_TPL1_TP_VS_TEX_OFFSET, A3XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, + A3XX_VPC_ATTR, A3XX_VPC_VARY_CYLWRAP_ENABLE_1, +}; + +/* Global registers that need to be saved separately */ +static const unsigned int global_registers[] = { + A3XX_GRAS_CL_USER_PLANE_X0, A3XX_GRAS_CL_USER_PLANE_Y0, + A3XX_GRAS_CL_USER_PLANE_Z0, A3XX_GRAS_CL_USER_PLANE_W0, + A3XX_GRAS_CL_USER_PLANE_X1, A3XX_GRAS_CL_USER_PLANE_Y1, + A3XX_GRAS_CL_USER_PLANE_Z1, A3XX_GRAS_CL_USER_PLANE_W1, + A3XX_GRAS_CL_USER_PLANE_X2, A3XX_GRAS_CL_USER_PLANE_Y2, + A3XX_GRAS_CL_USER_PLANE_Z2, A3XX_GRAS_CL_USER_PLANE_W2, + A3XX_GRAS_CL_USER_PLANE_X3, A3XX_GRAS_CL_USER_PLANE_Y3, + A3XX_GRAS_CL_USER_PLANE_Z3, A3XX_GRAS_CL_USER_PLANE_W3, + A3XX_GRAS_CL_USER_PLANE_X4, A3XX_GRAS_CL_USER_PLANE_Y4, + A3XX_GRAS_CL_USER_PLANE_Z4, A3XX_GRAS_CL_USER_PLANE_W4, + A3XX_GRAS_CL_USER_PLANE_X5, A3XX_GRAS_CL_USER_PLANE_Y5, + A3XX_GRAS_CL_USER_PLANE_Z5, A3XX_GRAS_CL_USER_PLANE_W5, + A3XX_VSC_BIN_SIZE, + A3XX_VSC_PIPE_CONFIG_0, A3XX_VSC_PIPE_CONFIG_1, + A3XX_VSC_PIPE_CONFIG_2, A3XX_VSC_PIPE_CONFIG_3, + A3XX_VSC_PIPE_CONFIG_4, A3XX_VSC_PIPE_CONFIG_5, + A3XX_VSC_PIPE_CONFIG_6, A3XX_VSC_PIPE_CONFIG_7, + A3XX_VSC_PIPE_DATA_ADDRESS_0, A3XX_VSC_PIPE_DATA_ADDRESS_1, + A3XX_VSC_PIPE_DATA_ADDRESS_2, A3XX_VSC_PIPE_DATA_ADDRESS_3, + A3XX_VSC_PIPE_DATA_ADDRESS_4, A3XX_VSC_PIPE_DATA_ADDRESS_5, + A3XX_VSC_PIPE_DATA_ADDRESS_6, A3XX_VSC_PIPE_DATA_ADDRESS_7, + A3XX_VSC_PIPE_DATA_LENGTH_0, A3XX_VSC_PIPE_DATA_LENGTH_1, + A3XX_VSC_PIPE_DATA_LENGTH_2, A3XX_VSC_PIPE_DATA_LENGTH_3, + A3XX_VSC_PIPE_DATA_LENGTH_4, A3XX_VSC_PIPE_DATA_LENGTH_5, + A3XX_VSC_PIPE_DATA_LENGTH_6, A3XX_VSC_PIPE_DATA_LENGTH_7, + A3XX_VSC_SIZE_ADDRESS +}; + +#define GLOBAL_REGISTER_COUNT ARRAY_SIZE(global_registers) + +/* A scratchpad used to build commands during context create */ +static struct tmp_ctx { + unsigned int *cmd; /* Next available dword in C&V buffer */ + + /* Addresses in comamnd buffer where registers are saved */ + uint32_t reg_values[GLOBAL_REGISTER_COUNT]; + uint32_t gmem_base; /* Base GPU address of GMEM */ +} tmp_ctx; + +#ifndef GSL_CONTEXT_SWITCH_CPU_SYNC +/* + * Function for executing dest = ( (reg & and) ROL rol ) | or + */ +static unsigned int *rmw_regtomem(unsigned int *cmd, + unsigned int reg, unsigned int and, + unsigned int rol, unsigned int or, + unsigned int dest) +{ + /* CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0x00000000) | reg */ + *cmd++ = cp_type3_packet(CP_REG_RMW, 3); + *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2; + *cmd++ = 0x00000000; /* AND value */ + *cmd++ = reg; /* OR address */ + + /* CP_SCRATCH_REG2 = ( (CP_SCRATCH_REG2 & and) ROL rol ) | or */ + *cmd++ = cp_type3_packet(CP_REG_RMW, 3); + *cmd++ = (rol << 24) | A3XX_CP_SCRATCH_REG2; + *cmd++ = and; /* AND value */ + *cmd++ = or; /* OR value */ + + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = A3XX_CP_SCRATCH_REG2; + *cmd++ = dest; + + return cmd; +} +#endif + +static void build_regconstantsave_cmds(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + unsigned int *cmd = tmp_ctx.cmd; + unsigned int *start = cmd; + unsigned int i; + + drawctxt->constant_save_commands[0].hostptr = cmd; + drawctxt->constant_save_commands[0].gpuaddr = + virt2gpu(cmd, &drawctxt->gpustate); + cmd++; + + *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmd++ = 0; + +#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES + /* + * Context registers are already shadowed; just need to + * disable shadowing to prevent corruption. + */ + + *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3); + *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000; + *cmd++ = 4 << 16; /* regs, start=0 */ + *cmd++ = 0x0; /* count = 0 */ + +#else + /* + * Make sure the HW context has the correct register values before + * reading them. + */ + + /* Write context registers into shadow */ + for (i = 0; i < ARRAY_SIZE(context_register_ranges) / 2; i++) { + unsigned int start = context_register_ranges[i * 2]; + unsigned int end = context_register_ranges[i * 2 + 1]; + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = ((end - start + 1) << REG_TO_MEM_LOOP_COUNT_SHIFT) | + start; + *cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET) + & 0xFFFFE000) + (start - 0x2000) * 4; + } +#endif + + /* Need to handle some of the global registers separately */ + for (i = 0; i < ARRAY_SIZE(global_registers); i++) { + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = global_registers[i]; + *cmd++ = tmp_ctx.reg_values[i]; + } + + /* Save vertex shader constants */ + *cmd++ = cp_type3_packet(CP_COND_EXEC, 4); + *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2; + *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2; + *cmd++ = 0x0000FFFF; + *cmd++ = 3; /* EXEC_COUNT */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + drawctxt->constant_save_commands[1].hostptr = cmd; + drawctxt->constant_save_commands[1].gpuaddr = + virt2gpu(cmd, &drawctxt->gpustate); + /* + From fixup: + + dwords = SP_VS_CTRL_REG1.VSCONSTLENGTH / 4 + src = (HLSQ_SHADOW_BASE + 0x2000) / 4 + + From register spec: + SP_VS_CTRL_REG1.VSCONSTLENGTH [09:00]: 0-512, unit = 128bits. + */ + *cmd++ = 0; /* (dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */ + /* ALU constant shadow base */ + *cmd++ = drawctxt->gpustate.gpuaddr & 0xfffffffc; + + /* Save fragment shader constants */ + *cmd++ = cp_type3_packet(CP_COND_EXEC, 4); + *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2; + *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2; + *cmd++ = 0x0000FFFF; + *cmd++ = 3; /* EXEC_COUNT */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + drawctxt->constant_save_commands[2].hostptr = cmd; + drawctxt->constant_save_commands[2].gpuaddr = + virt2gpu(cmd, &drawctxt->gpustate); + /* + From fixup: + + dwords = SP_FS_CTRL_REG1.FSCONSTLENGTH / 4 + src = (HLSQ_SHADOW_BASE + 0x2000 + SSIZE) / 4 + + From register spec: + SP_FS_CTRL_REG1.FSCONSTLENGTH [09:00]: 0-512, unit = 128bits. + */ + *cmd++ = 0; /* (dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */ + + /* + From fixup: + + base = drawctxt->gpustate.gpuaddr (ALU constant shadow base) + offset = SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET + + From register spec: + SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET [16:24]: Constant object + start offset in on chip RAM, + 128bit aligned + + dst = base + offset + Because of the base alignment we can use + dst = base | offset + */ + *cmd++ = 0; /* dst */ + + /* Save VS texture memory objects */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = + ((TEX_SIZE_MEM_OBJECTS / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) | + ((HLSQ_SHADOW_BASE + HLSQ_MEMOBJ_OFFSET) / 4); + *cmd++ = + (drawctxt->gpustate.gpuaddr + + VS_TEX_OFFSET_MEM_OBJECTS) & 0xfffffffc; + + /* Save VS texture mipmap pointers */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = + ((TEX_SIZE_MIPMAP / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) | + ((HLSQ_SHADOW_BASE + HLSQ_MIPMAP_OFFSET) / 4); + *cmd++ = + (drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MIPMAP) & 0xfffffffc; + + /* Save VS texture sampler objects */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = ((TEX_SIZE_SAMPLER_OBJ / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) | + ((HLSQ_SHADOW_BASE + HLSQ_SAMPLER_OFFSET) / 4); + *cmd++ = + (drawctxt->gpustate.gpuaddr + + VS_TEX_OFFSET_SAMPLER_OBJ) & 0xfffffffc; + + /* Save FS texture memory objects */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = + ((TEX_SIZE_MEM_OBJECTS / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) | + ((HLSQ_SHADOW_BASE + HLSQ_MEMOBJ_OFFSET + SSIZE) / 4); + *cmd++ = + (drawctxt->gpustate.gpuaddr + + FS_TEX_OFFSET_MEM_OBJECTS) & 0xfffffffc; + + /* Save FS texture mipmap pointers */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = + ((TEX_SIZE_MIPMAP / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) | + ((HLSQ_SHADOW_BASE + HLSQ_MIPMAP_OFFSET + SSIZE) / 4); + *cmd++ = + (drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MIPMAP) & 0xfffffffc; + + /* Save FS texture sampler objects */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = + ((TEX_SIZE_SAMPLER_OBJ / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) | + ((HLSQ_SHADOW_BASE + HLSQ_SAMPLER_OFFSET + SSIZE) / 4); + *cmd++ = + (drawctxt->gpustate.gpuaddr + + FS_TEX_OFFSET_SAMPLER_OBJ) & 0xfffffffc; + + /* Create indirect buffer command for above command sequence */ + create_ib1(drawctxt, drawctxt->regconstant_save, start, cmd); + + tmp_ctx.cmd = cmd; +} + +/* Copy GMEM contents to system memory shadow. */ +static unsigned int *build_gmem2sys_cmds(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, + struct gmem_shadow_t *shadow) +{ + unsigned int *cmds = tmp_ctx.cmd; + unsigned int *start = cmds; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_RB_MODE_CONTROL); + + /* RB_MODE_CONTROL */ + *cmds++ = _SET(RB_MODECONTROL_RENDER_MODE, RB_RESOLVE_PASS) | + _SET(RB_MODECONTROL_MARB_CACHE_SPLIT_MODE, 1) | + _SET(RB_MODECONTROL_PACKER_TIMER_ENABLE, 1); + /* RB_RENDER_CONTROL */ + *cmds++ = _SET(RB_RENDERCONTROL_BIN_WIDTH, shadow->width >> 5) | + _SET(RB_RENDERCONTROL_DISABLE_COLOR_PIPE, 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5); + *cmds++ = CP_REG(A3XX_RB_COPY_CONTROL); + /* RB_COPY_CONTROL */ + *cmds++ = _SET(RB_COPYCONTROL_RESOLVE_CLEAR_MODE, + RB_CLEAR_MODE_RESOLVE) | + _SET(RB_COPYCONTROL_COPY_GMEM_BASE, + tmp_ctx.gmem_base >> 14); + /* RB_COPY_DEST_BASE */ + *cmds++ = _SET(RB_COPYDESTBASE_COPY_DEST_BASE, + shadow->gmemshadow.gpuaddr >> 5); + /* RB_COPY_DEST_PITCH */ + *cmds++ = _SET(RB_COPYDESTPITCH_COPY_DEST_PITCH, + (shadow->pitch * 4) / 32); + /* RB_COPY_DEST_INFO */ + *cmds++ = _SET(RB_COPYDESTINFO_COPY_DEST_TILE, + RB_TILINGMODE_LINEAR) | + _SET(RB_COPYDESTINFO_COPY_DEST_FORMAT, RB_R8G8B8A8_UNORM) | + _SET(RB_COPYDESTINFO_COPY_COMPONENT_ENABLE, 0X0F) | + _SET(RB_COPYDESTINFO_COPY_DEST_ENDIAN, RB_ENDIAN_NONE); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_GRAS_SC_CONTROL); + /* GRAS_SC_CONTROL */ + *cmds++ = _SET(GRAS_SC_CONTROL_RENDER_MODE, 2); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_VFD_CONTROL_0); + /* VFD_CONTROL_0 */ + *cmds++ = _SET(VFD_CTRLREG0_TOTALATTRTOVS, 4) | + _SET(VFD_CTRLREG0_PACKETSIZE, 2) | + _SET(VFD_CTRLREG0_STRMDECINSTRCNT, 1) | + _SET(VFD_CTRLREG0_STRMFETCHINSTRCNT, 1); + /* VFD_CONTROL_1 */ + *cmds++ = _SET(VFD_CTRLREG1_MAXSTORAGE, 1) | + _SET(VFD_CTRLREG1_REGID4VTX, 252) | + _SET(VFD_CTRLREG1_REGID4INST, 252); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_VFD_FETCH_INSTR_0_0); + /* VFD_FETCH_INSTR_0_0 */ + *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 11) | + _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 12) | + _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1); + /* VFD_FETCH_INSTR_1_0 */ + *cmds++ = _SET(VFD_BASEADDR_BASEADDR, + shadow->quad_vertices.gpuaddr); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_VFD_DECODE_INSTR_0); + /* VFD_DECODE_INSTR_0 */ + *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) | + _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) | + _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 2) | + _SET(VFD_DECODEINSTRUCTIONS_REGID, 5) | + _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 12) | + _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5); + *cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG); + /* HLSQ_CONTROL_0_REG */ + *cmds++ = _SET(HLSQ_CTRL0REG_FSTHREADSIZE, HLSQ_TWO_PIX_QUADS) | + _SET(HLSQ_CTRL0REG_FSSUPERTHREADENABLE, 1) | + _SET(HLSQ_CTRL0REG_SPSHADERRESTART, 1) | + _SET(HLSQ_CTRL0REG_RESERVED2, 1) | + _SET(HLSQ_CTRL0REG_CHUNKDISABLE, 1) | + _SET(HLSQ_CTRL0REG_CONSTSWITCHMODE, 1) | + _SET(HLSQ_CTRL0REG_LAZYUPDATEDISABLE, 1) | + _SET(HLSQ_CTRL0REG_SPCONSTFULLUPDATE, 1) | + _SET(HLSQ_CTRL0REG_TPFULLUPDATE, 1); + /* HLSQ_CONTROL_1_REG */ + *cmds++ = _SET(HLSQ_CTRL1REG_VSTHREADSIZE, HLSQ_TWO_VTX_QUADS) | + _SET(HLSQ_CTRL1REG_VSSUPERTHREADENABLE, 1) | + _SET(HLSQ_CTRL1REG_RESERVED1, 4); + /* HLSQ_CONTROL_2_REG */ + *cmds++ = _SET(HLSQ_CTRL2REG_PRIMALLOCTHRESHOLD, 31); + /* HLSQ_CONTROL_3_REG */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5); + *cmds++ = CP_REG(A3XX_HLSQ_VS_CONTROL_REG); + /* HLSQ_VS_CONTROL_REG */ + *cmds++ = _SET(HLSQ_VSCTRLREG_VSINSTRLENGTH, 1); + /* HLSQ_FS_CONTROL_REG */ + *cmds++ = _SET(HLSQ_FSCTRLREG_FSCONSTLENGTH, 1) | + _SET(HLSQ_FSCTRLREG_FSCONSTSTARTOFFSET, 272) | + _SET(HLSQ_FSCTRLREG_FSINSTRLENGTH, 1); + /* HLSQ_CONST_VSPRESV_RANGE_REG */ + *cmds++ = 0x00000000; + /* HLSQ_CONST_FSPRESV_RANGE_REQ */ + *cmds++ = _SET(HLSQ_CONSTFSPRESERVEDRANGEREG_STARTENTRY, 32) | + _SET(HLSQ_CONSTFSPRESERVEDRANGEREG_ENDENTRY, 32); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_SP_FS_LENGTH_REG); + /* SP_FS_LENGTH_REG */ + *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_SP_SP_CTRL_REG); + /* SP_SP_CTRL_REG */ + *cmds++ = _SET(SP_SPCTRLREG_CONSTMODE, 1) | + _SET(SP_SPCTRLREG_SLEEPMODE, 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 12); + *cmds++ = CP_REG(A3XX_SP_VS_CTRL_REG0); + /* SP_VS_CTRL_REG0 */ + *cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) | + _SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) | + _SET(SP_VSCTRLREG0_VSICACHEINVALID, 1) | + _SET(SP_VSCTRLREG0_VSFULLREGFOOTPRINT, 3) | + _SET(SP_VSCTRLREG0_VSTHREADSIZE, SP_TWO_VTX_QUADS) | + _SET(SP_VSCTRLREG0_VSSUPERTHREADMODE, 1) | + _SET(SP_VSCTRLREG0_VSLENGTH, 1); + /* SP_VS_CTRL_REG1 */ + *cmds++ = _SET(SP_VSCTRLREG1_VSINITIALOUTSTANDING, 4); + /* SP_VS_PARAM_REG */ + *cmds++ = _SET(SP_VSPARAMREG_POSREGID, 1) | + _SET(SP_VSPARAMREG_PSIZEREGID, 252); + /* SP_VS_OUT_REG_0 */ + *cmds++ = 0x00000000; + /* SP_VS_OUT_REG_1 */ + *cmds++ = 0x00000000; + /* SP_VS_OUT_REG_2 */ + *cmds++ = 0x00000000; + /* SP_VS_OUT_REG_3 */ + *cmds++ = 0x00000000; + /* SP_VS_OUT_REG_4 */ + *cmds++ = 0x00000000; + /* SP_VS_OUT_REG_5 */ + *cmds++ = 0x00000000; + /* SP_VS_OUT_REG_6 */ + *cmds++ = 0x00000000; + /* SP_VS_OUT_REG_7 */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7); + *cmds++ = CP_REG(A3XX_SP_VS_VPC_DST_REG_0); + /* SP_VS_VPC_DST_REG_0 */ + *cmds++ = 0x00000000; + /* SP_VS_VPC_DST_REG_1 */ + *cmds++ = 0x00000000; + /* SP_VS_VPC_DST_REG_2 */ + *cmds++ = 0x00000000; + /* SP_VS_VPC_DST_REG_3 */ + *cmds++ = 0x00000000; + /* SP_VS_OBJ_OFFSET_REG */ + *cmds++ = 0x00000000; + /* SP_VS_OBJ_START_REG */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6); + *cmds++ = CP_REG(A3XX_SP_VS_LENGTH_REG); + /* SP_VS_LENGTH_REG */ + *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1); + /* SP_FS_CTRL_REG0 */ + *cmds++ = _SET(SP_FSCTRLREG0_FSTHREADMODE, SP_MULTI) | + _SET(SP_FSCTRLREG0_FSINSTRBUFFERMODE, SP_BUFFER_MODE) | + _SET(SP_FSCTRLREG0_FSICACHEINVALID, 1) | + _SET(SP_FSCTRLREG0_FSFULLREGFOOTPRINT, 2) | + _SET(SP_FSCTRLREG0_FSINOUTREGOVERLAP, 1) | + _SET(SP_FSCTRLREG0_FSTHREADSIZE, SP_TWO_VTX_QUADS) | + _SET(SP_FSCTRLREG0_FSSUPERTHREADMODE, 1) | + _SET(SP_FSCTRLREG0_FSLENGTH, 1); + /* SP_FS_CTRL_REG1 */ + *cmds++ = _SET(SP_FSCTRLREG1_FSCONSTLENGTH, 1) | + _SET(SP_FSCTRLREG1_FSINITIALOUTSTANDING, 2) | + _SET(SP_FSCTRLREG1_HALFPRECVAROFFSET, 63); + /* SP_FS_OBJ_OFFSET_REG */ + *cmds++ = _SET(SP_OBJOFFSETREG_CONSTOBJECTSTARTOFFSET, 272) | + _SET(SP_OBJOFFSETREG_SHADEROBJOFFSETINIC, 1); + /* SP_FS_OBJ_START_REG */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0); + /* SP_FS_FLAT_SHAD_MODE_REG_0 */ + *cmds++ = 0x00000000; + /* SP_FS_FLAT_SHAD_MODE_REG_1 */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_SP_FS_OUTPUT_REG); + /* SP_FS_OUTPUT_REG */ + *cmds++ = _SET(SP_IMAGEOUTPUTREG_PAD0, SP_PIXEL_BASED); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5); + *cmds++ = CP_REG(A3XX_SP_FS_MRT_REG_0); + /* SP_FS_MRT_REG_0 */ + *cmds++ = _SET(SP_FSMRTREG_REGID, 1); + /* SP_FS_MRT_REG_1 */ + *cmds++ = 0x00000000; + /* SP_FS_MRT_REG_2 */ + *cmds++ = 0x00000000; + /* SP_FS_MRT_REG_3 */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 11); + *cmds++ = CP_REG(A3XX_VPC_ATTR); + /* VPC_ATTR */ + *cmds++ = _SET(VPC_VPCATTR_THRHDASSIGN, 1) | + _SET(VPC_VPCATTR_LMSIZE, 1); + /* VPC_PACK */ + *cmds++ = 0x00000000; + /* VPC_VARRYING_INTERUPT_MODE_0 */ + *cmds++ = 0x00000000; + /* VPC_VARRYING_INTERUPT_MODE_1 */ + *cmds++ = 0x00000000; + /* VPC_VARRYING_INTERUPT_MODE_2 */ + *cmds++ = 0x00000000; + /* VPC_VARRYING_INTERUPT_MODE_3 */ + *cmds++ = 0x00000000; + /* VPC_VARYING_PS_REPL_MODE_0 */ + *cmds++ = 0x00000000; + /* VPC_VARYING_PS_REPL_MODE_1 */ + *cmds++ = 0x00000000; + /* VPC_VARYING_PS_REPL_MODE_2 */ + *cmds++ = 0x00000000; + /* VPC_VARYING_PS_REPL_MODE_3 */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10); + *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT) + | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT) + | (HLSQ_BLOCK_ID_SP_VS << CP_LOADSTATE_STATEBLOCKID_SHIFT) + | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT); + *cmds++ = (HLSQ_SP_VS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT) + | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT); + + /* (sy)(rpt3)mov.f32f32 r0.y, (r)r1.y; */ + *cmds++ = 0x00000005; *cmds++ = 0x30044b01; + /* end; */ + *cmds++ = 0x00000000; *cmds++ = 0x03000000; + /* nop; */ + *cmds++ = 0x00000000; *cmds++ = 0x00000000; + /* nop; */ + *cmds++ = 0x00000000; *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10); + *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT) + | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT) + | (HLSQ_BLOCK_ID_SP_FS << CP_LOADSTATE_STATEBLOCKID_SHIFT) + | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT); + *cmds++ = (HLSQ_SP_FS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT) + | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT); + + /* (sy)(rpt3)mov.f32f32 r0.y, (r)c0.x; */ + *cmds++ = 0x00000000; *cmds++ = 0x30244b01; + /* end; */ + *cmds++ = 0x00000000; *cmds++ = 0x03000000; + /* nop; */ + *cmds++ = 0x00000000; *cmds++ = 0x00000000; + /* nop; */ + *cmds++ = 0x00000000; *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_RB_MSAA_CONTROL); + /* RB_MSAA_CONTROL */ + *cmds++ = _SET(RB_MSAACONTROL_MSAA_DISABLE, 1) | + _SET(RB_MSAACONTROL_SAMPLE_MASK, 0xFFFF); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_RB_DEPTH_CONTROL); + /* RB_DEPTH_CONTROL */ + *cmds++ = _SET(RB_DEPTHCONTROL_Z_TEST_FUNC, RB_FRAG_NEVER); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_RB_MRT_CONTROL0); + /* RB_MRT_CONTROL0 */ + *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) | + _SET(RB_MRTCONTROL_ROP_CODE, 12) | + _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) | + _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL0); + /* RB_MRT_BLEND_CONTROL0 */ + *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) | + _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1); + /* RB_MRT_CONTROL1 */ + *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) | + _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) | + _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL1); + /* RB_MRT_BLEND_CONTROL1 */ + *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) | + _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1); + /* RB_MRT_CONTROL2 */ + *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) | + _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) | + _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL2); + /* RB_MRT_BLEND_CONTROL2 */ + *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) | + _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1); + /* RB_MRT_CONTROL3 */ + *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) | + _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) | + _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL3); + /* RB_MRT_BLEND_CONTROL3 */ + *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) | + _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5); + *cmds++ = CP_REG(A3XX_VFD_INDEX_MIN); + /* VFD_INDEX_MIN */ + *cmds++ = 0x00000000; + /* VFD_INDEX_MAX */ + *cmds++ = 0xFFFFFFFF; + /* VFD_INSTANCEID_OFFSET */ + *cmds++ = 0x00000000; + /* VFD_INDEX_OFFSET */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_VFD_VS_THREADING_THRESHOLD); + /* VFD_VS_THREADING_THRESHOLD */ + *cmds++ = _SET(VFD_THREADINGTHRESHOLD_RESERVED6, 12) | + _SET(VFD_THREADINGTHRESHOLD_REGID_VTXCNT, 252); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_TPL1_TP_VS_TEX_OFFSET); + /* TPL1_TP_VS_TEX_OFFSET */ + *cmds++ = 0; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_TPL1_TP_FS_TEX_OFFSET); + /* TPL1_TP_FS_TEX_OFFSET */ + *cmds++ = _SET(TPL1_TPTEXOFFSETREG_SAMPLEROFFSET, 16) | + _SET(TPL1_TPTEXOFFSETREG_MEMOBJOFFSET, 16) | + _SET(TPL1_TPTEXOFFSETREG_BASETABLEPTR, 224); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_PC_PRIM_VTX_CNTL); + /* PC_PRIM_VTX_CNTL */ + *cmds++ = _SET(PC_PRIM_VTX_CONTROL_POLYMODE_FRONT_PTYPE, + PC_DRAW_TRIANGLES) | + _SET(PC_PRIM_VTX_CONTROL_POLYMODE_BACK_PTYPE, + PC_DRAW_TRIANGLES) | + _SET(PC_PRIM_VTX_CONTROL_PROVOKING_VTX_LAST, 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_GRAS_SC_WINDOW_SCISSOR_TL); + /* GRAS_SC_WINDOW_SCISSOR_TL */ + *cmds++ = 0x00000000; + /* GRAS_SC_WINDOW_SCISSOR_BR */ + *cmds++ = _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_X, shadow->width - 1) | + _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_Y, shadow->height - 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_GRAS_SC_SCREEN_SCISSOR_TL); + /* GRAS_SC_SCREEN_SCISSOR_TL */ + *cmds++ = 0x00000000; + /* GRAS_SC_SCREEN_SCISSOR_BR */ + *cmds++ = _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_X, shadow->width - 1) | + _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_Y, shadow->height - 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5); + *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_XOFFSET); + /* GRAS_CL_VPORT_XOFFSET */ + *cmds++ = 0x00000000; + /* GRAS_CL_VPORT_XSCALE */ + *cmds++ = _SET(GRAS_CL_VPORT_XSCALE_VPORT_XSCALE, 0x3f800000); + /* GRAS_CL_VPORT_YOFFSET */ + *cmds++ = 0x00000000; + /* GRAS_CL_VPORT_YSCALE */ + *cmds++ = _SET(GRAS_CL_VPORT_YSCALE_VPORT_YSCALE, 0x3f800000); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_ZOFFSET); + /* GRAS_CL_VPORT_ZOFFSET */ + *cmds++ = 0x00000000; + /* GRAS_CL_VPORT_ZSCALE */ + *cmds++ = _SET(GRAS_CL_VPORT_ZSCALE_VPORT_ZSCALE, 0x3f800000); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_GRAS_CL_CLIP_CNTL); + /* GRAS_CL_CLIP_CNTL */ + *cmds++ = _SET(GRAS_CL_CLIP_CNTL_CLIP_DISABLE, 1) | + _SET(GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE, 1) | + _SET(GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE, 1) | + _SET(GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE, 1) | + _SET(GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE, 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_GRAS_CL_GB_CLIP_ADJ); + /* GRAS_CL_GB_CLIP_ADJ */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmds++ = 0x00000000; + + /* + * Resolve using two draw calls with a dummy register + * write in between. This is a HLM workaround + * that should be removed later. + */ + *cmds++ = cp_type3_packet(CP_DRAW_INDX_2, 6); + *cmds++ = 0x00000000; /* Viz query info */ + *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_TRILIST, + PC_DI_SRC_SEL_IMMEDIATE, + PC_DI_INDEX_SIZE_32_BIT, + PC_DI_IGNORE_VISIBILITY); + *cmds++ = 0x00000003; /* Num indices */ + *cmds++ = 0x00000000; /* Index 0 */ + *cmds++ = 0x00000001; /* Index 1 */ + *cmds++ = 0x00000002; /* Index 2 */ + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_HLSQ_CL_CONTROL_0_REG); + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_DRAW_INDX_2, 6); + *cmds++ = 0x00000000; /* Viz query info */ + *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_TRILIST, + PC_DI_SRC_SEL_IMMEDIATE, + PC_DI_INDEX_SIZE_32_BIT, + PC_DI_IGNORE_VISIBILITY); + *cmds++ = 0x00000003; /* Num indices */ + *cmds++ = 0x00000002; /* Index 0 */ + *cmds++ = 0x00000001; /* Index 1 */ + *cmds++ = 0x00000003; /* Index 2 */ + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_HLSQ_CL_CONTROL_0_REG); + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmds++ = 0x00000000; + + /* Create indirect buffer command for above command sequence */ + create_ib1(drawctxt, shadow->gmem_save, start, cmds); + + return cmds; +} + +static void build_shader_save_cmds(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + unsigned int *cmd = tmp_ctx.cmd; + unsigned int *start; + + /* Reserve space for boolean values used for COND_EXEC packet */ + drawctxt->cond_execs[0].hostptr = cmd; + drawctxt->cond_execs[0].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate); + *cmd++ = 0; + drawctxt->cond_execs[1].hostptr = cmd; + drawctxt->cond_execs[1].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate); + *cmd++ = 0; + + drawctxt->shader_save_commands[0].hostptr = cmd; + drawctxt->shader_save_commands[0].gpuaddr = + virt2gpu(cmd, &drawctxt->gpustate); + *cmd++ = 0; + drawctxt->shader_save_commands[1].hostptr = cmd; + drawctxt->shader_save_commands[1].gpuaddr = + virt2gpu(cmd, &drawctxt->gpustate); + *cmd++ = 0; + + start = cmd; + + /* Save vertex shader */ + + *cmd++ = cp_type3_packet(CP_COND_EXEC, 4); + *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2; + *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2; + *cmd++ = 0x0000FFFF; + *cmd++ = 3; /* EXEC_COUNT */ + + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + drawctxt->shader_save_commands[2].hostptr = cmd; + drawctxt->shader_save_commands[2].gpuaddr = + virt2gpu(cmd, &drawctxt->gpustate); + /* + From fixup: + + dwords = SP_VS_CTRL_REG0.VS_LENGTH * 8 + + From regspec: + SP_VS_CTRL_REG0.VS_LENGTH [31:24]: VS length, unit = 256bits. + If bit31 is 1, it means overflow + or any long shader. + + src = (HLSQ_SHADOW_BASE + 0x1000)/4 + */ + *cmd++ = 0; /*(dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */ + *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET) & 0xfffffffc; + + /* Save fragment shader */ + *cmd++ = cp_type3_packet(CP_COND_EXEC, 4); + *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2; + *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2; + *cmd++ = 0x0000FFFF; + *cmd++ = 3; /* EXEC_COUNT */ + + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + drawctxt->shader_save_commands[3].hostptr = cmd; + drawctxt->shader_save_commands[3].gpuaddr = + virt2gpu(cmd, &drawctxt->gpustate); + /* + From fixup: + + dwords = SP_FS_CTRL_REG0.FS_LENGTH * 8 + + From regspec: + SP_FS_CTRL_REG0.FS_LENGTH [31:24]: FS length, unit = 256bits. + If bit31 is 1, it means overflow + or any long shader. + + fs_offset = SP_FS_OBJ_OFFSET_REG.SHADEROBJOFFSETINIC * 32 + From regspec: + + SP_FS_OBJ_OFFSET_REG.SHADEROBJOFFSETINIC [31:25]: + First instruction of the whole shader will be stored from + the offset in instruction cache, unit = 256bits, a cache line. + It can start from 0 if no VS available. + + src = (HLSQ_SHADOW_BASE + 0x1000 + SSIZE + fs_offset)/4 + */ + *cmd++ = 0; /*(dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */ + *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET + + (SHADER_SHADOW_SIZE / 2)) & 0xfffffffc; + + /* Create indirect buffer command for above command sequence */ + create_ib1(drawctxt, drawctxt->shader_save, start, cmd); + + tmp_ctx.cmd = cmd; +} + +/* + * Make an IB to modify context save IBs with the correct shader instruction + * and constant sizes and offsets. + */ + +static void build_save_fixup_cmds(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + unsigned int *cmd = tmp_ctx.cmd; + unsigned int *start = cmd; + + /* Flush HLSQ lazy updates */ + *cmd++ = cp_type3_packet(CP_EVENT_WRITE, 1); + *cmd++ = 0x7; /* HLSQ_FLUSH */ + *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmd++ = 0; + + *cmd++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2); + *cmd++ = 0x00000000; /* No start addr for full invalidate */ + *cmd++ = (unsigned int) + UCHE_ENTIRE_CACHE << UCHE_INVALIDATE1REG_ALLORPORTION | + UCHE_OP_INVALIDATE << UCHE_INVALIDATE1REG_OPCODE | + 0; /* No end addr for full invalidate */ + + /* Make sure registers are flushed */ + *cmd++ = cp_type3_packet(CP_CONTEXT_UPDATE, 1); + *cmd++ = 0; + +#ifdef GSL_CONTEXT_SWITCH_CPU_SYNC + + /* Save shader sizes */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = A3XX_SP_VS_CTRL_REG0; + *cmd++ = drawctxt->shader_save_commands[2].gpuaddr; + + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = A3XX_SP_FS_CTRL_REG0; + *cmd++ = drawctxt->shader_save_commands[3].gpuaddr; + + /* Save shader offsets */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG; + *cmd++ = drawctxt->shader_save_commands[1].gpuaddr; + + /* Save constant sizes */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = A3XX_SP_VS_CTRL_REG1; + *cmd++ = drawctxt->constant_save_commands[1].gpuaddr; + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = A3XX_SP_FS_CTRL_REG1; + *cmd++ = drawctxt->constant_save_commands[2].gpuaddr; + + /* Save FS constant offset */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG; + *cmd++ = drawctxt->constant_save_commands[0].gpuaddr; + + + /* Save VS instruction store mode */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = A3XX_SP_VS_CTRL_REG0; + *cmd++ = drawctxt->cond_execs[0].gpuaddr; + + /* Save FS instruction store mode */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = A3XX_SP_FS_CTRL_REG0; + *cmd++ = drawctxt->cond_execs[1].gpuaddr; +#else + + /* Shader save */ + cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x7f000000, + 11+REG_TO_MEM_LOOP_COUNT_SHIFT, + (HLSQ_SHADOW_BASE + 0x1000) / 4, + drawctxt->shader_save_commands[2].gpuaddr); + + /* CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0x00000000) | SP_FS_CTRL_REG0 */ + *cmd++ = cp_type3_packet(CP_REG_RMW, 3); + *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2; + *cmd++ = 0x00000000; /* AND value */ + *cmd++ = A3XX_SP_FS_CTRL_REG0; /* OR address */ + /* CP_SCRATCH_REG2 = ( (CP_SCRATCH_REG2 & 0x7f000000) >> 21 ) + | ((HLSQ_SHADOW_BASE+0x1000+SSIZE)/4) */ + *cmd++ = cp_type3_packet(CP_REG_RMW, 3); + *cmd++ = ((11 + REG_TO_MEM_LOOP_COUNT_SHIFT) << 24) | + A3XX_CP_SCRATCH_REG2; + *cmd++ = 0x7f000000; /* AND value */ + *cmd++ = (HLSQ_SHADOW_BASE + 0x1000 + SSIZE) / 4; /* OR value */ + + /* + * CP_SCRATCH_REG3 = (CP_SCRATCH_REG3 & 0x00000000) | + * SP_FS_OBJ_OFFSET_REG + */ + + *cmd++ = cp_type3_packet(CP_REG_RMW, 3); + *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG3; + *cmd++ = 0x00000000; /* AND value */ + *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG; /* OR address */ + /* + * CP_SCRATCH_REG3 = ( (CP_SCRATCH_REG3 & 0xfe000000) >> 25 ) | + * 0x00000000 + */ + *cmd++ = cp_type3_packet(CP_REG_RMW, 3); + *cmd++ = A3XX_CP_SCRATCH_REG3; + *cmd++ = 0xfe000000; /* AND value */ + *cmd++ = 0x00000000; /* OR value */ + /* + * CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0xffffffff) | CP_SCRATCH_REG3 + */ + *cmd++ = cp_type3_packet(CP_REG_RMW, 3); + *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2; + *cmd++ = 0xffffffff; /* AND value */ + *cmd++ = A3XX_CP_SCRATCH_REG3; /* OR address */ + + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = A3XX_CP_SCRATCH_REG2; + *cmd++ = drawctxt->shader_save_commands[3].gpuaddr; + + /* Constant save */ + cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff, + 17, (HLSQ_SHADOW_BASE + 0x2000) / 4, + drawctxt->constant_save_commands[1].gpuaddr); + + cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff, + 17, (HLSQ_SHADOW_BASE + 0x2000 + SSIZE) / 4, + drawctxt->constant_save_commands[2].gpuaddr); + + cmd = rmw_regtomem(cmd, A3XX_SP_FS_OBJ_OFFSET_REG, 0x00ff0000, + 18, drawctxt->gpustate.gpuaddr & 0xfffffe00, + drawctxt->constant_save_commands[2].gpuaddr + + sizeof(unsigned int)); + + /* Modify constant save conditionals */ + cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff, + 0, 0, drawctxt->cond_execs[2].gpuaddr); + + cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff, + 0, 0, drawctxt->cond_execs[3].gpuaddr); + + /* Save VS instruction store mode */ + + cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x00000002, + 31, 0, drawctxt->cond_execs[0].gpuaddr); + + /* Save FS instruction store mode */ + cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG0, 0x00000002, + 31, 0, drawctxt->cond_execs[1].gpuaddr); + +#endif + + create_ib1(drawctxt, drawctxt->save_fixup, start, cmd); + + tmp_ctx.cmd = cmd; +} + +/****************************************************************************/ +/* Functions to build context restore IBs */ +/****************************************************************************/ + +static unsigned int *build_sys2gmem_cmds(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, + struct gmem_shadow_t *shadow) +{ + unsigned int *cmds = tmp_ctx.cmd; + unsigned int *start = cmds; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5); + *cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG); + /* HLSQ_CONTROL_0_REG */ + *cmds++ = _SET(HLSQ_CTRL0REG_FSTHREADSIZE, HLSQ_FOUR_PIX_QUADS) | + _SET(HLSQ_CTRL0REG_SPSHADERRESTART, 1) | + _SET(HLSQ_CTRL0REG_CHUNKDISABLE, 1) | + _SET(HLSQ_CTRL0REG_SPCONSTFULLUPDATE, 1) | + _SET(HLSQ_CTRL0REG_TPFULLUPDATE, 1); + /* HLSQ_CONTROL_1_REG */ + *cmds++ = _SET(HLSQ_CTRL1REG_VSTHREADSIZE, HLSQ_TWO_VTX_QUADS); + /* HLSQ_CONTROL_2_REG */ + *cmds++ = _SET(HLSQ_CTRL2REG_PRIMALLOCTHRESHOLD, 31); + /* HLSQ_CONTROL3_REG */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_RB_MRT_BUF_INFO0); + /* RB_MRT_BUF_INFO0 */ + *cmds++ = _SET(RB_MRTBUFINFO_COLOR_FORMAT, RB_R8G8B8A8_UNORM) | + _SET(RB_MRTBUFINFO_COLOR_TILE_MODE, RB_TILINGMODE_32X32) | + _SET(RB_MRTBUFINFO_COLOR_BUF_PITCH, + (shadow->gmem_pitch * 4 * 8) / 256); + /* RB_MRT_BUF_BASE0 */ + *cmds++ = _SET(RB_MRTBUFBASE_COLOR_BUF_BASE, tmp_ctx.gmem_base >> 5); + + /* Texture samplers */ + *cmds++ = cp_type3_packet(CP_LOAD_STATE, 4); + *cmds++ = (16 << CP_LOADSTATE_DSTOFFSET_SHIFT) + | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT) + | (HLSQ_BLOCK_ID_TP_TEX << CP_LOADSTATE_STATEBLOCKID_SHIFT) + | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT); + *cmds++ = (HLSQ_TP_TEX_SAMPLERS << CP_LOADSTATE_STATETYPE_SHIFT) + | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT); + *cmds++ = 0x00000240; + *cmds++ = 0x00000000; + + /* Texture memobjs */ + *cmds++ = cp_type3_packet(CP_LOAD_STATE, 6); + *cmds++ = (16 << CP_LOADSTATE_DSTOFFSET_SHIFT) + | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT) + | (HLSQ_BLOCK_ID_TP_TEX << CP_LOADSTATE_STATEBLOCKID_SHIFT) + | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT); + *cmds++ = (HLSQ_TP_TEX_MEMOBJ << CP_LOADSTATE_STATETYPE_SHIFT) + | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT); + *cmds++ = 0x4cc06880; + *cmds++ = shadow->height | (shadow->width << 14); + *cmds++ = (shadow->pitch*4*8) << 9; + *cmds++ = 0x00000000; + + /* Mipmap bases */ + *cmds++ = cp_type3_packet(CP_LOAD_STATE, 16); + *cmds++ = (224 << CP_LOADSTATE_DSTOFFSET_SHIFT) + | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT) + | (HLSQ_BLOCK_ID_TP_MIPMAP << CP_LOADSTATE_STATEBLOCKID_SHIFT) + | (14 << CP_LOADSTATE_NUMOFUNITS_SHIFT); + *cmds++ = (HLSQ_TP_MIPMAP_BASE << CP_LOADSTATE_STATETYPE_SHIFT) + | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT); + *cmds++ = shadow->gmemshadow.gpuaddr; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5); + *cmds++ = CP_REG(A3XX_HLSQ_VS_CONTROL_REG); + /* HLSQ_VS_CONTROL_REG */ + *cmds++ = _SET(HLSQ_VSCTRLREG_VSINSTRLENGTH, 1); + /* HLSQ_FS_CONTROL_REG */ + *cmds++ = _SET(HLSQ_FSCTRLREG_FSCONSTLENGTH, 1) | + _SET(HLSQ_FSCTRLREG_FSCONSTSTARTOFFSET, 128) | + _SET(HLSQ_FSCTRLREG_FSINSTRLENGTH, 2); + /* HLSQ_CONST_VSPRESV_RANGE_REG */ + *cmds++ = 0x00000000; + /* HLSQ_CONST_FSPRESV_RANGE_REG */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_SP_FS_LENGTH_REG); + /* SP_FS_LENGTH_REG */ + *cmds++ = _SET(SP_SHADERLENGTH_LEN, 2); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 12); + *cmds++ = CP_REG(A3XX_SP_VS_CTRL_REG0); + /* SP_VS_CTRL_REG0 */ + *cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) | + _SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) | + _SET(SP_VSCTRLREG0_VSICACHEINVALID, 1) | + _SET(SP_VSCTRLREG0_VSFULLREGFOOTPRINT, 2) | + _SET(SP_VSCTRLREG0_VSTHREADSIZE, SP_TWO_VTX_QUADS) | + _SET(SP_VSCTRLREG0_VSLENGTH, 1); + /* SP_VS_CTRL_REG1 */ + *cmds++ = _SET(SP_VSCTRLREG1_VSINITIALOUTSTANDING, 8); + /* SP_VS_PARAM_REG */ + *cmds++ = _SET(SP_VSPARAMREG_POSREGID, 4) | + _SET(SP_VSPARAMREG_PSIZEREGID, 252) | + _SET(SP_VSPARAMREG_TOTALVSOUTVAR, 1); + /* SP_VS_OUT_REG0 */ + *cmds++ = _SET(SP_VSOUTREG_COMPMASK0, 3); + /* SP_VS_OUT_REG1 */ + *cmds++ = 0x00000000; + /* SP_VS_OUT_REG2 */ + *cmds++ = 0x00000000; + /* SP_VS_OUT_REG3 */ + *cmds++ = 0x00000000; + /* SP_VS_OUT_REG4 */ + *cmds++ = 0x00000000; + /* SP_VS_OUT_REG5 */ + *cmds++ = 0x00000000; + /* SP_VS_OUT_REG6 */ + *cmds++ = 0x00000000; + /* SP_VS_OUT_REG7 */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7); + *cmds++ = CP_REG(A3XX_SP_VS_VPC_DST_REG_0); + /* SP_VS_VPC_DST_REG0 */ + *cmds++ = _SET(SP_VSVPCDSTREG_OUTLOC0, 8); + /* SP_VS_VPC_DST_REG1 */ + *cmds++ = 0x00000000; + /* SP_VS_VPC_DST_REG2 */ + *cmds++ = 0x00000000; + /* SP_VS_VPC_DST_REG3 */ + *cmds++ = 0x00000000; + /* SP_VS_OBJ_OFFSET_REG */ + *cmds++ = 0x00000000; + /* SP_VS_OBJ_START_REG */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6); + *cmds++ = CP_REG(A3XX_SP_VS_LENGTH_REG); + /* SP_VS_LENGTH_REG */ + *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1); + /* SP_FS_CTRL_REG0 */ + *cmds++ = _SET(SP_FSCTRLREG0_FSTHREADMODE, SP_MULTI) | + _SET(SP_FSCTRLREG0_FSINSTRBUFFERMODE, SP_BUFFER_MODE) | + _SET(SP_FSCTRLREG0_FSICACHEINVALID, 1) | + _SET(SP_FSCTRLREG0_FSFULLREGFOOTPRINT, 2) | + _SET(SP_FSCTRLREG0_FSINOUTREGOVERLAP, 1) | + _SET(SP_FSCTRLREG0_FSTHREADSIZE, SP_FOUR_PIX_QUADS) | + _SET(SP_FSCTRLREG0_PIXLODENABLE, 1) | + _SET(SP_FSCTRLREG0_FSLENGTH, 2); + /* SP_FS_CTRL_REG1 */ + *cmds++ = _SET(SP_FSCTRLREG1_FSCONSTLENGTH, 1) | + _SET(SP_FSCTRLREG1_FSINITIALOUTSTANDING, 2) | + _SET(SP_FSCTRLREG1_HALFPRECVAROFFSET, 63); + /* SP_FS_OBJ_OFFSET_REG */ + *cmds++ = _SET(SP_OBJOFFSETREG_CONSTOBJECTSTARTOFFSET, 128); + /* SP_FS_OBJ_START_REG */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0); + /* SP_FS_FLAT_SHAD_MODE_REG0 */ + *cmds++ = 0x00000000; + /* SP_FS_FLAT_SHAD_MODE_REG1 */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_SP_FS_OUTPUT_REG); + /* SP_FS_OUT_REG */ + *cmds++ = _SET(SP_FSOUTREG_PAD0, SP_PIXEL_BASED); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_SP_FS_MRT_REG_0); + /* SP_FS_MRT_REG0 */ + *cmds++ = _SET(SP_FSMRTREG_REGID, 4); + /* SP_FS_MRT_REG1 */ + *cmds++ = 0; + /* SP_FS_MRT_REG2 */ + *cmds++ = 0; + /* SP_FS_MRT_REG3 */ + *cmds++ = 0; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 11); + *cmds++ = CP_REG(A3XX_VPC_ATTR); + /* VPC_ATTR */ + *cmds++ = _SET(VPC_VPCATTR_TOTALATTR, 2) | + _SET(VPC_VPCATTR_THRHDASSIGN, 1) | + _SET(VPC_VPCATTR_LMSIZE, 1); + /* VPC_PACK */ + *cmds++ = _SET(VPC_VPCPACK_NUMFPNONPOSVAR, 2) | + _SET(VPC_VPCPACK_NUMNONPOSVSVAR, 2); + /* VPC_VARYING_INTERP_MODE_0 */ + *cmds++ = 0x00000000; + /* VPC_VARYING_INTERP_MODE1 */ + *cmds++ = 0x00000000; + /* VPC_VARYING_INTERP_MODE2 */ + *cmds++ = 0x00000000; + /* VPC_VARYING_IINTERP_MODE3 */ + *cmds++ = 0x00000000; + /* VPC_VARRYING_PS_REPL_MODE_0 */ + *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2); + /* VPC_VARRYING_PS_REPL_MODE_1 */ + *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2); + /* VPC_VARRYING_PS_REPL_MODE_2 */ + *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2); + /* VPC_VARRYING_PS_REPL_MODE_3 */ + *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) | + _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 11); + *cmds++ = CP_REG(A3XX_SP_SP_CTRL_REG); + /* SP_SP_CTRL_REG */ + *cmds++ = _SET(SP_SPCTRLREG_SLEEPMODE, 1); + + /* Load vertex shader */ + *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10); + *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT) + | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT) + | (HLSQ_BLOCK_ID_SP_VS << CP_LOADSTATE_STATEBLOCKID_SHIFT) + | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT); + *cmds++ = (HLSQ_SP_VS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT) + | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT); + /* (sy)end; */ + *cmds++ = 0x00000000; *cmds++ = 0x13000000; + /* nop; */ + *cmds++ = 0x00000000; *cmds++ = 0x00000000; + /* nop; */ + *cmds++ = 0x00000000; *cmds++ = 0x00000000; + /* nop; */ + *cmds++ = 0x00000000; *cmds++ = 0x00000000; + + /* Load fragment shader */ + *cmds++ = cp_type3_packet(CP_LOAD_STATE, 18); + *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT) + | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT) + | (HLSQ_BLOCK_ID_SP_FS << CP_LOADSTATE_STATEBLOCKID_SHIFT) + | (2 << CP_LOADSTATE_NUMOFUNITS_SHIFT); + *cmds++ = (HLSQ_SP_FS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT) + | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT); + /* (sy)(rpt1)bary.f (ei)r0.z, (r)0, r0.x; */ + *cmds++ = 0x00002000; *cmds++ = 0x57368902; + /* (rpt5)nop; */ + *cmds++ = 0x00000000; *cmds++ = 0x00000500; + /* sam (f32)r0.xyzw, r0.z, s#0, t#0; */ + *cmds++ = 0x00000005; *cmds++ = 0xa0c01f00; + /* (sy)mov.f32f32 r1.x, r0.x; */ + *cmds++ = 0x00000000; *cmds++ = 0x30044004; + /* mov.f32f32 r1.y, r0.y; */ + *cmds++ = 0x00000001; *cmds++ = 0x20044005; + /* mov.f32f32 r1.z, r0.z; */ + *cmds++ = 0x00000002; *cmds++ = 0x20044006; + /* mov.f32f32 r1.w, r0.w; */ + *cmds++ = 0x00000003; *cmds++ = 0x20044007; + /* end; */ + *cmds++ = 0x00000000; *cmds++ = 0x03000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_VFD_CONTROL_0); + /* VFD_CONTROL_0 */ + *cmds++ = _SET(VFD_CTRLREG0_TOTALATTRTOVS, 8) | + _SET(VFD_CTRLREG0_PACKETSIZE, 2) | + _SET(VFD_CTRLREG0_STRMDECINSTRCNT, 2) | + _SET(VFD_CTRLREG0_STRMFETCHINSTRCNT, 2); + /* VFD_CONTROL_1 */ + *cmds++ = _SET(VFD_CTRLREG1_MAXSTORAGE, 2) | + _SET(VFD_CTRLREG1_REGID4VTX, 252) | + _SET(VFD_CTRLREG1_REGID4INST, 252); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5); + *cmds++ = CP_REG(A3XX_VFD_FETCH_INSTR_0_0); + /* VFD_FETCH_INSTR_0_0 */ + *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 7) | + _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 8) | + _SET(VFD_FETCHINSTRUCTIONS_SWITCHNEXT, 1) | + _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1); + /* VFD_FETCH_INSTR_1_0 */ + *cmds++ = _SET(VFD_BASEADDR_BASEADDR, + shadow->quad_vertices_restore.gpuaddr); + /* VFD_FETCH_INSTR_0_1 */ + *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 11) | + _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 12) | + _SET(VFD_FETCHINSTRUCTIONS_INDEXDECODE, 1) | + _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1); + /* VFD_FETCH_INSTR_1_1 */ + *cmds++ = _SET(VFD_BASEADDR_BASEADDR, + shadow->quad_vertices_restore.gpuaddr + 16); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_VFD_DECODE_INSTR_0); + /* VFD_DECODE_INSTR_0 */ + *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) | + _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) | + _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 1) | + _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 8) | + _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1) | + _SET(VFD_DECODEINSTRUCTIONS_SWITCHNEXT, 1); + /* VFD_DECODE_INSTR_1 */ + *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) | + _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) | + _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 2) | + _SET(VFD_DECODEINSTRUCTIONS_REGID, 4) | + _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 12) | + _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_RB_DEPTH_CONTROL); + /* RB_DEPTH_CONTROL */ + *cmds++ = _SET(RB_DEPTHCONTROL_Z_TEST_FUNC, RB_FRAG_NEVER); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_RB_STENCIL_CONTROL); + /* RB_STENCIL_CONTROL */ + *cmds++ = _SET(RB_STENCILCONTROL_STENCIL_FUNC, RB_REF_NEVER) | + _SET(RB_STENCILCONTROL_STENCIL_FAIL, RB_STENCIL_KEEP) | + _SET(RB_STENCILCONTROL_STENCIL_ZPASS, RB_STENCIL_KEEP) | + _SET(RB_STENCILCONTROL_STENCIL_ZFAIL, RB_STENCIL_KEEP) | + _SET(RB_STENCILCONTROL_STENCIL_FUNC_BF, RB_REF_NEVER) | + _SET(RB_STENCILCONTROL_STENCIL_FAIL_BF, RB_STENCIL_KEEP) | + _SET(RB_STENCILCONTROL_STENCIL_ZPASS_BF, RB_STENCIL_KEEP) | + _SET(RB_STENCILCONTROL_STENCIL_ZFAIL_BF, RB_STENCIL_KEEP); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_RB_MODE_CONTROL); + /* RB_MODE_CONTROL */ + *cmds++ = _SET(RB_MODECONTROL_RENDER_MODE, RB_RENDERING_PASS) | + _SET(RB_MODECONTROL_MARB_CACHE_SPLIT_MODE, 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_RB_RENDER_CONTROL); + /* RB_RENDER_CONTROL */ + *cmds++ = _SET(RB_RENDERCONTROL_BIN_WIDTH, shadow->width >> 5) | + _SET(RB_RENDERCONTROL_ALPHA_TEST_FUNC, 7); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_RB_MSAA_CONTROL); + /* RB_MSAA_CONTROL */ + *cmds++ = _SET(RB_MSAACONTROL_MSAA_DISABLE, 1) | + _SET(RB_MSAACONTROL_SAMPLE_MASK, 0xFFFF); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_RB_MRT_CONTROL0); + /* RB_MRT_CONTROL0 */ + *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) | + _SET(RB_MRTCONTROL_ROP_CODE, 12) | + _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) | + _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL0); + /* RB_MRT_BLENDCONTROL0 */ + *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) | + _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) | + _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1); + /* RB_MRT_CONTROL1 */ + *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) | + _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) | + _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL1); + /* RB_MRT_BLENDCONTROL1 */ + *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) | + _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) | + _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1); + /* RB_MRT_CONTROL2 */ + *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) | + _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) | + _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL2); + /* RB_MRT_BLENDCONTROL2 */ + *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) | + _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) | + _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1); + /* RB_MRT_CONTROL3 */ + *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) | + _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) | + _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL3); + /* RB_MRT_BLENDCONTROL3 */ + *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) | + _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) | + _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) | + _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) | + _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5); + *cmds++ = CP_REG(A3XX_VFD_INDEX_MIN); + /* VFD_INDEX_MIN */ + *cmds++ = 0x00000000; + /* VFD_INDEX_MAX */ + *cmds++ = 0xFFFFFFFF; + /* VFD_INDEX_OFFSET */ + *cmds++ = 0x00000000; + /* TPL1_TP_VS_TEX_OFFSET */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_VFD_VS_THREADING_THRESHOLD); + /* VFD_VS_THREADING_THRESHOLD */ + *cmds++ = _SET(VFD_THREADINGTHRESHOLD_RESERVED6, 12) | + _SET(VFD_THREADINGTHRESHOLD_REGID_VTXCNT, 252); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_TPL1_TP_VS_TEX_OFFSET); + /* TPL1_TP_VS_TEX_OFFSET */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_TPL1_TP_FS_TEX_OFFSET); + /* TPL1_TP_FS_TEX_OFFSET */ + *cmds++ = _SET(TPL1_TPTEXOFFSETREG_SAMPLEROFFSET, 16) | + _SET(TPL1_TPTEXOFFSETREG_MEMOBJOFFSET, 16) | + _SET(TPL1_TPTEXOFFSETREG_BASETABLEPTR, 224); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_GRAS_SC_CONTROL); + /* GRAS_SC_CONTROL */ + *cmds++ = _SET(GRAS_SC_CONTROL_RASTER_MODE, 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_GRAS_SU_MODE_CONTROL); + /* GRAS_SU_MODE_CONTROL */ + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_GRAS_SC_WINDOW_SCISSOR_TL); + /* GRAS_SC_WINDOW_SCISSOR_TL */ + *cmds++ = 0x00000000; + /* GRAS_SC_WINDOW_SCISSOR_BR */ + *cmds++ = _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_X, shadow->width - 1) | + _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_Y, shadow->height - 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_GRAS_SC_SCREEN_SCISSOR_TL); + /* GRAS_SC_SCREEN_SCISSOR_TL */ + *cmds++ = 0x00000000; + /* GRAS_SC_SCREEN_SCISSOR_BR */ + *cmds++ = _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_X, shadow->width - 1) | + _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_Y, shadow->height - 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5); + *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_XOFFSET); + /* GRAS_CL_VPORT_XOFFSET */ + *cmds++ = 0x00000000; + /* GRAS_CL_VPORT_XSCALE */ + *cmds++ = _SET(GRAS_CL_VPORT_XSCALE_VPORT_XSCALE, 0x3F800000); + /* GRAS_CL_VPORT_YOFFSET */ + *cmds++ = 0x00000000; + /* GRAS_CL_VPORT_YSCALE */ + *cmds++ = _SET(GRAS_CL_VPORT_YSCALE_VPORT_YSCALE, 0x3F800000); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); + *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_ZOFFSET); + /* GRAS_CL_VPORT_ZOFFSET */ + *cmds++ = 0x00000000; + /* GRAS_CL_VPORT_ZSCALE */ + *cmds++ = _SET(GRAS_CL_VPORT_ZSCALE_VPORT_ZSCALE, 0x3F800000); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_GRAS_CL_CLIP_CNTL); + /* GRAS_CL_CLIP_CNTL */ + *cmds++ = _SET(GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER, 1); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_SP_FS_IMAGE_OUTPUT_REG_0); + /* SP_FS_IMAGE_OUTPUT_REG_0 */ + *cmds++ = _SET(SP_IMAGEOUTPUTREG_MRTFORMAT, SP_R8G8B8A8_UNORM); + + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_PC_PRIM_VTX_CNTL); + /* PC_PRIM_VTX_CONTROL */ + *cmds++ = _SET(PC_PRIM_VTX_CONTROL_STRIDE_IN_VPC, 2) | + _SET(PC_PRIM_VTX_CONTROL_POLYMODE_FRONT_PTYPE, + PC_DRAW_TRIANGLES) | + _SET(PC_PRIM_VTX_CONTROL_POLYMODE_BACK_PTYPE, + PC_DRAW_TRIANGLES) | + _SET(PC_PRIM_VTX_CONTROL_PROVOKING_VTX_LAST, 1); + + *cmds++ = cp_type3_packet(CP_DRAW_INDX, 3); + *cmds++ = 0x00000000; /* Viz query info */ + *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_RECTLIST, + PC_DI_SRC_SEL_AUTO_INDEX, + PC_DI_INDEX_SIZE_16_BIT, + PC_DI_IGNORE_VISIBILITY); + *cmds++ = 0x00000002; /* Num indices */ + + /* Create indirect buffer command for above command sequence */ + create_ib1(drawctxt, shadow->gmem_restore, start, cmds); + + return cmds; +} + +static void build_regrestore_cmds(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + unsigned int *start = tmp_ctx.cmd; + unsigned int *cmd = start; + unsigned int *lcc_start; + + int i; + + /* Flush HLSQ lazy updates */ + *cmd++ = cp_type3_packet(CP_EVENT_WRITE, 1); + *cmd++ = 0x7; /* HLSQ_FLUSH */ + *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmd++ = 0; + + *cmd++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2); + *cmd++ = 0x00000000; /* No start addr for full invalidate */ + *cmd++ = (unsigned int) + UCHE_ENTIRE_CACHE << UCHE_INVALIDATE1REG_ALLORPORTION | + UCHE_OP_INVALIDATE << UCHE_INVALIDATE1REG_OPCODE | + 0; /* No end addr for full invalidate */ + + lcc_start = cmd; + + /* deferred cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, ???); */ + cmd++; + +#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES + /* Force mismatch */ + *cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000) | 1; +#else + *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000; +#endif + + for (i = 0; i < ARRAY_SIZE(context_register_ranges) / 2; i++) { + cmd = reg_range(cmd, context_register_ranges[i * 2], + context_register_ranges[i * 2 + 1]); + } + + lcc_start[0] = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, + (cmd - lcc_start) - 1); + +#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES + lcc_start[2] |= (0 << 24) | (4 << 16); /* Disable shadowing. */ +#else + lcc_start[2] |= (1 << 24) | (4 << 16); +#endif + + for (i = 0; i < ARRAY_SIZE(global_registers); i++) { + *cmd++ = cp_type0_packet(global_registers[i], 1); + tmp_ctx.reg_values[i] = virt2gpu(cmd, &drawctxt->gpustate); + *cmd++ = 0x00000000; + } + + create_ib1(drawctxt, drawctxt->reg_restore, start, cmd); + tmp_ctx.cmd = cmd; +} + +static void build_constantrestore_cmds(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + unsigned int *cmd = tmp_ctx.cmd; + unsigned int *start = cmd; + unsigned int mode = 4; /* Indirect mode */ + unsigned int stateblock; + unsigned int numunits; + unsigned int statetype; + + drawctxt->cond_execs[2].hostptr = cmd; + drawctxt->cond_execs[2].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate); + *cmd++ = 0; + drawctxt->cond_execs[3].hostptr = cmd; + drawctxt->cond_execs[3].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate); + *cmd++ = 0; + +#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES + *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3); + *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000; + *cmd++ = 4 << 16; + *cmd++ = 0x0; +#endif + /* HLSQ full update */ + *cmd++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmd++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG); + *cmd++ = 0x68000240; /* A3XX_HLSQ_CONTROL_0_REG */ + +#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES + /* Re-enable shadowing */ + *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3); + *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000; + *cmd++ = (4 << 16) | (1 << 24); + *cmd++ = 0x0; +#endif + + /* Load vertex shader constants */ + *cmd++ = cp_type3_packet(CP_COND_EXEC, 4); + *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2; + *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2; + *cmd++ = 0x0000ffff; + *cmd++ = 3; /* EXEC_COUNT */ + *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2); + drawctxt->constant_load_commands[0].hostptr = cmd; + drawctxt->constant_load_commands[0].gpuaddr = virt2gpu(cmd, + &drawctxt->gpustate); + + /* + From fixup: + + mode = 4 (indirect) + stateblock = 4 (Vertex constants) + numunits = SP_VS_CTRL_REG1.VSCONSTLENGTH * 2; (256bit units) + + From register spec: + SP_VS_CTRL_REG1.VSCONSTLENGTH [09:00]: 0-512, unit = 128bits. + + ord1 = (numunits<<22) | (stateblock<<19) | (mode<<16); + */ + + *cmd++ = 0; /* ord1 */ + *cmd++ = ((drawctxt->gpustate.gpuaddr) & 0xfffffffc) | 1; + + /* Load fragment shader constants */ + *cmd++ = cp_type3_packet(CP_COND_EXEC, 4); + *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2; + *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2; + *cmd++ = 0x0000ffff; + *cmd++ = 3; /* EXEC_COUNT */ + *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2); + drawctxt->constant_load_commands[1].hostptr = cmd; + drawctxt->constant_load_commands[1].gpuaddr = + virt2gpu(cmd, &drawctxt->gpustate); + /* + From fixup: + + mode = 4 (indirect) + stateblock = 6 (Fragment constants) + numunits = SP_FS_CTRL_REG1.FSCONSTLENGTH * 2; (256bit units) + + From register spec: + SP_FS_CTRL_REG1.FSCONSTLENGTH [09:00]: 0-512, unit = 128bits. + + ord1 = (numunits<<22) | (stateblock<<19) | (mode<<16); + */ + + *cmd++ = 0; /* ord1 */ + drawctxt->constant_load_commands[2].hostptr = cmd; + drawctxt->constant_load_commands[2].gpuaddr = + virt2gpu(cmd, &drawctxt->gpustate); + /* + From fixup: + base = drawctxt->gpustate.gpuaddr (ALU constant shadow base) + offset = SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET + + From register spec: + SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET [16:24]: Constant object + start offset in on chip RAM, + 128bit aligned + + ord2 = base + offset | 1 + Because of the base alignment we can use + ord2 = base | offset | 1 + */ + *cmd++ = 0; /* ord2 */ + + /* Restore VS texture memory objects */ + stateblock = 0; + statetype = 1; + numunits = (TEX_SIZE_MEM_OBJECTS / 7) / 4; + + *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2); + *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16); + *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MEM_OBJECTS) + & 0xfffffffc) | statetype; + + /* Restore VS texture mipmap addresses */ + stateblock = 1; + statetype = 1; + numunits = TEX_SIZE_MIPMAP / 4; + *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2); + *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16); + *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MIPMAP) + & 0xfffffffc) | statetype; + + /* Restore VS texture sampler objects */ + stateblock = 0; + statetype = 0; + numunits = (TEX_SIZE_SAMPLER_OBJ / 2) / 4; + *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2); + *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16); + *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_SAMPLER_OBJ) + & 0xfffffffc) | statetype; + + /* Restore FS texture memory objects */ + stateblock = 2; + statetype = 1; + numunits = (TEX_SIZE_MEM_OBJECTS / 7) / 4; + *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2); + *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16); + *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MEM_OBJECTS) + & 0xfffffffc) | statetype; + + /* Restore FS texture mipmap addresses */ + stateblock = 3; + statetype = 1; + numunits = TEX_SIZE_MIPMAP / 4; + *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2); + *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16); + *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MIPMAP) + & 0xfffffffc) | statetype; + + /* Restore FS texture sampler objects */ + stateblock = 2; + statetype = 0; + numunits = (TEX_SIZE_SAMPLER_OBJ / 2) / 4; + *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2); + *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16); + *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_SAMPLER_OBJ) + & 0xfffffffc) | statetype; + + create_ib1(drawctxt, drawctxt->constant_restore, start, cmd); + tmp_ctx.cmd = cmd; +} + +static void build_shader_restore_cmds(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + unsigned int *cmd = tmp_ctx.cmd; + unsigned int *start = cmd; + + /* Vertex shader */ + *cmd++ = cp_type3_packet(CP_COND_EXEC, 4); + *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2; + *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2; + *cmd++ = 1; + *cmd++ = 3; /* EXEC_COUNT */ + + *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2); + drawctxt->shader_load_commands[0].hostptr = cmd; + drawctxt->shader_load_commands[0].gpuaddr = + virt2gpu(cmd, &drawctxt->gpustate); + /* + From fixup: + + mode = 4 (indirect) + stateblock = 4 (Vertex shader) + numunits = SP_VS_CTRL_REG0.VS_LENGTH + + From regspec: + SP_VS_CTRL_REG0.VS_LENGTH [31:24]: VS length, unit = 256bits. + If bit31 is 1, it means overflow + or any long shader. + + ord1 = (numunits<<22) | (stateblock<<19) | (mode<<11) + */ + *cmd++ = 0; /*ord1 */ + *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET) & 0xfffffffc; + + /* Fragment shader */ + *cmd++ = cp_type3_packet(CP_COND_EXEC, 4); + *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2; + *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2; + *cmd++ = 1; + *cmd++ = 3; /* EXEC_COUNT */ + + *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2); + drawctxt->shader_load_commands[1].hostptr = cmd; + drawctxt->shader_load_commands[1].gpuaddr = + virt2gpu(cmd, &drawctxt->gpustate); + /* + From fixup: + + mode = 4 (indirect) + stateblock = 6 (Fragment shader) + numunits = SP_FS_CTRL_REG0.FS_LENGTH + + From regspec: + SP_FS_CTRL_REG0.FS_LENGTH [31:24]: FS length, unit = 256bits. + If bit31 is 1, it means overflow + or any long shader. + + ord1 = (numunits<<22) | (stateblock<<19) | (mode<<11) + */ + *cmd++ = 0; /*ord1 */ + *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET + + (SHADER_SHADOW_SIZE / 2)) & 0xfffffffc; + + create_ib1(drawctxt, drawctxt->shader_restore, start, cmd); + tmp_ctx.cmd = cmd; +} + +static void build_hlsqcontrol_restore_cmds(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + unsigned int *cmd = tmp_ctx.cmd; + unsigned int *start = cmd; + + *cmd++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmd++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG); + drawctxt->hlsqcontrol_restore_commands[0].hostptr = cmd; + drawctxt->hlsqcontrol_restore_commands[0].gpuaddr + = virt2gpu(cmd, &drawctxt->gpustate); + *cmd++ = 0; + + /* Create indirect buffer command for above command sequence */ + create_ib1(drawctxt, drawctxt->hlsqcontrol_restore, start, cmd); + + tmp_ctx.cmd = cmd; +} + +/* IB that modifies the shader and constant sizes and offsets in restore IBs. */ +static void build_restore_fixup_cmds(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + unsigned int *cmd = tmp_ctx.cmd; + unsigned int *start = cmd; + +#ifdef GSL_CONTEXT_SWITCH_CPU_SYNC + /* Save shader sizes */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = A3XX_SP_VS_CTRL_REG0; + *cmd++ = drawctxt->shader_load_commands[0].gpuaddr; + + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = A3XX_SP_FS_CTRL_REG0; + *cmd++ = drawctxt->shader_load_commands[1].gpuaddr; + + /* Save constant sizes */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = A3XX_SP_VS_CTRL_REG1; + *cmd++ = drawctxt->constant_load_commands[0].gpuaddr; + + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = A3XX_SP_FS_CTRL_REG1; + *cmd++ = drawctxt->constant_load_commands[1].gpuaddr; + + /* Save constant offsets */ + *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); + *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG; + *cmd++ = drawctxt->constant_load_commands[2].gpuaddr; +#else + /* Save shader sizes */ + cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x7f000000, + 30, (4 << 19) | (4 << 16), + drawctxt->shader_load_commands[0].gpuaddr); + + cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG0, 0x7f000000, + 30, (6 << 19) | (4 << 16), + drawctxt->shader_load_commands[1].gpuaddr); + + /* Save constant sizes */ + cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff, + 23, (4 << 19) | (4 << 16), + drawctxt->constant_load_commands[0].gpuaddr); + + cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff, + 23, (6 << 19) | (4 << 16), + drawctxt->constant_load_commands[1].gpuaddr); + + /* Modify constant restore conditionals */ + cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff, + 0, 0, drawctxt->cond_execs[2].gpuaddr); + + cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff, + 0, 0, drawctxt->cond_execs[3].gpuaddr); + + /* Save fragment constant shadow offset */ + cmd = rmw_regtomem(cmd, A3XX_SP_FS_OBJ_OFFSET_REG, 0x00ff0000, + 18, (drawctxt->gpustate.gpuaddr & 0xfffffe00) | 1, + drawctxt->constant_load_commands[2].gpuaddr); +#endif + + /* Use mask value to avoid flushing HLSQ which would cause the HW to + discard all the shader data */ + + cmd = rmw_regtomem(cmd, A3XX_HLSQ_CONTROL_0_REG, 0x9ffffdff, + 0, 0, drawctxt->hlsqcontrol_restore_commands[0].gpuaddr); + + create_ib1(drawctxt, drawctxt->restore_fixup, start, cmd); + + tmp_ctx.cmd = cmd; +} + +static int a3xx_create_gpustate_shadow(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + drawctxt->flags |= CTXT_FLAGS_STATE_SHADOW; + + build_regrestore_cmds(adreno_dev, drawctxt); + build_constantrestore_cmds(adreno_dev, drawctxt); + build_hlsqcontrol_restore_cmds(adreno_dev, drawctxt); + build_regconstantsave_cmds(adreno_dev, drawctxt); + build_shader_save_cmds(adreno_dev, drawctxt); + build_shader_restore_cmds(adreno_dev, drawctxt); + build_restore_fixup_cmds(adreno_dev, drawctxt); + build_save_fixup_cmds(adreno_dev, drawctxt); + + return 0; +} + +/* create buffers for saving/restoring registers, constants, & GMEM */ +static int a3xx_create_gmem_shadow(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + calc_gmemsize(&drawctxt->context_gmem_shadow, + adreno_dev->gmemspace.sizebytes); + tmp_ctx.gmem_base = adreno_dev->gmemspace.gpu_base; + + if (drawctxt->flags & CTXT_FLAGS_GMEM_SHADOW) { + int result = + kgsl_allocate(&drawctxt->context_gmem_shadow.gmemshadow, + drawctxt->pagetable, + drawctxt->context_gmem_shadow.size); + + if (result) + return result; + } else { + memset(&drawctxt->context_gmem_shadow.gmemshadow, 0, + sizeof(drawctxt->context_gmem_shadow.gmemshadow)); + + return 0; + } + + build_quad_vtxbuff(drawctxt, &drawctxt->context_gmem_shadow, + &tmp_ctx.cmd); + + /* Dow we need to idle? */ + /* adreno_idle(&adreno_dev->dev, KGSL_TIMEOUT_DEFAULT); */ + + tmp_ctx.cmd = build_gmem2sys_cmds(adreno_dev, drawctxt, + &drawctxt->context_gmem_shadow); + tmp_ctx.cmd = build_sys2gmem_cmds(adreno_dev, drawctxt, + &drawctxt->context_gmem_shadow); + + kgsl_cache_range_op(&drawctxt->context_gmem_shadow.gmemshadow, + KGSL_CACHE_OP_FLUSH); + + return 0; +} + +static int a3xx_drawctxt_create(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + int ret; + + /* + * Allocate memory for the GPU state and the context commands. + * Despite the name, this is much more then just storage for + * the gpustate. This contains command space for gmem save + * and texture and vertex buffer storage too + */ + + ret = kgsl_allocate(&drawctxt->gpustate, + drawctxt->pagetable, CONTEXT_SIZE); + + if (ret) + return ret; + + kgsl_sharedmem_set(&drawctxt->gpustate, 0, 0, CONTEXT_SIZE); + tmp_ctx.cmd = drawctxt->gpustate.hostptr + CMD_OFFSET; + + if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) { + ret = a3xx_create_gpustate_shadow(adreno_dev, drawctxt); + if (ret) + goto done; + + drawctxt->flags |= CTXT_FLAGS_SHADER_SAVE; + } + + if (!(drawctxt->flags & CTXT_FLAGS_NOGMEMALLOC)) + ret = a3xx_create_gmem_shadow(adreno_dev, drawctxt); + +done: + if (ret) + kgsl_sharedmem_free(&drawctxt->gpustate); + + return ret; +} + +static void a3xx_drawctxt_save(struct adreno_device *adreno_dev, + struct adreno_context *context) +{ + struct kgsl_device *device = &adreno_dev->dev; + + if (context == NULL) + return; + + if (context->flags & CTXT_FLAGS_GPU_HANG) + KGSL_CTXT_WARN(device, + "Current active context has caused gpu hang\n"); + + if (!(context->flags & CTXT_FLAGS_PREAMBLE)) { + /* Fixup self modifying IBs for save operations */ + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, + context->save_fixup, 3); + + /* save registers and constants. */ + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, + context->regconstant_save, 3); + + if (context->flags & CTXT_FLAGS_SHADER_SAVE) { + /* Save shader instructions */ + adreno_ringbuffer_issuecmds(device, + KGSL_CMD_FLAGS_PMODE, context->shader_save, 3); + + context->flags |= CTXT_FLAGS_SHADER_RESTORE; + } + } + + if ((context->flags & CTXT_FLAGS_GMEM_SAVE) && + (context->flags & CTXT_FLAGS_GMEM_SHADOW)) { + /* + * Save GMEM (note: changes shader. shader must + * already be saved.) + */ + + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE, + context->context_gmem_shadow. + gmem_save, 3); + context->flags |= CTXT_FLAGS_GMEM_RESTORE; + } +} + +static void a3xx_drawctxt_restore(struct adreno_device *adreno_dev, + struct adreno_context *context) +{ + struct kgsl_device *device = &adreno_dev->dev; + unsigned int cmds[5]; + + if (context == NULL) { + /* No context - set the default pagetable and thats it */ + kgsl_mmu_setstate(device, device->mmu.defaultpagetable); + return; + } + + KGSL_CTXT_INFO(device, "context flags %08x\n", context->flags); + + cmds[0] = cp_nop_packet(1); + cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER; + cmds[2] = cp_type3_packet(CP_MEM_WRITE, 2); + cmds[3] = device->memstore.gpuaddr + + KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, current_context); + cmds[4] = context->id; + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, cmds, 5); + kgsl_mmu_setstate(device, context->pagetable); + + /* + * Restore GMEM. (note: changes shader. + * Shader must not already be restored.) + */ + + if (context->flags & CTXT_FLAGS_GMEM_RESTORE) { + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE, + context->context_gmem_shadow. + gmem_restore, 3); + context->flags &= ~CTXT_FLAGS_GMEM_RESTORE; + } + + if (!(context->flags & CTXT_FLAGS_PREAMBLE)) { + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, + context->reg_restore, 3); + + /* Fixup self modifying IBs for restore operations */ + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, + context->restore_fixup, 3); + + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, + context->constant_restore, 3); + + if (context->flags & CTXT_FLAGS_SHADER_RESTORE) + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, + context->shader_restore, 3); + + /* Restore HLSQ_CONTROL_0 register */ + adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, + context->hlsqcontrol_restore, 3); + } +} + +static void a3xx_rb_init(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb) +{ + unsigned int *cmds, cmds_gpu; + cmds = adreno_ringbuffer_allocspace(rb, 18); + cmds_gpu = rb->buffer_desc.gpuaddr + sizeof(uint) * (rb->wptr - 18); + + GSL_RB_WRITE(cmds, cmds_gpu, cp_type3_packet(CP_ME_INIT, 17)); + GSL_RB_WRITE(cmds, cmds_gpu, 0x000003f7); + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000080); + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000100); + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000180); + GSL_RB_WRITE(cmds, cmds_gpu, 0x00006600); + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000150); + GSL_RB_WRITE(cmds, cmds_gpu, 0x0000014e); + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000154); + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000001); + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); + /* Protected mode control - turned off for A3XX */ + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); + GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); + + adreno_ringbuffer_submit(rb); +} + +static void a3xx_err_callback(struct adreno_device *adreno_dev, int bit) +{ + struct kgsl_device *device = &adreno_dev->dev; + const char *err = ""; + + switch (bit) { + case A3XX_INT_RBBM_AHB_ERROR: { + unsigned int reg; + + adreno_regread(device, A3XX_RBBM_AHB_ERROR_STATUS, ®); + + /* + * Return the word address of the erroring register so that it + * matches the register specification + */ + + KGSL_DRV_CRIT(device, + "RBBM | AHB bus error | %s | addr=%x | ports=%x:%x\n", + reg & (1 << 28) ? "WRITE" : "READ", + (reg & 0xFFFFF) >> 2, (reg >> 20) & 0x3, + (reg >> 24) & 0x3); + + /* Clear the error */ + adreno_regwrite(device, A3XX_RBBM_AHB_CMD, (1 << 3)); + return; + } + case A3XX_INT_RBBM_REG_TIMEOUT: + err = "RBBM: AHB register timeout"; + break; + case A3XX_INT_RBBM_ME_MS_TIMEOUT: + err = "RBBM: ME master split timeout"; + break; + case A3XX_INT_RBBM_PFP_MS_TIMEOUT: + err = "RBBM: PFP master split timeout"; + break; + case A3XX_INT_RBBM_ATB_BUS_OVERFLOW: + err = "RBBM: ATB bus oveflow"; + break; + case A3XX_INT_VFD_ERROR: + err = "VFD: Out of bounds access"; + break; + case A3XX_INT_CP_T0_PACKET_IN_IB: + err = "ringbuffer TO packet in IB interrupt"; + break; + case A3XX_INT_CP_OPCODE_ERROR: + err = "ringbuffer opcode error interrupt"; + break; + case A3XX_INT_CP_RESERVED_BIT_ERROR: + err = "ringbuffer reserved bit error interrupt"; + break; + case A3XX_INT_CP_HW_FAULT: + err = "ringbuffer hardware fault"; + break; + case A3XX_INT_CP_REG_PROTECT_FAULT: + err = "ringbuffer protected mode error interrupt"; + break; + case A3XX_INT_CP_AHB_ERROR_HALT: + err = "ringbuffer AHB error interrupt"; + break; + case A3XX_INT_MISC_HANG_DETECT: + err = "MISC: GPU hang detected"; + break; + case A3XX_INT_UCHE_OOB_ACCESS: + err = "UCHE: Out of bounds access"; + break; + } + + KGSL_DRV_CRIT(device, "%s\n", err); + kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF); +} + +static void a3xx_cp_callback(struct adreno_device *adreno_dev, int irq) +{ + struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer; + + if (irq == A3XX_INT_CP_RB_INT) { + unsigned int context_id; + kgsl_sharedmem_readl(&adreno_dev->dev.memstore, + &context_id, + KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, + current_context)); + if (context_id < KGSL_MEMSTORE_MAX) { + kgsl_sharedmem_writel(&rb->device->memstore, + KGSL_MEMSTORE_OFFSET(context_id, + ts_cmp_enable), 0); + wmb(); + } + KGSL_CMD_WARN(rb->device, "ringbuffer rb interrupt\n"); + } + + wake_up_interruptible_all(&rb->device->wait_queue); + + /* Schedule work to free mem and issue ibs */ + queue_work(rb->device->work_queue, &rb->device->ts_expired_ws); + + atomic_notifier_call_chain(&rb->device->ts_notifier_list, + rb->device->id, NULL); +} + +#define A3XX_IRQ_CALLBACK(_c) { .func = _c } + +#define A3XX_INT_MASK \ + ((1 << A3XX_INT_RBBM_AHB_ERROR) | \ + (1 << A3XX_INT_RBBM_REG_TIMEOUT) | \ + (1 << A3XX_INT_RBBM_ME_MS_TIMEOUT) | \ + (1 << A3XX_INT_RBBM_PFP_MS_TIMEOUT) | \ + (1 << A3XX_INT_RBBM_ATB_BUS_OVERFLOW) | \ + (1 << A3XX_INT_VFD_ERROR) | \ + (1 << A3XX_INT_CP_T0_PACKET_IN_IB) | \ + (1 << A3XX_INT_CP_OPCODE_ERROR) | \ + (1 << A3XX_INT_CP_RESERVED_BIT_ERROR) | \ + (1 << A3XX_INT_CP_HW_FAULT) | \ + (1 << A3XX_INT_CP_IB1_INT) | \ + (1 << A3XX_INT_CP_IB2_INT) | \ + (1 << A3XX_INT_CP_RB_INT) | \ + (1 << A3XX_INT_CP_REG_PROTECT_FAULT) | \ + (1 << A3XX_INT_CP_AHB_ERROR_HALT) | \ + (1 << A3XX_INT_MISC_HANG_DETECT) | \ + (1 << A3XX_INT_UCHE_OOB_ACCESS)) + +static struct { + void (*func)(struct adreno_device *, int); +} a3xx_irq_funcs[] = { + A3XX_IRQ_CALLBACK(NULL), /* 0 - RBBM_GPU_IDLE */ + A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 1 - RBBM_AHB_ERROR */ + A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 2 - RBBM_REG_TIMEOUT */ + A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 3 - RBBM_ME_MS_TIMEOUT */ + A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 4 - RBBM_PFP_MS_TIMEOUT */ + A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 5 - RBBM_ATB_BUS_OVERFLOW */ + A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 6 - RBBM_VFD_ERROR */ + A3XX_IRQ_CALLBACK(NULL), /* 7 - CP_SW */ + A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 8 - CP_T0_PACKET_IN_IB */ + A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 9 - CP_OPCODE_ERROR */ + A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 10 - CP_RESERVED_BIT_ERROR */ + A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 11 - CP_HW_FAULT */ + A3XX_IRQ_CALLBACK(NULL), /* 12 - CP_DMA */ + A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 13 - CP_IB2_INT */ + A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 14 - CP_IB1_INT */ + A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 15 - CP_RB_INT */ + A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 16 - CP_REG_PROTECT_FAULT */ + A3XX_IRQ_CALLBACK(NULL), /* 17 - CP_RB_DONE_TS */ + A3XX_IRQ_CALLBACK(NULL), /* 18 - CP_VS_DONE_TS */ + A3XX_IRQ_CALLBACK(NULL), /* 19 - CP_PS_DONE_TS */ + A3XX_IRQ_CALLBACK(NULL), /* 20 - CP_CACHE_FLUSH_TS */ + A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 21 - CP_AHB_ERROR_FAULT */ + A3XX_IRQ_CALLBACK(NULL), /* 22 - Unused */ + A3XX_IRQ_CALLBACK(NULL), /* 23 - Unused */ + A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 24 - MISC_HANG_DETECT */ + A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 25 - UCHE_OOB_ACCESS */ + /* 26 to 31 - Unused */ +}; + +static irqreturn_t a3xx_irq_handler(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = &adreno_dev->dev; + irqreturn_t ret = IRQ_NONE; + unsigned int status, tmp; + int i; + + adreno_regread(&adreno_dev->dev, A3XX_RBBM_INT_0_STATUS, &status); + + for (tmp = status, i = 0; tmp && i < ARRAY_SIZE(a3xx_irq_funcs); i++) { + if (tmp & 1) { + if (a3xx_irq_funcs[i].func != NULL) { + a3xx_irq_funcs[i].func(adreno_dev, i); + ret = IRQ_HANDLED; + } else { + KGSL_DRV_CRIT(device, + "Unhandled interrupt bit %x\n", i); + } + } + + tmp >>= 1; + } + + if (status) + adreno_regwrite(&adreno_dev->dev, A3XX_RBBM_INT_CLEAR_CMD, + status); + return ret; +} + +static void a3xx_irq_control(struct adreno_device *adreno_dev, int state) +{ + struct kgsl_device *device = &adreno_dev->dev; + + if (state) + adreno_regwrite(device, A3XX_RBBM_INT_0_MASK, A3XX_INT_MASK); + else + adreno_regwrite(device, A3XX_RBBM_INT_0_MASK, 0); +} + +static unsigned int a3xx_busy_cycles(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = &adreno_dev->dev; + unsigned int reg, val; + + /* Freeze the counter */ + adreno_regread(device, A3XX_RBBM_RBBM_CTL, ®); + reg &= ~RBBM_RBBM_CTL_ENABLE_PWR_CTR1; + adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg); + + /* Read the value */ + adreno_regread(device, A3XX_RBBM_PERFCTR_PWR_1_LO, &val); + + /* Reset the counter */ + reg |= RBBM_RBBM_CTL_RESET_PWR_CTR1; + adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg); + + /* Re-enable the counter */ + reg &= ~RBBM_RBBM_CTL_RESET_PWR_CTR1; + reg |= RBBM_RBBM_CTL_ENABLE_PWR_CTR1; + adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg); + + return val; +} + +static void a3xx_start(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = &adreno_dev->dev; + + /* Reset the core */ + adreno_regwrite(device, A3XX_RBBM_SW_RESET_CMD, + 0x00000001); + msleep(20); + + /* + * enable fixed master AXI port of 0x0 for all clients to keep + * traffic from going to random places + */ + + adreno_regwrite(device, A3XX_VBIF_FIXED_SORT_EN, 0x0001003F); + adreno_regwrite(device, A3XX_VBIF_FIXED_SORT_SEL0, 0x00000000); + adreno_regwrite(device, A3XX_VBIF_FIXED_SORT_SEL1, 0x00000000); + + /* Make all blocks contribute to the GPU BUSY perf counter */ + adreno_regwrite(device, A3XX_RBBM_GPU_BUSY_MASKED, 0xFFFFFFFF); + + /* Enable the RBBM error reporting bits. This lets us get + useful information on failure */ + + adreno_regwrite(device, A3XX_RBBM_AHB_CTL0, 0x00000001); + + /* Enable AHB error reporting */ + adreno_regwrite(device, A3XX_RBBM_AHB_CTL1, 0xA6FFFFFF); + + /* Turn on the power counters */ + adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, 0x00003000); +} + +struct adreno_gpudev adreno_a3xx_gpudev = { + .reg_rbbm_status = A3XX_RBBM_STATUS, + .reg_cp_pfp_ucode_addr = A3XX_CP_PFP_UCODE_ADDR, + .reg_cp_pfp_ucode_data = A3XX_CP_PFP_UCODE_DATA, + + .ctxt_create = a3xx_drawctxt_create, + .ctxt_save = a3xx_drawctxt_save, + .ctxt_restore = a3xx_drawctxt_restore, + .rb_init = a3xx_rb_init, + .irq_control = a3xx_irq_control, + .irq_handler = a3xx_irq_handler, + .busy_cycles = a3xx_busy_cycles, + .start = a3xx_start, +}; diff --git a/drivers/gpu/msm/adreno_debugfs.c b/drivers/gpu/msm/adreno_debugfs.c old mode 100644 new mode 100755 index c878a2c2..9c9ee02c --- a/drivers/gpu/msm/adreno_debugfs.c +++ b/drivers/gpu/msm/adreno_debugfs.c @@ -223,21 +223,24 @@ static int kgsl_regread_nolock(struct kgsl_device *device, return 0; } -#define KGSL_ISTORE_START 0x5000 -#define KGSL_ISTORE_LENGTH 0x600 +#define ADRENO_ISTORE_START 0x5000 static ssize_t kgsl_istore_read( struct file *file, char __user *buff, size_t buff_count, loff_t *ppos) { - int i, count = KGSL_ISTORE_LENGTH, remaining, pos = 0, tot = 0; + int i, count, remaining, pos = 0, tot = 0; struct kgsl_device *device = file->private_data; const int rowc = 8; + struct adreno_device *adreno_dev; if (!ppos || !device) return 0; + adreno_dev = ADRENO_DEVICE(device); + count = adreno_dev->istore_size * adreno_dev->instruction_size; + remaining = count; for (i = 0; i < count; i += rowc) { unsigned int vals[rowc]; @@ -248,7 +251,8 @@ static ssize_t kgsl_istore_read( if (pos >= *ppos) { for (j = 0; j < linec; ++j) kgsl_regread_nolock(device, - KGSL_ISTORE_START+i+j, vals+j); + ADRENO_ISTORE_START + i + j, + vals + j); } else memset(vals, 0, sizeof(vals)); @@ -440,6 +444,8 @@ void adreno_debugfs_init(struct kgsl_device *device) &kgsl_cff_dump_enable_fops); debugfs_create_u32("wait_timeout", 0644, device->d_debugfs, &adreno_dev->wait_timeout); + debugfs_create_u32("ib_check", 0644, device->d_debugfs, + &adreno_dev->ib_check_level); /* Create post mortem control files */ diff --git a/drivers/gpu/msm/adreno_drawctxt.c b/drivers/gpu/msm/adreno_drawctxt.c old mode 100644 new mode 100755 index b7b0ea46..fc4789ad --- a/drivers/gpu/msm/adreno_drawctxt.c +++ b/drivers/gpu/msm/adreno_drawctxt.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved. +/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -17,8 +17,11 @@ #include "kgsl_sharedmem.h" #include "adreno.h" +#define KGSL_INIT_REFTIMESTAMP 0x7FFFFFFF + /* quad for copying GMEM to context shadow */ #define QUAD_LEN 12 +#define QUAD_RESTORE_LEN 14 static unsigned int gmem_copy_quad[QUAD_LEN] = { 0x00000000, 0x00000000, 0x3f800000, @@ -27,6 +30,14 @@ static unsigned int gmem_copy_quad[QUAD_LEN] = { 0x00000000, 0x00000000, 0x3f800000 }; +static unsigned int gmem_restore_quad[QUAD_RESTORE_LEN] = { + 0x00000000, 0x3f800000, 0x3f800000, + 0x00000000, 0x00000000, 0x00000000, + 0x3f800000, 0x00000000, 0x00000000, + 0x3f800000, 0x00000000, 0x00000000, + 0x3f800000, 0x3f800000, +}; + #define TEXCOORD_LEN 8 static unsigned int gmem_copy_texcoord[TEXCOORD_LEN] = { @@ -73,12 +84,12 @@ static void set_gmem_copy_quad(struct gmem_shadow_t *shadow) gmem_copy_quad[4] = uint2float(shadow->height); gmem_copy_quad[9] = uint2float(shadow->width); - gmem_copy_quad[0] = 0; - gmem_copy_quad[6] = 0; - gmem_copy_quad[7] = 0; - gmem_copy_quad[10] = 0; + gmem_restore_quad[5] = uint2float(shadow->height); + gmem_restore_quad[7] = uint2float(shadow->width); memcpy(shadow->quad_vertices.hostptr, gmem_copy_quad, QUAD_LEN << 2); + memcpy(shadow->quad_vertices_restore.hostptr, gmem_copy_quad, + QUAD_RESTORE_LEN << 2); memcpy(shadow->quad_texcoords.hostptr, gmem_copy_texcoord, TEXCOORD_LEN << 2); @@ -103,6 +114,13 @@ void build_quad_vtxbuff(struct adreno_context *drawctxt, cmd += QUAD_LEN; + /* Used by A3XX, but define for both to make the code easier */ + shadow->quad_vertices_restore.hostptr = cmd; + shadow->quad_vertices_restore.gpuaddr = + virt2gpu(cmd, &drawctxt->gpustate); + + cmd += QUAD_RESTORE_LEN; + /* tex coord buffer location (in GPU space) */ shadow->quad_texcoords.hostptr = cmd; shadow->quad_texcoords.gpuaddr = virt2gpu(cmd, &drawctxt->gpustate); @@ -138,28 +156,28 @@ int adreno_drawctxt_create(struct kgsl_device *device, drawctxt->pagetable = pagetable; drawctxt->bin_base_offset = 0; + drawctxt->id = context->id; - /* FIXME: Deal with preambles */ + if (flags & KGSL_CONTEXT_PREAMBLE) + drawctxt->flags |= CTXT_FLAGS_PREAMBLE; - ret = adreno_dev->gpudev->ctxt_gpustate_shadow(adreno_dev, drawctxt); + if (flags & KGSL_CONTEXT_NO_GMEM_ALLOC) + drawctxt->flags |= CTXT_FLAGS_NOGMEMALLOC; + + if (flags & KGSL_CONTEXT_PER_CONTEXT_TS) + drawctxt->flags |= CTXT_FLAGS_PER_CONTEXT_TS; + + ret = adreno_dev->gpudev->ctxt_create(adreno_dev, drawctxt); if (ret) - goto err; - - /* Save the shader instruction memory on context switching */ - drawctxt->flags |= CTXT_FLAGS_SHADER_SAVE; - - if (!(flags & KGSL_CONTEXT_NO_GMEM_ALLOC)) { - /* create gmem shadow */ - ret = adreno_dev->gpudev->ctxt_gmem_shadow(adreno_dev, - drawctxt); - if (ret != 0) goto err; - } + + kgsl_sharedmem_writel(&device->memstore, + KGSL_MEMSTORE_OFFSET(drawctxt->id, ref_wait_ts), + KGSL_INIT_REFTIMESTAMP); context->devctxt = drawctxt; return 0; err: - kgsl_sharedmem_free(&drawctxt->gpustate); kfree(drawctxt); return ret; } @@ -179,11 +197,12 @@ void adreno_drawctxt_destroy(struct kgsl_device *device, struct kgsl_context *context) { struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - struct adreno_context *drawctxt = context->devctxt; + struct adreno_context *drawctxt; - if (drawctxt == NULL) + if (context == NULL) return; + drawctxt = context->devctxt; /* deactivate context */ if (adreno_dev->drawctxt_active == drawctxt) { /* no need to save GMEM or shader, the context is @@ -261,6 +280,6 @@ void adreno_drawctxt_switch(struct adreno_device *adreno_dev, adreno_dev->gpudev->ctxt_save(adreno_dev, adreno_dev->drawctxt_active); /* Set the new context */ - adreno_dev->drawctxt_active = drawctxt; adreno_dev->gpudev->ctxt_restore(adreno_dev, drawctxt); + adreno_dev->drawctxt_active = drawctxt; } diff --git a/drivers/gpu/msm/adreno_drawctxt.h b/drivers/gpu/msm/adreno_drawctxt.h old mode 100644 new mode 100755 index 3c3a8536..61198ebd --- a/drivers/gpu/msm/adreno_drawctxt.h +++ b/drivers/gpu/msm/adreno_drawctxt.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved. +/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -30,12 +30,20 @@ #define CTXT_FLAGS_GMEM_SAVE 0x00000200 /* gmem can be restored from shadow */ #define CTXT_FLAGS_GMEM_RESTORE 0x00000400 +/* preamble packed in cmdbuffer for context switching */ +#define CTXT_FLAGS_PREAMBLE 0x00000800 /* shader must be copied to shadow */ #define CTXT_FLAGS_SHADER_SAVE 0x00002000 /* shader can be restored from shadow */ #define CTXT_FLAGS_SHADER_RESTORE 0x00004000 /* Context has caused a GPU hang */ #define CTXT_FLAGS_GPU_HANG 0x00008000 +/* Specifies there is no need to save GMEM */ +#define CTXT_FLAGS_NOGMEMALLOC 0x00010000 +/* Trash state for context */ +#define CTXT_FLAGS_TRASHSTATE 0x00020000 +/* per context timestamps enabled */ +#define CTXT_FLAGS_PER_CONTEXT_TS 0x00040000 struct kgsl_device; struct adreno_device; @@ -46,37 +54,57 @@ struct kgsl_context; struct gmem_shadow_t { struct kgsl_memdesc gmemshadow; /* Shadow buffer address */ - /* 256 KB GMEM surface = 4 bytes-per-pixel x 256 pixels/row x - * 256 rows. */ - /* width & height must be a multiples of 32, in case tiled textures - * are used. */ - enum COLORFORMATX format; + /* + * 256 KB GMEM surface = 4 bytes-per-pixel x 256 pixels/row x + * 256 rows. Width & height must be multiples of 32 in case tiled + * textures are used + */ + + enum COLORFORMATX format; /* Unused on A3XX */ unsigned int size; /* Size of surface used to store GMEM */ unsigned int width; /* Width of surface used to store GMEM */ unsigned int height; /* Height of surface used to store GMEM */ unsigned int pitch; /* Pitch of surface used to store GMEM */ unsigned int gmem_pitch; /* Pitch value used for GMEM */ - unsigned int *gmem_save_commands; - unsigned int *gmem_restore_commands; + unsigned int *gmem_save_commands; /* Unused on A3XX */ + unsigned int *gmem_restore_commands; /* Unused on A3XX */ unsigned int gmem_save[3]; unsigned int gmem_restore[3]; struct kgsl_memdesc quad_vertices; struct kgsl_memdesc quad_texcoords; + struct kgsl_memdesc quad_vertices_restore; }; struct adreno_context { + unsigned int id; uint32_t flags; struct kgsl_pagetable *pagetable; struct kgsl_memdesc gpustate; - unsigned int reg_save[3]; unsigned int reg_restore[3]; unsigned int shader_save[3]; - unsigned int shader_fixup[3]; unsigned int shader_restore[3]; - unsigned int chicken_restore[3]; - unsigned int bin_base_offset; + /* Information of the GMEM shadow that is created in context create */ struct gmem_shadow_t context_gmem_shadow; + + /* A2XX specific items */ + unsigned int reg_save[3]; + unsigned int shader_fixup[3]; + unsigned int chicken_restore[3]; + unsigned int bin_base_offset; + + /* A3XX specific items */ + unsigned int regconstant_save[3]; + unsigned int constant_restore[3]; + unsigned int hlsqcontrol_restore[3]; + unsigned int save_fixup[3]; + unsigned int restore_fixup[3]; + struct kgsl_memdesc shader_load_commands[2]; + struct kgsl_memdesc shader_save_commands[4]; + struct kgsl_memdesc constant_save_commands[3]; + struct kgsl_memdesc constant_load_commands[3]; + struct kgsl_memdesc cond_execs[4]; + struct kgsl_memdesc hlsqcontrol_restore_commands[1]; }; int adreno_drawctxt_create(struct kgsl_device *device, diff --git a/drivers/gpu/msm/adreno_pm4types.h b/drivers/gpu/msm/adreno_pm4types.h old mode 100644 new mode 100755 index 8aea58c9..9340f691 --- a/drivers/gpu/msm/adreno_pm4types.h +++ b/drivers/gpu/msm/adreno_pm4types.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved. +/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -29,11 +29,6 @@ /* skip N 32-bit words to get to the next packet */ #define CP_NOP 0x10 -/* indirect buffer dispatch. prefetch parser uses this packet type to determine -* whether to pre-fetch the IB -*/ -#define CP_INDIRECT_BUFFER 0x3f - /* indirect buffer dispatch. same as IB, but init is pipelined */ #define CP_INDIRECT_BUFFER_PFD 0x37 @@ -117,6 +112,9 @@ /* load constants from a location in memory */ #define CP_LOAD_CONSTANT_CONTEXT 0x2e +/* (A2x) sets binning configuration registers */ +#define CP_SET_BIN_DATA 0x2f + /* selective invalidation of state pointers */ #define CP_INVALIDATE_STATE 0x3b @@ -157,6 +155,25 @@ #define CP_SET_PROTECTED_MODE 0x5f /* sets the register protection mode */ +/* + * for a3xx + */ + +#define CP_LOAD_STATE 0x30 /* load high level sequencer command */ + +/* Conditionally load a IB based on a flag */ +#define CP_COND_INDIRECT_BUFFER_PFE 0x3A /* prefetch enabled */ +#define CP_COND_INDIRECT_BUFFER_PFD 0x32 /* prefetch disabled */ + +/* Load a buffer with pre-fetch enabled */ +#define CP_INDIRECT_BUFFER_PFE 0x3F + +#define CP_LOADSTATE_DSTOFFSET_SHIFT 0x00000000 +#define CP_LOADSTATE_STATESRC_SHIFT 0x00000010 +#define CP_LOADSTATE_STATEBLOCKID_SHIFT 0x00000013 +#define CP_LOADSTATE_NUMOFUNITS_SHIFT 0x00000016 +#define CP_LOADSTATE_STATETYPE_SHIFT 0x00000000 +#define CP_LOADSTATE_EXTSRCADDR_SHIFT 0x00000002 /* packet header building macros */ #define cp_type0_packet(regindx, cnt) \ @@ -178,11 +195,20 @@ #define cp_nop_packet(cnt) \ (CP_TYPE3_PKT | (((cnt)-1) << 16) | (CP_NOP << 8)) +#define pkt_is_type0(pkt) (((pkt) & 0XC0000000) == CP_TYPE0_PKT) + +#define type0_pkt_size(pkt) ((((pkt) >> 16) & 0x3FFF) + 1) +#define type0_pkt_offset(pkt) ((pkt) & 0x7FFF) + +#define pkt_is_type3(pkt) (((pkt) & 0xC0000000) == CP_TYPE3_PKT) + +#define cp_type3_opcode(pkt) (((pkt) >> 8) & 0xFF) +#define type3_pkt_size(pkt) ((((pkt) >> 16) & 0x3FFF) + 1) /* packet headers */ #define CP_HDR_ME_INIT cp_type3_packet(CP_ME_INIT, 18) #define CP_HDR_INDIRECT_BUFFER_PFD cp_type3_packet(CP_INDIRECT_BUFFER_PFD, 2) -#define CP_HDR_INDIRECT_BUFFER cp_type3_packet(CP_INDIRECT_BUFFER, 2) +#define CP_HDR_INDIRECT_BUFFER_PFE cp_type3_packet(CP_INDIRECT_BUFFER_PFE, 2) /* dword base address of the GFX decode space */ #define SUBBLOCK_OFFSET(reg) ((unsigned int)((reg) - (0x2000))) @@ -190,4 +216,14 @@ /* gmem command buffer length */ #define CP_REG(reg) ((0x4 << 16) | (SUBBLOCK_OFFSET(reg))) + +/* Return 1 if the command is an indirect buffer of any kind */ +static inline int adreno_cmd_is_ib(unsigned int cmd) +{ + return (cmd == cp_type3_packet(CP_INDIRECT_BUFFER_PFE, 2) || + cmd == cp_type3_packet(CP_INDIRECT_BUFFER_PFD, 2) || + cmd == cp_type3_packet(CP_COND_INDIRECT_BUFFER_PFE, 2) || + cmd == cp_type3_packet(CP_COND_INDIRECT_BUFFER_PFD, 2)); +} + #endif /* __ADRENO_PM4TYPES_H */ diff --git a/drivers/gpu/msm/adreno_postmortem.c b/drivers/gpu/msm/adreno_postmortem.c old mode 100644 new mode 100755 index 3d957f69..427741f1 --- a/drivers/gpu/msm/adreno_postmortem.c +++ b/drivers/gpu/msm/adreno_postmortem.c @@ -14,6 +14,7 @@ #include #include "kgsl.h" +#include "kgsl_sharedmem.h" #include "adreno.h" #include "adreno_pm4types.h" @@ -52,7 +53,7 @@ static const struct pm_id_name pm3_types[] = { {CP_IM_LOAD, "IN__LOAD"}, {CP_IM_LOAD_IMMEDIATE, "IM_LOADI"}, {CP_IM_STORE, "IM_STORE"}, - {CP_INDIRECT_BUFFER, "IND_BUF_"}, + {CP_INDIRECT_BUFFER_PFE, "IND_BUF_"}, {CP_INDIRECT_BUFFER_PFD, "IND_BUFP"}, {CP_INTERRUPT, "PM4_INTR"}, {CP_INVALIDATE_STATE, "INV_STAT"}, @@ -247,9 +248,8 @@ static void adreno_dump_regs(struct kgsl_device *device, static void dump_ib(struct kgsl_device *device, char* buffId, uint32_t pt_base, uint32_t base_offset, uint32_t ib_base, uint32_t ib_size, bool dump) { - unsigned int memsize; - uint8_t *base_addr = kgsl_sharedmem_convertaddr(device, pt_base, - ib_base, &memsize); + uint8_t *base_addr = adreno_convertaddr(device, pt_base, + ib_base, ib_size*sizeof(uint32_t)); if (base_addr && dump) print_hex_dump(KERN_ERR, buffId, DUMP_PREFIX_OFFSET, @@ -277,20 +277,19 @@ static void dump_ib1(struct kgsl_device *device, uint32_t pt_base, int i, j; uint32_t value; uint32_t *ib1_addr; - unsigned int memsize; dump_ib(device, "IB1:", pt_base, base_offset, ib1_base, ib1_size, dump); /* fetch virtual address for given IB base */ - ib1_addr = (uint32_t *)kgsl_sharedmem_convertaddr(device, pt_base, - ib1_base, &memsize); + ib1_addr = (uint32_t *)adreno_convertaddr(device, pt_base, + ib1_base, ib1_size*sizeof(uint32_t)); if (!ib1_addr) return; for (i = 0; i+3 < ib1_size; ) { value = ib1_addr[i++]; - if (value == cp_type3_packet(CP_INDIRECT_BUFFER_PFD, 2)) { + if (adreno_cmd_is_ib(value)) { uint32_t ib2_base = ib1_addr[i++]; uint32_t ib2_size = ib1_addr[i++]; @@ -466,7 +465,9 @@ static int adreno_dump(struct kgsl_device *device) const uint32_t *rb_vaddr; int num_item = 0; int read_idx, write_idx; - unsigned int ts_processed, rb_memsize; + unsigned int ts_processed = 0xdeaddead; + struct kgsl_context *context; + unsigned int context_id; static struct ib_list ib_list; @@ -662,9 +663,18 @@ static int adreno_dump(struct kgsl_device *device) KGSL_LOG_DUMP(device, "MH_INTERRUPT: MASK = %08X | STATUS = %08X\n", r1, r2); - ts_processed = device->ftbl->readtimestamp(device, + kgsl_sharedmem_readl(&device->memstore, + (unsigned int *) &context_id, + KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, + current_context)); + context = idr_find(&device->context_idr, context_id); + if (context) { + ts_processed = device->ftbl->readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED); - KGSL_LOG_DUMP(device, "TIMESTM RTRD: %08X\n", ts_processed); + KGSL_LOG_DUMP(device, "CTXT: %d TIMESTM RTRD: %08X\n", + context->id, ts_processed); + } else + KGSL_LOG_DUMP(device, "BAD CTXT: %d\n", context_id); num_item = adreno_ringbuffer_count(&adreno_dev->ringbuffer, cp_rb_rptr); @@ -681,11 +691,16 @@ static int adreno_dump(struct kgsl_device *device) KGSL_LOG_DUMP(device, "RB: rd_addr:%8.8x rb_size:%d num_item:%d\n", cp_rb_base, rb_count<<2, num_item); - rb_vaddr = (const uint32_t *)kgsl_sharedmem_convertaddr(device, - cur_pt_base, cp_rb_base, &rb_memsize); + + if (adreno_dev->ringbuffer.buffer_desc.gpuaddr != cp_rb_base) + KGSL_LOG_POSTMORTEM_WRITE(device, + "rb address mismatch, should be 0x%08x\n", + adreno_dev->ringbuffer.buffer_desc.gpuaddr); + + rb_vaddr = adreno_dev->ringbuffer.buffer_desc.hostptr; if (!rb_vaddr) { KGSL_LOG_POSTMORTEM_WRITE(device, - "Can't fetch vaddr for CP_RB_BASE\n"); + "rb has no kernel mapping!\n"); goto error_vfree; } @@ -711,7 +726,7 @@ static int adreno_dump(struct kgsl_device *device) i = 0; for (read_idx = 0; read_idx < num_item; ) { uint32_t this_cmd = rb_copy[read_idx++]; - if (this_cmd == cp_type3_packet(CP_INDIRECT_BUFFER_PFD, 2)) { + if (adreno_cmd_is_ib(this_cmd)) { uint32_t ib_addr = rb_copy[read_idx++]; uint32_t ib_size = rb_copy[read_idx++]; dump_ib1(device, cur_pt_base, (read_idx-3)<<2, ib_addr, @@ -743,8 +758,7 @@ static int adreno_dump(struct kgsl_device *device) for (read_idx = NUM_DWORDS_OF_RINGBUFFER_HISTORY; read_idx >= 0; --read_idx) { uint32_t this_cmd = rb_copy[read_idx]; - if (this_cmd == cp_type3_packet( - CP_INDIRECT_BUFFER_PFD, 2)) { + if (adreno_cmd_is_ib(this_cmd)) { uint32_t ib_addr = rb_copy[read_idx+1]; uint32_t ib_size = rb_copy[read_idx+2]; if (ib_size && cp_ib1_base == ib_addr) { diff --git a/drivers/gpu/msm/adreno_ringbuffer.c b/drivers/gpu/msm/adreno_ringbuffer.c old mode 100644 new mode 100755 index d59057c8..da80576f --- a/drivers/gpu/msm/adreno_ringbuffer.c +++ b/drivers/gpu/msm/adreno_ringbuffer.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved. +/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -22,30 +22,14 @@ #include "adreno.h" #include "adreno_pm4types.h" #include "adreno_ringbuffer.h" +#include "adreno_debugfs.h" #include "a2xx_reg.h" +#include "a3xx_reg.h" #define GSL_RB_NOP_SIZEDWORDS 2 -/* protected mode error checking below register address 0x800 -* note: if CP_INTERRUPT packet is used then checking needs -* to change to below register address 0x7C8 -*/ -#define GSL_RB_PROTECTED_MODE_CONTROL 0x200001F2 -/* Firmware file names - * Legacy names must remain but replacing macro names to - * match current kgsl model. - * a200 is yamato - * a220 is leia - */ -#define A200_PFP_FW "yamato_pfp.fw" -#define A200_PM4_FW "yamato_pm4.fw" -#define A220_PFP_470_FW "leia_pfp_470.fw" -#define A220_PM4_470_FW "leia_pm4_470.fw" -#define A225_PFP_FW "a225_pfp.fw" -#define A225_PM4_FW "a225_pm4.fw" - -static void adreno_ringbuffer_submit(struct adreno_ringbuffer *rb) +void adreno_ringbuffer_submit(struct adreno_ringbuffer *rb) { BUG_ON(rb->wptr == 0); @@ -104,8 +88,7 @@ adreno_ringbuffer_waitspace(struct adreno_ringbuffer *rb, unsigned int numcmds, } while ((freecmds != 0) && (freecmds <= numcmds)); } - -static unsigned int *adreno_ringbuffer_allocspace(struct adreno_ringbuffer *rb, +unsigned int *adreno_ringbuffer_allocspace(struct adreno_ringbuffer *rb, unsigned int numcmds) { unsigned int *ptr = NULL; @@ -231,9 +214,10 @@ static int adreno_ringbuffer_load_pfp_ucode(struct kgsl_device *device) KGSL_DRV_INFO(device, "loading pfp ucode version: %d\n", adreno_dev->pfp_fw[0]); - adreno_regwrite(device, REG_CP_PFP_UCODE_ADDR, 0); + adreno_regwrite(device, adreno_dev->gpudev->reg_cp_pfp_ucode_addr, 0); for (i = 1; i < adreno_dev->pfp_fw_size; i++) - adreno_regwrite(device, REG_CP_PFP_UCODE_DATA, + adreno_regwrite(device, + adreno_dev->gpudev->reg_cp_pfp_ucode_data, adreno_dev->pfp_fw[i]); err: return ret; @@ -244,15 +228,15 @@ int adreno_ringbuffer_start(struct adreno_ringbuffer *rb, unsigned int init_ram) int status; /*cp_rb_cntl_u cp_rb_cntl; */ union reg_cp_rb_cntl cp_rb_cntl; - unsigned int *cmds, rb_cntl; + unsigned int rb_cntl; struct kgsl_device *device = rb->device; - uint cmds_gpu; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); if (rb->flags & KGSL_FLAGS_STARTED) return 0; if (init_ram) { - rb->timestamp = 0; + rb->timestamp[KGSL_MEMSTORE_GLOBAL] = 0; GSL_RB_INIT_TIMESTAMP(rb); } @@ -262,12 +246,15 @@ int adreno_ringbuffer_start(struct adreno_ringbuffer *rb, unsigned int init_ram) kgsl_sharedmem_set(&rb->buffer_desc, 0, 0xAA, (rb->sizedwords << 2)); + if (adreno_is_a2xx(adreno_dev)) { adreno_regwrite(device, REG_CP_RB_WPTR_BASE, (rb->memptrs_desc.gpuaddr + GSL_RB_MEMPTRS_WPTRPOLL_OFFSET)); /* setup WPTR delay */ - adreno_regwrite(device, REG_CP_RB_WPTR_DELAY, 0 /*0x70000010 */); + adreno_regwrite(device, REG_CP_RB_WPTR_DELAY, + 0 /*0x70000010 */); + } /*setup REG_CP_RB_CNTL */ adreno_regread(device, REG_CP_RB_CNTL, &rb_cntl); @@ -286,7 +273,11 @@ int adreno_ringbuffer_start(struct adreno_ringbuffer *rb, unsigned int init_ram) */ cp_rb_cntl.f.rb_blksz = ilog2(KGSL_RB_BLKSIZE >> 3); - cp_rb_cntl.f.rb_poll_en = GSL_RB_CNTL_POLL_EN; /* WPTR polling */ + if (adreno_is_a2xx(adreno_dev)) { + /* WPTR polling */ + cp_rb_cntl.f.rb_poll_en = GSL_RB_CNTL_POLL_EN; + } + /* mem RPTR writebacks */ cp_rb_cntl.f.rb_no_update = GSL_RB_CNTL_NO_UPDATE; @@ -298,13 +289,41 @@ int adreno_ringbuffer_start(struct adreno_ringbuffer *rb, unsigned int init_ram) rb->memptrs_desc.gpuaddr + GSL_RB_MEMPTRS_RPTR_OFFSET); + if (adreno_is_a3xx(adreno_dev)) { + /* enable access protection to privileged registers */ + adreno_regwrite(device, A3XX_CP_PROTECT_CTRL, 0x00000007); + + /* RBBM registers */ + adreno_regwrite(device, A3XX_CP_PROTECT_REG_0, 0x63000040); + adreno_regwrite(device, A3XX_CP_PROTECT_REG_1, 0x62000080); + adreno_regwrite(device, A3XX_CP_PROTECT_REG_2, 0x600000CC); + adreno_regwrite(device, A3XX_CP_PROTECT_REG_3, 0x60000108); + adreno_regwrite(device, A3XX_CP_PROTECT_REG_4, 0x64000140); + adreno_regwrite(device, A3XX_CP_PROTECT_REG_5, 0x66000400); + + /* CP registers */ + adreno_regwrite(device, A3XX_CP_PROTECT_REG_6, 0x65000700); + adreno_regwrite(device, A3XX_CP_PROTECT_REG_7, 0x610007D8); + adreno_regwrite(device, A3XX_CP_PROTECT_REG_8, 0x620007E0); + adreno_regwrite(device, A3XX_CP_PROTECT_REG_9, 0x61001178); + adreno_regwrite(device, A3XX_CP_PROTECT_REG_A, 0x64001180); + + /* RB registers */ + adreno_regwrite(device, A3XX_CP_PROTECT_REG_B, 0x60003300); + + /* VBIF registers */ + adreno_regwrite(device, A3XX_CP_PROTECT_REG_C, 0x6B00C000); + } + + if (adreno_is_a2xx(adreno_dev)) { /* explicitly clear all cp interrupts */ adreno_regwrite(device, REG_CP_INT_ACK, 0xFFFFFFFF); + } /* setup scratch/timestamp */ - adreno_regwrite(device, REG_SCRATCH_ADDR, - device->memstore.gpuaddr + - KGSL_DEVICE_MEMSTORE_OFFSET(soptimestamp)); + adreno_regwrite(device, REG_SCRATCH_ADDR, device->memstore.gpuaddr + + KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, + soptimestamp)); adreno_regwrite(device, REG_SCRATCH_UMSK, GSL_RB_MEMPTRS_SCRATCH_MASK); @@ -328,54 +347,8 @@ int adreno_ringbuffer_start(struct adreno_ringbuffer *rb, unsigned int init_ram) /* clear ME_HALT to start micro engine */ adreno_regwrite(device, REG_CP_ME_CNTL, 0); - /* ME_INIT */ - cmds = adreno_ringbuffer_allocspace(rb, 19); - cmds_gpu = rb->buffer_desc.gpuaddr + sizeof(uint)*(rb->wptr-19); - - GSL_RB_WRITE(cmds, cmds_gpu, CP_HDR_ME_INIT); - /* All fields present (bits 9:0) */ - GSL_RB_WRITE(cmds, cmds_gpu, 0x000003ff); - /* Disable/Enable Real-Time Stream processing (present but ignored) */ - GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); - /* Enable (2D <-> 3D) implicit synchronization (present but ignored) */ - GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); - - GSL_RB_WRITE(cmds, cmds_gpu, - SUBBLOCK_OFFSET(REG_RB_SURFACE_INFO)); - GSL_RB_WRITE(cmds, cmds_gpu, - SUBBLOCK_OFFSET(REG_PA_SC_WINDOW_OFFSET)); - GSL_RB_WRITE(cmds, cmds_gpu, - SUBBLOCK_OFFSET(REG_VGT_MAX_VTX_INDX)); - GSL_RB_WRITE(cmds, cmds_gpu, - SUBBLOCK_OFFSET(REG_SQ_PROGRAM_CNTL)); - GSL_RB_WRITE(cmds, cmds_gpu, - SUBBLOCK_OFFSET(REG_RB_DEPTHCONTROL)); - GSL_RB_WRITE(cmds, cmds_gpu, - SUBBLOCK_OFFSET(REG_PA_SU_POINT_SIZE)); - GSL_RB_WRITE(cmds, cmds_gpu, - SUBBLOCK_OFFSET(REG_PA_SC_LINE_CNTL)); - GSL_RB_WRITE(cmds, cmds_gpu, - SUBBLOCK_OFFSET(REG_PA_SU_POLY_OFFSET_FRONT_SCALE)); - - /* Vertex and Pixel Shader Start Addresses in instructions - * (3 DWORDS per instruction) */ - GSL_RB_WRITE(cmds, cmds_gpu, 0x80000180); - /* Maximum Contexts */ - GSL_RB_WRITE(cmds, cmds_gpu, 0x00000001); - /* Write Confirm Interval and The CP will wait the - * wait_interval * 16 clocks between polling */ - GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); - - /* NQ and External Memory Swap */ - GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); - /* Protected mode error checking */ - GSL_RB_WRITE(cmds, cmds_gpu, GSL_RB_PROTECTED_MODE_CONTROL); - /* Disable header dumping and Header dump address */ - GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); - /* Header dump size */ - GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); - - adreno_ringbuffer_submit(rb); + /* ME init is GPU specific, so jump into the sub-function */ + adreno_dev->gpudev->rb_init(adreno_dev, rb); /* idle device to validate ME INIT */ status = adreno_idle(device, KGSL_TIMEOUT_DEFAULT); @@ -391,7 +364,6 @@ void adreno_ringbuffer_stop(struct adreno_ringbuffer *rb) if (rb->flags & KGSL_FLAGS_STARTED) { /* ME_HALT */ adreno_regwrite(rb->device, REG_CP_ME_CNTL, 0x10000000); - rb->flags &= ~KGSL_FLAGS_STARTED; } } @@ -454,14 +426,28 @@ void adreno_ringbuffer_close(struct adreno_ringbuffer *rb) static uint32_t adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb, + struct adreno_context *context, unsigned int flags, unsigned int *cmds, int sizedwords) { + struct adreno_device *adreno_dev = ADRENO_DEVICE(rb->device); unsigned int *ringcmds; unsigned int timestamp; - unsigned int total_sizedwords = sizedwords + 6; + unsigned int total_sizedwords = sizedwords; unsigned int i; unsigned int rcmd_gpu; + unsigned int context_id = KGSL_MEMSTORE_GLOBAL; + unsigned int gpuaddr = rb->device->memstore.gpuaddr; + + if (context != NULL) { + /* + * if the context was not created with per context timestamp + * support, we must use the global timestamp since issueibcmds + * will be returning that one. + */ + if (context->flags & CTXT_FLAGS_PER_CONTEXT_TS) + context_id = context->id; + } /* reserve space to temporarily turn off protected mode * error checking if needed @@ -470,6 +456,16 @@ adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb, total_sizedwords += !(flags & KGSL_CMD_FLAGS_NO_TS_CMP) ? 7 : 0; total_sizedwords += !(flags & KGSL_CMD_FLAGS_NOT_KERNEL_CMD) ? 2 : 0; + if (adreno_is_a3xx(adreno_dev)) + total_sizedwords += 7; + + total_sizedwords += 2; /* scratchpad ts for recovery */ + if (context) { + total_sizedwords += 3; /* sop timestamp */ + total_sizedwords += 4; /* eop timestamp */ + } + total_sizedwords += 4; /* global timestamp for recovery*/ + ringcmds = adreno_ringbuffer_allocspace(rb, total_sizedwords); rcmd_gpu = rb->buffer_desc.gpuaddr + sizeof(uint)*(rb->wptr-total_sizedwords); @@ -497,28 +493,70 @@ adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb, GSL_RB_WRITE(ringcmds, rcmd_gpu, 1); } - rb->timestamp++; - timestamp = rb->timestamp; + /* always increment the global timestamp. once. */ + rb->timestamp[KGSL_MEMSTORE_GLOBAL]++; + if (context) { + if (context_id == KGSL_MEMSTORE_GLOBAL) + rb->timestamp[context_id] = + rb->timestamp[KGSL_MEMSTORE_GLOBAL]; + else + rb->timestamp[context_id]++; + } + timestamp = rb->timestamp[context_id]; - /* start-of-pipeline and end-of-pipeline timestamps */ + /* scratchpad ts for recovery */ GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type0_packet(REG_CP_TIMESTAMP, 1)); - GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp); + GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp[KGSL_MEMSTORE_GLOBAL]); + + if (adreno_is_a3xx(adreno_dev)) { + /* + * FLush HLSQ lazy updates to make sure there are no + * rsources pending for indirect loads after the timestamp + */ + + GSL_RB_WRITE(ringcmds, rcmd_gpu, + cp_type3_packet(CP_EVENT_WRITE, 1)); + GSL_RB_WRITE(ringcmds, rcmd_gpu, 0x07); /* HLSQ_FLUSH */ + GSL_RB_WRITE(ringcmds, rcmd_gpu, + cp_type3_packet(CP_WAIT_FOR_IDLE, 1)); + GSL_RB_WRITE(ringcmds, rcmd_gpu, 0x00); + } + + if (context) { + /* start-of-pipeline timestamp */ + GSL_RB_WRITE(ringcmds, rcmd_gpu, + cp_type3_packet(CP_MEM_WRITE, 2)); + GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr + + KGSL_MEMSTORE_OFFSET(context->id, soptimestamp))); + GSL_RB_WRITE(ringcmds, rcmd_gpu, timestamp); + + /* end-of-pipeline timestamp */ + GSL_RB_WRITE(ringcmds, rcmd_gpu, + cp_type3_packet(CP_EVENT_WRITE, 3)); + GSL_RB_WRITE(ringcmds, rcmd_gpu, CACHE_FLUSH_TS); + GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr + + KGSL_MEMSTORE_OFFSET(context->id, eoptimestamp))); + GSL_RB_WRITE(ringcmds, rcmd_gpu, timestamp); + } + GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_EVENT_WRITE, 3)); GSL_RB_WRITE(ringcmds, rcmd_gpu, CACHE_FLUSH_TS); - GSL_RB_WRITE(ringcmds, rcmd_gpu, - (rb->device->memstore.gpuaddr + - KGSL_DEVICE_MEMSTORE_OFFSET(eoptimestamp))); - GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp); + GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr + + KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, + eoptimestamp))); + GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp[KGSL_MEMSTORE_GLOBAL]); if (!(flags & KGSL_CMD_FLAGS_NO_TS_CMP)) { /* Conditional execution based on memory values */ GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_COND_EXEC, 4)); - GSL_RB_WRITE(ringcmds, rcmd_gpu, (rb->device->memstore.gpuaddr + - KGSL_DEVICE_MEMSTORE_OFFSET(ts_cmp_enable)) >> 2); - GSL_RB_WRITE(ringcmds, rcmd_gpu, (rb->device->memstore.gpuaddr + - KGSL_DEVICE_MEMSTORE_OFFSET(ref_wait_ts)) >> 2); - GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp); + GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr + + KGSL_MEMSTORE_OFFSET( + context_id, ts_cmp_enable)) >> 2); + GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr + + KGSL_MEMSTORE_OFFSET( + context_id, ref_wait_ts)) >> 2); + GSL_RB_WRITE(ringcmds, rcmd_gpu, timestamp); /* # of conditional command DWORDs */ GSL_RB_WRITE(ringcmds, rcmd_gpu, 2); GSL_RB_WRITE(ringcmds, rcmd_gpu, @@ -526,9 +564,17 @@ adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb, GSL_RB_WRITE(ringcmds, rcmd_gpu, CP_INT_CNTL__RB_INT_MASK); } + if (adreno_is_a3xx(adreno_dev)) { + /* Dummy set-constant to trigger context rollover */ + GSL_RB_WRITE(ringcmds, rcmd_gpu, + cp_type3_packet(CP_SET_CONSTANT, 2)); + GSL_RB_WRITE(ringcmds, rcmd_gpu, + (0x4<<16)|(A3XX_HLSQ_CL_KERNEL_GROUP_X_REG - 0x2000)); + GSL_RB_WRITE(ringcmds, rcmd_gpu, 0); + } + adreno_ringbuffer_submit(rb); - /* return timestamp of issued coREG_ands */ return timestamp; } @@ -543,7 +589,199 @@ adreno_ringbuffer_issuecmds(struct kgsl_device *device, if (device->state & KGSL_STATE_HUNG) return; - adreno_ringbuffer_addcmds(rb, flags, cmds, sizedwords); + adreno_ringbuffer_addcmds(rb, NULL, flags, cmds, sizedwords); +} + +static bool _parse_ibs(struct kgsl_device_private *dev_priv, uint gpuaddr, + int sizedwords); + +static bool +_handle_type3(struct kgsl_device_private *dev_priv, uint *hostaddr) +{ + unsigned int opcode = cp_type3_opcode(*hostaddr); + switch (opcode) { + case CP_INDIRECT_BUFFER_PFD: + case CP_INDIRECT_BUFFER_PFE: + case CP_COND_INDIRECT_BUFFER_PFE: + case CP_COND_INDIRECT_BUFFER_PFD: + return _parse_ibs(dev_priv, hostaddr[1], hostaddr[2]); + case CP_NOP: + case CP_WAIT_FOR_IDLE: + case CP_WAIT_REG_MEM: + case CP_WAIT_REG_EQ: + case CP_WAT_REG_GTE: + case CP_WAIT_UNTIL_READ: + case CP_WAIT_IB_PFD_COMPLETE: + case CP_REG_RMW: + case CP_REG_TO_MEM: + case CP_MEM_WRITE: + case CP_MEM_WRITE_CNTR: + case CP_COND_EXEC: + case CP_COND_WRITE: + case CP_EVENT_WRITE: + case CP_EVENT_WRITE_SHD: + case CP_EVENT_WRITE_CFL: + case CP_EVENT_WRITE_ZPD: + case CP_DRAW_INDX: + case CP_DRAW_INDX_2: + case CP_DRAW_INDX_BIN: + case CP_DRAW_INDX_2_BIN: + case CP_VIZ_QUERY: + case CP_SET_STATE: + case CP_SET_CONSTANT: + case CP_IM_LOAD: + case CP_IM_LOAD_IMMEDIATE: + case CP_LOAD_CONSTANT_CONTEXT: + case CP_INVALIDATE_STATE: + case CP_SET_SHADER_BASES: + case CP_SET_BIN_MASK: + case CP_SET_BIN_SELECT: + case CP_SET_BIN_BASE_OFFSET: + case CP_SET_BIN_DATA: + case CP_CONTEXT_UPDATE: + case CP_INTERRUPT: + case CP_IM_STORE: + case CP_LOAD_STATE: + break; + /* these shouldn't come from userspace */ + case CP_ME_INIT: + case CP_SET_PROTECTED_MODE: + default: + KGSL_CMD_ERR(dev_priv->device, "bad CP opcode %0x\n", opcode); + return false; + break; + } + + return true; +} + +static bool +_handle_type0(struct kgsl_device_private *dev_priv, uint *hostaddr) +{ + unsigned int reg = type0_pkt_offset(*hostaddr); + unsigned int cnt = type0_pkt_size(*hostaddr); + if (reg < 0x0192 || (reg + cnt) >= 0x8000) { + KGSL_CMD_ERR(dev_priv->device, "bad type0 reg: 0x%0x cnt: %d\n", + reg, cnt); + return false; + } + return true; +} + +/* + * Traverse IBs and dump them to test vector. Detect swap by inspecting + * register writes, keeping note of the current state, and dump + * framebuffer config to test vector + */ +static bool _parse_ibs(struct kgsl_device_private *dev_priv, + uint gpuaddr, int sizedwords) +{ + static uint level; /* recursion level */ + bool ret = false; + uint *hostaddr, *hoststart; + int dwords_left = sizedwords; /* dwords left in the current command + buffer */ + struct kgsl_mem_entry *entry; + + spin_lock(&dev_priv->process_priv->mem_lock); + entry = kgsl_sharedmem_find_region(dev_priv->process_priv, + gpuaddr, sizedwords * sizeof(uint)); + spin_unlock(&dev_priv->process_priv->mem_lock); + if (entry == NULL) { + KGSL_CMD_ERR(dev_priv->device, + "no mapping for gpuaddr: 0x%08x\n", gpuaddr); + return false; + } + + hostaddr = (uint *)kgsl_gpuaddr_to_vaddr(&entry->memdesc, gpuaddr); + if (hostaddr == NULL) { + KGSL_CMD_ERR(dev_priv->device, + "no mapping for gpuaddr: 0x%08x\n", gpuaddr); + return false; + } + + hoststart = hostaddr; + + level++; + + KGSL_CMD_INFO(dev_priv->device, "ib: gpuaddr:0x%08x, wc:%d, hptr:%p\n", + gpuaddr, sizedwords, hostaddr); + + mb(); + while (dwords_left > 0) { + bool cur_ret = true; + int count = 0; /* dword count including packet header */ + + switch (*hostaddr >> 30) { + case 0x0: /* type-0 */ + count = (*hostaddr >> 16)+2; + cur_ret = _handle_type0(dev_priv, hostaddr); + break; + case 0x1: /* type-1 */ + count = 2; + break; + case 0x3: /* type-3 */ + count = ((*hostaddr >> 16) & 0x3fff) + 2; + cur_ret = _handle_type3(dev_priv, hostaddr); + break; + default: + KGSL_CMD_ERR(dev_priv->device, "unexpected type: " + "type:%d, word:0x%08x @ 0x%p, gpu:0x%08x\n", + *hostaddr >> 30, *hostaddr, hostaddr, + gpuaddr+4*(sizedwords-dwords_left)); + cur_ret = false; + count = dwords_left; + break; + } + + if (!cur_ret) { + KGSL_CMD_ERR(dev_priv->device, + "bad sub-type: #:%d/%d, v:0x%08x" + " @ 0x%p[gb:0x%08x], level:%d\n", + sizedwords-dwords_left, sizedwords, *hostaddr, + hostaddr, gpuaddr+4*(sizedwords-dwords_left), + level); + + if (ADRENO_DEVICE(dev_priv->device)->ib_check_level + >= 2) + print_hex_dump(KERN_ERR, + level == 1 ? "IB1:" : "IB2:", + DUMP_PREFIX_OFFSET, 32, 4, hoststart, + sizedwords*4, 0); + goto done; + } + + /* jump to next packet */ + dwords_left -= count; + hostaddr += count; + if (dwords_left < 0) { + KGSL_CMD_ERR(dev_priv->device, + "bad count: c:%d, #:%d/%d, " + "v:0x%08x @ 0x%p[gb:0x%08x], level:%d\n", + count, sizedwords-(dwords_left+count), + sizedwords, *(hostaddr-count), hostaddr-count, + gpuaddr+4*(sizedwords-(dwords_left+count)), + level); + if (ADRENO_DEVICE(dev_priv->device)->ib_check_level + >= 2) + print_hex_dump(KERN_ERR, + level == 1 ? "IB1:" : "IB2:", + DUMP_PREFIX_OFFSET, 32, 4, hoststart, + sizedwords*4, 0); + goto done; + } + } + + ret = true; +done: + if (!ret) + KGSL_DRV_ERR(dev_priv->device, + "parsing failed: gpuaddr:0x%08x, " + "host:0x%p, wc:%d\n", gpuaddr, hoststart, sizedwords); + + level--; + + return ret; } int @@ -560,6 +798,7 @@ adreno_ringbuffer_issueibcmds(struct kgsl_device_private *dev_priv, unsigned int *cmds; unsigned int i; struct adreno_context *drawctxt; + unsigned int start_index = 0; if (device->state & KGSL_STATE_HUNG) return -EBUSY; @@ -571,26 +810,52 @@ adreno_ringbuffer_issueibcmds(struct kgsl_device_private *dev_priv, if (drawctxt->flags & CTXT_FLAGS_GPU_HANG) { KGSL_CTXT_WARN(device, "Context %p caused a gpu hang.." - " will not accept commands for this context\n", - drawctxt); + " will not accept commands for context %d\n", + drawctxt, drawctxt->id); return -EDEADLK; } - link = kzalloc(sizeof(unsigned int) * numibs * 3, GFP_KERNEL); - cmds = link; + + cmds = link = kzalloc(sizeof(unsigned int) * (numibs * 3 + 4), + GFP_KERNEL); if (!link) { - KGSL_MEM_ERR(device, "Failed to allocate memory for for command" - " submission, size %x\n", numibs * 3); + KGSL_CORE_ERR("kzalloc(%d) failed\n", + sizeof(unsigned int) * (numibs * 3 + 4)); return -ENOMEM; } - for (i = 0; i < numibs; i++) { - (void)kgsl_cffdump_parse_ibs(dev_priv, NULL, - ibdesc[i].gpuaddr, ibdesc[i].sizedwords, false); + /*When preamble is enabled, the preamble buffer with state restoration + commands are stored in the first node of the IB chain. We can skip that + if a context switch hasn't occured */ + + if (drawctxt->flags & CTXT_FLAGS_PREAMBLE && + adreno_dev->drawctxt_active == drawctxt) + start_index = 1; + + if (!start_index) { + *cmds++ = cp_nop_packet(1); + *cmds++ = KGSL_START_OF_IB_IDENTIFIER; + } else { + *cmds++ = cp_nop_packet(4); + *cmds++ = KGSL_START_OF_IB_IDENTIFIER; + *cmds++ = CP_HDR_INDIRECT_BUFFER_PFD; + *cmds++ = ibdesc[0].gpuaddr; + *cmds++ = ibdesc[0].sizedwords; + } + for (i = start_index; i < numibs; i++) { + if (unlikely(adreno_dev->ib_check_level >= 1 && + !_parse_ibs(dev_priv, ibdesc[i].gpuaddr, + ibdesc[i].sizedwords))) { + kfree(link); + return -EINVAL; + } *cmds++ = CP_HDR_INDIRECT_BUFFER_PFD; *cmds++ = ibdesc[i].gpuaddr; *cmds++ = ibdesc[i].sizedwords; } + *cmds++ = cp_nop_packet(1); + *cmds++ = KGSL_END_OF_IB_IDENTIFIER; + kgsl_setstate(device, kgsl_mmu_pt_get_flags(device->mmu.hwpagetable, device->id)); @@ -598,6 +863,7 @@ adreno_ringbuffer_issueibcmds(struct kgsl_device_private *dev_priv, adreno_drawctxt_switch(adreno_dev, drawctxt, flags); *timestamp = adreno_ringbuffer_addcmds(&adreno_dev->ringbuffer, + drawctxt, KGSL_CMD_FLAGS_NOT_KERNEL_CMD, &link[0], (cmds - link)); @@ -631,12 +897,25 @@ int adreno_ringbuffer_extract(struct adreno_ringbuffer *rb, unsigned int val2; unsigned int val3; unsigned int copy_rb_contents = 0; - unsigned int cur_context; - unsigned int j; + struct kgsl_context *context; + unsigned int context_id; GSL_RB_GET_READPTR(rb, &rb->rptr); - retired_timestamp = device->ftbl->readtimestamp(device, + /* current_context is the context that is presently active in the + * GPU, i.e the context in which the hang is caused */ + kgsl_sharedmem_readl(&device->memstore, &context_id, + KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, + current_context)); + KGSL_DRV_ERR(device, "Last context id: %d\n", context_id); + context = idr_find(&device->context_idr, context_id); + if (context == NULL) { + KGSL_DRV_ERR(device, + "GPU recovery from hang not possible because last" + " context id is invalid.\n"); + return -EINVAL; + } + retired_timestamp = device->ftbl->readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED); KGSL_DRV_ERR(device, "GPU successfully executed till ts: %x\n", retired_timestamp); @@ -671,7 +950,8 @@ int adreno_ringbuffer_extract(struct adreno_ringbuffer *rb, (val1 == cp_type3_packet(CP_EVENT_WRITE, 3) && val2 == CACHE_FLUSH_TS && val3 == (rb->device->memstore.gpuaddr + - KGSL_DEVICE_MEMSTORE_OFFSET(eoptimestamp)))) { + KGSL_MEMSTORE_OFFSET(context_id, + eoptimestamp)))) { rb_rptr = adreno_ringbuffer_inc_wrapped(rb_rptr, rb->buffer_desc.size); KGSL_DRV_ERR(device, @@ -717,10 +997,6 @@ int adreno_ringbuffer_extract(struct adreno_ringbuffer *rb, return -EINVAL; } - /* current_context is the context that is presently active in the - * GPU, i.e the context in which the hang is caused */ - kgsl_sharedmem_readl(&device->memstore, &cur_context, - KGSL_DEVICE_MEMSTORE_OFFSET(current_context)); while ((rb_rptr / sizeof(unsigned int)) != rb->wptr) { kgsl_sharedmem_readl(&rb->buffer_desc, &value, rb_rptr); rb_rptr = adreno_ringbuffer_inc_wrapped(rb_rptr, @@ -735,12 +1011,25 @@ int adreno_ringbuffer_extract(struct adreno_ringbuffer *rb, rb_rptr = adreno_ringbuffer_inc_wrapped(rb_rptr, rb->buffer_desc.size); BUG_ON(val1 != (device->memstore.gpuaddr + - KGSL_DEVICE_MEMSTORE_OFFSET(current_context))); + KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, + current_context))); kgsl_sharedmem_readl(&rb->buffer_desc, &value, rb_rptr); rb_rptr = adreno_ringbuffer_inc_wrapped(rb_rptr, rb->buffer_desc.size); - BUG_ON((copy_rb_contents == 0) && - (value == cur_context)); + + /* + * If other context switches were already lost and + * and the current context is the one that is hanging, + * then we cannot recover. Print an error message + * and leave. + */ + + if ((copy_rb_contents == 0) && (value == context_id)) { + KGSL_DRV_ERR(device, "GPU recovery could not " + "find the previous context\n"); + return -EINVAL; + } + /* * If we were copying the commands and got to this point * then we need to remove the 3 commands that appear @@ -751,7 +1040,7 @@ int adreno_ringbuffer_extract(struct adreno_ringbuffer *rb, /* if context switches to a context that did not cause * hang then start saving the rb contents as those * commands can be executed */ - if (value != cur_context) { + if (value != context_id) { copy_rb_contents = 1; temp_rb_buffer[temp_idx++] = cp_nop_packet(1); temp_rb_buffer[temp_idx++] = @@ -771,19 +1060,6 @@ int adreno_ringbuffer_extract(struct adreno_ringbuffer *rb, } *rb_size = temp_idx; - KGSL_DRV_ERR(device, "Extracted rb contents, size: %x\n", *rb_size); - for (temp_idx = 0; temp_idx < *rb_size;) { - char str[80]; - int idx = 0; - if ((temp_idx + 8) <= *rb_size) - j = 8; - else - j = *rb_size - temp_idx; - for (; j != 0; j--) - idx += scnprintf(str + idx, 80 - idx, - "%8.8X ", temp_rb_buffer[temp_idx++]); - printk(KERN_ALERT "%s", str); - } return 0; } diff --git a/drivers/gpu/msm/adreno_ringbuffer.h b/drivers/gpu/msm/adreno_ringbuffer.h old mode 100644 new mode 100755 index 3e7a6880..bfac915c --- a/drivers/gpu/msm/adreno_ringbuffer.h +++ b/drivers/gpu/msm/adreno_ringbuffer.h @@ -1,5 +1,4 @@ -/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved. - * Copyright (C) 2011 Sony Ericsson Mobile Communications AB. +/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -14,10 +13,6 @@ #ifndef __ADRENO_RINGBUFFER_H #define __ADRENO_RINGBUFFER_H -#define GSL_RB_USE_MEM_RPTR -#define GSL_RB_USE_MEM_TIMESTAMP -#define GSL_DEVICE_SHADOW_MEMSTORE_TO_USER - /* * Adreno ringbuffer sizes in bytes - these are converted to * the appropriate log2 values in the code @@ -59,52 +54,39 @@ struct adreno_ringbuffer { unsigned int wptr; /* write pointer offset in dwords from baseaddr */ unsigned int rptr; /* read pointer offset in dwords from baseaddr */ - uint32_t timestamp; + + unsigned int timestamp[KGSL_MEMSTORE_MAX]; }; #define GSL_RB_WRITE(ring, gpuaddr, data) \ do { \ - writel_relaxed(data, ring); \ + *ring = data; \ wmb(); \ kgsl_cffdump_setmem(gpuaddr, data, 4); \ ring++; \ gpuaddr += sizeof(uint); \ } while (0) -/* timestamp */ -#ifdef GSL_DEVICE_SHADOW_MEMSTORE_TO_USER -#define GSL_RB_USE_MEM_TIMESTAMP -#endif /* GSL_DEVICE_SHADOW_MEMSTORE_TO_USER */ - -#ifdef GSL_RB_USE_MEM_TIMESTAMP /* enable timestamp (...scratch0) memory shadowing */ #define GSL_RB_MEMPTRS_SCRATCH_MASK 0x1 #define GSL_RB_INIT_TIMESTAMP(rb) -#else -#define GSL_RB_MEMPTRS_SCRATCH_MASK 0x0 -#define GSL_RB_INIT_TIMESTAMP(rb) \ - adreno_regwrite((rb)->device->id, REG_CP_TIMESTAMP, 0) - -#endif /* GSL_RB_USE_MEMTIMESTAMP */ - /* mem rptr */ -#ifdef GSL_RB_USE_MEM_RPTR #define GSL_RB_CNTL_NO_UPDATE 0x0 /* enable */ #define GSL_RB_GET_READPTR(rb, data) \ do { \ - *(data) = readl_relaxed(&(rb)->memptrs->rptr); \ + *(data) = rb->memptrs->rptr; \ } while (0) -#else -#define GSL_RB_CNTL_NO_UPDATE 0x1 /* disable */ -#define GSL_RB_GET_READPTR(rb, data) \ - do { \ - adreno_regread((rb)->device->id, REG_CP_RB_RPTR, (data)); \ - } while (0) -#endif /* GSL_RB_USE_MEMRPTR */ #define GSL_RB_CNTL_POLL_EN 0x0 /* disable */ +/* + * protected mode error checking below register address 0x800 + * note: if CP_INTERRUPT packet is used then checking needs + * to change to below register address 0x7C8 + */ +#define GSL_RB_PROTECTED_MODE_CONTROL 0x200001F2 + int adreno_ringbuffer_issueibcmds(struct kgsl_device_private *dev_priv, struct kgsl_context *context, struct kgsl_ibdesc *ibdesc, @@ -126,6 +108,8 @@ void adreno_ringbuffer_issuecmds(struct kgsl_device *device, unsigned int *cmdaddr, int sizedwords); +void adreno_ringbuffer_submit(struct adreno_ringbuffer *rb); + void kgsl_cp_intrcallback(struct kgsl_device *device); int adreno_ringbuffer_extract(struct adreno_ringbuffer *rb, @@ -136,6 +120,9 @@ void adreno_ringbuffer_restore(struct adreno_ringbuffer *rb, unsigned int *rb_buff, int num_rb_contents); +unsigned int *adreno_ringbuffer_allocspace(struct adreno_ringbuffer *rb, + unsigned int numcmds); + static inline int adreno_ringbuffer_count(struct adreno_ringbuffer *rb, unsigned int rptr) { diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c old mode 100644 new mode 100755 index e21ca09c..491944ba --- a/drivers/gpu/msm/kgsl.c +++ b/drivers/gpu/msm/kgsl.c @@ -1,5 +1,4 @@ -/* Copyright (c) 2008-2011, Code Aurora Forum. All rights reserved. - * Copyright (C) 2011 Sony Ericsson Mobile Communications AB. +/* Copyright (c) 2008-2012, Code Aurora Forum. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -25,6 +24,7 @@ #include #include +#include #include "kgsl.h" #include "kgsl_debugfs.h" @@ -45,7 +45,7 @@ module_param_named(mmutype, ksgl_mmu_type, charp, 0); MODULE_PARM_DESC(ksgl_mmu_type, "Type of MMU to be used for graphics. Valid values are 'iommu' or 'gpummu' or 'nommu'"); -#ifdef CONFIG_GENLOCK +static struct ion_client *kgsl_ion_client; /** * kgsl_add_event - Add a new timstamp event for the KGSL device @@ -53,25 +53,35 @@ MODULE_PARM_DESC(ksgl_mmu_type, * @ts - the timestamp to trigger the event on * @cb - callback function to call when the timestamp expires * @priv - private data for the specific event type + * @owner - driver instance that owns this event * * @returns - 0 on success or error code on failure */ -static int kgsl_add_event(struct kgsl_device *device, u32 ts, - void (*cb)(struct kgsl_device *, void *, u32), void *priv) +static int kgsl_add_event(struct kgsl_device *device, u32 id, u32 ts, + void (*cb)(struct kgsl_device *, void *, u32, u32), void *priv, + struct kgsl_device_private *owner) { struct kgsl_event *event; struct list_head *n; - unsigned int cur = device->ftbl->readtimestamp(device, - KGSL_TIMESTAMP_RETIRED); + unsigned int cur_ts; + struct kgsl_context *context = NULL; if (cb == NULL) return -EINVAL; + if (id != KGSL_MEMSTORE_GLOBAL) { + context = idr_find(&device->context_idr, id); + if (context == NULL) + return -EINVAL; + } + cur_ts = device->ftbl->readtimestamp(device, context, + KGSL_TIMESTAMP_RETIRED); + /* Check to see if the requested timestamp has already fired */ - if (timestamp_cmp(cur, ts) >= 0) { - cb(device, priv, cur); + if (timestamp_cmp(cur_ts, ts) >= 0) { + cb(device, priv, id, cur_ts); return 0; } @@ -79,16 +89,24 @@ static int kgsl_add_event(struct kgsl_device *device, u32 ts, if (event == NULL) return -ENOMEM; + event->context = context; event->timestamp = ts; event->priv = priv; event->func = cb; + event->owner = owner; - /* Add the event in order to the list */ + /* + * Add the event in order to the list. Order is by context id + * first and then by timestamp for that context. + */ for (n = device->events.next ; n != &device->events; n = n->next) { struct kgsl_event *e = list_entry(n, struct kgsl_event, list); + if (e->context != context) + continue; + if (timestamp_cmp(e->timestamp, ts) > 0) { list_add(&event->list, n->prev); break; @@ -97,10 +115,77 @@ static int kgsl_add_event(struct kgsl_device *device, u32 ts, if (n == &device->events) list_add_tail(&event->list, &device->events); - + + queue_work(device->work_queue, &device->ts_expired_ws); return 0; } -#endif + +/** + * kgsl_cancel_events - Cancel all events for a process + * @device - KGSL device for the events to cancel + * @owner - driver instance that owns the events to cancel + * + */ +static void kgsl_cancel_events(struct kgsl_device *device, + struct kgsl_device_private *owner) +{ + struct kgsl_event *event, *event_tmp; + unsigned int id, cur; + + list_for_each_entry_safe(event, event_tmp, &device->events, list) { + if (event->owner != owner) + continue; + + cur = device->ftbl->readtimestamp(device, event->context, + KGSL_TIMESTAMP_RETIRED); + + id = event->context ? event->context->id : KGSL_MEMSTORE_GLOBAL; + /* + * "cancel" the events by calling their callback. + * Currently, events are used for lock and memory + * management, so if the process is dying the right + * thing to do is release or free. + */ + if (event->func) + event->func(device, event->priv, id, cur); + + list_del(&event->list); + kfree(event); + } +} + +/* kgsl_get_mem_entry - get the mem_entry structure for the specified object + * @ptbase - the pagetable base of the object + * @gpuaddr - the GPU address of the object + * @size - Size of the region to search + */ + +struct kgsl_mem_entry *kgsl_get_mem_entry(unsigned int ptbase, + unsigned int gpuaddr, unsigned int size) +{ + struct kgsl_process_private *priv; + struct kgsl_mem_entry *entry; + + mutex_lock(&kgsl_driver.process_mutex); + + list_for_each_entry(priv, &kgsl_driver.process_list, list) { + if (!kgsl_mmu_pt_equal(priv->pagetable, ptbase)) + continue; + spin_lock(&priv->mem_lock); + entry = kgsl_sharedmem_find_region(priv, gpuaddr, size); + + if (entry) { + spin_unlock(&priv->mem_lock); + mutex_unlock(&kgsl_driver.process_mutex); + return entry; + } + spin_unlock(&priv->mem_lock); + } + mutex_unlock(&kgsl_driver.process_mutex); + + return NULL; +} +EXPORT_SYMBOL(kgsl_get_mem_entry); static inline struct kgsl_mem_entry * kgsl_mem_entry_create(void) @@ -121,18 +206,32 @@ kgsl_mem_entry_destroy(struct kref *kref) struct kgsl_mem_entry *entry = container_of(kref, struct kgsl_mem_entry, refcount); - size_t size = entry->memdesc.size; + + if (entry->memtype != KGSL_MEM_ENTRY_KERNEL) + kgsl_driver.stats.mapped -= entry->memdesc.size; + + /* + * Ion takes care of freeing the sglist for us (how nice ) so + * unmap the dma before freeing the sharedmem so kgsl_sharedmem_free + * doesn't try to free it again + */ + + if (entry->memtype == KGSL_MEM_ENTRY_ION) { + ion_unmap_dma(kgsl_ion_client, entry->priv_data); + entry->memdesc.sg = NULL; + } kgsl_sharedmem_free(&entry->memdesc); - if (entry->memtype == KGSL_USER_MEMORY) - entry->priv->stats.user -= size; - else if (entry->memtype == KGSL_MAPPED_MEMORY) { - if (entry->file_ptr) - fput(entry->file_ptr); - - kgsl_driver.stats.mapped -= size; - entry->priv->stats.mapped -= size; + switch (entry->memtype) { + case KGSL_MEM_ENTRY_PMEM: + case KGSL_MEM_ENTRY_ASHMEM: + if (entry->priv_data) + fput(entry->priv_data); + break; + case KGSL_MEM_ENTRY_ION: + ion_free(kgsl_ion_client, entry->priv_data); + break; } kfree(entry); @@ -150,6 +249,21 @@ void kgsl_mem_entry_attach_process(struct kgsl_mem_entry *entry, entry->priv = process; } +/* Detach a memory entry from a process and unmap it from the MMU */ + +static void kgsl_mem_entry_detach_process(struct kgsl_mem_entry *entry) +{ + if (entry == NULL) + return; + + entry->priv->stats[entry->memtype].cur -= entry->memdesc.size; + entry->priv = NULL; + + kgsl_mmu_unmap(entry->memdesc.pagetable, &entry->memdesc); + + kgsl_mem_entry_put(entry); +} + /* Allocate a new context id */ static struct kgsl_context * @@ -170,8 +284,8 @@ kgsl_create_context(struct kgsl_device_private *dev_priv) return NULL; } - ret = idr_get_new(&dev_priv->device->context_idr, - context, &id); + ret = idr_get_new_above(&dev_priv->device->context_idr, + context, 1, &id); if (ret != -EAGAIN) break; @@ -182,6 +296,16 @@ kgsl_create_context(struct kgsl_device_private *dev_priv) return NULL; } + /* MAX - 1, there is one memdesc in memstore for device info */ + if (id >= KGSL_MEMSTORE_MAX) { + KGSL_DRV_ERR(dev_priv->device, "cannot have more than %d " + "ctxts due to memstore limitation\n", + KGSL_MEMSTORE_MAX); + idr_remove(&dev_priv->device->context_idr, id); + kfree(context); + return NULL; + } + context->id = id; context->dev_priv = dev_priv; @@ -206,74 +330,34 @@ kgsl_destroy_context(struct kgsl_device_private *dev_priv, idr_remove(&dev_priv->device->context_idr, id); } -/* to be called when a process is destroyed, this walks the memqueue and - * frees any entryies that belong to the dying process - */ -static void kgsl_memqueue_cleanup(struct kgsl_device *device, - struct kgsl_process_private *private) -{ - struct kgsl_mem_entry *entry, *entry_tmp; - - if (!private) - return; - - BUG_ON(!mutex_is_locked(&device->mutex)); - - list_for_each_entry_safe(entry, entry_tmp, &device->memqueue, list) { - if (entry->priv == private) { - list_del(&entry->list); - kgsl_mem_entry_put(entry); - } - } -} - -static void kgsl_memqueue_freememontimestamp(struct kgsl_device *device, - struct kgsl_mem_entry *entry, - uint32_t timestamp, - enum kgsl_timestamp_type type) -{ - BUG_ON(!mutex_is_locked(&device->mutex)); - - entry->free_timestamp = timestamp; - - list_add_tail(&entry->list, &device->memqueue); -} - static void kgsl_timestamp_expired(struct work_struct *work) { struct kgsl_device *device = container_of(work, struct kgsl_device, ts_expired_ws); - struct kgsl_mem_entry *entry, *entry_tmp; struct kgsl_event *event, *event_tmp; uint32_t ts_processed; + unsigned int id; mutex_lock(&device->mutex); - /* get current EOP timestamp */ - ts_processed = device->ftbl->readtimestamp(device, - KGSL_TIMESTAMP_RETIRED); - - /* Flush the freememontimestamp queue */ - list_for_each_entry_safe(entry, entry_tmp, &device->memqueue, list) { - if (timestamp_cmp(ts_processed, entry->free_timestamp) < 0) - break; - - list_del(&entry->list); - kgsl_mem_entry_put(entry); - } - /* Process expired events */ list_for_each_entry_safe(event, event_tmp, &device->events, list) { + ts_processed = device->ftbl->readtimestamp(device, + event->context, KGSL_TIMESTAMP_RETIRED); if (timestamp_cmp(ts_processed, event->timestamp) < 0) - break; + continue; + + id = event->context ? event->context->id : KGSL_MEMSTORE_GLOBAL; if (event->func) - event->func(device, event->priv, ts_processed); + event->func(device, event->priv, id, ts_processed); list_del(&event->list); kfree(event); } + device->last_expired_ctxt_id = KGSL_CONTEXT_INVALID; + mutex_unlock(&device->mutex); } @@ -348,11 +432,15 @@ int kgsl_unregister_ts_notifier(struct kgsl_device *device, } EXPORT_SYMBOL(kgsl_unregister_ts_notifier); -int kgsl_check_timestamp(struct kgsl_device *device, unsigned int timestamp) +int kgsl_check_timestamp(struct kgsl_device *device, + struct kgsl_context *context, unsigned int timestamp) { unsigned int ts_processed; + unsigned int global; - ts_processed = device->ftbl->readtimestamp(device, + ts_processed = device->ftbl->readtimestamp(device, context, + KGSL_TIMESTAMP_RETIRED); + global = device->ftbl->readtimestamp(device, NULL, KGSL_TIMESTAMP_RETIRED); return (timestamp_cmp(ts_processed, timestamp) >= 0); @@ -585,18 +673,13 @@ kgsl_put_process_private(struct kgsl_device *device, if (--private->refcnt) goto unlock; - KGSL_MEM_INFO(device, - "Memory usage: user (%d/%d) mapped (%d/%d)\n", - private->stats.user, private->stats.user_max, - private->stats.mapped, private->stats.mapped_max); - kgsl_process_uninit_sysfs(private); list_del(&private->list); list_for_each_entry_safe(entry, entry_tmp, &private->mem_list, list) { list_del(&entry->list); - kgsl_mem_entry_put(entry); + kgsl_mem_entry_detach_process(entry); } kgsl_mmu_putpagetable(private->pagetable); @@ -641,7 +724,7 @@ static int kgsl_release(struct inode *inodep, struct file *filep) /* clean up any to-be-freed entries that belong to this * process and this device */ - kgsl_memqueue_cleanup(device, private); + kgsl_cancel_events(device, dev_priv); mutex_unlock(&device->mutex); kfree(dev_priv); @@ -698,6 +781,9 @@ static int kgsl_open(struct inode *inodep, struct file *filep) kgsl_check_suspended(device); if (device->open_count == 0) { + kgsl_sharedmem_set(&device->memstore, 0, 0, + device->memstore.size); + result = device->ftbl->start(device, true); if (result) { @@ -758,9 +844,7 @@ kgsl_sharedmem_find_region(struct kgsl_process_private *private, BUG_ON(private == NULL); list_for_each_entry(entry, &private->mem_list, list) { - if (gpuaddr >= entry->memdesc.gpuaddr && - ((gpuaddr + size) <= - (entry->memdesc.gpuaddr + entry->memdesc.size))) { + if (kgsl_gpuaddr_in_memdesc(&entry->memdesc, gpuaddr, size)) { result = entry; break; } @@ -770,20 +854,6 @@ kgsl_sharedmem_find_region(struct kgsl_process_private *private, } EXPORT_SYMBOL(kgsl_sharedmem_find_region); -uint8_t *kgsl_gpuaddr_to_vaddr(const struct kgsl_memdesc *memdesc, - unsigned int gpuaddr, unsigned int *size) -{ - BUG_ON(memdesc->hostptr == NULL); - - if (memdesc->gpuaddr == 0 || (gpuaddr < memdesc->gpuaddr || - gpuaddr >= memdesc->gpuaddr + memdesc->size)) - return NULL; - - *size = memdesc->size - (gpuaddr - memdesc->gpuaddr); - return memdesc->hostptr + (gpuaddr - memdesc->gpuaddr); -} -EXPORT_SYMBOL(kgsl_gpuaddr_to_vaddr); - /*call all ioctl sub functions with driver locked*/ static long kgsl_ioctl_device_getproperty(struct kgsl_device_private *dev_priv, unsigned int cmd, void *data) @@ -810,6 +880,40 @@ static long kgsl_ioctl_device_getproperty(struct kgsl_device_private *dev_priv, break; } + case KGSL_PROP_GPU_RESET_STAT: + { + /* Return reset status of given context and clear it */ + uint32_t id; + struct kgsl_context *context; + + if (param->sizebytes != sizeof(unsigned int)) { + result = -EINVAL; + break; + } + /* We expect the value passed in to contain the context id */ + if (copy_from_user(&id, param->value, + sizeof(unsigned int))) { + result = -EFAULT; + break; + } + context = kgsl_find_context(dev_priv, id); + if (!context) { + result = -EINVAL; + break; + } + /* + * Copy the reset status to value which also serves as + * the out parameter + */ + if (copy_to_user(param->value, &(context->reset_status), + sizeof(unsigned int))) { + result = -EFAULT; + break; + } + /* Clear reset status once its been queried */ + context->reset_status = KGSL_CTX_STAT_NO_ERROR; + break; + } default: result = dev_priv->device->ftbl->getproperty( dev_priv->device, param->type, @@ -820,21 +924,35 @@ static long kgsl_ioctl_device_getproperty(struct kgsl_device_private *dev_priv, return result; } -static long kgsl_ioctl_device_waittimestamp(struct kgsl_device_private - *dev_priv, unsigned int cmd, - void *data) +static long kgsl_ioctl_device_setproperty(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) { int result = 0; - struct kgsl_device_waittimestamp *param = data; + /* The getproperty struct is reused for setproperty too */ + struct kgsl_device_getproperty *param = data; - /* Set the active count so that suspend doesn't do the - wrong thing */ + if (dev_priv->device->ftbl->setproperty) + result = dev_priv->device->ftbl->setproperty( + dev_priv->device, param->type, + param->value, param->sizebytes); + + return result; +} + +static long _device_waittimestamp(struct kgsl_device_private *dev_priv, + struct kgsl_context *context, + unsigned int timestamp, + unsigned int timeout) +{ + int result = 0; + + /* Set the active count so that suspend doesn't do the wrong thing */ dev_priv->device->active_cnt++; result = dev_priv->device->ftbl->waittimestamp(dev_priv->device, - param->timestamp, - param->timeout); + context, timestamp, timeout); + /* Fire off any pending suspend operations that are in flight */ @@ -844,39 +962,33 @@ static long kgsl_ioctl_device_waittimestamp(struct kgsl_device_private return result; } -static bool check_ibdesc(struct kgsl_device_private *dev_priv, - struct kgsl_ibdesc *ibdesc, unsigned int numibs, - bool parse) -{ - bool result = true; - unsigned int i; - for (i = 0; i < numibs; i++) { - struct kgsl_mem_entry *entry; - spin_lock(&dev_priv->process_priv->mem_lock); - entry = kgsl_sharedmem_find_region(dev_priv->process_priv, - ibdesc[i].gpuaddr, ibdesc[i].sizedwords * sizeof(uint)); - spin_unlock(&dev_priv->process_priv->mem_lock); - if (entry == NULL) { - KGSL_DRV_ERR(dev_priv->device, - "invalid cmd buffer gpuaddr %08x " \ - "sizedwords %d\n", ibdesc[i].gpuaddr, - ibdesc[i].sizedwords); - result = false; - break; - } - if (parse && !kgsl_cffdump_parse_ibs(dev_priv, &entry->memdesc, - ibdesc[i].gpuaddr, ibdesc[i].sizedwords, true)) { - KGSL_DRV_ERR(dev_priv->device, - "invalid cmd buffer gpuaddr %08x " \ - "sizedwords %d numibs %d/%d\n", - ibdesc[i].gpuaddr, - ibdesc[i].sizedwords, i+1, numibs); - result = false; - break; - } +static long kgsl_ioctl_device_waittimestamp(struct kgsl_device_private + *dev_priv, unsigned int cmd, + void *data) +{ + struct kgsl_device_waittimestamp *param = data; + + return _device_waittimestamp(dev_priv, KGSL_MEMSTORE_GLOBAL, + param->timestamp, param->timeout); +} + +static long kgsl_ioctl_device_waittimestamp_ctxtid(struct kgsl_device_private + *dev_priv, unsigned int cmd, + void *data) +{ + struct kgsl_device_waittimestamp_ctxtid *param = data; + struct kgsl_context *context; + + context = kgsl_find_context(dev_priv, param->context_id); + if (context == NULL) { + KGSL_DRV_ERR(dev_priv->device, "invalid context_id %d\n", + param->context_id); + return -EINVAL; } - return result; + + return _device_waittimestamp(dev_priv, context, + param->timestamp, param->timeout); } static long kgsl_ioctl_rb_issueibcmds(struct kgsl_device_private *dev_priv, @@ -895,7 +1007,7 @@ static long kgsl_ioctl_rb_issueibcmds(struct kgsl_device_private *dev_priv, if (context == NULL) { result = -EINVAL; KGSL_DRV_ERR(dev_priv->device, - "invalid drawctxt drawctxt_id %d\n", + "invalid context_id %d\n", param->drawctxt_id); goto done; } @@ -947,12 +1059,6 @@ static long kgsl_ioctl_rb_issueibcmds(struct kgsl_device_private *dev_priv, param->numibs = 1; } - if (!check_ibdesc(dev_priv, ibdesc, param->numibs, true)) { - KGSL_DRV_ERR(dev_priv->device, "bad ibdesc"); - result = -EINVAL; - goto free_ibdesc; - } - result = dev_priv->device->ftbl->issueibcmds(dev_priv, context, ibdesc, @@ -960,17 +1066,6 @@ static long kgsl_ioctl_rb_issueibcmds(struct kgsl_device_private *dev_priv, ¶m->timestamp, param->flags); - if (result != 0) - goto free_ibdesc; - - /* this is a check to try to detect if a command buffer was freed - * during issueibcmds(). - */ - if (!check_ibdesc(dev_priv, ibdesc, param->numibs, false)) { - KGSL_DRV_ERR(dev_priv->device, "bad ibdesc AFTER issue"); - result = -EINVAL; - goto free_ibdesc; - } free_ibdesc: kfree(ibdesc); @@ -983,43 +1078,111 @@ done: return result; } +static long _cmdstream_readtimestamp(struct kgsl_device_private *dev_priv, + struct kgsl_context *context, unsigned int type, + unsigned int *timestamp) +{ + *timestamp = dev_priv->device->ftbl->readtimestamp(dev_priv->device, + context, type); + + return 0; +} + static long kgsl_ioctl_cmdstream_readtimestamp(struct kgsl_device_private *dev_priv, unsigned int cmd, void *data) { struct kgsl_cmdstream_readtimestamp *param = data; - param->timestamp = - dev_priv->device->ftbl->readtimestamp(dev_priv->device, - param->type); + return _cmdstream_readtimestamp(dev_priv, NULL, + param->type, ¶m->timestamp); +} - return 0; +static long kgsl_ioctl_cmdstream_readtimestamp_ctxtid(struct kgsl_device_private + *dev_priv, unsigned int cmd, + void *data) +{ + struct kgsl_cmdstream_readtimestamp_ctxtid *param = data; + struct kgsl_context *context; + + context = kgsl_find_context(dev_priv, param->context_id); + if (context == NULL) { + KGSL_DRV_ERR(dev_priv->device, "invalid context_id %d\n", + param->context_id); + return -EINVAL; + } + + return _cmdstream_readtimestamp(dev_priv, context, + param->type, ¶m->timestamp); +} + +static void kgsl_freemem_event_cb(struct kgsl_device *device, + void *priv, u32 id, u32 timestamp) +{ + struct kgsl_mem_entry *entry = priv; + spin_lock(&entry->priv->mem_lock); + list_del(&entry->list); + spin_unlock(&entry->priv->mem_lock); + kgsl_mem_entry_detach_process(entry); +} + +static long _cmdstream_freememontimestamp(struct kgsl_device_private *dev_priv, + unsigned int gpuaddr, struct kgsl_context *context, + unsigned int timestamp, unsigned int type) +{ + int result = 0; + struct kgsl_mem_entry *entry = NULL; + struct kgsl_device *device = dev_priv->device; + unsigned int cur; + unsigned int context_id = context ? context->id : KGSL_MEMSTORE_GLOBAL; + + spin_lock(&dev_priv->process_priv->mem_lock); + entry = kgsl_sharedmem_find(dev_priv->process_priv, gpuaddr); + spin_unlock(&dev_priv->process_priv->mem_lock); + + if (entry) { + cur = device->ftbl->readtimestamp(device, context, + KGSL_TIMESTAMP_RETIRED); + + result = kgsl_add_event(dev_priv->device, context_id, + timestamp, kgsl_freemem_event_cb, + entry, dev_priv); + } else { + KGSL_DRV_ERR(dev_priv->device, + "invalid gpuaddr %08x\n", gpuaddr); + result = -EINVAL; + } + + return result; } static long kgsl_ioctl_cmdstream_freememontimestamp(struct kgsl_device_private *dev_priv, unsigned int cmd, void *data) { - int result = 0; struct kgsl_cmdstream_freememontimestamp *param = data; - struct kgsl_mem_entry *entry = NULL; - spin_lock(&dev_priv->process_priv->mem_lock); - entry = kgsl_sharedmem_find(dev_priv->process_priv, param->gpuaddr); - if (entry) - list_del(&entry->list); - spin_unlock(&dev_priv->process_priv->mem_lock); + return _cmdstream_freememontimestamp(dev_priv, param->gpuaddr, + NULL, param->timestamp, param->type); +} - if (entry) { - kgsl_memqueue_freememontimestamp(dev_priv->device, entry, - param->timestamp, param->type); - } else { +static long kgsl_ioctl_cmdstream_freememontimestamp_ctxtid( + struct kgsl_device_private + *dev_priv, unsigned int cmd, + void *data) +{ + struct kgsl_cmdstream_freememontimestamp_ctxtid *param = data; + struct kgsl_context *context; + + context = kgsl_find_context(dev_priv, param->context_id); + if (context == NULL) { KGSL_DRV_ERR(dev_priv->device, - "invalid gpuaddr %08x\n", param->gpuaddr); - result = -EINVAL; + "invalid drawctxt context_id %d\n", param->context_id); + return -EINVAL; } - return result; + return _cmdstream_freememontimestamp(dev_priv, param->gpuaddr, + context, param->timestamp, param->type); } static long kgsl_ioctl_drawctxt_create(struct kgsl_device_private *dev_priv, @@ -1089,7 +1252,7 @@ static long kgsl_ioctl_sharedmem_free(struct kgsl_device_private *dev_priv, spin_unlock(&private->mem_lock); if (entry) { - kgsl_mem_entry_put(entry); + kgsl_mem_entry_detach_process(entry); } else { KGSL_CORE_ERR("invalid gpuaddr %08x\n", param->gpuaddr); result = -EINVAL; @@ -1169,7 +1332,7 @@ kgsl_ioctl_sharedmem_from_vmalloc(struct kgsl_device_private *dev_priv, goto error; } - result = kgsl_sharedmem_vmalloc_user(&entry->memdesc, + result = kgsl_sharedmem_page_alloc_user(&entry->memdesc, private->pagetable, len, param->flags); if (result != 0) @@ -1177,26 +1340,25 @@ kgsl_ioctl_sharedmem_from_vmalloc(struct kgsl_device_private *dev_priv, vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); - result = remap_vmalloc_range(vma, (void *) entry->memdesc.hostptr, 0); + result = kgsl_sharedmem_map_vma(vma, &entry->memdesc); if (result) { - KGSL_CORE_ERR("remap_vmalloc_range failed: %d\n", result); - goto error_free_vmalloc; + KGSL_CORE_ERR("kgsl_sharedmem_map_vma failed: %d\n", result); + goto error_free_alloc; } param->gpuaddr = entry->memdesc.gpuaddr; - entry->memtype = KGSL_USER_MEMORY; + entry->memtype = KGSL_MEM_ENTRY_KERNEL; kgsl_mem_entry_attach_process(entry, private); /* Process specific statistics */ - KGSL_STATS_ADD(len, private->stats.user, - private->stats.user_max); + kgsl_process_add_stats(private, entry->memtype, len); kgsl_check_idle(dev_priv->device); return 0; -error_free_vmalloc: +error_free_alloc: kgsl_sharedmem_free(&entry->memdesc); error_free_entry: @@ -1292,7 +1454,7 @@ static int kgsl_setup_phys_file(struct kgsl_mem_entry *entry, } - entry->file_ptr = filep; + entry->priv_data = filep; entry->memdesc.pagetable = pagetable; entry->memdesc.size = size; @@ -1319,8 +1481,8 @@ static int memdesc_sg_virt(struct kgsl_memdesc *memdesc, int sglen = PAGE_ALIGN(size) / PAGE_SIZE; unsigned long paddr = (unsigned long) addr; - memdesc->sg = kmalloc(sglen * sizeof(struct scatterlist), - GFP_KERNEL); + memdesc->sg = kgsl_sg_alloc(sglen); + if (memdesc->sg == NULL) return -ENOMEM; @@ -1360,7 +1522,7 @@ static int memdesc_sg_virt(struct kgsl_memdesc *memdesc, err: spin_unlock(¤t->mm->page_table_lock); - kfree(memdesc->sg); + kgsl_sg_free(memdesc->sg, sglen); memdesc->sg = NULL; return -EINVAL; @@ -1464,7 +1626,7 @@ static int kgsl_setup_ashmem(struct kgsl_mem_entry *entry, goto err; } - entry->file_ptr = filep; + entry->priv_data = filep; entry->memdesc.pagetable = pagetable; entry->memdesc.size = ALIGN(size, PAGE_SIZE); entry->memdesc.hostptr = hostptr; @@ -1488,6 +1650,51 @@ static int kgsl_setup_ashmem(struct kgsl_mem_entry *entry, } #endif +static int kgsl_setup_ion(struct kgsl_mem_entry *entry, + struct kgsl_pagetable *pagetable, int fd) +{ + struct ion_handle *handle; + struct scatterlist *s; + unsigned long flags; + + if (kgsl_ion_client == NULL) { + kgsl_ion_client = msm_ion_client_create(UINT_MAX, KGSL_NAME); + if (kgsl_ion_client == NULL) + return -ENODEV; + } + + handle = ion_import_fd(kgsl_ion_client, fd); + if (IS_ERR_OR_NULL(handle)) + return PTR_ERR(handle); + + entry->memtype = KGSL_MEM_ENTRY_ION; + entry->priv_data = handle; + entry->memdesc.pagetable = pagetable; + entry->memdesc.size = 0; + + if (ion_handle_get_flags(kgsl_ion_client, handle, &flags)) + goto err; + + entry->memdesc.sg = ion_map_dma(kgsl_ion_client, handle, flags); + + if (IS_ERR_OR_NULL(entry->memdesc.sg)) + goto err; + + /* Calculate the size of the memdesc from the sglist */ + + entry->memdesc.sglen = 0; + + for (s = entry->memdesc.sg; s != NULL; s = sg_next(s)) { + entry->memdesc.size += s->length; + entry->memdesc.sglen++; + } + + return 0; +err: + ion_free(kgsl_ion_client, handle); + return -ENOMEM; +} + static long kgsl_ioctl_map_user_mem(struct kgsl_device_private *dev_priv, unsigned int cmd, void *data) { @@ -1515,6 +1722,7 @@ static long kgsl_ioctl_map_user_mem(struct kgsl_device_private *dev_priv, result = kgsl_setup_phys_file(entry, private->pagetable, param->fd, param->offset, param->len); + entry->memtype = KGSL_MEM_ENTRY_PMEM; break; case KGSL_USER_MEM_TYPE_ADDR: @@ -1531,6 +1739,7 @@ static long kgsl_ioctl_map_user_mem(struct kgsl_device_private *dev_priv, result = kgsl_setup_hostptr(entry, private->pagetable, (void *) param->hostptr, param->offset, param->len); + entry->memtype = KGSL_MEM_ENTRY_USER; break; case KGSL_USER_MEM_TYPE_ASHMEM: @@ -1547,6 +1756,12 @@ static long kgsl_ioctl_map_user_mem(struct kgsl_device_private *dev_priv, result = kgsl_setup_ashmem(entry, private->pagetable, param->fd, (void *) param->hostptr, param->len); + + entry->memtype = KGSL_MEM_ENTRY_ASHMEM; + break; + case KGSL_USER_MEM_TYPE_ION: + result = kgsl_setup_ion(entry, private->pagetable, + param->fd); break; default: KGSL_CORE_ERR("Invalid memory type: %x\n", memtype); @@ -1566,14 +1781,10 @@ static long kgsl_ioctl_map_user_mem(struct kgsl_device_private *dev_priv, /* Adjust the returned value for a non 4k aligned offset */ param->gpuaddr = entry->memdesc.gpuaddr + (param->offset & ~PAGE_MASK); - entry->memtype = KGSL_MAPPED_MEMORY; - KGSL_STATS_ADD(param->len, kgsl_driver.stats.mapped, kgsl_driver.stats.mapped_max); - /* Statistics */ - KGSL_STATS_ADD(param->len, private->stats.mapped, - private->stats.mapped_max); + kgsl_process_add_stats(private, entry->memtype, param->len); kgsl_mem_entry_attach_process(entry, private); @@ -1581,8 +1792,8 @@ static long kgsl_ioctl_map_user_mem(struct kgsl_device_private *dev_priv, return result; error_put_file_ptr: - if (entry->file_ptr) - fput(entry->file_ptr); + if (entry->priv_data) + fput(entry->priv_data); error: kfree(entry); @@ -1608,10 +1819,6 @@ kgsl_ioctl_sharedmem_flush_cache(struct kgsl_device_private *dev_priv, result = -EINVAL; goto done; } - if (!entry->memdesc.hostptr) - entry->memdesc.hostptr = - kgsl_gpuaddr_to_vaddr(&entry->memdesc, - param->gpuaddr, &entry->memdesc.size); if (!entry->memdesc.hostptr) { KGSL_CORE_ERR("invalid hostptr with gpuaddr %08x\n", @@ -1620,9 +1827,6 @@ kgsl_ioctl_sharedmem_flush_cache(struct kgsl_device_private *dev_priv, } kgsl_cache_range_op(&entry->memdesc, KGSL_CACHE_OP_CLEAN); - - /* Statistics - keep track of how many flushes each process does */ - private->stats.flushes++; done: spin_unlock(&private->mem_lock); return result; @@ -1645,12 +1849,11 @@ kgsl_ioctl_gpumem_alloc(struct kgsl_device_private *dev_priv, param->size, param->flags); if (result == 0) { - entry->memtype = KGSL_USER_MEMORY; + entry->memtype = KGSL_MEM_ENTRY_KERNEL; kgsl_mem_entry_attach_process(entry, private); param->gpuaddr = entry->memdesc.gpuaddr; - KGSL_STATS_ADD(entry->memdesc.size, private->stats.user, - private->stats.user_max); + kgsl_process_add_stats(private, entry->memtype, param->size); } else kfree(entry); @@ -1698,13 +1901,14 @@ struct kgsl_genlock_event_priv { * kgsl_genlock_event_cb - Event callback for a genlock timestamp event * @device - The KGSL device that expired the timestamp * @priv - private data for the event + * @context_id - the context id that goes with the timestamp * @timestamp - the timestamp that triggered the event * * Release a genlock lock following the expiration of a timestamp */ static void kgsl_genlock_event_cb(struct kgsl_device *device, - void *priv, u32 timestamp) + void *priv, u32 context_id, u32 timestamp) { struct kgsl_genlock_event_priv *ev = priv; int ret; @@ -1724,6 +1928,7 @@ static void kgsl_genlock_event_cb(struct kgsl_device *device, * @timestamp - Timestamp to trigger the event * @data - User space buffer containing struct kgsl_genlock_event_priv * @len - length of the userspace buffer + * @owner - driver instance that owns this event * @returns 0 on success or error code on error * * Attack to a genlock handle and register an event to release the @@ -1731,7 +1936,8 @@ static void kgsl_genlock_event_cb(struct kgsl_device *device, */ static int kgsl_add_genlock_event(struct kgsl_device *device, - u32 timestamp, void __user *data, int len) + u32 context_id, u32 timestamp, void __user *data, int len, + struct kgsl_device_private *owner) { struct kgsl_genlock_event_priv *event; struct kgsl_timestamp_event_genlock priv; @@ -1756,7 +1962,8 @@ static int kgsl_add_genlock_event(struct kgsl_device *device, return ret; } - ret = kgsl_add_event(device, timestamp, kgsl_genlock_event_cb, event); + ret = kgsl_add_event(device, context_id, timestamp, + kgsl_genlock_event_cb, event, owner); if (ret) kfree(event); @@ -1764,7 +1971,8 @@ static int kgsl_add_genlock_event(struct kgsl_device *device, } #else static long kgsl_add_genlock_event(struct kgsl_device *device, - u32 timestamp, void __user *data, int len) + u32 context_id, u32 timestamp, void __user *data, int len, + struct kgsl_device_private *owner) { return -EINVAL; } @@ -1787,7 +1995,8 @@ static long kgsl_ioctl_timestamp_event(struct kgsl_device_private *dev_priv, switch (param->type) { case KGSL_TIMESTAMP_EVENT_GENLOCK: ret = kgsl_add_genlock_event(dev_priv->device, - param->timestamp, param->priv, param->len); + param->context_id, param->timestamp, param->priv, + param->len, dev_priv); break; default: ret = -EINVAL; @@ -1811,12 +2020,18 @@ static const struct { kgsl_ioctl_device_getproperty, 1), KGSL_IOCTL_FUNC(IOCTL_KGSL_DEVICE_WAITTIMESTAMP, kgsl_ioctl_device_waittimestamp, 1), + KGSL_IOCTL_FUNC(IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID, + kgsl_ioctl_device_waittimestamp_ctxtid, 1), KGSL_IOCTL_FUNC(IOCTL_KGSL_RINGBUFFER_ISSUEIBCMDS, kgsl_ioctl_rb_issueibcmds, 1), KGSL_IOCTL_FUNC(IOCTL_KGSL_CMDSTREAM_READTIMESTAMP, kgsl_ioctl_cmdstream_readtimestamp, 1), + KGSL_IOCTL_FUNC(IOCTL_KGSL_CMDSTREAM_READTIMESTAMP_CTXTID, + kgsl_ioctl_cmdstream_readtimestamp_ctxtid, 1), KGSL_IOCTL_FUNC(IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP, kgsl_ioctl_cmdstream_freememontimestamp, 1), + KGSL_IOCTL_FUNC(IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP_CTXTID, + kgsl_ioctl_cmdstream_freememontimestamp_ctxtid, 1), KGSL_IOCTL_FUNC(IOCTL_KGSL_DRAWCTXT_CREATE, kgsl_ioctl_drawctxt_create, 1), KGSL_IOCTL_FUNC(IOCTL_KGSL_DRAWCTXT_DESTROY, @@ -1839,6 +2054,8 @@ static const struct { kgsl_ioctl_cff_user_event, 0), KGSL_IOCTL_FUNC(IOCTL_KGSL_TIMESTAMP_EVENT, kgsl_ioctl_timestamp_event, 1), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SETPROPERTY, + kgsl_ioctl_device_setproperty, 1), }; static long kgsl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) @@ -1892,7 +2109,7 @@ static long kgsl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) if (!func) { KGSL_DRV_INFO(dev_priv->device, "invalid ioctl code %08x\n", cmd); - ret = -EINVAL; + ret = -ENOIOCTLCMD; goto done; } lock = 1; @@ -2142,16 +2359,15 @@ kgsl_register_device(struct kgsl_device *device) INIT_WORK(&device->idle_check_ws, kgsl_idle_check); INIT_WORK(&device->ts_expired_ws, kgsl_timestamp_expired); - INIT_LIST_HEAD(&device->memqueue); INIT_LIST_HEAD(&device->events); + device->last_expired_ctxt_id = KGSL_CONTEXT_INVALID; + ret = kgsl_mmu_init(device); if (ret != 0) goto err_dest_work_q; - ret = kgsl_allocate_contiguous(&device->memstore, - sizeof(struct kgsl_devmemstore)); - + ret = kgsl_allocate_contiguous(&device->memstore, KGSL_MEMSTORE_SIZE); if (ret != 0) goto err_close_mmu; diff --git a/drivers/gpu/msm/kgsl.h b/drivers/gpu/msm/kgsl.h old mode 100644 new mode 100755 index e26cdc9e..fc45ff7c --- a/drivers/gpu/msm/kgsl.h +++ b/drivers/gpu/msm/kgsl.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-2011, Code Aurora Forum. All rights reserved. +/* Copyright (c) 2008-2012, Code Aurora Forum. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -21,9 +21,21 @@ #include #include #include +#include #define KGSL_NAME "kgsl" +/* The number of memstore arrays limits the number of contexts allowed. + * If more contexts are needed, update multiple for MEMSTORE_SIZE + */ +#define KGSL_MEMSTORE_SIZE ((int)(PAGE_SIZE * 2)) +#define KGSL_MEMSTORE_GLOBAL (0) +#define KGSL_MEMSTORE_MAX (KGSL_MEMSTORE_SIZE / \ + sizeof(struct kgsl_devmemstore) - 1) + +/* Timestamp window used to detect rollovers */ +#define KGSL_TIMESTAMP_WINDOW 0x80000000 + /*cache coherency ops */ #define DRM_KGSL_GEM_CACHE_OP_TO_DEV 0x0001 #define DRM_KGSL_GEM_CACHE_OP_FROM_DEV 0x0002 @@ -91,6 +103,8 @@ struct kgsl_driver { struct { unsigned int vmalloc; unsigned int vmalloc_max; + unsigned int page_alloc; + unsigned int page_alloc_max; unsigned int coherent; unsigned int coherent_max; unsigned int mapped; @@ -101,8 +115,41 @@ struct kgsl_driver { extern struct kgsl_driver kgsl_driver; -#define KGSL_USER_MEMORY 1 -#define KGSL_MAPPED_MEMORY 2 +struct kgsl_pagetable; +struct kgsl_memdesc; + +struct kgsl_memdesc_ops { + int (*vmflags)(struct kgsl_memdesc *); + int (*vmfault)(struct kgsl_memdesc *, struct vm_area_struct *, + struct vm_fault *); + void (*free)(struct kgsl_memdesc *memdesc); + int (*map_kernel_mem)(struct kgsl_memdesc *); +}; + +#define KGSL_MEMDESC_GUARD_PAGE BIT(0) + +/* shared memory allocation */ +struct kgsl_memdesc { + struct kgsl_pagetable *pagetable; + void *hostptr; + unsigned int gpuaddr; + unsigned int physaddr; + unsigned int size; + unsigned int priv; + struct scatterlist *sg; + unsigned int sglen; + struct kgsl_memdesc_ops *ops; + int flags; +}; + +/* List of different memory entry types */ + +#define KGSL_MEM_ENTRY_KERNEL 0 +#define KGSL_MEM_ENTRY_PMEM 1 +#define KGSL_MEM_ENTRY_ASHMEM 2 +#define KGSL_MEM_ENTRY_USER 3 +#define KGSL_MEM_ENTRY_ION 4 +#define KGSL_MEM_ENTRY_MAX 5 struct kgsl_pagetable; struct kgsl_memdesc_ops; @@ -124,9 +171,10 @@ struct kgsl_mem_entry { struct kref refcount; struct kgsl_memdesc memdesc; int memtype; - struct file *file_ptr; + void *priv_data; struct list_head list; uint32_t free_timestamp; + unsigned int context_id; /* back pointer to private structure under whose context this * allocation is made */ struct kgsl_process_private *priv; @@ -139,8 +187,10 @@ struct kgsl_mem_entry { #endif void kgsl_mem_entry_destroy(struct kref *kref); -uint8_t *kgsl_gpuaddr_to_vaddr(const struct kgsl_memdesc *memdesc, - unsigned int gpuaddr, unsigned int *size); + +struct kgsl_mem_entry *kgsl_get_mem_entry(unsigned int ptbase, + unsigned int gpuaddr, unsigned int size); + struct kgsl_mem_entry *kgsl_sharedmem_find_region( struct kgsl_process_private *private, unsigned int gpuaddr, size_t size); @@ -169,14 +219,26 @@ static inline void kgsl_drm_exit(void) #endif static inline int kgsl_gpuaddr_in_memdesc(const struct kgsl_memdesc *memdesc, - unsigned int gpuaddr) + unsigned int gpuaddr, unsigned int size) { - if (gpuaddr >= memdesc->gpuaddr && (gpuaddr + sizeof(unsigned int)) <= - (memdesc->gpuaddr + memdesc->size)) { + if (gpuaddr >= memdesc->gpuaddr && + ((gpuaddr + size) <= (memdesc->gpuaddr + memdesc->size))) { return 1; } return 0; } +static inline uint8_t *kgsl_gpuaddr_to_vaddr(struct kgsl_memdesc *memdesc, + unsigned int gpuaddr) +{ + if (memdesc->gpuaddr == 0 || + gpuaddr < memdesc->gpuaddr || + gpuaddr >= (memdesc->gpuaddr + memdesc->size) || + (NULL == memdesc->hostptr && memdesc->ops->map_kernel_mem && + memdesc->ops->map_kernel_mem(memdesc))) + return NULL; + + return memdesc->hostptr + (gpuaddr - memdesc->gpuaddr); +} static inline int timestamp_cmp(unsigned int new, unsigned int old) { @@ -185,7 +247,7 @@ static inline int timestamp_cmp(unsigned int new, unsigned int old) if (ts_diff == 0) return 0; - return ((ts_diff > 0) || (ts_diff < -20000)) ? 1 : -1; + return ((ts_diff > 0) || (ts_diff < -KGSL_TIMESTAMP_WINDOW)) ? 1 : -1; } static inline void diff --git a/drivers/gpu/msm/kgsl_cffdump.c b/drivers/gpu/msm/kgsl_cffdump.c old mode 100644 new mode 100755 index aa33152c..1ab8908f --- a/drivers/gpu/msm/kgsl_cffdump.c +++ b/drivers/gpu/msm/kgsl_cffdump.c @@ -20,7 +20,7 @@ #include #include #include -#include +//#include #include "kgsl.h" #include "kgsl_cffdump.h" @@ -231,8 +231,6 @@ static void cffdump_printline(int id, uint opcode, uint op1, uint op2, spin_lock(&cffdump_lock); if (opcode == CFF_OP_WRITE_MEM) { - if (op1 < 0x40000000 || op1 >= 0x60000000) - KGSL_CORE_ERR("addr out-of-range: op1=%08x", op1); if ((cff_op_write_membuf.addr != op1 && cff_op_write_membuf.count) || (cff_op_write_membuf.count == MEMBUF_SIZE)) @@ -360,15 +358,7 @@ void kgsl_cffdump_destroy() void kgsl_cffdump_open(enum kgsl_deviceid device_id) { - /*TODO: move this to where we can report correct gmemsize*/ - unsigned int va_base; - - if (cpu_is_msm8x60() || cpu_is_msm8960() || cpu_is_msm8930()) - va_base = 0x40000000; - else - va_base = 0x20000000; - - kgsl_cffdump_memory_base(device_id, va_base, + kgsl_cffdump_memory_base(device_id, KGSL_PAGETABLE_BASE, CONFIG_MSM_KGSL_PAGE_TABLE_SIZE, SZ_256K); } @@ -401,8 +391,6 @@ void kgsl_cffdump_syncmem(struct kgsl_device_private *dev_priv, bool clean_cache) { const void *src; - uint host_size; - uint physaddr; if (!kgsl_cff_dump_enable) return; @@ -422,13 +410,9 @@ void kgsl_cffdump_syncmem(struct kgsl_device_private *dev_priv, } memdesc = &entry->memdesc; } - BUG_ON(memdesc->gpuaddr == 0); - BUG_ON(gpuaddr == 0); - physaddr = kgsl_get_realaddr(memdesc) + (gpuaddr - memdesc->gpuaddr); - - src = kgsl_gpuaddr_to_vaddr(memdesc, gpuaddr, &host_size); - if (src == NULL || host_size < sizebytes) { - KGSL_CORE_ERR("did not find mapping for " + src = (uint *)kgsl_gpuaddr_to_vaddr(memdesc, gpuaddr); + if (memdesc->hostptr == NULL) { + KGSL_CORE_ERR("no kernel mapping for " "gpuaddr: 0x%08x, m->host: 0x%p, phys: 0x%08x\n", gpuaddr, memdesc->hostptr, memdesc->physaddr); return; @@ -444,7 +428,6 @@ void kgsl_cffdump_syncmem(struct kgsl_device_private *dev_priv, KGSL_CACHE_OP_INV); } - BUG_ON(physaddr > 0x66000000 && physaddr < 0x66ffffff); while (sizebytes > 3) { cffdump_printline(-1, CFF_OP_WRITE_MEM, gpuaddr, *(uint *)src, 0, 0, 0); @@ -462,7 +445,6 @@ void kgsl_cffdump_setmem(uint addr, uint value, uint sizebytes) if (!kgsl_cff_dump_enable) return; - BUG_ON(addr > 0x66000000 && addr < 0x66ffffff); while (sizebytes > 3) { /* Use 32bit memory writes as long as there's at least * 4 bytes left */ @@ -515,197 +497,6 @@ int kgsl_cffdump_waitirq(void) } EXPORT_SYMBOL(kgsl_cffdump_waitirq); -#define ADDRESS_STACK_SIZE 256 -#define GET_PM4_TYPE3_OPCODE(x) ((*(x) >> 8) & 0xFF) -static unsigned int kgsl_cffdump_addr_count; - -static bool kgsl_cffdump_handle_type3(struct kgsl_device_private *dev_priv, - uint *hostaddr, bool check_only) -{ - static uint addr_stack[ADDRESS_STACK_SIZE]; - static uint size_stack[ADDRESS_STACK_SIZE]; - - switch (GET_PM4_TYPE3_OPCODE(hostaddr)) { - case CP_INDIRECT_BUFFER_PFD: - case CP_INDIRECT_BUFFER: - { - /* traverse indirect buffers */ - int i; - uint ibaddr = hostaddr[1]; - uint ibsize = hostaddr[2]; - - /* is this address already in encountered? */ - for (i = 0; - i < kgsl_cffdump_addr_count && addr_stack[i] != ibaddr; - ++i) - ; - - if (kgsl_cffdump_addr_count == i) { - addr_stack[kgsl_cffdump_addr_count] = ibaddr; - size_stack[kgsl_cffdump_addr_count++] = ibsize; - - if (kgsl_cffdump_addr_count >= ADDRESS_STACK_SIZE) { - KGSL_CORE_ERR("stack overflow\n"); - return false; - } - - return kgsl_cffdump_parse_ibs(dev_priv, NULL, - ibaddr, ibsize, check_only); - } else if (size_stack[i] != ibsize) { - KGSL_CORE_ERR("gpuaddr: 0x%08x, " - "wc: %u, with size wc: %u already on the " - "stack\n", ibaddr, ibsize, size_stack[i]); - return false; - } - } - break; - } - - return true; -} - -/* - * Traverse IBs and dump them to test vector. Detect swap by inspecting - * register writes, keeping note of the current state, and dump - * framebuffer config to test vector - */ -bool kgsl_cffdump_parse_ibs(struct kgsl_device_private *dev_priv, - const struct kgsl_memdesc *memdesc, uint gpuaddr, int sizedwords, - bool check_only) -{ - static uint level; /* recursion level */ - bool ret = true; - uint host_size; - uint *hostaddr, *hoststart; - int dwords_left = sizedwords; /* dwords left in the current command - buffer */ - - if (level == 0) - kgsl_cffdump_addr_count = 0; - - if (memdesc == NULL) { - struct kgsl_mem_entry *entry; - spin_lock(&dev_priv->process_priv->mem_lock); - entry = kgsl_sharedmem_find_region(dev_priv->process_priv, - gpuaddr, sizedwords * sizeof(uint)); - spin_unlock(&dev_priv->process_priv->mem_lock); - if (entry == NULL) { - KGSL_CORE_ERR("did not find mapping " - "for gpuaddr: 0x%08x\n", gpuaddr); - return true; - } - memdesc = &entry->memdesc; - } - - hostaddr = (uint *)kgsl_gpuaddr_to_vaddr(memdesc, gpuaddr, &host_size); - if (hostaddr == NULL) { - KGSL_CORE_ERR("did not find mapping for " - "gpuaddr: 0x%08x\n", gpuaddr); - return true; - } - - hoststart = hostaddr; - - level++; - - if (!memdesc->physaddr) { - KGSL_CORE_ERR("no physaddr"); - } else { - mb(); - kgsl_cache_range_op((struct kgsl_memdesc *)memdesc, - KGSL_CACHE_OP_INV); - } - -#ifdef DEBUG - pr_info("kgsl: cffdump: ib: gpuaddr:0x%08x, wc:%d, hptr:%p\n", - gpuaddr, sizedwords, hostaddr); -#endif - - while (dwords_left > 0) { - int count = 0; /* dword count including packet header */ - bool cur_ret = true; - - switch (*hostaddr >> 30) { - case 0x0: /* type-0 */ - count = (*hostaddr >> 16)+2; - break; - case 0x1: /* type-1 */ - count = 2; - break; - case 0x3: /* type-3 */ - count = ((*hostaddr >> 16) & 0x3fff) + 2; - cur_ret = kgsl_cffdump_handle_type3(dev_priv, - hostaddr, check_only); - break; - default: - pr_warn("kgsl: cffdump: parse-ib: unexpected type: " - "type:%d, word:0x%08x @ 0x%p, gpu:0x%08x\n", - *hostaddr >> 30, *hostaddr, hostaddr, - gpuaddr+4*(sizedwords-dwords_left)); - cur_ret = false; - count = dwords_left; - break; - } - -#ifdef DEBUG - if (!cur_ret) { - pr_info("kgsl: cffdump: bad sub-type: #:%d/%d, v:0x%08x" - " @ 0x%p[gb:0x%08x], level:%d\n", - sizedwords-dwords_left, sizedwords, *hostaddr, - hostaddr, gpuaddr+4*(sizedwords-dwords_left), - level); - - print_hex_dump(KERN_ERR, level == 1 ? "IB1:" : "IB2:", - DUMP_PREFIX_OFFSET, 32, 4, hoststart, - sizedwords*4, 0); - } -#endif - ret = ret && cur_ret; - - /* jump to next packet */ - dwords_left -= count; - hostaddr += count; - cur_ret = dwords_left >= 0; - -#ifdef DEBUG - if (!cur_ret) { - pr_info("kgsl: cffdump: bad count: c:%d, #:%d/%d, " - "v:0x%08x @ 0x%p[gb:0x%08x], level:%d\n", - count, sizedwords-(dwords_left+count), - sizedwords, *(hostaddr-count), hostaddr-count, - gpuaddr+4*(sizedwords-(dwords_left+count)), - level); - - print_hex_dump(KERN_ERR, level == 1 ? "IB1:" : "IB2:", - DUMP_PREFIX_OFFSET, 32, 4, hoststart, - sizedwords*4, 0); - } -#endif - - ret = ret && cur_ret; - } - - if (!ret) - pr_info("kgsl: cffdump: parsing failed: gpuaddr:0x%08x, " - "host:0x%p, wc:%d\n", gpuaddr, hoststart, sizedwords); - - if (!check_only) { -#ifdef DEBUG - uint offset = gpuaddr - memdesc->gpuaddr; - pr_info("kgsl: cffdump: ib-dump: hostptr:%p, gpuaddr:%08x, " - "physaddr:%08x, offset:%d, size:%d", hoststart, - gpuaddr, memdesc->physaddr + offset, offset, - sizedwords*4); -#endif - kgsl_cffdump_syncmem(dev_priv, memdesc, gpuaddr, sizedwords*4, - false); - } - - level--; - - return ret; -} - static int subbuf_start_handler(struct rchan_buf *buf, void *subbuf, void *prev_subbuf, uint prev_padding) { diff --git a/drivers/gpu/msm/kgsl_device.h b/drivers/gpu/msm/kgsl_device.h old mode 100644 new mode 100755 index 64d369eb..45e41d91 --- a/drivers/gpu/msm/kgsl_device.h +++ b/drivers/gpu/msm/kgsl_device.h @@ -1,5 +1,4 @@ -/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved. - * Copyright (C) 2011 Sony Ericsson Mobile Communications AB. +/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -47,6 +46,7 @@ #define KGSL_STATE_SUSPEND 0x00000010 #define KGSL_STATE_HUNG 0x00000020 #define KGSL_STATE_DUMP_AND_RECOVER 0x00000040 +#define KGSL_STATE_SLUMBER 0x00000080 #define KGSL_GRAPHICS_MEMORY_LOW_WATERMARK 0x1000000 @@ -76,9 +76,10 @@ struct kgsl_functable { enum kgsl_property_type type, void *value, unsigned int sizebytes); int (*waittimestamp) (struct kgsl_device *device, - unsigned int timestamp, unsigned int msecs); + struct kgsl_context *context, unsigned int timestamp, + unsigned int msecs); unsigned int (*readtimestamp) (struct kgsl_device *device, - enum kgsl_timestamp_type type); + struct kgsl_context *context, enum kgsl_timestamp_type type); int (*issueibcmds) (struct kgsl_device_private *dev_priv, struct kgsl_context *context, struct kgsl_ibdesc *ibdesc, unsigned int sizedwords, uint32_t *timestamp, @@ -101,6 +102,9 @@ struct kgsl_functable { struct kgsl_context *context); long (*ioctl) (struct kgsl_device_private *dev_priv, unsigned int cmd, void *data); + int (*setproperty) (struct kgsl_device *device, + enum kgsl_property_type type, void *value, + unsigned int sizebytes); }; struct kgsl_memregion { @@ -120,10 +124,12 @@ struct kgsl_mh { }; struct kgsl_event { + struct kgsl_context *context; uint32_t timestamp; - void (*func)(struct kgsl_device *, void *, u32); + void (*func)(struct kgsl_device *, void *, u32, u32); void *priv; struct list_head list; + struct kgsl_device_private *owner; }; @@ -152,7 +158,7 @@ struct kgsl_device { uint32_t state; uint32_t requested_state; - struct list_head memqueue; + unsigned int last_expired_ctxt_id; unsigned int active_cnt; struct completion suspend_gate; @@ -185,6 +191,11 @@ struct kgsl_context { /* Pointer to the device specific context information */ void *devctxt; + /* + * Status indicating whether a gpu reset occurred and whether this + * context was responsible for causing it + */ + unsigned int reset_status; }; struct kgsl_process_private { @@ -194,15 +205,12 @@ struct kgsl_process_private { struct list_head mem_list; struct kgsl_pagetable *pagetable; struct list_head list; - struct kobject *kobj; + struct kobject kobj; struct { - unsigned int user; - unsigned int user_max; - unsigned int mapped; - unsigned int mapped_max; - unsigned int flushes; - } stats; + unsigned int cur; + unsigned int max; + } stats[KGSL_MEM_ENTRY_MAX]; }; struct kgsl_device_private { @@ -217,6 +225,14 @@ struct kgsl_power_stats { struct kgsl_device *kgsl_get_device(int dev_idx); +static inline void kgsl_process_add_stats(struct kgsl_process_private *priv, + unsigned int type, size_t size) +{ + priv->stats[type].cur += size; + if (priv->stats[type].max < priv->stats[type].cur) + priv->stats[type].max = priv->stats[type].cur; +} + static inline void kgsl_regread(struct kgsl_device *device, unsigned int offsetwords, unsigned int *value) @@ -294,7 +310,8 @@ kgsl_find_context(struct kgsl_device_private *dev_priv, uint32_t id) return (ctxt && ctxt->dev_priv == dev_priv) ? ctxt : NULL; } -int kgsl_check_timestamp(struct kgsl_device *device, unsigned int timestamp); +int kgsl_check_timestamp(struct kgsl_device *device, + struct kgsl_context *context, unsigned int timestamp); int kgsl_register_ts_notifier(struct kgsl_device *device, struct notifier_block *nb); diff --git a/drivers/gpu/msm/kgsl_drm.c b/drivers/gpu/msm/kgsl_drm.c old mode 100644 new mode 100755 index cdf9dc4e..66ac08f6 --- a/drivers/gpu/msm/kgsl_drm.c +++ b/drivers/gpu/msm/kgsl_drm.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2009-2011, Code Aurora Forum. All rights reserved. +/* Copyright (c) 2009-2012, Code Aurora Forum. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -17,7 +17,6 @@ #include "drmP.h" #include "drm.h" #include -#include #include "kgsl.h" #include "kgsl_device.h" @@ -39,6 +38,9 @@ #define ENTRY_EMPTY -1 #define ENTRY_NEEDS_CLEANUP -2 +#define DRM_KGSL_NOT_INITED -1 +#define DRM_KGSL_INITED 1 + #define DRM_KGSL_NUM_FENCE_ENTRIES (DRM_KGSL_HANDLE_WAIT_ENTRIES << 2) #define DRM_KGSL_HANDLE_WAIT_ENTRIES 5 @@ -127,6 +129,8 @@ struct drm_kgsl_gem_object { struct list_head wait_list; }; +static int kgsl_drm_inited = DRM_KGSL_NOT_INITED; + /* This is a global list of all the memory currently mapped in the MMU */ static struct list_head kgsl_mem_list; @@ -152,22 +156,6 @@ static void kgsl_gem_mem_flush(struct kgsl_memdesc *memdesc, int type, int op) kgsl_cache_range_op(memdesc, cacheop); } -/* Flush all the memory mapped in the MMU */ - -void kgsl_gpu_mem_flush(int op) -{ - struct drm_kgsl_gem_object *entry; - - list_for_each_entry(entry, &kgsl_mem_list, list) { - kgsl_gem_mem_flush(&entry->memdesc, entry->type, op); - } - - /* Takes care of WT/WC case. - * More useful when we go barrierless - */ - dmb(); -} - /* TODO: * Add vsync wait */ @@ -186,41 +174,6 @@ struct kgsl_drm_device_priv { struct kgsl_device_private *devpriv[KGSL_DEVICE_MAX]; }; -static int kgsl_ts_notifier_cb(struct notifier_block *blk, - unsigned long code, void *_param); - -static struct notifier_block kgsl_ts_nb[KGSL_DEVICE_MAX]; - -static int kgsl_drm_firstopen(struct drm_device *dev) -{ - int i; - - for (i = 0; i < KGSL_DEVICE_MAX; i++) { - struct kgsl_device *device = kgsl_get_device(i); - - if (device == NULL) - continue; - - kgsl_ts_nb[i].notifier_call = kgsl_ts_notifier_cb; - kgsl_register_ts_notifier(device, &kgsl_ts_nb[i]); - } - - return 0; -} - -void kgsl_drm_lastclose(struct drm_device *dev) -{ - int i; - - for (i = 0; i < KGSL_DEVICE_MAX; i++) { - struct kgsl_device *device = kgsl_get_device(i); - if (device == NULL) - continue; - - kgsl_unregister_ts_notifier(device, &kgsl_ts_nb[i]); - } -} - void kgsl_drm_preclose(struct drm_device *dev, struct drm_file *file_priv) { } @@ -268,80 +221,71 @@ kgsl_gem_alloc_memory(struct drm_gem_object *obj) { struct drm_kgsl_gem_object *priv = obj->driver_private; int index; + int result = 0; /* Return if the memory is already allocated */ if (kgsl_gem_memory_allocated(obj) || TYPE_IS_FD(priv->type)) return 0; + if (priv->pagetable == NULL) { + priv->pagetable = kgsl_mmu_getpagetable(KGSL_MMU_GLOBAL_PT); + + if (priv->pagetable == NULL) { + DRM_ERROR("Unable to get the GPU MMU pagetable\n"); + return -EINVAL; + } + } + if (TYPE_IS_PMEM(priv->type)) { int type; if (priv->type == DRM_KGSL_GEM_TYPE_EBI || - priv->type & DRM_KGSL_GEM_PMEM_EBI) - type = PMEM_MEMTYPE_EBI1; - else - type = PMEM_MEMTYPE_SMI; - - priv->memdesc.physaddr = - pmem_kalloc(obj->size * priv->bufcount, - type | PMEM_ALIGNMENT_4K); - - if (IS_ERR((void *) priv->memdesc.physaddr)) { - DRM_ERROR("Unable to allocate PMEM memory\n"); - return -ENOMEM; + priv->type & DRM_KGSL_GEM_PMEM_EBI) { + type = PMEM_MEMTYPE_EBI1; + result = kgsl_sharedmem_ebimem_user( + &priv->memdesc, + priv->pagetable, + obj->size * priv->bufcount, + 0); + if (result) { + DRM_ERROR( + "Unable to allocate PMEM memory\n"); + return result; + } } - - priv->memdesc.size = obj->size * priv->bufcount; + else + return -EINVAL; } else if (TYPE_IS_MEM(priv->type)) { - priv->memdesc.hostptr = - vmalloc_user(obj->size * priv->bufcount); - if (priv->memdesc.hostptr == NULL) { - DRM_ERROR("Unable to allocate vmalloc memory\n"); - return -ENOMEM; + if (priv->type == DRM_KGSL_GEM_TYPE_KMEM || + priv->type & DRM_KGSL_GEM_CACHE_MASK) + list_add(&priv->list, &kgsl_mem_list); + + result = kgsl_sharedmem_page_alloc_user(&priv->memdesc, + priv->pagetable, + obj->size * priv->bufcount, 0); + + if (result != 0) { + DRM_ERROR( + "Unable to allocate Vmalloc user memory\n"); + return result; } - - priv->memdesc.size = obj->size * priv->bufcount; - priv->memdesc.ops = &kgsl_vmalloc_ops; } else return -EINVAL; - for (index = 0; index < priv->bufcount; index++) + for (index = 0; index < priv->bufcount; index++) { priv->bufs[index].offset = index * obj->size; - + priv->bufs[index].gpuaddr = + priv->memdesc.gpuaddr + + priv->bufs[index].offset; + } + priv->flags |= DRM_KGSL_GEM_FLAG_MAPPED; return 0; } -#ifdef CONFIG_MSM_KGSL_MMU -static void -kgsl_gem_unmap(struct drm_gem_object *obj) -{ - struct drm_kgsl_gem_object *priv = obj->driver_private; - - if (!priv->flags & DRM_KGSL_GEM_FLAG_MAPPED) - return; - - kgsl_mmu_unmap(priv->pagetable, &priv->memdesc); - - kgsl_mmu_putpagetable(priv->pagetable); - priv->pagetable = NULL; - - if ((priv->type == DRM_KGSL_GEM_TYPE_KMEM) || - (priv->type & DRM_KGSL_GEM_CACHE_MASK)) - list_del(&priv->list); - - priv->flags &= ~DRM_KGSL_GEM_FLAG_MAPPED; -} -#else -static void -kgsl_gem_unmap(struct drm_gem_object *obj) -{ -} -#endif - static void kgsl_gem_free_memory(struct drm_gem_object *obj) { @@ -353,12 +297,17 @@ kgsl_gem_free_memory(struct drm_gem_object *obj) kgsl_gem_mem_flush(&priv->memdesc, priv->type, DRM_KGSL_GEM_CACHE_OP_FROM_DEV); - kgsl_gem_unmap(obj); - - if (TYPE_IS_PMEM(priv->type)) - pmem_kfree(priv->memdesc.physaddr); - kgsl_sharedmem_free(&priv->memdesc); + + kgsl_mmu_putpagetable(priv->pagetable); + priv->pagetable = NULL; + + if ((priv->type == DRM_KGSL_GEM_TYPE_KMEM) || + (priv->type & DRM_KGSL_GEM_CACHE_MASK)) + list_del(&priv->list); + + priv->flags &= ~DRM_KGSL_GEM_FLAG_MAPPED; + } int @@ -454,7 +403,7 @@ kgsl_gem_obj_addr(int drm_fd, int handle, unsigned long *start, filp = fget(drm_fd); if (unlikely(filp == NULL)) { - DRM_ERROR("Unable to ghet the DRM file descriptor\n"); + DRM_ERROR("Unable to get the DRM file descriptor\n"); return -EINVAL; } file_priv = filp->private_data; @@ -527,7 +476,7 @@ kgsl_gem_init_obj(struct drm_device *dev, ret = drm_gem_handle_create(file_priv, obj, handle); - drm_gem_object_handle_unreference(obj); + drm_gem_object_unreference(obj); INIT_LIST_HEAD(&priv->wait_list); for (i = 0; i < DRM_KGSL_HANDLE_WAIT_ENTRIES; i++) { @@ -702,128 +651,14 @@ int kgsl_gem_unbind_gpu_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { - struct drm_kgsl_gem_bind_gpu *args = data; - struct drm_gem_object *obj; - struct drm_kgsl_gem_object *priv; - - obj = drm_gem_object_lookup(dev, file_priv, args->handle); - - if (obj == NULL) { - DRM_ERROR("Invalid GEM handle %x\n", args->handle); - return -EBADF; - } - - mutex_lock(&dev->struct_mutex); - priv = obj->driver_private; - - if (--priv->bound == 0) - kgsl_gem_unmap(obj); - - drm_gem_object_unreference(obj); - mutex_unlock(&dev->struct_mutex); return 0; } -#ifdef CONFIG_MSM_KGSL_MMU -static int -kgsl_gem_map(struct drm_gem_object *obj) -{ - struct drm_kgsl_gem_object *priv = obj->driver_private; - int index; - int ret = -EINVAL; - - if (priv->flags & DRM_KGSL_GEM_FLAG_MAPPED) - return 0; - - /* Get the global page table */ - - if (priv->pagetable == NULL) { - priv->pagetable = kgsl_mmu_getpagetable(KGSL_MMU_GLOBAL_PT); - - if (priv->pagetable == NULL) { - DRM_ERROR("Unable to get the GPU MMU pagetable\n"); - return -EINVAL; - } - } - - priv->memdesc.pagetable = priv->pagetable; - - ret = kgsl_mmu_map(priv->pagetable, &priv->memdesc, - GSL_PT_PAGE_RV | GSL_PT_PAGE_WV); - - if (!ret) { - for (index = 0; index < priv->bufcount; index++) { - priv->bufs[index].gpuaddr = - priv->memdesc.gpuaddr + - priv->bufs[index].offset; - } - } - - /* Add cached memory to the list to be cached */ - - if (priv->type == DRM_KGSL_GEM_TYPE_KMEM || - priv->type & DRM_KGSL_GEM_CACHE_MASK) - list_add(&priv->list, &kgsl_mem_list); - - priv->flags |= DRM_KGSL_GEM_FLAG_MAPPED; - - return ret; -} -#else -static int -kgsl_gem_map(struct drm_gem_object *obj) -{ - struct drm_kgsl_gem_object *priv = obj->driver_private; - int index; - - if (TYPE_IS_PMEM(priv->type)) { - for (index = 0; index < priv->bufcount; index++) - priv->bufs[index].gpuaddr = - priv->memdesc.physaddr + priv->bufs[index].offset; - - return 0; - } - - return -EINVAL; -} -#endif - int kgsl_gem_bind_gpu_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { - struct drm_kgsl_gem_bind_gpu *args = data; - struct drm_gem_object *obj; - struct drm_kgsl_gem_object *priv; - int ret = 0; - - obj = drm_gem_object_lookup(dev, file_priv, args->handle); - - if (obj == NULL) { - DRM_ERROR("Invalid GEM handle %x\n", args->handle); - return -EBADF; - } - - mutex_lock(&dev->struct_mutex); - priv = obj->driver_private; - - if (priv->bound++ == 0) { - - if (!kgsl_gem_memory_allocated(obj)) { - DRM_ERROR("Memory not allocated for this object\n"); - ret = -ENOMEM; - goto out; - } - - ret = kgsl_gem_map(obj); - - /* This is legacy behavior - use GET_BUFFERINFO instead */ - args->gpuptr = priv->bufs[0].gpuaddr; - } -out: - drm_gem_object_unreference(obj); - mutex_unlock(&dev->struct_mutex); - return ret; + return 0; } /* Allocate the memory and prepare it for CPU mapping */ @@ -1068,17 +903,18 @@ int kgsl_gem_kmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) struct drm_gem_object *obj = vma->vm_private_data; struct drm_device *dev = obj->dev; struct drm_kgsl_gem_object *priv; - unsigned long offset, pg; + unsigned long offset; struct page *page; + int i; mutex_lock(&dev->struct_mutex); priv = obj->driver_private; offset = (unsigned long) vmf->virtual_address - vma->vm_start; - pg = (unsigned long) priv->memdesc.hostptr + offset; + i = offset >> PAGE_SHIFT; + page = sg_page(&(priv->memdesc.sg[i])); - page = vmalloc_to_page((void *) pg); if (!page) { mutex_unlock(&dev->struct_mutex); return VM_FAULT_SIGBUS; @@ -1370,27 +1206,6 @@ wakeup_fence_entries(struct drm_kgsl_gem_object_fence *fence) fence->fence_id = ENTRY_NEEDS_CLEANUP; /* Mark it as needing cleanup */ } -static int kgsl_ts_notifier_cb(struct notifier_block *blk, - unsigned long code, void *_param) -{ - struct drm_kgsl_gem_object_fence *fence; - struct kgsl_device *device = kgsl_get_device(code); - int i; - - /* loop through the fences to see what things can be processed */ - - for (i = 0; i < DRM_KGSL_NUM_FENCE_ENTRIES; i++) { - fence = &gem_buf_fence[i]; - if (!fence->ts_valid || fence->ts_device != code) - continue; - - if (kgsl_check_timestamp(device, fence->timestamp)) - wakeup_fence_entries(fence); - } - - return 0; -} - int kgsl_gem_lock_handle_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) @@ -1583,7 +1398,7 @@ kgsl_gem_unlock_on_ts_ioctl(struct drm_device *dev, void *data, } device = kgsl_get_device(ts_device); - ts_done = kgsl_check_timestamp(device, args->timestamp); + ts_done = kgsl_check_timestamp(device, NULL, args->timestamp); mutex_lock(&dev->struct_mutex); @@ -1634,11 +1449,9 @@ struct drm_ioctl_desc kgsl_drm_ioctls[] = { }; static struct drm_driver driver = { - .driver_features = DRIVER_USE_PLATFORM_DEVICE | DRIVER_GEM, + .driver_features = DRIVER_GEM, .load = kgsl_drm_load, .unload = kgsl_drm_unload, - .firstopen = kgsl_drm_firstopen, - .lastclose = kgsl_drm_lastclose, .preclose = kgsl_drm_preclose, .suspend = kgsl_drm_suspend, .resume = kgsl_drm_resume, @@ -1669,8 +1482,13 @@ int kgsl_drm_init(struct platform_device *dev) { int i; + /* Only initialize once */ + if (kgsl_drm_inited == DRM_KGSL_INITED) + return 0; + + kgsl_drm_inited = DRM_KGSL_INITED; + driver.num_ioctls = DRM_ARRAY_SIZE(kgsl_drm_ioctls); - driver.platform_device = dev; INIT_LIST_HEAD(&kgsl_mem_list); @@ -1680,10 +1498,11 @@ int kgsl_drm_init(struct platform_device *dev) gem_buf_fence[i].fence_id = ENTRY_EMPTY; } - return drm_init(&driver); + return drm_platform_init(&driver, dev); } void kgsl_drm_exit(void) { - drm_exit(&driver); + kgsl_drm_inited = DRM_KGSL_NOT_INITED; + drm_platform_exit(&driver, driver.kdriver.platform_device); } diff --git a/drivers/gpu/msm/kgsl_gpummu.c b/drivers/gpu/msm/kgsl_gpummu.c index 9e7ef61d..5d326658 100644 --- a/drivers/gpu/msm/kgsl_gpummu.c +++ b/drivers/gpu/msm/kgsl_gpummu.c @@ -356,8 +356,8 @@ err_ptpool_remove: int kgsl_gpummu_pt_equal(struct kgsl_pagetable *pt, unsigned int pt_base) { - struct kgsl_gpummu_pt *gpummu_pt = pt->priv; - return pt && pt_base && (gpummu_pt->base.gpuaddr == pt_base); + struct kgsl_gpummu_pt *gpummu_pt = pt ? pt->priv : NULL; + return gpummu_pt && pt_base && (gpummu_pt->base.gpuaddr == pt_base); } void kgsl_gpummu_destroy_pagetable(void *mmu_specific_pt) @@ -385,14 +385,16 @@ kgsl_pt_map_set(struct kgsl_gpummu_pt *pt, uint32_t pte, uint32_t val) { uint32_t *baseptr = (uint32_t *)pt->base.hostptr; - writel_relaxed(val, &baseptr[pte]); + BUG_ON(pte*sizeof(uint32_t) >= pt->base.size); + baseptr[pte] = val; } static inline uint32_t kgsl_pt_map_get(struct kgsl_gpummu_pt *pt, uint32_t pte) { uint32_t *baseptr = (uint32_t *)pt->base.hostptr; - return readl_relaxed(&baseptr[pte]) & GSL_PT_PAGE_ADDR_MASK; + BUG_ON(pte*sizeof(uint32_t) >= pt->base.size); + return baseptr[pte] & GSL_PT_PAGE_ADDR_MASK; } static unsigned int kgsl_gpummu_pt_get_flags(struct kgsl_pagetable *pt, @@ -683,7 +685,7 @@ kgsl_gpummu_map(void *mmu_specific_pt, flushtlb = 1; for_each_sg(memdesc->sg, s, memdesc->sglen, i) { - unsigned int paddr = sg_phys(s); + unsigned int paddr = kgsl_get_sg_pa(s); unsigned int j; /* Each sg entry might be multiple pages long */ diff --git a/drivers/gpu/msm/kgsl_iommu.c b/drivers/gpu/msm/kgsl_iommu.c index 30365a3c..760cdb03 100644 --- a/drivers/gpu/msm/kgsl_iommu.c +++ b/drivers/gpu/msm/kgsl_iommu.c @@ -34,8 +34,8 @@ struct kgsl_iommu { static int kgsl_iommu_pt_equal(struct kgsl_pagetable *pt, unsigned int pt_base) { - struct iommu_domain *domain = pt->priv; - return pt && pt_base && ((unsigned int)domain == pt_base); + struct iommu_domain *domain = pt ? pt->priv : NULL; + return domain && pt_base && ((unsigned int)domain == pt_base); } static void kgsl_iommu_destroy_pagetable(void *mmu_specific_pt) diff --git a/drivers/gpu/msm/kgsl_mmu.c b/drivers/gpu/msm/kgsl_mmu.c old mode 100644 new mode 100755 index 7916ecb0..d7585ef9 --- a/drivers/gpu/msm/kgsl_mmu.c +++ b/drivers/gpu/msm/kgsl_mmu.c @@ -1,5 +1,4 @@ -/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved. - * Copyright (C) 2011 Sony Ericsson Mobile Communications AB. +/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -23,6 +22,7 @@ #include "kgsl_mmu.h" #include "kgsl_device.h" #include "kgsl_sharedmem.h" +#include "adreno_postmortem.h" #define KGSL_MMU_ALIGN_SHIFT 13 #define KGSL_MMU_ALIGN_MASK (~((1 << KGSL_MMU_ALIGN_SHIFT) - 1)) @@ -592,6 +592,12 @@ kgsl_mmu_unmap(struct kgsl_pagetable *pagetable, memdesc->gpuaddr & KGSL_MMU_ALIGN_MASK, memdesc->size); + /* + * Don't clear the gpuaddr on global mappings because they + * may be in use by other pagetables + */ + if (!(memdesc->priv & KGSL_MEMFLAGS_GLOBAL)) + memdesc->gpuaddr = 0; return 0; } EXPORT_SYMBOL(kgsl_mmu_unmap); @@ -623,6 +629,7 @@ int kgsl_mmu_map_global(struct kgsl_pagetable *pagetable, gpuaddr, memdesc->gpuaddr); goto error_unmap; } + memdesc->priv |= KGSL_MEMFLAGS_GLOBAL; return result; error_unmap: kgsl_mmu_unmap(pagetable, memdesc); diff --git a/drivers/gpu/msm/kgsl_sharedmem.c b/drivers/gpu/msm/kgsl_sharedmem.c old mode 100644 new mode 100755 index a587c44a..b52fc1d5 --- a/drivers/gpu/msm/kgsl_sharedmem.c +++ b/drivers/gpu/msm/kgsl_sharedmem.c @@ -1,5 +1,4 @@ -/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved. - * Copyright (C) 2011 Sony Ericsson Mobile Communications AB. +/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -11,9 +10,14 @@ * GNU General Public License for more details. * */ + +#include #include #include #include +#include +#include +#include #include "kgsl.h" #include "kgsl_sharedmem.h" @@ -21,6 +25,59 @@ #include "kgsl_device.h" #include "adreno_ringbuffer.h" +/* An attribute for showing per-process memory statistics */ +struct kgsl_mem_entry_attribute { + struct attribute attr; + int memtype; + ssize_t (*show)(struct kgsl_process_private *priv, + int type, char *buf); +}; + +#define to_mem_entry_attr(a) \ +container_of(a, struct kgsl_mem_entry_attribute, attr) + +#define __MEM_ENTRY_ATTR(_type, _name, _show) \ +{ \ + .attr = { .name = __stringify(_name), .mode = 0444 }, \ + .memtype = _type, \ + .show = _show, \ +} + +/* + * A structure to hold the attributes for a particular memory type. + * For each memory type in each process we store the current and maximum + * memory usage and display the counts in sysfs. This structure and + * the following macro allow us to simplify the definition for those + * adding new memory types + */ + +struct mem_entry_stats { + int memtype; + struct kgsl_mem_entry_attribute attr; + struct kgsl_mem_entry_attribute max_attr; +}; + + +#define MEM_ENTRY_STAT(_type, _name) \ +{ \ + .memtype = _type, \ + .attr = __MEM_ENTRY_ATTR(_type, _name, mem_entry_show), \ + .max_attr = __MEM_ENTRY_ATTR(_type, _name##_max, \ + mem_entry_max_show), \ +} + + +/* + * One page allocation for a guard region to protect against over-zealous + * GPU pre-fetch + */ + +static struct page *kgsl_guard_page; + +/** + * Given a kobj, find the process structure attached to it + */ + static struct kgsl_process_private * _get_priv_from_kobj(struct kobject *kobj) { @@ -41,87 +98,109 @@ _get_priv_from_kobj(struct kobject *kobj) return NULL; } -/* sharedmem / memory sysfs files */ +/** + * Show the current amount of memory allocated for the given memtype + */ static ssize_t -process_show(struct kobject *kobj, - struct kobj_attribute *attr, - char *buf) +mem_entry_show(struct kgsl_process_private *priv, int type, char *buf) { + return snprintf(buf, PAGE_SIZE, "%d\n", priv->stats[type].cur); +} + +/** + * Show the maximum memory allocated for the given memtype through the life of + * the process + */ + +static ssize_t +mem_entry_max_show(struct kgsl_process_private *priv, int type, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", priv->stats[type].max); +} + + +static void mem_entry_sysfs_release(struct kobject *kobj) +{ +} + +static ssize_t mem_entry_sysfs_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct kgsl_mem_entry_attribute *pattr = to_mem_entry_attr(attr); struct kgsl_process_private *priv; - unsigned int val = 0; + ssize_t ret; mutex_lock(&kgsl_driver.process_mutex); priv = _get_priv_from_kobj(kobj); - if (priv == NULL) { - mutex_unlock(&kgsl_driver.process_mutex); - return 0; - } - - if (!strncmp(attr->attr.name, "user", 4)) - val = priv->stats.user; - if (!strncmp(attr->attr.name, "user_max", 8)) - val = priv->stats.user_max; - if (!strncmp(attr->attr.name, "mapped", 6)) - val = priv->stats.mapped; - if (!strncmp(attr->attr.name, "mapped_max", 10)) - val = priv->stats.mapped_max; - if (!strncmp(attr->attr.name, "flushes", 7)) - val = priv->stats.flushes; + if (priv && pattr->show) + ret = pattr->show(priv, pattr->memtype, buf); + else + ret = -EIO; mutex_unlock(&kgsl_driver.process_mutex); - return snprintf(buf, PAGE_SIZE, "%u\n", val); + return ret; } -#define KGSL_MEMSTAT_ATTR(_name, _show) \ - static struct kobj_attribute attr_##_name = \ - __ATTR(_name, 0444, _show, NULL) - -KGSL_MEMSTAT_ATTR(user, process_show); -KGSL_MEMSTAT_ATTR(user_max, process_show); -KGSL_MEMSTAT_ATTR(mapped, process_show); -KGSL_MEMSTAT_ATTR(mapped_max, process_show); -KGSL_MEMSTAT_ATTR(flushes, process_show); - -static struct attribute *process_attrs[] = { - &attr_user.attr, - &attr_user_max.attr, - &attr_mapped.attr, - &attr_mapped_max.attr, - &attr_flushes.attr, - NULL +static const struct sysfs_ops mem_entry_sysfs_ops = { + .show = mem_entry_sysfs_show, }; -static struct attribute_group process_attr_group = { - .attrs = process_attrs, +static struct kobj_type ktype_mem_entry = { + .sysfs_ops = &mem_entry_sysfs_ops, + .default_attrs = NULL, + .release = mem_entry_sysfs_release +}; + +static struct mem_entry_stats mem_stats[] = { + MEM_ENTRY_STAT(KGSL_MEM_ENTRY_KERNEL, kernel), +#ifdef CONFIG_ANDROID_PMEM + MEM_ENTRY_STAT(KGSL_MEM_ENTRY_PMEM, pmem), +#endif +#ifdef CONFIG_ASHMEM + MEM_ENTRY_STAT(KGSL_MEM_ENTRY_ASHMEM, ashmem), +#endif + MEM_ENTRY_STAT(KGSL_MEM_ENTRY_USER, user), +#ifdef CONFIG_ION + MEM_ENTRY_STAT(KGSL_MEM_ENTRY_ION, ion), +#endif }; void kgsl_process_uninit_sysfs(struct kgsl_process_private *private) { - /* Remove the sysfs entry */ - if (private->kobj) { - sysfs_remove_group(private->kobj, &process_attr_group); - kobject_put(private->kobj); + int i; + + for (i = 0; i < ARRAY_SIZE(mem_stats); i++) { + sysfs_remove_file(&private->kobj, &mem_stats[i].attr.attr); + sysfs_remove_file(&private->kobj, + &mem_stats[i].max_attr.attr); } + + kobject_put(&private->kobj); } void kgsl_process_init_sysfs(struct kgsl_process_private *private) { unsigned char name[16]; + int i, ret; - /* Add a entry to the sysfs device */ snprintf(name, sizeof(name), "%d", private->pid); - private->kobj = kobject_create_and_add(name, kgsl_driver.prockobj); - /* sysfs failure isn't fatal, just annoying */ - if (private->kobj != NULL) { - if (sysfs_create_group(private->kobj, &process_attr_group)) { - kobject_put(private->kobj); - private->kobj = NULL; - } + if (kobject_init_and_add(&private->kobj, &ktype_mem_entry, + kgsl_driver.prockobj, name)) + return; + + for (i = 0; i < ARRAY_SIZE(mem_stats); i++) { + /* We need to check the value of sysfs_create_file, but we + * don't really care if it passed or not */ + + ret = sysfs_create_file(&private->kobj, + &mem_stats[i].attr.attr); + ret = sysfs_create_file(&private->kobj, + &mem_stats[i].max_attr.attr); } } @@ -135,6 +214,10 @@ static int kgsl_drv_memstat_show(struct device *dev, val = kgsl_driver.stats.vmalloc; else if (!strncmp(attr->attr.name, "vmalloc_max", 11)) val = kgsl_driver.stats.vmalloc_max; + else if (!strncmp(attr->attr.name, "page_alloc", 10)) + val = kgsl_driver.stats.page_alloc; + else if (!strncmp(attr->attr.name, "page_alloc_max", 14)) + val = kgsl_driver.stats.page_alloc_max; else if (!strncmp(attr->attr.name, "coherent", 8)) val = kgsl_driver.stats.coherent; else if (!strncmp(attr->attr.name, "coherent_max", 12)) @@ -164,6 +247,8 @@ static int kgsl_drv_histogram_show(struct device *dev, DEVICE_ATTR(vmalloc, 0444, kgsl_drv_memstat_show, NULL); DEVICE_ATTR(vmalloc_max, 0444, kgsl_drv_memstat_show, NULL); +DEVICE_ATTR(page_alloc, 0444, kgsl_drv_memstat_show, NULL); +DEVICE_ATTR(page_alloc_max, 0444, kgsl_drv_memstat_show, NULL); DEVICE_ATTR(coherent, 0444, kgsl_drv_memstat_show, NULL); DEVICE_ATTR(coherent_max, 0444, kgsl_drv_memstat_show, NULL); DEVICE_ATTR(mapped, 0444, kgsl_drv_memstat_show, NULL); @@ -173,6 +258,8 @@ DEVICE_ATTR(histogram, 0444, kgsl_drv_histogram_show, NULL); static struct device_attribute *drv_attr_list[] = { &dev_attr_vmalloc, &dev_attr_vmalloc_max, + &dev_attr_page_alloc, + &dev_attr_page_alloc_max, &dev_attr_coherent, &dev_attr_coherent_max, &dev_attr_mapped, @@ -216,7 +303,7 @@ static void outer_cache_range_op_sg(struct scatterlist *sg, int sglen, int op) int i; for_each_sg(sg, s, sglen, i) { - unsigned int paddr = sg_phys(s); + unsigned int paddr = kgsl_get_sg_pa(s); _outer_cache_range_op(op, paddr, s->length); } } @@ -227,17 +314,18 @@ static void outer_cache_range_op_sg(struct scatterlist *sg, int sglen, int op) } #endif -static int kgsl_vmalloc_vmfault(struct kgsl_memdesc *memdesc, +static int kgsl_page_alloc_vmfault(struct kgsl_memdesc *memdesc, struct vm_area_struct *vma, struct vm_fault *vmf) { - unsigned long offset, pg; + unsigned long offset; struct page *page; + int i; offset = (unsigned long) vmf->virtual_address - vma->vm_start; - pg = (unsigned long) memdesc->hostptr + offset; - page = vmalloc_to_page((void *) pg); + i = offset >> PAGE_SHIFT; + page = sg_page(&memdesc->sg[i]); if (page == NULL) return VM_FAULT_SIGBUS; @@ -247,15 +335,30 @@ static int kgsl_vmalloc_vmfault(struct kgsl_memdesc *memdesc, return 0; } -static int kgsl_vmalloc_vmflags(struct kgsl_memdesc *memdesc) +static int kgsl_page_alloc_vmflags(struct kgsl_memdesc *memdesc) { return VM_RESERVED | VM_DONTEXPAND; } -static void kgsl_vmalloc_free(struct kgsl_memdesc *memdesc) +static void kgsl_page_alloc_free(struct kgsl_memdesc *memdesc) { + int i = 0; + struct scatterlist *sg; + int sglen = memdesc->sglen; + + /* Don't free the guard page if it was used */ + if (memdesc->flags & KGSL_MEMDESC_GUARD_PAGE) + sglen--; + + kgsl_driver.stats.page_alloc -= memdesc->size; + + if (memdesc->hostptr) { + vunmap(memdesc->hostptr); kgsl_driver.stats.vmalloc -= memdesc->size; - vfree(memdesc->hostptr); + } + if (memdesc->sg) + for_each_sg(memdesc->sg, sg, sglen, i) + __free_page(sg_page(sg)); } static int kgsl_contiguous_vmflags(struct kgsl_memdesc *memdesc) @@ -263,6 +366,48 @@ static int kgsl_contiguous_vmflags(struct kgsl_memdesc *memdesc) return VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTEXPAND; } +/* + * kgsl_page_alloc_map_kernel - Map the memory in memdesc to kernel address + * space + * + * @memdesc - The memory descriptor which contains information about the memory + * + * Return: 0 on success else error code + */ +static int kgsl_page_alloc_map_kernel(struct kgsl_memdesc *memdesc) +{ + if (!memdesc->hostptr) { + pgprot_t page_prot = pgprot_writecombine(PAGE_KERNEL); + struct page **pages = NULL; + struct scatterlist *sg; + int sglen = memdesc->sglen; + int i; + + /* Don't map the guard page if it exists */ + if (memdesc->flags & KGSL_MEMDESC_GUARD_PAGE) + sglen--; + + /* create a list of pages to call vmap */ + pages = vmalloc(sglen * sizeof(struct page *)); + if (!pages) { + KGSL_CORE_ERR("vmalloc(%d) failed\n", + sglen * sizeof(struct page *)); + return -ENOMEM; + } + for_each_sg(memdesc->sg, sg, sglen, i) + pages[i] = sg_page(sg); + memdesc->hostptr = vmap(pages, sglen, + VM_IOREMAP, page_prot); + KGSL_STATS_ADD(memdesc->size, kgsl_driver.stats.vmalloc, + kgsl_driver.stats.vmalloc_max); + vfree(pages); + } + if (!memdesc->hostptr) + return -ENOMEM; + + return 0; +} + static int kgsl_contiguous_vmfault(struct kgsl_memdesc *memdesc, struct vm_area_struct *vma, struct vm_fault *vmf) @@ -302,12 +447,13 @@ static void kgsl_coherent_free(struct kgsl_memdesc *memdesc) } /* Global - also used by kgsl_drm.c */ -struct kgsl_memdesc_ops kgsl_vmalloc_ops = { - .free = kgsl_vmalloc_free, - .vmflags = kgsl_vmalloc_vmflags, - .vmfault = kgsl_vmalloc_vmfault, +struct kgsl_memdesc_ops kgsl_page_alloc_ops = { + .free = kgsl_page_alloc_free, + .vmflags = kgsl_page_alloc_vmflags, + .vmfault = kgsl_page_alloc_vmfault, + .map_kernel_mem = kgsl_page_alloc_map_kernel, }; -EXPORT_SYMBOL(kgsl_vmalloc_ops); +EXPORT_SYMBOL(kgsl_page_alloc_ops); static struct kgsl_memdesc_ops kgsl_ebimem_ops = { .free = kgsl_ebimem_free, @@ -341,47 +487,145 @@ void kgsl_cache_range_op(struct kgsl_memdesc *memdesc, int op) EXPORT_SYMBOL(kgsl_cache_range_op); static int -_kgsl_sharedmem_vmalloc(struct kgsl_memdesc *memdesc, +_kgsl_sharedmem_page_alloc(struct kgsl_memdesc *memdesc, struct kgsl_pagetable *pagetable, - void *ptr, size_t size, unsigned int protflags) + size_t size, unsigned int protflags) { - int order, ret = 0; + int i, order, ret = 0; int sglen = PAGE_ALIGN(size) / PAGE_SIZE; - int i; + struct page **pages = NULL; + pgprot_t page_prot = pgprot_writecombine(PAGE_KERNEL); + void *ptr; + + /* + * Add guard page to the end of the allocation when the + * IOMMU is in use. + */ + + if (kgsl_mmu_get_mmutype() == KGSL_MMU_TYPE_IOMMU) + sglen++; memdesc->size = size; memdesc->pagetable = pagetable; memdesc->priv = KGSL_MEMFLAGS_CACHED; - memdesc->ops = &kgsl_vmalloc_ops; - memdesc->hostptr = (void *) ptr; + memdesc->ops = &kgsl_page_alloc_ops; + + memdesc->sg = kgsl_sg_alloc(sglen); - memdesc->sg = kmalloc(sglen * sizeof(struct scatterlist), GFP_KERNEL); if (memdesc->sg == NULL) { + KGSL_CORE_ERR("vmalloc(%d) failed\n", + sglen * sizeof(struct scatterlist)); ret = -ENOMEM; goto done; } + /* + * Allocate space to store the list of pages to send to vmap. + * This is an array of pointers so we can track 1024 pages per page of + * allocation which means we can handle up to a 8MB buffer request with + * two pages; well within the acceptable limits for using kmalloc. + */ + + pages = kmalloc(sglen * sizeof(struct page *), GFP_KERNEL); + + if (pages == NULL) { + KGSL_CORE_ERR("kmalloc (%d) failed\n", + sglen * sizeof(struct page *)); + ret = -ENOMEM; + goto done; + } + + kmemleak_not_leak(memdesc->sg); + memdesc->sglen = sglen; sg_init_table(memdesc->sg, sglen); - for (i = 0; i < memdesc->sglen; i++, ptr += PAGE_SIZE) { - struct page *page = vmalloc_to_page(ptr); - if (!page) { - ret = -EINVAL; + for (i = 0; i < PAGE_ALIGN(size) / PAGE_SIZE; i++) { + + /* + * Don't use GFP_ZERO here because it is faster to memset the + * range ourselves (see below) + */ + + pages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); + if (pages[i] == NULL) { + ret = -ENOMEM; + memdesc->sglen = i; goto done; } - sg_set_page(&memdesc->sg[i], page, PAGE_SIZE, 0); + + sg_set_page(&memdesc->sg[i], pages[i], PAGE_SIZE, 0); } - kgsl_cache_range_op(memdesc, KGSL_CACHE_OP_INV); + /* ADd the guard page to the end of the sglist */ + + if (kgsl_mmu_get_mmutype() == KGSL_MMU_TYPE_IOMMU) { + /* + * It doesn't matter if we use GFP_ZERO here, this never + * gets mapped, and we only allocate it once in the life + * of the system + */ + + if (kgsl_guard_page == NULL) + kgsl_guard_page = alloc_page(GFP_KERNEL | __GFP_ZERO | + __GFP_HIGHMEM); + + if (kgsl_guard_page != NULL) { + sg_set_page(&memdesc->sg[sglen - 1], kgsl_guard_page, + PAGE_SIZE, 0); + memdesc->flags |= KGSL_MEMDESC_GUARD_PAGE; + } else + memdesc->sglen--; + } + + /* + * All memory that goes to the user has to be zeroed out before it gets + * exposed to userspace. This means that the memory has to be mapped in + * the kernel, zeroed (memset) and then unmapped. This also means that + * the dcache has to be flushed to ensure coherency between the kernel + * and user pages. We used to pass __GFP_ZERO to alloc_page which mapped + * zeroed and unmaped each individual page, and then we had to turn + * around and call flush_dcache_page() on that page to clear the caches. + * This was killing us for performance. Instead, we found it is much + * faster to allocate the pages without GFP_ZERO, map the entire range, + * memset it, flush the range and then unmap - this results in a factor + * of 4 improvement for speed for large buffers. There is a small + * increase in speed for small buffers, but only on the order of a few + * microseconds at best. The only downside is that there needs to be + * enough temporary space in vmalloc to accomodate the map. This + * shouldn't be a problem, but if it happens, fall back to a much slower + * path + */ + + ptr = vmap(pages, i, VM_IOREMAP, page_prot); + + if (ptr != NULL) { + memset(ptr, 0, memdesc->size); + dmac_flush_range(ptr, ptr + memdesc->size); + vunmap(ptr); + } else { + int j; + + /* Very, very, very slow path */ + + for (j = 0; j < i; j++) { + ptr = kmap_atomic(pages[j],KM_BOUNCE_READ); + memset(ptr, 0, PAGE_SIZE); + dmac_flush_range(ptr, ptr + PAGE_SIZE); + kunmap_atomic(ptr,KM_BOUNCE_READ); + } + } + + outer_cache_range_op_sg(memdesc->sg, memdesc->sglen, + KGSL_CACHE_OP_FLUSH); ret = kgsl_mmu_map(pagetable, memdesc, protflags); if (ret) goto done; - KGSL_STATS_ADD(size, kgsl_driver.stats.vmalloc, - kgsl_driver.stats.vmalloc_max); + KGSL_STATS_ADD(size, kgsl_driver.stats.page_alloc, + kgsl_driver.stats.page_alloc_max); order = get_order(size); @@ -389,6 +633,8 @@ _kgsl_sharedmem_vmalloc(struct kgsl_memdesc *memdesc, kgsl_driver.stats.histogram[order]++; done: + kfree(pages); + if (ret) kgsl_sharedmem_free(memdesc); @@ -396,51 +642,41 @@ done: } int -kgsl_sharedmem_vmalloc(struct kgsl_memdesc *memdesc, +kgsl_sharedmem_page_alloc(struct kgsl_memdesc *memdesc, struct kgsl_pagetable *pagetable, size_t size) { - void *ptr; - + int ret = 0; BUG_ON(size == 0); size = ALIGN(size, PAGE_SIZE * 2); - ptr = vmalloc(size); - if (ptr == NULL) { - KGSL_CORE_ERR("vmalloc(%d) failed\n", size); - return -ENOMEM; - } - - return _kgsl_sharedmem_vmalloc(memdesc, pagetable, ptr, size, + ret = _kgsl_sharedmem_page_alloc(memdesc, pagetable, size, GSL_PT_PAGE_RV | GSL_PT_PAGE_WV); + if (!ret) + ret = kgsl_page_alloc_map_kernel(memdesc); + if (ret) + kgsl_sharedmem_free(memdesc); + return ret; } -EXPORT_SYMBOL(kgsl_sharedmem_vmalloc); +EXPORT_SYMBOL(kgsl_sharedmem_page_alloc); int -kgsl_sharedmem_vmalloc_user(struct kgsl_memdesc *memdesc, +kgsl_sharedmem_page_alloc_user(struct kgsl_memdesc *memdesc, struct kgsl_pagetable *pagetable, size_t size, int flags) { - void *ptr; unsigned int protflags; BUG_ON(size == 0); - ptr = vmalloc_user(size); - - if (ptr == NULL) { - KGSL_CORE_ERR("vmalloc_user(%d) failed: allocated=%d\n", - size, kgsl_driver.stats.vmalloc); - return -ENOMEM; - } protflags = GSL_PT_PAGE_RV; if (!(flags & KGSL_MEMFLAGS_GPUREADONLY)) protflags |= GSL_PT_PAGE_WV; - return _kgsl_sharedmem_vmalloc(memdesc, pagetable, ptr, size, + return _kgsl_sharedmem_page_alloc(memdesc, pagetable, size, protflags); } -EXPORT_SYMBOL(kgsl_sharedmem_vmalloc_user); +EXPORT_SYMBOL(kgsl_sharedmem_page_alloc_user); int kgsl_sharedmem_alloc_coherent(struct kgsl_memdesc *memdesc, size_t size) @@ -488,7 +724,7 @@ void kgsl_sharedmem_free(struct kgsl_memdesc *memdesc) if (memdesc->ops && memdesc->ops->free) memdesc->ops->free(memdesc); - kfree(memdesc->sg); + kgsl_sg_free(memdesc->sg, memdesc->sglen); memset(memdesc, 0, sizeof(*memdesc)); } @@ -570,13 +806,17 @@ kgsl_sharedmem_readl(const struct kgsl_memdesc *memdesc, uint32_t *dst, unsigned int offsetbytes) { + uint32_t *src; BUG_ON(memdesc == NULL || memdesc->hostptr == NULL || dst == NULL); - WARN_ON(offsetbytes + sizeof(unsigned int) > memdesc->size); + WARN_ON(offsetbytes % sizeof(uint32_t) != 0); + if (offsetbytes % sizeof(uint32_t) != 0) + return -EINVAL; - if (offsetbytes + sizeof(unsigned int) > memdesc->size) + WARN_ON(offsetbytes + sizeof(uint32_t) > memdesc->size); + if (offsetbytes + sizeof(uint32_t) > memdesc->size) return -ERANGE; - - *dst = readl_relaxed(memdesc->hostptr + offsetbytes); + src = (uint32_t *)(memdesc->hostptr + offsetbytes); + *dst = *src; return 0; } EXPORT_SYMBOL(kgsl_sharedmem_readl); @@ -586,12 +826,19 @@ kgsl_sharedmem_writel(const struct kgsl_memdesc *memdesc, unsigned int offsetbytes, uint32_t src) { + uint32_t *dst; BUG_ON(memdesc == NULL || memdesc->hostptr == NULL); - BUG_ON(offsetbytes + sizeof(unsigned int) > memdesc->size); + WARN_ON(offsetbytes % sizeof(uint32_t) != 0); + if (offsetbytes % sizeof(uint32_t) != 0) + return -EINVAL; - kgsl_cffdump_setmem(memdesc->physaddr + offsetbytes, - src, sizeof(uint)); - writel_relaxed(src, memdesc->hostptr + offsetbytes); + WARN_ON(offsetbytes + sizeof(uint32_t) > memdesc->size); + if (offsetbytes + sizeof(uint32_t) > memdesc->size) + return -ERANGE; + kgsl_cffdump_setmem(memdesc->gpuaddr + offsetbytes, + src, sizeof(uint32_t)); + dst = (uint32_t *)(memdesc->hostptr + offsetbytes); + *dst = src; return 0; } EXPORT_SYMBOL(kgsl_sharedmem_writel); @@ -603,9 +850,39 @@ kgsl_sharedmem_set(const struct kgsl_memdesc *memdesc, unsigned int offsetbytes, BUG_ON(memdesc == NULL || memdesc->hostptr == NULL); BUG_ON(offsetbytes + sizebytes > memdesc->size); - kgsl_cffdump_setmem(memdesc->physaddr + offsetbytes, value, + kgsl_cffdump_setmem(memdesc->gpuaddr + offsetbytes, value, sizebytes); memset(memdesc->hostptr + offsetbytes, value, sizebytes); return 0; } EXPORT_SYMBOL(kgsl_sharedmem_set); + +/* + * kgsl_sharedmem_map_vma - Map a user vma to physical memory + * + * @vma - The user vma to map + * @memdesc - The memory descriptor which contains information about the + * physical memory + * + * Return: 0 on success else error code + */ +int +kgsl_sharedmem_map_vma(struct vm_area_struct *vma, + const struct kgsl_memdesc *memdesc) +{ + unsigned long addr = vma->vm_start; + unsigned long size = vma->vm_end - vma->vm_start; + int ret, i = 0; + + if (!memdesc->sg || (size != memdesc->size) || + (memdesc->sglen != (size / PAGE_SIZE))) + return -EINVAL; + + for (; addr < vma->vm_end; addr += PAGE_SIZE, i++) { + ret = vm_insert_page(vma, addr, sg_page(&memdesc->sg[i])); + if (ret) + return ret; + } + return 0; +} +EXPORT_SYMBOL(kgsl_sharedmem_map_vma); diff --git a/drivers/gpu/msm/kgsl_sharedmem.h b/drivers/gpu/msm/kgsl_sharedmem.h old mode 100644 new mode 100755 index 61bcf05b..20579ac9 --- a/drivers/gpu/msm/kgsl_sharedmem.h +++ b/drivers/gpu/msm/kgsl_sharedmem.h @@ -16,6 +16,8 @@ #include #include +#include +#include /* * Convert a page to a physical address @@ -31,20 +33,15 @@ struct kgsl_process_private; /** Set if the memdesc describes cached memory */ #define KGSL_MEMFLAGS_CACHED 0x00000001 +/** Set if the memdesc is mapped into all pagetables */ +#define KGSL_MEMFLAGS_GLOBAL 0x00000002 -struct kgsl_memdesc_ops { - int (*vmflags)(struct kgsl_memdesc *); - int (*vmfault)(struct kgsl_memdesc *, struct vm_area_struct *, - struct vm_fault *); - void (*free)(struct kgsl_memdesc *memdesc); -}; +extern struct kgsl_memdesc_ops kgsl_page_alloc_ops; -extern struct kgsl_memdesc_ops kgsl_vmalloc_ops; - -int kgsl_sharedmem_vmalloc(struct kgsl_memdesc *memdesc, +int kgsl_sharedmem_page_alloc(struct kgsl_memdesc *memdesc, struct kgsl_pagetable *pagetable, size_t size); -int kgsl_sharedmem_vmalloc_user(struct kgsl_memdesc *memdesc, +int kgsl_sharedmem_page_alloc_user(struct kgsl_memdesc *memdesc, struct kgsl_pagetable *pagetable, size_t size, int flags); @@ -80,6 +77,68 @@ void kgsl_process_uninit_sysfs(struct kgsl_process_private *private); int kgsl_sharedmem_init_sysfs(void); void kgsl_sharedmem_uninit_sysfs(void); +static inline unsigned int kgsl_get_sg_pa(struct scatterlist *sg) +{ + /* + * Try sg_dma_address first to support ion carveout + * regions which do not work with sg_phys(). + */ + unsigned int pa = sg_dma_address(sg); + if (pa == 0) + pa = sg_phys(sg); + return pa; +} + +int +kgsl_sharedmem_map_vma(struct vm_area_struct *vma, + const struct kgsl_memdesc *memdesc); + +/* + * For relatively small sglists, it is preferable to use kzalloc + * rather than going down the vmalloc rat hole. If the size of + * the sglist is < PAGE_SIZE use kzalloc otherwise fallback to + * vmalloc + */ + +static inline void *kgsl_sg_alloc(unsigned int sglen) +{ + if ((sglen * sizeof(struct scatterlist)) < PAGE_SIZE) + return kzalloc(sglen * sizeof(struct scatterlist), GFP_KERNEL); + else + return vmalloc(sglen * sizeof(struct scatterlist)); +} + +static inline void kgsl_sg_free(void *ptr, unsigned int sglen) +{ + if ((sglen * sizeof(struct scatterlist)) < PAGE_SIZE) + kfree(ptr); + else + vfree(ptr); +} + +static inline int +memdesc_sg_phys(struct kgsl_memdesc *memdesc, + unsigned int physaddr, unsigned int size) +{ + memdesc->sg = kgsl_sg_alloc(1); + + kmemleak_not_leak(memdesc->sg); + + memdesc->sglen = 1; + sg_init_table(memdesc->sg, 1); + memdesc->sg[0].length = size; + memdesc->sg[0].offset = 0; + memdesc->sg[0].dma_address = physaddr; + return 0; +} + +static inline int +kgsl_allocate(struct kgsl_memdesc *memdesc, + struct kgsl_pagetable *pagetable, size_t size) +{ + return kgsl_sharedmem_page_alloc(memdesc, pagetable, size); +} + static inline int memdesc_sg_phys(struct kgsl_memdesc *memdesc, unsigned int physaddr, unsigned int size) @@ -112,21 +171,13 @@ kgsl_allocate_user(struct kgsl_memdesc *memdesc, struct kgsl_pagetable *pagetable, size_t size, unsigned int flags) { -#ifdef CONFIG_MSM_KGSL_MMU - return kgsl_sharedmem_vmalloc_user(memdesc, pagetable, size, flags); -#else - return kgsl_sharedmem_ebimem_user(memdesc, pagetable, size, flags); -#endif + return kgsl_sharedmem_page_alloc_user(memdesc, pagetable, size, flags); } static inline int kgsl_allocate_contiguous(struct kgsl_memdesc *memdesc, size_t size) { int ret = kgsl_sharedmem_alloc_coherent(memdesc, size); -#ifndef CONFIG_MSM_KGSL_MMU - if (!ret) - memdesc->gpuaddr = memdesc->physaddr; -#endif return ret; } diff --git a/drivers/gpu/msm/z180.c b/drivers/gpu/msm/z180.c old mode 100644 new mode 100755 index e7a1d521..688f23d8 --- a/drivers/gpu/msm/z180.c +++ b/drivers/gpu/msm/z180.c @@ -100,6 +100,7 @@ enum z180_cmdwindow_type { static int z180_start(struct kgsl_device *device, unsigned int init_ram); static int z180_stop(struct kgsl_device *device); static int z180_wait(struct kgsl_device *device, + struct kgsl_context *context, unsigned int timestamp, unsigned int msecs); static void z180_regread(struct kgsl_device *device, @@ -382,8 +383,8 @@ static int z180_idle(struct kgsl_device *device, unsigned int timeout) if (timestamp_cmp(z180_dev->current_timestamp, z180_dev->timestamp) > 0) - status = z180_wait(device, z180_dev->current_timestamp, - timeout); + status = z180_wait(device, NULL, + z180_dev->current_timestamp, timeout); if (status) KGSL_DRV_ERR(device, "z180_waittimestamp() timed out\n"); @@ -793,14 +794,16 @@ static void z180_cmdwindow_write(struct kgsl_device *device, } static unsigned int z180_readtimestamp(struct kgsl_device *device, - enum kgsl_timestamp_type type) + struct kgsl_context *context, enum kgsl_timestamp_type type) { struct z180_device *z180_dev = Z180_DEVICE(device); + (void)context; /* get current EOP timestamp */ return z180_dev->timestamp; } static int z180_waittimestamp(struct kgsl_device *device, + struct kgsl_context *context, unsigned int timestamp, unsigned int msecs) { @@ -811,13 +814,14 @@ static int z180_waittimestamp(struct kgsl_device *device, msecs = 10 * MSEC_PER_SEC; mutex_unlock(&device->mutex); - status = z180_wait(device, timestamp, msecs); + status = z180_wait(device, context, timestamp, msecs); mutex_lock(&device->mutex); return status; } static int z180_wait(struct kgsl_device *device, + struct kgsl_context *context, unsigned int timestamp, unsigned int msecs) { @@ -826,7 +830,7 @@ static int z180_wait(struct kgsl_device *device, timeout = wait_io_event_interruptible_timeout( device->wait_queue, - kgsl_check_timestamp(device, timestamp), + kgsl_check_timestamp(device, context, timestamp), msecs_to_jiffies(msecs)); if (timeout > 0) diff --git a/drivers/misc/pmem.c b/drivers/misc/pmem.c index f1523fd5..bc083e17 100755 --- a/drivers/misc/pmem.c +++ b/drivers/misc/pmem.c @@ -1,7 +1,7 @@ /* drivers/android/pmem.c * * Copyright (C) 2007 Google, Inc. - * Copyright (c) 2009-2010, Code Aurora Forum. All rights reserved. + * Copyright (c) 2009-2012, Code Aurora Forum. All rights reserved. * * This software is licensed under the terms of the GNU General Public * License version 2, as published by the Free Software Foundation, and @@ -1074,17 +1074,17 @@ static void bitmap_bits_set_all(uint32_t *bitp, int bit_start, int bit_end) static int bitmap_allocate_contiguous(uint32_t *bitp, int num_bits_to_alloc, - int total_bits, int spacing) + int total_bits, int spacing, int start_bit) { int bit_start, last_bit, word_index; if (num_bits_to_alloc <= 0) return -1; - for (bit_start = 0; ; - bit_start = (last_bit + + for (bit_start = start_bit; ; + bit_start = ((last_bit + (word_index << PMEM_32BIT_WORD_ORDER) + spacing - 1) - & ~(spacing - 1)) { + & ~(spacing - 1)) + start_bit) { int bit_end = bit_start + num_bits_to_alloc, total_words; if (bit_end > total_bits) @@ -1162,7 +1162,8 @@ static int reserve_quanta(const unsigned int quanta_needed, ret = bitmap_allocate_contiguous(pmem[id].allocator.bitmap.bitmap, quanta_needed, (pmem[id].size + pmem[id].quantum - 1) / pmem[id].quantum, - spacing); + spacing, + start_bit); #if PMEM_DEBUG if (ret < 0) @@ -1915,6 +1916,13 @@ int pmem_cache_maint(struct file *file, unsigned int cmd, if (!file) return -EBADF; + /* + * check that the vaddr passed for flushing is valid + * so that you don't crash the kernel + */ + if (!pmem_addr->vaddr) + return -EINVAL; + data = file->private_data; id = get_id(file); diff --git a/drivers/mmc/host/msm_sdcc.c b/drivers/mmc/host/msm_sdcc.c old mode 100644 new mode 100755 index 806dcad0..4fef37f7 --- a/drivers/mmc/host/msm_sdcc.c +++ b/drivers/mmc/host/msm_sdcc.c @@ -73,7 +73,7 @@ static int msmsdcc_auto_suspend(struct mmc_host *, int); #define BUSCLK_TIMEOUT (HZ) #define SQN_BUSCLK_TIMEOUT (5 * HZ) static unsigned int msmsdcc_fmin = 144000; -static unsigned int msmsdcc_fmax = 50000000; +static unsigned int msmsdcc_fmax = 64000000; static unsigned int msmsdcc_4bit = 1; static unsigned int msmsdcc_pwrsave = 1; static unsigned int msmsdcc_piopoll = 1; @@ -308,42 +308,40 @@ msmsdcc_dma_exec_func(struct msm_dmov_cmd *cmd) } static void -msmsdcc_dma_complete_func(struct msm_dmov_cmd *cmd, - unsigned int result, - struct msm_dmov_errdata *err) +msmsdcc_dma_complete_tlet(unsigned long data) { - struct msmsdcc_dma_data *dma_data = - container_of(cmd, struct msmsdcc_dma_data, hdr); - struct msmsdcc_host *host = dma_data->host; + struct msmsdcc_host *host = (struct msmsdcc_host *)data; unsigned long flags; struct mmc_request *mrq; + struct msm_dmov_errdata err; spin_lock_irqsave(&host->lock, flags); host->dma.active = 0; + err = host->dma.err; mrq = host->curr.mrq; BUG_ON(!mrq); WARN_ON(!mrq->data); - if (!(result & DMOV_RSLT_VALID)) { + if (!(host->dma.result & DMOV_RSLT_VALID)) { pr_err("msmsdcc: Invalid DataMover result\n"); goto out; } - if (result & DMOV_RSLT_DONE) { + if (host->dma.result & DMOV_RSLT_DONE) { host->curr.data_xfered = host->curr.xfer_size; } else { /* Error or flush */ - if (result & DMOV_RSLT_ERROR) + if (host->dma.result & DMOV_RSLT_ERROR) pr_err("%s: DMA error (0x%.8x)\n", - mmc_hostname(host->mmc), result); - if (result & DMOV_RSLT_FLUSH) + mmc_hostname(host->mmc), host->dma.result); + if (host->dma.result & DMOV_RSLT_FLUSH) pr_err("%s: DMA channel flushed (0x%.8x)\n", - mmc_hostname(host->mmc), result); - if (err) + mmc_hostname(host->mmc), host->dma.result); + pr_err("Flush data: %.8x %.8x %.8x %.8x %.8x %.8x\n", - err->flush[0], err->flush[1], err->flush[2], - err->flush[3], err->flush[4], err->flush[5]); + err.flush[0], err.flush[1], err.flush[2], + err.flush[3], err.flush[4], err.flush[5]); if (!mrq->data->error) mrq->data->error = -EIO; } @@ -391,6 +389,22 @@ out: return; } +static void +msmsdcc_dma_complete_func(struct msm_dmov_cmd *cmd, + unsigned int result, + struct msm_dmov_errdata *err) +{ + struct msmsdcc_dma_data *dma_data = + container_of(cmd, struct msmsdcc_dma_data, hdr); + struct msmsdcc_host *host = dma_data->host; + + dma_data->result = result; + if (err) + memcpy(&dma_data->err, err, sizeof(struct msm_dmov_errdata)); + + tasklet_schedule(&host->dma_tlet); +} + static int validate_dma(struct msmsdcc_host *host, struct mmc_data *data) { if (host->dma.channel == -1) @@ -451,14 +465,30 @@ static int msmsdcc_config_dma(struct msmsdcc_host *host, struct mmc_data *data) host->curr.user_pages = 0; box = &nc->cmd[0]; - for (i = 0; i < host->dma.num_ents; i++) { + + /* location of command block must be 64 bit aligned */ + BUG_ON(host->dma.cmd_busaddr & 0x07); + + nc->cmdptr = (host->dma.cmd_busaddr >> 3) | CMD_PTR_LP; + host->dma.hdr.cmdptr = DMOV_CMD_PTR_LIST | + DMOV_CMD_ADDR(host->dma.cmdptr_busaddr); + host->dma.hdr.complete_func = msmsdcc_dma_complete_func; + + n = dma_map_sg(mmc_dev(host->mmc), host->dma.sg, + host->dma.num_ents, host->dma.dir); + if (n == 0) { + printk(KERN_ERR "%s: Unable to map in all sg elements\n", + mmc_hostname(host->mmc)); + host->dma.sg = NULL; + host->dma.num_ents = 0; + return -ENOMEM; + } + + for_each_sg(host->dma.sg, sg, n, i) { + box->cmd = CMD_MODE_BOX; - /* Initialize sg dma address */ - sg->dma_address = page_to_dma(mmc_dev(host->mmc), sg_page(sg)) - + sg->offset; - - if (i == (host->dma.num_ents - 1)) + if (i == n - 1) box->cmd |= CMD_LC; rows = (sg_dma_len(sg) % MCI_FIFOSIZE) ? (sg_dma_len(sg) / MCI_FIFOSIZE) + 1 : @@ -486,27 +516,6 @@ static int msmsdcc_config_dma(struct msmsdcc_host *host, struct mmc_data *data) box->cmd |= CMD_DST_CRCI(crci); } box++; - sg++; - } - - /* location of command block must be 64 bit aligned */ - BUG_ON(host->dma.cmd_busaddr & 0x07); - - nc->cmdptr = (host->dma.cmd_busaddr >> 3) | CMD_PTR_LP; - host->dma.hdr.cmdptr = DMOV_CMD_PTR_LIST | - DMOV_CMD_ADDR(host->dma.cmdptr_busaddr); - host->dma.hdr.complete_func = msmsdcc_dma_complete_func; - - n = dma_map_sg(mmc_dev(host->mmc), host->dma.sg, - host->dma.num_ents, host->dma.dir); -/* dsb inside dma_map_sg will write nc out to mem as well */ - - if (n != host->dma.num_ents) { - printk(KERN_ERR "%s: Unable to map in all sg elements\n", - mmc_hostname(host->mmc)); - host->dma.sg = NULL; - host->dma.num_ents = 0; - return -ENOMEM; } return 0; @@ -542,6 +551,11 @@ msmsdcc_start_command_deferred(struct msmsdcc_host *host, (cmd->opcode == 53)) *c |= MCI_CSPM_DATCMD; + if (host->prog_scan && (cmd->opcode == 12)) { + *c |= MCI_CPSM_PROGENA; + host->prog_enable = true; + } + if (cmd == cmd->mrq->stop) *c |= MCI_CSPM_MCIABORT; @@ -612,6 +626,8 @@ msmsdcc_start_data(struct msmsdcc_host *host, struct mmc_data *data, } dsb(); msm_dmov_enqueue_cmd_ext(host->dma.channel, &host->dma.hdr); + if (data->flags & MMC_DATA_WRITE) + host->prog_scan = true; } else { msmsdcc_writel(host, timeout, MMCIDATATIMER); @@ -701,6 +717,9 @@ msmsdcc_pio_read(struct msmsdcc_host *host, char *buffer, unsigned int remain) count += remain; }else #endif + if (remain % 4) + remain = ((remain >> 2) + 1) << 2; + while (msmsdcc_readl(host, MMCISTATUS) & MCI_RXDATAAVLBL) { *ptr = msmsdcc_readl(host, MMCIFIFO + (count % MCI_FIFOSIZE)); ptr++; @@ -737,13 +756,14 @@ msmsdcc_pio_write(struct msmsdcc_host *host, char *buffer, } else { #endif do { - unsigned int count, maxcnt; + unsigned int count, maxcnt, sz; maxcnt = status & MCI_TXFIFOEMPTY ? MCI_FIFOSIZE : MCI_FIFOHALFSIZE; count = min(remain, maxcnt); - writesl(base + MMCIFIFO, ptr, count >> 2); + sz = count % 4 ? (count >> 2) + 1 : (count >> 2); + writesl(base + MMCIFIFO, ptr, sz); ptr += count; remain -= count; @@ -906,8 +926,23 @@ static void msmsdcc_do_cmdirq(struct msmsdcc_host *host, uint32_t status) else if (host->curr.data) { /* Non DMA */ msmsdcc_stop_data(host); msmsdcc_request_end(host, cmd->mrq); - } else /* host->data == NULL */ + } else { /* host->data == NULL */ + if (!cmd->error && host->prog_enable) { + if (status & MCI_PROGDONE) { + host->prog_scan = false; + host->prog_enable = false; msmsdcc_request_end(host, cmd->mrq); + } else { + host->curr.cmd = cmd; + } + } else { + if (host->prog_enable) { + host->prog_scan = false; + host->prog_enable = false; + } + msmsdcc_request_end(host, cmd->mrq); + } + } } else if (cmd->data) if (!(cmd->data->flags & MMC_DATA_READ)) msmsdcc_start_data(host, cmd->data, @@ -921,7 +956,7 @@ msmsdcc_handle_irq_data(struct msmsdcc_host *host, u32 status, struct mmc_data *data = host->curr.data; if (status & (MCI_CMDSENT | MCI_CMDRESPEND | MCI_CMDCRCFAIL | - MCI_CMDTIMEOUT) && host->curr.cmd) { + MCI_CMDTIMEOUT | MCI_PROGDONE) && host->curr.cmd) { msmsdcc_do_cmdirq(host, status); } @@ -1265,24 +1300,6 @@ msmsdcc_init_dma(struct msmsdcc_host *host) return 0; } -#ifdef CONFIG_MMC_MSM7X00A_RESUME_IN_WQ -static void -do_resume_work(struct work_struct *work) -{ - struct msmsdcc_host *host = - container_of(work, struct msmsdcc_host, resume_task); - struct mmc_host *mmc = host->mmc; - - if (mmc) { - mmc_resume_host(mmc); - if (host->stat_irq) - enable_irq(host->stat_irq); - } -} - -#endif - - #ifdef CONFIG_HAS_EARLYSUSPEND static void msmsdcc_early_suspend(struct early_suspend *h) { @@ -1382,14 +1399,8 @@ msmsdcc_probe(struct platform_device *pdev) host->dmares = dmares; spin_lock_init(&host->lock); -#ifdef CONFIG_MMC_EMBEDDED_SDIO - if (plat->embedded_sdio) - mmc_set_embedded_sdio_data(mmc, - &plat->embedded_sdio->cis, - &plat->embedded_sdio->cccr, - plat->embedded_sdio->funcs, - plat->embedded_sdio->num_funcs); -#endif + tasklet_init(&host->dma_tlet, msmsdcc_dma_complete_tlet, + (unsigned long)host); /* * Setup DMA @@ -1608,22 +1619,14 @@ msmsdcc_resume(struct platform_device *dev) msmsdcc_writel(host, host->saved_irq0mask, MMCIMASK0); - if (mmc->card && mmc->card->type != MMC_TYPE_SDIO) { -#ifdef CONFIG_MMC_MSM7X00A_RESUME_IN_WQ - schedule_work(&host->resume_task); -#else + if (mmc->card && mmc->card->type != MMC_TYPE_SDIO) mmc_resume_host(mmc); -#endif - } - if (host->stat_irq) enable_irq(host->stat_irq); - #if BUSCLK_PWRSAVE if (host->clks_on) msmsdcc_disable_clocks(host, 1); #endif - } return 0; } diff --git a/drivers/mmc/host/msm_sdcc.h b/drivers/mmc/host/msm_sdcc.h old mode 100644 new mode 100755 index fdb0b9c6..1368fc0c --- a/drivers/mmc/host/msm_sdcc.h +++ b/drivers/mmc/host/msm_sdcc.h @@ -155,7 +155,7 @@ #define MCI_IRQENABLE \ (MCI_CMDCRCFAILMASK|MCI_DATACRCFAILMASK|MCI_CMDTIMEOUTMASK| \ MCI_DATATIMEOUTMASK|MCI_TXUNDERRUNMASK|MCI_RXOVERRUNMASK| \ - MCI_CMDRESPENDMASK|MCI_CMDSENTMASK|MCI_DATAENDMASK) + MCI_CMDRESPENDMASK|MCI_CMDSENTMASK|MCI_DATAENDMASK|MCI_PROGDONEMASK) /* * The size of the FIFO in bytes. @@ -164,7 +164,7 @@ #define MCI_FIFOHALFSIZE (MCI_FIFOSIZE / 2) -#define NR_SG 32 +#define NR_SG 128 struct clk; @@ -190,7 +190,7 @@ struct msmsdcc_dma_data { int busy; /* Set if DM is busy */ int active; unsigned int result; - struct msm_dmov_errdata *err; + struct msm_dmov_errdata err; }; struct msmsdcc_pio_data { @@ -258,17 +258,12 @@ struct msmsdcc_host { int polling_enabled; #endif -#ifdef CONFIG_MMC_MSM7X00A_RESUME_IN_WQ - struct work_struct resume_task; -#endif struct tasklet_struct dma_tlet; #ifdef CONFIG_MMC_AUTO_SUSPEND unsigned long suspended; #endif - unsigned int prog_scan; - unsigned int prog_enable; /* Command parameters */ unsigned int cmd_timeout; unsigned int cmd_pio_irqmask; @@ -279,6 +274,8 @@ struct msmsdcc_host { unsigned int dummy_52_needed; unsigned int dummy_52_state; + bool prog_scan; + bool prog_enable; }; #endif diff --git a/drivers/mtd/devices/htcleo_nand.c b/drivers/mtd/devices/htcleo_nand.c index 9b23d680..12a9b390 100755 --- a/drivers/mtd/devices/htcleo_nand.c +++ b/drivers/mtd/devices/htcleo_nand.c @@ -51,7 +51,7 @@ unsigned crci_mask; #include "msm_nand.h" -#define MSM_NAND_DMA_BUFFER_SIZE SZ_4K +#define MSM_NAND_DMA_BUFFER_SIZE SZ_1M #define MSM_NAND_DMA_BUFFER_SLOTS \ (MSM_NAND_DMA_BUFFER_SIZE / (sizeof(((atomic_t *)0)->counter) * 8)) diff --git a/drivers/power/ds2746_battery.c b/drivers/power/ds2746_battery.c index 5f071a39..19e97980 100644 --- a/drivers/power/ds2746_battery.c +++ b/drivers/power/ds2746_battery.c @@ -46,6 +46,8 @@ Original Auther: /*#include "../w1/slaves/w1_ds2784.h"*/ #include #include +#include +#include struct ds2746_device_info { @@ -64,6 +66,45 @@ struct ds2746_device_info { }; static struct wake_lock vbus_wake_lock; +/* + * proc_fs interface for fast charge + * by marc1706 + */ +#define PROC_FAST_CHARGE_NAME "fast_charge" + +static struct proc_dir_entry *fast_charge; +static int allow_fast_charge = 1; + +static int proc_read_fast_charge(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + int ret; + + ret = sprintf(page, "%i\n", allow_fast_charge); + + return ret; +} + +static int proc_write_fast_charge(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + char temp_buff[count + 1]; + int ret; + int len = count; + + if (copy_from_user(temp_buff, buffer, len)) + return -EFAULT; + + sscanf(temp_buff, "%i", &ret); + + if (!ret || ret == 1) + allow_fast_charge = ret; + else + printk(KERN_ALERT "%s: Incorrect value:%i\n", __func__, ret); + + return ret; +} + /*======================================================================================== HTC power algorithm helper member and functions @@ -279,10 +320,18 @@ static BOOL is_charging_avaiable(void) static BOOL is_high_current_charging_avaialable(void) { - if (!poweralg.protect_flags.is_charging_high_current_avaialble) return FALSE; - //if (!poweralg.is_china_ac_in) return FALSE; /* allow high current charging on china chargers */ - if (poweralg.charge_state == CHARGE_STATE_UNKNOWN) return FALSE; - return TRUE; + bool ret; + + if (!poweralg.protect_flags.is_charging_high_current_avaialble) + ret = FALSE; + else if (!poweralg.is_china_ac_in && !allow_fast_charge) + ret = FALSE; + else if (poweralg.charge_state == CHARGE_STATE_UNKNOWN) + ret = FALSE; + else + ret = TRUE; + + return ret; } static void update_next_charge_state(void) @@ -1245,6 +1294,19 @@ static int __init ds2746_battery_init(void) return ret; } + fast_charge = create_proc_entry(PROC_FAST_CHARGE_NAME, 0644, NULL); + + if (fast_charge == NULL) { + remove_proc_entry(PROC_FAST_CHARGE_NAME, NULL); + printk(KERN_ALERT "%s: Unable to create /proc/%s\n", __func__, + PROC_FAST_CHARGE_NAME); + } + fast_charge->read_proc = proc_read_fast_charge; + fast_charge->write_proc = proc_write_fast_charge; + fast_charge->uid = 0; + fast_charge->gid = 0; + printk(KERN_INFO "/proc/%s created\n", PROC_FAST_CHARGE_NAME); + /*mutex_init(&htc_batt_info.lock);*/ return platform_driver_register(&ds2746_battery_driver); } diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig old mode 100644 new mode 100755 index 8ee4bfa6..b8964347 --- a/drivers/staging/Kconfig +++ b/drivers/staging/Kconfig @@ -125,5 +125,7 @@ source "drivers/staging/iio/Kconfig" source "drivers/staging/zram/Kconfig" +source "drivers/staging/snappy/Kconfig" + endif # !STAGING_EXCLUDE_BUILD endif # STAGING diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile old mode 100644 new mode 100755 index 5a1b7341..621dc916 --- a/drivers/staging/Makefile +++ b/drivers/staging/Makefile @@ -45,4 +45,5 @@ obj-$(CONFIG_DX_SEP) += sep/ obj-$(CONFIG_IIO) += iio/ obj-$(CONFIG_ZRAM) += zram/ obj-$(CONFIG_XVMALLOC) += zram/ - +obj-$(CONFIG_SNAPPY_COMPRESS) += snappy/ +obj-$(CONFIG_SNAPPY_DECOMPRESS) += snappy/ diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c index a0763da9..7453f1b0 100644 --- a/drivers/staging/android/binder.c +++ b/drivers/staging/android/binder.c @@ -31,14 +31,15 @@ #include #include #include -#include #include #include +#include #include "binder.h" static DEFINE_MUTEX(binder_lock); static DEFINE_MUTEX(binder_deferred_lock); +static DEFINE_MUTEX(binder_mmap_lock); static HLIST_HEAD(binder_procs); static HLIST_HEAD(binder_deferred_list); @@ -103,7 +104,7 @@ enum { static uint32_t binder_debug_mask; module_param_named(debug_mask, binder_debug_mask, uint, S_IWUSR | S_IRUGO); -static int binder_debug_no_lock; +static bool binder_debug_no_lock; module_param_named(proc_no_lock, binder_debug_no_lock, bool, S_IWUSR | S_IRUGO); static DECLARE_WAIT_QUEUE_HEAD(binder_user_error_wait); @@ -258,7 +259,7 @@ struct binder_ref { }; struct binder_buffer { - struct list_head entry; /* free and allocated entries by addesss */ + struct list_head entry; /* free and allocated entries by address */ struct rb_node rb_node; /* free entry by size or allocated entry */ /* by address */ unsigned free:1; @@ -288,6 +289,7 @@ struct binder_proc { struct rb_root refs_by_node; int pid; struct vm_area_struct *vma; + struct mm_struct *vma_vm_mm; struct task_struct *tsk; struct files_struct *files; struct hlist_node deferred_work_node; @@ -380,8 +382,7 @@ int task_get_unused_fd_flags(struct binder_proc *proc, int flags) repeat: fdt = files_fdtable(files); - fd = find_next_zero_bit(fdt->open_fds->fds_bits, fdt->max_fds, - files->next_fd); + fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, files->next_fd); /* * N.B. For clone tasks sharing a files structure, this test @@ -633,6 +634,11 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate, if (mm) { down_write(&mm->mmap_sem); vma = proc->vma; + if (vma && mm != proc->vma_vm_mm) { + pr_err("binder: %d: vma mm and task mm mismatch\n", + proc->pid); + vma = NULL; + } } if (allocate == 0) @@ -651,7 +657,7 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate, page = &proc->pages[(page_addr - proc->buffer) / PAGE_SIZE]; BUG_ON(*page); - *page = alloc_page(GFP_KERNEL | __GFP_ZERO); + *page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO); if (*page == NULL) { binder_debug(BINDER_DEBUG_TOP_ERRORS, "binder: %d: binder_alloc_buf failed " @@ -1283,7 +1289,8 @@ static void binder_send_failed_reply(struct binder_transaction *t, binder_debug(BINDER_DEBUG_TOP_ERRORS, "binder: reply failed, target " "thread, %d:%d, has error code %d " - "already\n", target_thread->proc->pid, + "already\n", + target_thread->proc->pid, target_thread->pid, target_thread->return_error); } @@ -1338,9 +1345,9 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, buffer->data_size < sizeof(*fp) || !IS_ALIGNED(*offp, sizeof(void *))) { binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder: transaction release %d bad" - "offset %zd, size %zd\n", debug_id, - *offp, buffer->data_size); + "binder: transaction release %d bad" + "offset %zd, size %zd\n", debug_id, + *offp, buffer->data_size); continue; } fp = (struct flat_binder_object *)(buffer->data + *offp); @@ -1352,7 +1359,9 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, if (node == NULL) { binder_debug(BINDER_DEBUG_TOP_ERRORS, "binder: transaction release %d" - " bad node %p\n", debug_id, fp->binder); + " bad node %p\n", + debug_id, + fp->binder); break; } binder_debug(BINDER_DEBUG_TRANSACTION, @@ -2272,6 +2281,7 @@ retry: if (put_user(thread->return_error2, (uint32_t __user *)ptr)) return -EFAULT; ptr += sizeof(uint32_t); + binder_stat_br(proc, thread, thread->return_error2); if (ptr == end) goto done; thread->return_error2 = BR_OK; @@ -2279,6 +2289,7 @@ retry: if (put_user(thread->return_error, (uint32_t __user *)ptr)) return -EFAULT; ptr += sizeof(uint32_t); + binder_stat_br(proc, thread, thread->return_error); thread->return_error = BR_OK; goto done; } @@ -2434,6 +2445,7 @@ retry: if (put_user(death->cookie, (void * __user *)ptr)) return -EFAULT; ptr += sizeof(void *); + binder_stat_br(proc, thread, cmd); binder_debug(BINDER_DEBUG_DEATH_NOTIFICATION, "binder: %d:%d %s %p\n", proc->pid, thread->pid, @@ -2541,6 +2553,7 @@ done: proc->pid, thread->pid); if (put_user(BR_SPAWN_LOOPER, (uint32_t __user *)buffer)) return -EFAULT; + binder_stat_br(proc, thread, BR_SPAWN_LOOPER); } return 0; } @@ -2556,14 +2569,38 @@ static void binder_release_work(struct list_head *list) struct binder_transaction *t; t = container_of(w, struct binder_transaction, work); - if (t->buffer->target_node && !(t->flags & TF_ONE_WAY)) + if (t->buffer->target_node && + !(t->flags & TF_ONE_WAY)) { binder_send_failed_reply(t, BR_DEAD_REPLY); + } else { + binder_debug(BINDER_DEBUG_DEAD_TRANSACTION, + "binder: undelivered transaction %d\n", + t->debug_id); + t->buffer->transaction = NULL; + kfree(t); + binder_stats_deleted(BINDER_STAT_TRANSACTION); + } } break; case BINDER_WORK_TRANSACTION_COMPLETE: { + binder_debug(BINDER_DEBUG_DEAD_TRANSACTION, + "binder: undelivered TRANSACTION_COMPLETE\n"); kfree(w); binder_stats_deleted(BINDER_STAT_TRANSACTION_COMPLETE); } break; + case BINDER_WORK_DEAD_BINDER_AND_CLEAR: + case BINDER_WORK_CLEAR_DEATH_NOTIFICATION: { + struct binder_ref_death *death; + + death = container_of(w, struct binder_ref_death, work); + binder_debug(BINDER_DEBUG_DEAD_TRANSACTION, + "binder: undelivered death notification, %p\n", + death->cookie); + kfree(death); + binder_stats_deleted(BINDER_STAT_DEATH); + } break; default: + pr_err("binder: unexpected work type, %d, not freed\n", + w->type); break; } } @@ -2684,8 +2721,7 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) unsigned int size = _IOC_SIZE(cmd); void __user *ubuf = (void __user *)arg; - /*binder_debug(BINDER_DEBUG_TOP_ERRORS, "binder_ioctl: %d:%d %x %lx\n", - proc->pid, current->pid, cmd, arg);*/ + /*printk(KERN_INFO "binder_ioctl: %d:%d %x %lx\n", proc->pid, current->pid, cmd, arg);*/ ret = wait_event_interruptible(binder_user_error_wait, binder_stop_on_user_error < 2); @@ -2755,6 +2791,7 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (binder_context_mgr_node != NULL) { binder_debug(BINDER_DEBUG_TOP_ERRORS, "binder: BINDER_SET_CONTEXT_MGR already set\n"); + "binder: BINDER_SET_CONTEXT_MGR already set\n"); ret = -EBUSY; goto err; } @@ -2808,8 +2845,8 @@ err: wait_event_interruptible(binder_user_error_wait, binder_stop_on_user_error < 2); if (ret && ret != -ERESTARTSYS) binder_debug(BINDER_DEBUG_TOP_ERRORS, - "binder: %d:%d ioctl %x %lx returned %d\n", - proc->pid, current->pid, cmd, arg, ret); + "binder: %d:%d ioctl %x %lx returned %d\n", + proc->pid, current->pid, cmd, arg, ret); return ret; } @@ -2821,7 +2858,6 @@ static void binder_vma_open(struct vm_area_struct *vma) proc->pid, vma->vm_start, vma->vm_end, (vma->vm_end - vma->vm_start) / SZ_1K, vma->vm_flags, (unsigned long)pgprot_val(vma->vm_page_prot)); - dump_stack(); } static void binder_vma_close(struct vm_area_struct *vma) @@ -2833,6 +2869,7 @@ static void binder_vma_close(struct vm_area_struct *vma) (vma->vm_end - vma->vm_start) / SZ_1K, vma->vm_flags, (unsigned long)pgprot_val(vma->vm_page_prot)); proc->vma = NULL; + proc->vma_vm_mm = NULL; binder_defer_work(proc, BINDER_DEFERRED_PUT_FILES); } @@ -2865,6 +2902,7 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma) } vma->vm_flags = (vma->vm_flags | VM_DONTCOPY) & ~VM_MAYWRITE; + mutex_lock(&binder_mmap_lock); if (proc->buffer) { ret = -EBUSY; failure_string = "already mapped"; @@ -2879,6 +2917,7 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma) } proc->buffer = area->addr; proc->user_buffer_offset = vma->vm_start - (uintptr_t)proc->buffer; + mutex_unlock(&binder_mmap_lock); #ifdef CONFIG_CPU_CACHE_VIPT if (cache_is_vipt_aliasing()) { @@ -2913,8 +2952,9 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma) binder_insert_free_buffer(proc, buffer); proc->free_async_space = proc->buffer_size / 2; barrier(); - proc->files = get_files_struct(current); + proc->files = get_files_struct(proc->tsk); proc->vma = vma; + proc->vma_vm_mm = vma->vm_mm; /*binder_debug(BINDER_DEBUG_TOP_ERRORS, "binder_mmap: %d %lx-%lx maps %p\n", @@ -2925,14 +2965,17 @@ err_alloc_small_buf_failed: kfree(proc->pages); proc->pages = NULL; err_alloc_pages_failed: + mutex_lock(&binder_mmap_lock); vfree(proc->buffer); proc->buffer = NULL; err_get_vm_area_failed: err_already_mapped: + mutex_unlock(&binder_mmap_lock); err_bad_arg: binder_debug(BINDER_DEBUG_TOP_ERRORS, "binder_mmap: %d %lx-%lx %s failed %d\n", - proc->pid, vma->vm_start, vma->vm_end, failure_string, ret); + proc->pid, vma->vm_start, vma->vm_end, failure_string, + ret); return ret; } @@ -3039,6 +3082,7 @@ static void binder_deferred_release(struct binder_proc *proc) nodes++; rb_erase(&node->rb_node, &proc->nodes); list_del_init(&node->work.entry); + binder_release_work(&node->async_todo); if (hlist_empty(&node->refs)) { kfree(node); binder_stats_deleted(BINDER_STAT_NODE); @@ -3077,6 +3121,7 @@ static void binder_deferred_release(struct binder_proc *proc) binder_delete_ref(ref); } binder_release_work(&proc->todo); + binder_release_work(&proc->delivered_death); buffers = 0; while ((n = rb_first(&proc->allocated_buffers))) { diff --git a/drivers/staging/android/logger.c b/drivers/staging/android/logger.c old mode 100644 new mode 100755 index 1a0c1391..002e11ae --- a/drivers/staging/android/logger.c +++ b/drivers/staging/android/logger.c @@ -37,7 +37,7 @@ * mutex 'mutex'. */ struct logger_log { - unsigned char *buffer;/* the ring buffer itself */ + unsigned char *buffer;/* the ring buffer itself */ struct miscdevice misc; /* misc device representing the log */ wait_queue_head_t wq; /* wait queue for readers */ struct list_head readers; /* this log's readers */ @@ -57,19 +57,25 @@ struct logger_reader { struct logger_log *log; /* associated log */ struct list_head list; /* entry in logger_log's list */ size_t r_off; /* current read head offset */ + bool r_all; /* reader can read all entries */ + int r_ver; /* reader ABI version */ }; /* logger_offset - returns index 'n' into the log via (optimized) modulus */ -#define logger_offset(n) ((n) & (log->size - 1)) +size_t logger_offset(struct logger_log *log, size_t n) +{ + return n & (log->size-1); +} + /* * file_get_log - Given a file structure, return the associated log * * This isn't aesthetic. We have several goals: * - * 1) Need to quickly obtain the associated log during an I/O operation - * 2) Readers need to maintain state (logger_reader) - * 3) Writers need to be very fast (open() should be a near no-op) + * 1) Need to quickly obtain the associated log during an I/O operation + * 2) Readers need to maintain state (logger_reader) + * 3) Writers need to be very fast (open() should be a near no-op) * * In the reader case, we can trivially go file->logger_reader->logger_log. * For a writer, we don't want to maintain a logger_reader, so we just go @@ -86,25 +92,75 @@ static inline struct logger_log *file_get_log(struct file *file) } /* - * get_entry_len - Grabs the length of the payload of the next entry starting - * from 'off'. + * get_entry_header - returns a pointer to the logger_entry header within + * 'log' starting at offset 'off'. A temporary logger_entry 'scratch' must + * be provided. Typically the return value will be a pointer within + * 'logger->buf'. However, a pointer to 'scratch' may be returned if + * the log entry spans the end and beginning of the circular buffer. + */ +static struct logger_entry *get_entry_header(struct logger_log *log, + size_t off, struct logger_entry *scratch) +{ + size_t len = min(sizeof(struct logger_entry), log->size - off); + if (len != sizeof(struct logger_entry)) { + memcpy(((void *) scratch), log->buffer + off, len); + memcpy(((void *) scratch) + len, log->buffer, + sizeof(struct logger_entry) - len); + return scratch; + } + + return (struct logger_entry *) (log->buffer + off); +} + +/* + * get_entry_msg_len - Grabs the length of the message of the entry + * starting from from 'off'. + * + * An entry length is 2 bytes (16 bits) in host endian order. + * In the log, the length does not include the size of the log entry structure. + * This function returns the size including the log entry structure. * * Caller needs to hold log->mutex. */ -static __u32 get_entry_len(struct logger_log *log, size_t off) +static __u32 get_entry_msg_len(struct logger_log *log, size_t off) { - __u16 val; + struct logger_entry scratch; + struct logger_entry *entry; - switch (log->size - off) { - case 1: - memcpy(&val, log->buffer + off, 1); - memcpy(((char *) &val) + 1, log->buffer, 1); - break; - default: - memcpy(&val, log->buffer + off, 2); + entry = get_entry_header(log, off, &scratch); + return entry->len; +} + +static size_t get_user_hdr_len(int ver) +{ + if (ver < 2) + return sizeof(struct user_logger_entry_compat); + else + return sizeof(struct logger_entry); +} + +static ssize_t copy_header_to_user(int ver, struct logger_entry *entry, + char __user *buf) +{ + void *hdr; + size_t hdr_len; + struct user_logger_entry_compat v1; + + if (ver < 2) { + v1.len = entry->len; + v1.__pad = 0; + v1.pid = entry->pid; + v1.tid = entry->tid; + v1.sec = entry->sec; + v1.nsec = entry->nsec; + hdr = &v1; + hdr_len = sizeof(struct user_logger_entry_compat); + } else { + hdr = entry; + hdr_len = sizeof(struct logger_entry); } - return sizeof(struct logger_entry) + val; + return copy_to_user(buf, hdr, hdr_len); } /* @@ -118,15 +174,31 @@ static ssize_t do_read_log_to_user(struct logger_log *log, char __user *buf, size_t count) { + struct logger_entry scratch; + struct logger_entry *entry; size_t len; + size_t msg_start; /* - * We read from the log in two disjoint operations. First, we read from - * the current read head offset up to 'count' bytes or to the end of + * First, copy the header to userspace, using the version of + * the header requested + */ + entry = get_entry_header(log, reader->r_off, &scratch); + if (copy_header_to_user(reader->r_ver, entry, buf)) + return -EFAULT; + + count -= get_user_hdr_len(reader->r_ver); + buf += get_user_hdr_len(reader->r_ver); + msg_start = logger_offset(log, + reader->r_off + sizeof(struct logger_entry)); + + /* + * We read from the msg in two disjoint operations. First, we read from + * the current msg head offset up to 'count' bytes or to the end of * the log, whichever comes first. */ - len = min(count, log->size - reader->r_off); - if (copy_to_user(buf, log->buffer + reader->r_off, len)) + len = min(count, log->size - msg_start); + if (copy_to_user(buf, log->buffer + msg_start, len)) return -EFAULT; /* @@ -137,9 +209,34 @@ static ssize_t do_read_log_to_user(struct logger_log *log, if (copy_to_user(buf + len, log->buffer, count - len)) return -EFAULT; - reader->r_off = logger_offset(reader->r_off + count); + reader->r_off = logger_offset(log, reader->r_off + + sizeof(struct logger_entry) + count); - return count; + return count + get_user_hdr_len(reader->r_ver); +} + +/* + * get_next_entry_by_uid - Starting at 'off', returns an offset into + * 'log->buffer' which contains the first entry readable by 'euid' + */ +static size_t get_next_entry_by_uid(struct logger_log *log, + size_t off, uid_t euid) +{ + while (off != log->w_off) { + struct logger_entry *entry; + struct logger_entry scratch; + size_t next_len; + + entry = get_entry_header(log, off, &scratch); + + if (entry->euid == euid) + return off; + + next_len = sizeof(struct logger_entry) + entry->len; + off = logger_offset(log, off + next_len); + } + + return off; } /* @@ -147,11 +244,11 @@ static ssize_t do_read_log_to_user(struct logger_log *log, * * Behavior: * - * - O_NONBLOCK works - * - If there are no log entries to read, blocks until log is written to - * - Atomically reads exactly one log entry + * - O_NONBLOCK works + * - If there are no log entries to read, blocks until log is written to + * - Atomically reads exactly one log entry * - * Optimal read size is LOGGER_ENTRY_MAX_LEN. Will set errno to EINVAL if read + * Will set errno to EINVAL if read * buffer is insufficient to hold next entry. */ static ssize_t logger_read(struct file *file, char __user *buf, @@ -164,9 +261,10 @@ static ssize_t logger_read(struct file *file, char __user *buf, start: while (1) { + mutex_lock(&log->mutex); + prepare_to_wait(&log->wq, &wait, TASK_INTERRUPTIBLE); - mutex_lock(&log->mutex); ret = (log->w_off == reader->r_off); mutex_unlock(&log->mutex); if (!ret) @@ -191,6 +289,10 @@ start: mutex_lock(&log->mutex); + if (!reader->r_all) + reader->r_off = get_next_entry_by_uid(log, + reader->r_off, current_euid()); + /* is there still something to read or did we race? */ if (unlikely(log->w_off == reader->r_off)) { mutex_unlock(&log->mutex); @@ -198,7 +300,8 @@ start: } /* get the size of the next entry */ - ret = get_entry_len(log, reader->r_off); + ret = get_user_hdr_len(reader->r_ver) + + get_entry_msg_len(log, reader->r_off); if (count < ret) { ret = -EINVAL; goto out; @@ -224,8 +327,9 @@ static size_t get_next_entry(struct logger_log *log, size_t off, size_t len) size_t count = 0; do { - size_t nr = get_entry_len(log, off); - off = logger_offset(off + nr); + size_t nr = sizeof(struct logger_entry) + + get_entry_msg_len(log, off); + off = logger_offset(log, off + nr); count += nr; } while (count < len); @@ -233,16 +337,28 @@ static size_t get_next_entry(struct logger_log *log, size_t off, size_t len) } /* - * clock_interval - is a < c < b in mod-space? Put another way, does the line - * from a to b cross c? + * is_between - is a < c < b, accounting for wrapping of a, b, and c + * positions in the buffer + * + * That is, if ab, check for c outside (not between) a and b + * + * |------- a xxxxxxxx b --------| + * c^ + * + * |xxxxx b --------- a xxxxxxxxx| + * c^ + * or c^ */ -static inline int clock_interval(size_t a, size_t b, size_t c) +static inline int is_between(size_t a, size_t b, size_t c) { - if (b < a) { - if (a < c || b >= c) + if (a < b) { + /* is c between a and b? */ + if (a < c && c <= b) return 1; } else { - if (a < c && b >= c) + /* is c outside of b through a? */ + if (c <= b || a < c) return 1; } @@ -260,14 +376,14 @@ static inline int clock_interval(size_t a, size_t b, size_t c) static void fix_up_readers(struct logger_log *log, size_t len) { size_t old = log->w_off; - size_t new = logger_offset(old + len); + size_t new = logger_offset(log, old + len); struct logger_reader *reader; - if (clock_interval(old, new, log->head)) + if (is_between(old, new, log->head)) log->head = get_next_entry(log, log->head, len); list_for_each_entry(reader, &log->readers, list) - if (clock_interval(old, new, reader->r_off)) + if (is_between(old, new, reader->r_off)) reader->r_off = get_next_entry(log, reader->r_off, len); } @@ -286,7 +402,7 @@ static void do_write_log(struct logger_log *log, const void *buf, size_t count) if (count != len) memcpy(log->buffer, buf + len, count - len); - log->w_off = logger_offset(log->w_off + count); + log->w_off = logger_offset(log, log->w_off + count); } @@ -309,9 +425,15 @@ static ssize_t do_write_log_from_user(struct logger_log *log, if (count != len) if (copy_from_user(log->buffer, buf + len, count - len)) + /* + * Note that by not updating w_off, this abandons the + * portion of the new entry that *was* successfully + * copied, just above. This is intentional to avoid + * message corruption from missing fragments. + */ return -EFAULT; - log->w_off = logger_offset(log->w_off + count); + log->w_off = logger_offset(log, log->w_off + count); return count; } @@ -336,7 +458,9 @@ ssize_t logger_aio_write(struct kiocb *iocb, const struct iovec *iov, header.tid = current->pid; header.sec = now.tv_sec; header.nsec = now.tv_nsec; + header.euid = current_euid(); header.len = min_t(size_t, iocb->ki_left, LOGGER_ENTRY_MAX_PAYLOAD); + header.hdr_size = sizeof(struct logger_entry); /* null writes succeed, return zero */ if (unlikely(!header.len)) @@ -409,6 +533,10 @@ static int logger_open(struct inode *inode, struct file *file) return -ENOMEM; reader->log = log; + reader->r_ver = 1; + reader->r_all = in_egroup_p(inode->i_gid) || + capable(CAP_SYSLOG); + INIT_LIST_HEAD(&reader->list); mutex_lock(&log->mutex); @@ -466,6 +594,10 @@ static unsigned int logger_poll(struct file *file, poll_table *wait) poll_wait(file, &log->wq, wait); mutex_lock(&log->mutex); + if (!reader->r_all) + reader->r_off = get_next_entry_by_uid(log, + reader->r_off, current_euid()); + if (log->w_off != reader->r_off) ret |= POLLIN | POLLRDNORM; mutex_unlock(&log->mutex); @@ -473,11 +605,25 @@ static unsigned int logger_poll(struct file *file, poll_table *wait) return ret; } +static long logger_set_version(struct logger_reader *reader, void __user *arg) +{ + int version; + if (copy_from_user(&version, arg, sizeof(int))) + return -EFAULT; + + if ((version < 1) || (version > 2)) + return -EINVAL; + + reader->r_ver = version; + return 0; +} + static long logger_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct logger_log *log = file_get_log(file); struct logger_reader *reader; - long ret = -ENOTTY; + long ret = -EINVAL; + void __user *argp = (void __user *) arg; mutex_lock(&log->mutex); @@ -502,8 +648,14 @@ static long logger_ioctl(struct file *file, unsigned int cmd, unsigned long arg) break; } reader = file->private_data; + + if (!reader->r_all) + reader->r_off = get_next_entry_by_uid(log, + reader->r_off, current_euid()); + if (log->w_off != reader->r_off) - ret = get_entry_len(log, reader->r_off); + ret = get_user_hdr_len(reader->r_ver) + + get_entry_msg_len(log, reader->r_off); else ret = 0; break; @@ -517,6 +669,22 @@ static long logger_ioctl(struct file *file, unsigned int cmd, unsigned long arg) log->head = log->w_off; ret = 0; break; + case LOGGER_GET_VERSION: + if (!(file->f_mode & FMODE_READ)) { + ret = -EBADF; + break; + } + reader = file->private_data; + ret = reader->r_ver; + break; + case LOGGER_SET_VERSION: + if (!(file->f_mode & FMODE_READ)) { + ret = -EBADF; + break; + } + reader = file->private_data; + ret = logger_set_version(reader, argp); + break; } mutex_unlock(&log->mutex); @@ -537,8 +705,8 @@ static const struct file_operations logger_fops = { /* * Defines a log structure with name 'NAME' and a size of 'SIZE' bytes, which - * must be a power of two, greater than LOGGER_ENTRY_MAX_LEN, and less than - * LONG_MAX minus LOGGER_ENTRY_MAX_LEN. + * must be a power of two, and greater than + * (LOGGER_ENTRY_MAX_PAYLOAD + sizeof(struct logger_entry)). */ #define DEFINE_LOGGER_DEVICE(VAR, NAME, SIZE) \ static unsigned char _buf_ ## VAR[SIZE]; \ diff --git a/drivers/staging/android/logger.h b/drivers/staging/android/logger.h old mode 100644 new mode 100755 index 2cb06e9d..3f612a3b --- a/drivers/staging/android/logger.h +++ b/drivers/staging/android/logger.h @@ -20,7 +20,12 @@ #include #include -struct logger_entry { +/* + * The userspace structure for version 1 of the logger_entry ABI. + * This structure is returned to userspace unless the caller requests + * an upgrade to a newer ABI version. + */ +struct user_logger_entry_compat { __u16 len; /* length of the payload */ __u16 __pad; /* no matter what, we get 2 bytes of padding */ __s32 pid; /* generating process's pid */ @@ -30,14 +35,28 @@ struct logger_entry { char msg[0]; /* the entry's payload */ }; +/* + * The structure for version 2 of the logger_entry ABI. + * This structure is returned to userspace if ioctl(LOGGER_SET_VERSION) + * is called with version >= 2 + */ +struct logger_entry { + __u16 len; /* length of the payload */ + __u16 hdr_size; /* sizeof(struct logger_entry_v2) */ + __s32 pid; /* generating process's pid */ + __s32 tid; /* generating process's tid */ + __s32 sec; /* seconds since Epoch */ + __s32 nsec; /* nanoseconds */ + uid_t euid; /* effective UID of logger */ + char msg[0]; /* the entry's payload */ +}; + #define LOGGER_LOG_RADIO "log_radio" /* radio-related messages */ #define LOGGER_LOG_EVENTS "log_events" /* system/hardware events */ #define LOGGER_LOG_SYSTEM "log_system" /* system/framework messages */ #define LOGGER_LOG_MAIN "log_main" /* everything else */ -#define LOGGER_ENTRY_MAX_LEN (4*1024) -#define LOGGER_ENTRY_MAX_PAYLOAD \ - (LOGGER_ENTRY_MAX_LEN - sizeof(struct logger_entry)) +#define LOGGER_ENTRY_MAX_PAYLOAD 4076 #define __LOGGERIO 0xAE @@ -45,5 +64,7 @@ struct logger_entry { #define LOGGER_GET_LOG_LEN _IO(__LOGGERIO, 2) /* used log len */ #define LOGGER_GET_NEXT_ENTRY_LEN _IO(__LOGGERIO, 3) /* next entry len */ #define LOGGER_FLUSH_LOG _IO(__LOGGERIO, 4) /* flush log */ +#define LOGGER_GET_VERSION _IO(__LOGGERIO, 5) /* abi version */ +#define LOGGER_SET_VERSION _IO(__LOGGERIO, 6) /* abi version */ #endif /* _LINUX_LOGGER_H */ diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c old mode 100644 new mode 100755 index 42cd93ea..f8017368 --- a/drivers/staging/android/lowmemorykiller.c +++ b/drivers/staging/android/lowmemorykiller.c @@ -29,12 +29,22 @@ * */ -#include #include +#include +#include +#include #include +#include +#include #include #include -#include +#include +#include + +#ifdef CONFIG_SWAP +#include +#include +#endif static uint32_t lowmem_debug_level = 2; static int lowmem_adj[6] = { @@ -52,8 +62,16 @@ static size_t lowmem_minfree[6] = { }; static int lowmem_minfree_size = 4; +static size_t lowmem_minfree_notif_trigger; + +static unsigned int offlining; static struct task_struct *lowmem_deathpending; -static DEFINE_SPINLOCK(lowmem_deathpending_lock); +static unsigned long lowmem_deathpending_timeout; +static struct kobject *lowmem_kobj; + +#ifdef CONFIG_SWAP +static int fudgeswap = 512; +#endif #define lowmem_print(level, x...) \ do { \ @@ -85,12 +103,73 @@ task_notify_func(struct notifier_block *self, unsigned long val, void *data) { struct task_struct *task = data; - if (task == lowmem_deathpending) { - schedule_work(&task_free_work); - } + if (task == lowmem_deathpending) + lowmem_deathpending = NULL; + return NOTIFY_OK; } +#ifdef CONFIG_MEMORY_HOTPLUG +static int lmk_hotplug_callback(struct notifier_block *self, + unsigned long cmd, void *data) +{ + switch (cmd) { + /* Don't care LMK cases */ + case MEM_ONLINE: + case MEM_OFFLINE: + case MEM_CANCEL_ONLINE: + case MEM_CANCEL_OFFLINE: + case MEM_GOING_ONLINE: + offlining = 0; + lowmem_print(4, "lmk in normal mode\n"); + break; + /* LMK should account for movable zone */ + case MEM_GOING_OFFLINE: + offlining = 1; + lowmem_print(4, "lmk in hotplug mode\n"); + break; + } + return NOTIFY_DONE; +} +#endif + + + +static void lowmem_notify_killzone_approach(void); + +static inline void get_free_ram(int *other_free, int *other_file) +{ + struct zone *zone; + *other_free = global_page_state(NR_FREE_PAGES); + *other_file = global_page_state(NR_FILE_PAGES) - + global_page_state(NR_SHMEM); +#ifdef CONFIG_SWAP + if(fudgeswap != 0){ + struct sysinfo si; + si_swapinfo(&si); + + if(si.freeswap > 0){ + if(fudgeswap > si.freeswap) + other_file += si.freeswap; + else + other_file += fudgeswap; + } + } +#endif + if (offlining) { + /* Discount all free space in the section being offlined */ + for_each_zone(zone) { + if (zone_idx(zone) == ZONE_MOVABLE) { + *other_free -= zone_page_state(zone, + NR_FREE_PAGES); + lowmem_print(4, "lowmem_shrink discounted " + "%lu pages in movable zone\n", + zone_page_state(zone, NR_FREE_PAGES)); + } + } + } +} + static int lowmem_shrink(int nr_to_scan, gfp_t gfp_mask) { struct task_struct *p; @@ -102,10 +181,8 @@ static int lowmem_shrink(int nr_to_scan, gfp_t gfp_mask) int selected_tasksize = 0; int selected_oom_adj; int array_size = ARRAY_SIZE(lowmem_adj); - int other_free = global_page_state(NR_FREE_PAGES); - int other_file = global_page_state(NR_FILE_PAGES); - unsigned long flags; - + int other_free; + int other_file; /* * If we already have a death outstanding, then * bail out right away; indicating to vmscan @@ -113,15 +190,24 @@ static int lowmem_shrink(int nr_to_scan, gfp_t gfp_mask) * this pass. * */ - if (lowmem_deathpending) + if (lowmem_deathpending && + time_before_eq(jiffies, lowmem_deathpending_timeout)) return 0; + get_free_ram(&other_free, &other_file); + + if (other_free < lowmem_minfree_notif_trigger && + other_file < lowmem_minfree_notif_trigger) { + lowmem_notify_killzone_approach(); + } + if (lowmem_adj_size < array_size) array_size = lowmem_adj_size; if (lowmem_minfree_size < array_size) array_size = lowmem_minfree_size; for (i = 0; i < array_size; i++) { - if (other_file < lowmem_minfree[i]) { + if (other_free < lowmem_minfree[i] && + other_file < lowmem_minfree[i]) { min_adj = lowmem_adj[i]; break; } @@ -178,18 +264,13 @@ static int lowmem_shrink(int nr_to_scan, gfp_t gfp_mask) } if (selected) { - spin_lock_irqsave(&lowmem_deathpending_lock, flags); - if (!lowmem_deathpending) { - lowmem_print(1, - "send sigkill to %d (%s), adj %d, size %d\n", - selected->pid, selected->comm, - selected_oom_adj, selected_tasksize); - lowmem_deathpending = selected; - task_free_register(&task_nb); - force_sig(SIGKILL, selected); - rem -= selected_tasksize; - } - spin_unlock_irqrestore(&lowmem_deathpending_lock, flags); + lowmem_print(1, "send sigkill to %d (%s), adj %d, size %d\n", + selected->pid, selected->comm, + selected_oom_adj, selected_tasksize); + lowmem_deathpending = selected; + lowmem_deathpending_timeout = jiffies + HZ; + force_sig(SIGKILL, selected); + rem -= selected_tasksize; } lowmem_print(4, "lowmem_shrink %d, %x, return %d\n", nr_to_scan, gfp_mask, rem); @@ -202,15 +283,93 @@ static struct shrinker lowmem_shrinker = { .seeks = DEFAULT_SEEKS * 16 }; +static void lowmem_notify_killzone_approach(void) +{ + lowmem_print(3, "notification trigger activated\n"); + sysfs_notify(lowmem_kobj, NULL, "notify_trigger_active"); +} + +static ssize_t lowmem_notify_trigger_active_show(struct kobject *k, + struct kobj_attribute *attr, char *buf) +{ + int other_free, other_file; + get_free_ram(&other_free, &other_file); + if (other_free < lowmem_minfree_notif_trigger && + other_file < lowmem_minfree_notif_trigger) + return snprintf(buf, 3, "1\n"); + else + return snprintf(buf, 3, "0\n"); +} + +static struct kobj_attribute lowmem_notify_trigger_active_attr = + __ATTR(notify_trigger_active, S_IRUGO, + lowmem_notify_trigger_active_show, NULL); + +static struct attribute *lowmem_default_attrs[] = { + &lowmem_notify_trigger_active_attr.attr, + NULL, +}; + +static ssize_t lowmem_show(struct kobject *k, struct attribute *attr, char *buf) +{ + struct kobj_attribute *kobj_attr; + kobj_attr = container_of(attr, struct kobj_attribute, attr); + return kobj_attr->show(k, kobj_attr, buf); +} + +static const struct sysfs_ops lowmem_ops = { + .show = lowmem_show, +}; + +static void lowmem_kobj_release(struct kobject *kobj) +{ + /* Nothing to be done here */ +} + +static struct kobj_type lowmem_kobj_type = { + .release = lowmem_kobj_release, + .sysfs_ops = &lowmem_ops, + .default_attrs = lowmem_default_attrs, +}; + static int __init lowmem_init(void) { + int rc; + task_free_register(&task_nb); register_shrinker(&lowmem_shrinker); +#ifdef CONFIG_MEMORY_HOTPLUG + hotplug_memory_notifier(lmk_hotplug_callback, 0); +#endif + + lowmem_kobj = kzalloc(sizeof(*lowmem_kobj), GFP_KERNEL); + if (!lowmem_kobj) { + rc = -ENOMEM; + goto err; + } + + rc = kobject_init_and_add(lowmem_kobj, &lowmem_kobj_type, + mm_kobj, "lowmemkiller"); + if (rc) + goto err_kobj; + return 0; + +err_kobj: + kfree(lowmem_kobj); + +err: + unregister_shrinker(&lowmem_shrinker); + task_free_unregister(&task_nb); + + return rc; } static void __exit lowmem_exit(void) { + kobject_put(lowmem_kobj); + kfree(lowmem_kobj); unregister_shrinker(&lowmem_shrinker); + task_free_unregister(&task_nb); } module_param_named(cost, lowmem_shrinker.seeks, int, S_IRUGO | S_IWUSR); @@ -219,7 +378,12 @@ module_param_array_named(adj, lowmem_adj, int, &lowmem_adj_size, module_param_array_named(minfree, lowmem_minfree, uint, &lowmem_minfree_size, S_IRUGO | S_IWUSR); module_param_named(debug_level, lowmem_debug_level, uint, S_IRUGO | S_IWUSR); +module_param_named(notify_trigger, lowmem_minfree_notif_trigger, uint, + S_IRUGO | S_IWUSR); +#ifdef CONFIG_SWAP +module_param_named(fudgeswap, fudgeswap, int, S_IRUGO | S_IWUSR); +#endif module_init(lowmem_init); module_exit(lowmem_exit); diff --git a/drivers/staging/snappy/Kconfig b/drivers/staging/snappy/Kconfig new file mode 100755 index 00000000..24f69085 --- /dev/null +++ b/drivers/staging/snappy/Kconfig @@ -0,0 +1,5 @@ +config SNAPPY_COMPRESS + tristate "Google Snappy Compression" + +config SNAPPY_DECOMPRESS + tristate "Google Snappy Decompression" diff --git a/drivers/staging/snappy/Makefile b/drivers/staging/snappy/Makefile new file mode 100755 index 00000000..399d070a --- /dev/null +++ b/drivers/staging/snappy/Makefile @@ -0,0 +1,5 @@ +snappy_compress-objs := csnappy_compress.o +snappy_decompress-objs := csnappy_decompress.o + +obj-$(CONFIG_SNAPPY_COMPRESS) += csnappy_compress.o +obj-$(CONFIG_SNAPPY_DECOMPRESS) += csnappy_decompress.o diff --git a/drivers/staging/snappy/csnappy.h b/drivers/staging/snappy/csnappy.h new file mode 100755 index 00000000..1e0a54ea --- /dev/null +++ b/drivers/staging/snappy/csnappy.h @@ -0,0 +1,125 @@ +#ifndef __CSNAPPY_H__ +#define __CSNAPPY_H__ +/* +File modified for the Linux Kernel by +Zeev Tarantov +*/ +#ifdef __cplusplus +extern "C" { +#endif + +#define CSNAPPY_VERSION 4 + +#define CSNAPPY_WORKMEM_BYTES_POWER_OF_TWO 15 +#define CSNAPPY_WORKMEM_BYTES (1 << CSNAPPY_WORKMEM_BYTES_POWER_OF_TWO) + +/* + * Returns the maximal size of the compressed representation of + * input data that is "source_len" bytes in length; + */ +uint32_t +csnappy_max_compressed_length(uint32_t source_len) __attribute__((const)); + +/* + * Flat array compression that does not emit the "uncompressed length" + * prefix. Compresses "input" array to the "output" array. + * + * REQUIRES: "input" is at most 32KiB long. + * REQUIRES: "output" points to an array of memory that is at least + * "csnappy_max_compressed_length(input_length)" in size. + * REQUIRES: working_memory has (1 << workmem_bytes_power_of_two) bytes. + * REQUIRES: 9 <= workmem_bytes_power_of_two <= 15. + * + * Returns an "end" pointer into "output" buffer. + * "end - output" is the compressed size of "input". + */ +char* +csnappy_compress_fragment( + const char *input, + const uint32_t input_length, + char *output, + void *working_memory, + const int workmem_bytes_power_of_two); + +/* + * REQUIRES: "compressed" must point to an area of memory that is at + * least "csnappy_max_compressed_length(input_length)" bytes in length. + * REQUIRES: working_memory has (1 << workmem_bytes_power_of_two) bytes. + * REQUIRES: 9 <= workmem_bytes_power_of_two <= 15. + * + * Takes the data stored in "input[0..input_length]" and stores + * it in the array pointed to by "compressed". + * + * "*out_compressed_length" is set to the length of the compressed output. + */ +void +csnappy_compress( + const char *input, + uint32_t input_length, + char *compressed, + uint32_t *out_compressed_length, + void *working_memory, + const int workmem_bytes_power_of_two); + +/* + * Reads header of compressed data to get stored length of uncompressed data. + * REQUIRES: start points to compressed data. + * REQUIRES: n is length of available compressed data. + * + * Returns SNAPPY_E_HEADER_BAD on error. + * Returns number of bytes read from input on success. + * Stores decoded length into *result. + */ +int +csnappy_get_uncompressed_length( + const char *start, + uint32_t n, + uint32_t *result); + +/* + * Safely decompresses all data from array "src" of length "src_len" containing + * entire compressed stream (with header) into array "dst" of size "dst_len". + * REQUIRES: dst_len is at least csnappy_get_uncompressed_length(...). + * + * Iff sucessful, returns CSNAPPY_E_OK. + * If recorded length in header is greater than dst_len, returns + * CSNAPPY_E_OUTPUT_INSUF. + * If compressed data is malformed, does not write more than dst_len into dst. + */ +int +csnappy_decompress( + const char *src, + uint32_t src_len, + char *dst, + uint32_t dst_len); + +/* + * Safely decompresses stream src_len bytes long read from src to dst. + * Amount of available space at dst must be provided in *dst_len by caller. + * If compressed stream needs more space, it will not overflow and return + * CSNAPPY_E_OUTPUT_OVERRUN. + * On success, sets *dst_len to actal number of bytes decompressed. + * Iff sucessful, returns CSNAPPY_E_OK. + */ +int +csnappy_decompress_noheader( + const char *src, + uint32_t src_len, + char *dst, + uint32_t *dst_len); + +/* + * Return values (< 0 = Error) + */ +#define CSNAPPY_E_OK 0 +#define CSNAPPY_E_HEADER_BAD (-1) +#define CSNAPPY_E_OUTPUT_INSUF (-2) +#define CSNAPPY_E_OUTPUT_OVERRUN (-3) +#define CSNAPPY_E_INPUT_NOT_CONSUMED (-4) +#define CSNAPPY_E_DATA_MALFORMED (-5) + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/drivers/staging/snappy/csnappy_compress.c b/drivers/staging/snappy/csnappy_compress.c new file mode 100755 index 00000000..7344f772 --- /dev/null +++ b/drivers/staging/snappy/csnappy_compress.c @@ -0,0 +1,497 @@ +/* +Copyright 2011, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +File modified for the Linux Kernel by +Zeev Tarantov +*/ + +#include "csnappy_internal.h" +#ifdef __KERNEL__ +#include +#include +#endif +#include "csnappy.h" + + +static inline char* +encode_varint32(char *sptr, uint32_t v) +{ + uint8_t* ptr = (uint8_t *)sptr; + static const int B = 128; + if (v < (1<<7)) { + *(ptr++) = v; + } else if (v < (1<<14)) { + *(ptr++) = v | B; + *(ptr++) = v>>7; + } else if (v < (1<<21)) { + *(ptr++) = v | B; + *(ptr++) = (v>>7) | B; + *(ptr++) = v>>14; + } else if (v < (1<<28)) { + *(ptr++) = v | B; + *(ptr++) = (v>>7) | B; + *(ptr++) = (v>>14) | B; + *(ptr++) = v>>21; + } else { + *(ptr++) = v | B; + *(ptr++) = (v>>7) | B; + *(ptr++) = (v>>14) | B; + *(ptr++) = (v>>21) | B; + *(ptr++) = v>>28; + } + return (char *)ptr; +} + + +/* + * Any hash function will produce a valid compressed bitstream, but a good + * hash function reduces the number of collisions and thus yields better + * compression for compressible input, and more speed for incompressible + * input. Of course, it doesn't hurt if the hash function is reasonably fast + * either, as it gets called a lot. + */ +static inline uint32_t HashBytes(uint32_t bytes, int shift) +{ + uint32_t kMul = 0x1e35a7bd; + return (bytes * kMul) >> shift; +} +static inline uint32_t Hash(const char *p, int shift) +{ + return HashBytes(UNALIGNED_LOAD32(p), shift); +} + + +/* + * *** DO NOT CHANGE THE VALUE OF kBlockSize *** + + * New Compression code chops up the input into blocks of at most + * the following size. This ensures that back-references in the + * output never cross kBlockSize block boundaries. This can be + * helpful in implementing blocked decompression. However the + * decompression code should not rely on this guarantee since older + * compression code may not obey it. + */ +#define kBlockLog 15 +#define kBlockSize (1 << kBlockLog) + + +/* + * Return the largest n such that + * + * s1[0,n-1] == s2[0,n-1] + * and n <= (s2_limit - s2). + * + * Does not read *s2_limit or beyond. + * Does not read *(s1 + (s2_limit - s2)) or beyond. + * Requires that s2_limit >= s2. + * + * Separate implementation for x86_64, for speed. Uses the fact that + * x86_64 is little endian. + */ +#if defined(__x86_64__) +static inline int +FindMatchLength(const char *s1, const char *s2, const char *s2_limit) +{ + uint64_t x; + int matched, matching_bits; + DCHECK_GE(s2_limit, s2); + matched = 0; + /* + * Find out how long the match is. We loop over the data 64 bits at a + * time until we find a 64-bit block that doesn't match; then we find + * the first non-matching bit and use that to calculate the total + * length of the match. + */ + while (likely(s2 <= s2_limit - 8)) { + if (unlikely(UNALIGNED_LOAD64(s1 + matched) == + UNALIGNED_LOAD64(s2))) { + s2 += 8; + matched += 8; + } else { + /* + * On current (mid-2008) Opteron models there is a 3% + * more efficient code sequence to find the first + * non-matching byte. However, what follows is ~10% + * better on Intel Core 2 and newer, and we expect AMD's + * bsf instruction to improve. + */ + x = UNALIGNED_LOAD64(s1 + matched) ^ + UNALIGNED_LOAD64(s2); + matching_bits = FindLSBSetNonZero64(x); + matched += matching_bits >> 3; + return matched; + } + } + while (likely(s2 < s2_limit)) { + if (likely(s1[matched] == *s2)) { + ++s2; + ++matched; + } else { + return matched; + } + } + return matched; +} +#else /* !defined(__x86_64__) */ +static inline int +FindMatchLength(const char *s1, const char *s2, const char *s2_limit) +{ + /* Implementation based on the x86-64 version, above. */ + int matched = 0; + DCHECK_GE(s2_limit, s2); + + while (s2 <= s2_limit - 4 && + UNALIGNED_LOAD32(s2) == UNALIGNED_LOAD32(s1 + matched)) { + s2 += 4; + matched += 4; + } +#if defined(__LITTLE_ENDIAN) + if (s2 <= s2_limit - 4) { + uint32_t x = UNALIGNED_LOAD32(s1 + matched) ^ + UNALIGNED_LOAD32(s2); + int matching_bits = FindLSBSetNonZero(x); + matched += matching_bits >> 3; + } else { + while ((s2 < s2_limit) && (s1[matched] == *s2)) { + ++s2; + ++matched; + } + } +#else + while ((s2 < s2_limit) && (s1[matched] == *s2)) { + ++s2; + ++matched; + } +#endif + return matched; +} +#endif /* !defined(__x86_64__) */ + + +static inline char* +EmitLiteral(char *op, const char *literal, int len, int allow_fast_path) +{ + int n = len - 1; /* Zero-length literals are disallowed */ + if (n < 60) { + /* Fits in tag byte */ + *op++ = LITERAL | (n << 2); + /* + The vast majority of copies are below 16 bytes, for which a + call to memcpy is overkill. This fast path can sometimes + copy up to 15 bytes too much, but that is okay in the + main loop, since we have a bit to go on for both sides: + - The input will always have kInputMarginBytes = 15 extra + available bytes, as long as we're in the main loop, and + if not, allow_fast_path = false. + - The output will always have 32 spare bytes (see + snappy_max_compressed_length). + */ + if (allow_fast_path && len <= 16) { + UNALIGNED_STORE64(op, UNALIGNED_LOAD64(literal)); + UNALIGNED_STORE64(op + 8, + UNALIGNED_LOAD64(literal + 8)); + return op + len; + } + } else { + /* Encode in upcoming bytes */ + char *base = op; + int count = 0; + op++; + while (n > 0) { + *op++ = n & 0xff; + n >>= 8; + count++; + } + DCHECK_GE(count, 1); + DCHECK_LE(count, 4); + *base = LITERAL | ((59+count) << 2); + } + memcpy(op, literal, len); + return op + len; +} + +static inline char* +EmitCopyLessThan64(char *op, int offset, int len) +{ + DCHECK_LE(len, 64); + DCHECK_GE(len, 4); + DCHECK_LT(offset, 65536); + + if ((len < 12) && (offset < 2048)) { + int len_minus_4 = len - 4; + DCHECK_LT(len_minus_4, 8); /* Must fit in 3 bits */ + *op++ = COPY_1_BYTE_OFFSET | + ((len_minus_4) << 2) | + ((offset >> 8) << 5); + *op++ = offset & 0xff; + } else { + *op++ = COPY_2_BYTE_OFFSET | ((len-1) << 2); + put_unaligned_le16(offset, op); + op += 2; + } + return op; +} + +static inline char* +EmitCopy(char *op, int offset, int len) +{ + /* Emit 64 byte copies but make sure to keep at least four bytes + * reserved */ + while (len >= 68) { + op = EmitCopyLessThan64(op, offset, 64); + len -= 64; + } + + /* Emit an extra 60 byte copy if have too much data to fit in one + * copy */ + if (len > 64) { + op = EmitCopyLessThan64(op, offset, 60); + len -= 60; + } + + /* Emit remainder */ + op = EmitCopyLessThan64(op, offset, len); + return op; +} + + +/* + * For 0 <= offset <= 4, GetUint32AtOffset(UNALIGNED_LOAD64(p), offset) will + * equal UNALIGNED_LOAD32(p + offset). Motivation: On x86-64 hardware we have + * empirically found that overlapping loads such as + * UNALIGNED_LOAD32(p) ... UNALIGNED_LOAD32(p+1) ... UNALIGNED_LOAD32(p+2) + * are slower than UNALIGNED_LOAD64(p) followed by shifts and casts to uint32_t. + */ +static inline uint32_t +GetUint32AtOffset(uint64_t v, int offset) +{ + DCHECK(0 <= offset && offset <= 4); +#ifdef __LITTLE_ENDIAN + return v >> (8 * offset); +#else + return v >> (32 - 8 * offset); +#endif +} + +#define kInputMarginBytes 15 +char* +csnappy_compress_fragment( + const char *input, + const uint32_t input_size, + char *op, + void *working_memory, + const int workmem_bytes_power_of_two) +{ + const char *ip, *ip_end, *base_ip, *next_emit, *ip_limit, *next_ip, + *candidate, *base; + uint16_t *table = (uint16_t *)working_memory; + uint64_t input_bytes; + uint32_t hash, next_hash, prev_hash, cur_hash, skip, candidate_bytes; + int shift, matched; + + DCHECK_GE(workmem_bytes_power_of_two, 9); + DCHECK_LE(workmem_bytes_power_of_two, 15); + /* Table of 2^X bytes, need (X-1) bits to address table of uint16_t. + * How many bits of 32bit hash function result are discarded? */ + shift = 33 - workmem_bytes_power_of_two; + /* "ip" is the input pointer, and "op" is the output pointer. */ + ip = input; + DCHECK_LE(input_size, kBlockSize); + ip_end = input + input_size; + base_ip = ip; + /* Bytes in [next_emit, ip) will be emitted as literal bytes. Or + [next_emit, ip_end) after the main loop. */ + next_emit = ip; + + if (unlikely(input_size < kInputMarginBytes)) + goto emit_remainder; + + memset(working_memory, 0, 1 << workmem_bytes_power_of_two); + + ip_limit = input + input_size - kInputMarginBytes; + next_hash = Hash(++ip, shift); + +main_loop: + DCHECK_LT(next_emit, ip); + /* + * The body of this loop calls EmitLiteral once and then EmitCopy one or + * more times. (The exception is that when we're close to exhausting + * the input we goto emit_remainder.) + * + * In the first iteration of this loop we're just starting, so + * there's nothing to copy, so calling EmitLiteral once is + * necessary. And we only start a new iteration when the + * current iteration has determined that a call to EmitLiteral will + * precede the next call to EmitCopy (if any). + * + * Step 1: Scan forward in the input looking for a 4-byte-long match. + * If we get close to exhausting the input then goto emit_remainder. + * + * Heuristic match skipping: If 32 bytes are scanned with no matches + * found, start looking only at every other byte. If 32 more bytes are + * scanned, look at every third byte, etc.. When a match is found, + * immediately go back to looking at every byte. This is a small loss + * (~5% performance, ~0.1% density) for compressible data due to more + * bookkeeping, but for non-compressible data (such as JPEG) it's a huge + * win since the compressor quickly "realizes" the data is incompressible + * and doesn't bother looking for matches everywhere. + * + * The "skip" variable keeps track of how many bytes there are since the + * last match; dividing it by 32 (ie. right-shifting by five) gives the + * number of bytes to move ahead for each iteration. + */ + skip = 32; + + next_ip = ip; + do { + ip = next_ip; + hash = next_hash; + DCHECK_EQ(hash, Hash(ip, shift)); + next_ip = ip + (skip++ >> 5); + if (unlikely(next_ip > ip_limit)) + goto emit_remainder; + next_hash = Hash(next_ip, shift); + candidate = base_ip + table[hash]; + DCHECK_GE(candidate, base_ip); + DCHECK_LT(candidate, ip); + + table[hash] = ip - base_ip; + } while (likely(UNALIGNED_LOAD32(ip) != + UNALIGNED_LOAD32(candidate))); + + /* + * Step 2: A 4-byte match has been found. We'll later see if more + * than 4 bytes match. But, prior to the match, input + * bytes [next_emit, ip) are unmatched. Emit them as "literal bytes." + */ + DCHECK_LE(next_emit + 16, ip_end); + op = EmitLiteral(op, next_emit, ip - next_emit, 1); + + /* + * Step 3: Call EmitCopy, and then see if another EmitCopy could + * be our next move. Repeat until we find no match for the + * input immediately after what was consumed by the last EmitCopy call. + * + * If we exit this loop normally then we need to call EmitLiteral next, + * though we don't yet know how big the literal will be. We handle that + * by proceeding to the next iteration of the main loop. We also can exit + * this loop via goto if we get close to exhausting the input. + */ + input_bytes = 0; + candidate_bytes = 0; + + do { + /* We have a 4-byte match at ip, and no need to emit any + "literal bytes" prior to ip. */ + base = ip; + matched = 4 + FindMatchLength(candidate + 4, ip + 4, ip_end); + ip += matched; + DCHECK_EQ(0, memcmp(base, candidate, matched)); + op = EmitCopy(op, base - candidate, matched); + /* We could immediately start working at ip now, but to improve + compression we first update table[Hash(ip - 1, ...)]. */ + next_emit = ip; + if (unlikely(ip >= ip_limit)) + goto emit_remainder; + input_bytes = UNALIGNED_LOAD64(ip - 1); + prev_hash = HashBytes(GetUint32AtOffset(input_bytes, 0), shift); + table[prev_hash] = ip - base_ip - 1; + cur_hash = HashBytes(GetUint32AtOffset(input_bytes, 1), shift); + candidate = base_ip + table[cur_hash]; + candidate_bytes = UNALIGNED_LOAD32(candidate); + table[cur_hash] = ip - base_ip; + } while (GetUint32AtOffset(input_bytes, 1) == candidate_bytes); + + next_hash = HashBytes(GetUint32AtOffset(input_bytes, 2), shift); + ++ip; + goto main_loop; + +emit_remainder: + /* Emit the remaining bytes as a literal */ + if (next_emit < ip_end) + op = EmitLiteral(op, next_emit, ip_end - next_emit, 0); + + return op; +} +#if defined(__KERNEL__) && !defined(STATIC) +EXPORT_SYMBOL(csnappy_compress_fragment); +#endif + +uint32_t __attribute__((const)) +csnappy_max_compressed_length(uint32_t source_len) +{ + return 32 + source_len + source_len/6; +} +#if defined(__KERNEL__) && !defined(STATIC) +EXPORT_SYMBOL(csnappy_max_compressed_length); +#endif + +void +csnappy_compress( + const char *input, + uint32_t input_length, + char *compressed, + uint32_t *compressed_length, + void *working_memory, + const int workmem_bytes_power_of_two) +{ + int workmem_size; + int num_to_read; + uint32_t written = 0; + char *p = encode_varint32(compressed, input_length); + written += (p - compressed); + compressed = p; + while (input_length > 0) { + num_to_read = min(input_length, (uint32_t)kBlockSize); + workmem_size = workmem_bytes_power_of_two; + if (num_to_read < kBlockSize) { + for (workmem_size = 9; + workmem_size < workmem_bytes_power_of_two; + ++workmem_size) { + if ((1 << (workmem_size-1)) >= num_to_read) + break; + } + } + p = csnappy_compress_fragment( + input, num_to_read, compressed, + working_memory, workmem_size); + written += (p - compressed); + compressed = p; + input_length -= num_to_read; + input += num_to_read; + } + *compressed_length = written; +} +#if defined(__KERNEL__) && !defined(STATIC) +EXPORT_SYMBOL(csnappy_compress); + +MODULE_LICENSE("BSD"); +MODULE_DESCRIPTION("Snappy Compressor"); +#endif diff --git a/drivers/staging/snappy/csnappy_decompress.c b/drivers/staging/snappy/csnappy_decompress.c new file mode 100755 index 00000000..d05d8173 --- /dev/null +++ b/drivers/staging/snappy/csnappy_decompress.c @@ -0,0 +1,321 @@ +/* +Copyright 2011, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +File modified for the Linux Kernel by +Zeev Tarantov +*/ + +#include "csnappy_internal.h" +#ifdef __KERNEL__ +#include +#include +#endif +#include "csnappy.h" + + +/* Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits */ +static const uint32_t wordmask[] = { + 0u, 0xffu, 0xffffu, 0xffffffu, 0xffffffffu +}; + +/* + * Data stored per entry in lookup table: + * Range Bits-used Description + * ------------------------------------ + * 1..64 0..7 Literal/copy length encoded in opcode byte + * 0..7 8..10 Copy offset encoded in opcode byte / 256 + * 0..4 11..13 Extra bytes after opcode + * + * We use eight bits for the length even though 7 would have sufficed + * because of efficiency reasons: + * (1) Extracting a byte is faster than a bit-field + * (2) It properly aligns copy offset so we do not need a <<8 + */ +static const uint16_t char_table[256] = { + 0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002, + 0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004, + 0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006, + 0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008, + 0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a, + 0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c, + 0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e, + 0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010, + 0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012, + 0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014, + 0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016, + 0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018, + 0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a, + 0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c, + 0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e, + 0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020, + 0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022, + 0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024, + 0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026, + 0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028, + 0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a, + 0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c, + 0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e, + 0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030, + 0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032, + 0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034, + 0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036, + 0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038, + 0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a, + 0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c, + 0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e, + 0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040 +}; + +/* + * Copy "len" bytes from "src" to "op", one byte at a time. Used for + * handling COPY operations where the input and output regions may + * overlap. For example, suppose: + * src == "ab" + * op == src + 2 + * len == 20 + * After IncrementalCopy(src, op, len), the result will have + * eleven copies of "ab" + * ababababababababababab + * Note that this does not match the semantics of either memcpy() + * or memmove(). + */ +static inline void IncrementalCopy(const char *src, char *op, int len) +{ + DCHECK_GT(len, 0); + do { + *op++ = *src++; + } while (--len > 0); +} + +/* + * Equivalent to IncrementalCopy except that it can write up to ten extra + * bytes after the end of the copy, and that it is faster. + * + * The main part of this loop is a simple copy of eight bytes at a time until + * we've copied (at least) the requested amount of bytes. However, if op and + * src are less than eight bytes apart (indicating a repeating pattern of + * length < 8), we first need to expand the pattern in order to get the correct + * results. For instance, if the buffer looks like this, with the eight-byte + * and patterns marked as intervals: + * + * abxxxxxxxxxxxx + * [------] src + * [------] op + * + * a single eight-byte copy from to will repeat the pattern once, + * after which we can move two bytes without moving : + * + * ababxxxxxxxxxx + * [------] src + * [------] op + * + * and repeat the exercise until the two no longer overlap. + * + * This allows us to do very well in the special case of one single byte + * repeated many times, without taking a big hit for more general cases. + * + * The worst case of extra writing past the end of the match occurs when + * op - src == 1 and len == 1; the last copy will read from byte positions + * [0..7] and write to [4..11], whereas it was only supposed to write to + * position 1. Thus, ten excess bytes. + */ +static const int kMaxIncrementCopyOverflow = 10; +static inline void IncrementalCopyFastPath(const char *src, char *op, int len) +{ + while (op - src < 8) { + UNALIGNED_STORE64(op, UNALIGNED_LOAD64(src)); + len -= op - src; + op += op - src; + } + while (len > 0) { + UNALIGNED_STORE64(op, UNALIGNED_LOAD64(src)); + src += 8; + op += 8; + len -= 8; + } +} + + +/* A type that writes to a flat array. */ +struct SnappyArrayWriter { + char *base; + char *op; + char *op_limit; +}; + +static inline int +SAW__Append(struct SnappyArrayWriter *this, + const char *ip, uint32_t len, int allow_fast_path) +{ + char *op = this->op; + const int space_left = this->op_limit - op; + /*Fast path, used for the majority (about 90%) of dynamic invocations.*/ + if (allow_fast_path && len <= 16 && space_left >= 16) { + UNALIGNED_STORE64(op, UNALIGNED_LOAD64(ip)); + UNALIGNED_STORE64(op + 8, UNALIGNED_LOAD64(ip + 8)); + } else { + if (space_left < len) + return CSNAPPY_E_OUTPUT_OVERRUN; + memcpy(op, ip, len); + } + this->op = op + len; + return CSNAPPY_E_OK; +} + +static inline int +SAW__AppendFromSelf(struct SnappyArrayWriter *this, + uint32_t offset, uint32_t len) +{ + char *op = this->op; + const int space_left = this->op_limit - op; + /* -1u catches offset==0 */ + if (op - this->base <= offset - 1u) + return CSNAPPY_E_DATA_MALFORMED; + /* Fast path, used for the majority (70-80%) of dynamic invocations. */ + if (len <= 16 && offset >= 8 && space_left >= 16) { + UNALIGNED_STORE64(op, UNALIGNED_LOAD64(op - offset)); + UNALIGNED_STORE64(op + 8, UNALIGNED_LOAD64(op - offset + 8)); + } else if (space_left >= len + kMaxIncrementCopyOverflow) { + IncrementalCopyFastPath(op - offset, op, len); + } else { + if (space_left < len) + return CSNAPPY_E_OUTPUT_OVERRUN; + IncrementalCopy(op - offset, op, len); + } + this->op = op + len; + return CSNAPPY_E_OK; +} + + +int +csnappy_get_uncompressed_length( + const char *src, + uint32_t src_len, + uint32_t *result) +{ + const char *src_base = src; + uint32_t shift = 0; + uint8_t c; + /* Length is encoded in 1..5 bytes */ + *result = 0; + for (;;) { + if (shift >= 32) + goto err_out; + if (src_len == 0) + goto err_out; + c = *(const uint8_t *)src++; + src_len -= 1; + *result |= (uint32_t)(c & 0x7f) << shift; + if (c < 128) + break; + shift += 7; + } + return src - src_base; +err_out: + return CSNAPPY_E_HEADER_BAD; +} +#if defined(__KERNEL__) && !defined(STATIC) +EXPORT_SYMBOL(csnappy_get_uncompressed_length); +#endif + +int +csnappy_decompress_noheader( + const char *src, + uint32_t src_remaining, + char *dst, + uint32_t *dst_len) +{ + struct SnappyArrayWriter writer; + uint32_t length, trailer, opword, extra_bytes; + int ret; + uint8_t opcode; + char scratch[5]; + writer.op = writer.base = dst; + writer.op_limit = writer.op + *dst_len; + while (src_remaining) { + if (unlikely(src_remaining < 5)) { + memcpy(scratch, src, src_remaining); + src = scratch; + } + opcode = *(const uint8_t *)src++; + opword = char_table[opcode]; + extra_bytes = opword >> 11; + trailer = get_unaligned_le32(src) & wordmask[extra_bytes]; + src += extra_bytes; + src_remaining -= 1 + extra_bytes; + length = opword & 0xff; + if (opcode & 0x3) { + trailer += opword & 0x700; + ret = SAW__AppendFromSelf(&writer, trailer, length); + if (ret < 0) + return ret; + } else { + length += trailer; + if (unlikely(src_remaining < length)) + return CSNAPPY_E_DATA_MALFORMED; + ret = src_remaining >= 16; + ret = SAW__Append(&writer, src, length, ret); + if (ret < 0) + return ret; + src += length; + src_remaining -= length; + } + } + *dst_len = writer.op - writer.base; + return CSNAPPY_E_OK; +} +#if defined(__KERNEL__) && !defined(STATIC) +EXPORT_SYMBOL(csnappy_decompress_noheader); +#endif + +int +csnappy_decompress( + const char *src, + uint32_t src_len, + char *dst, + uint32_t dst_len) +{ + int n; + uint32_t olen = 0; + /* Read uncompressed length from the front of the compressed input */ + n = csnappy_get_uncompressed_length(src, src_len, &olen); + if (unlikely(n < CSNAPPY_E_OK)) + return n; + /* Protect against possible DoS attack */ + if (unlikely(olen > dst_len)) + return CSNAPPY_E_OUTPUT_INSUF; + return csnappy_decompress_noheader(src + n, src_len - n, dst, &olen); +} +#if defined(__KERNEL__) && !defined(STATIC) +EXPORT_SYMBOL(csnappy_decompress); + +MODULE_LICENSE("BSD"); +MODULE_DESCRIPTION("Snappy Decompressor"); +#endif diff --git a/drivers/staging/snappy/csnappy_internal.h b/drivers/staging/snappy/csnappy_internal.h new file mode 100755 index 00000000..6f1a5465 --- /dev/null +++ b/drivers/staging/snappy/csnappy_internal.h @@ -0,0 +1,83 @@ +/* +Copyright 2011 Google Inc. All Rights Reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Various stubs for the open-source version of Snappy. + +File modified for the Linux Kernel by +Zeev Tarantov +*/ + +#ifndef CSNAPPY_INTERNAL_H_ +#define CSNAPPY_INTERNAL_H_ + +#ifndef __KERNEL__ +#include "csnappy_internal_userspace.h" +#else + +#include +#include +#include +#include +#include + +#ifdef DEBUG +#define DCHECK(cond) if (!(cond)) \ + printk(KERN_DEBUG "assert failed @ %s:%i\n", \ + __FILE__, __LINE__) +#else +#define DCHECK(cond) +#endif + +#define UNALIGNED_LOAD16(_p) get_unaligned((const uint16_t *)(_p)) +#define UNALIGNED_LOAD32(_p) get_unaligned((const uint32_t *)(_p)) +#define UNALIGNED_LOAD64(_p) get_unaligned((const uint64_t *)(_p)) +#define UNALIGNED_STORE16(_p, _val) put_unaligned((_val), (uint16_t *)(_p)) +#define UNALIGNED_STORE32(_p, _val) put_unaligned((_val), (uint32_t *)(_p)) +#define UNALIGNED_STORE64(_p, _val) put_unaligned((_val), (uint64_t *)(_p)) + +#define FindLSBSetNonZero(n) __builtin_ctz(n) +#define FindLSBSetNonZero64(n) __builtin_ctzll(n) + +#endif /* __KERNEL__ */ + +#define DCHECK_EQ(a, b) DCHECK(((a) == (b))) +#define DCHECK_NE(a, b) DCHECK(((a) != (b))) +#define DCHECK_GT(a, b) DCHECK(((a) > (b))) +#define DCHECK_GE(a, b) DCHECK(((a) >= (b))) +#define DCHECK_LT(a, b) DCHECK(((a) < (b))) +#define DCHECK_LE(a, b) DCHECK(((a) <= (b))) + +enum { + LITERAL = 0, + COPY_1_BYTE_OFFSET = 1, /* 3 bit length + 3 bits of offset in opcode */ + COPY_2_BYTE_OFFSET = 2, + COPY_4_BYTE_OFFSET = 3 +}; + +#endif /* CSNAPPY_INTERNAL_H_ */ diff --git a/drivers/staging/zram/Kconfig b/drivers/staging/zram/Kconfig old mode 100644 new mode 100755 index 18c1a971..24027b43 --- a/drivers/staging/zram/Kconfig +++ b/drivers/staging/zram/Kconfig @@ -6,8 +6,6 @@ config ZRAM tristate "Compressed RAM block device support" depends on BLOCK && SYSFS select XVMALLOC - select LZO_COMPRESS - select LZO_DECOMPRESS default n help Creates virtual block devices called /dev/zramX (X = 0, 1, ...). @@ -21,23 +19,6 @@ config ZRAM See zram.txt for more information. Project home: http://compcache.googlecode.com/ -config ZRAM_NUM_DEVICES - int "Default number of zram devices" - depends on ZRAM - range 1 32 - default 1 - help - Select default number of zram devices. You can override this value - using 'num_devices' module parameter. - -config ZRAM_DEFAULT_PERCENTAGE - int "Default number of zram percentage" - depends on ZRAM - range 10 80 - default 25 - help - Select default zram disk size: percentage of total RAM - config ZRAM_DEBUG bool "Compressed RAM block device debug support" depends on ZRAM @@ -45,11 +26,15 @@ config ZRAM_DEBUG help This option adds additional debugging code to the compressed RAM block device driver. +config ZRAM_LZO + bool "LZO compression" + default y + depends on ZRAM + select LZO_COMPRESS + select LZO_DECOMPRESS -config ZRAM_DEFAULT_DISKSIZE - int "Default size of zram in bytes" - depends on ZRAM - default 100663296 - help - Set default zram disk size (default ~ 96MB) - +config ZRAM_SNAPPY + bool "Snappy compression" + depends on ZRAM + select SNAPPY_COMPRESS + select SNAPPY_DECOMPRESS diff --git a/drivers/staging/zram/Makefile b/drivers/staging/zram/Makefile old mode 100644 new mode 100755 diff --git a/drivers/staging/zram/xvmalloc.c b/drivers/staging/zram/xvmalloc.c old mode 100644 new mode 100755 diff --git a/drivers/staging/zram/xvmalloc.h b/drivers/staging/zram/xvmalloc.h old mode 100644 new mode 100755 diff --git a/drivers/staging/zram/xvmalloc_int.h b/drivers/staging/zram/xvmalloc_int.h old mode 100644 new mode 100755 diff --git a/drivers/staging/zram/zram.txt b/drivers/staging/zram/zram.txt old mode 100644 new mode 100755 diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c old mode 100644 new mode 100755 index fc4c2e6f..c61261ac --- a/drivers/staging/zram/zram_drv.c +++ b/drivers/staging/zram/zram_drv.c @@ -29,12 +29,90 @@ #include #include #include -#include #include #include #include "zram_drv.h" +#if defined(CONFIG_ZRAM_LZO) +#include +#ifdef MULTIPLE_COMPRESSORS +static const struct zram_compressor lzo_compressor = { + .name = "LZO", + .workmem_bytes = LZO1X_MEM_COMPRESS, + .compress = &lzo1x_1_compress, + .decompress = &lzo1x_decompress_safe +}; +#else /* !MULTIPLE_COMPRESSORS */ +#define WMSIZE LZO1X_MEM_COMPRESS +#define COMPRESS(s, sl, d, dl, wm) \ + lzo1x_1_compress(s, sl, d, dl, wm) +#define DECOMPRESS(s, sl, d, dl) \ + lzo1x_decompress_safe(s, sl, d, dl) +#endif /* !MULTIPLE_COMPRESSORS */ +#endif /* defined(CONFIG_ZRAM_LZO) */ + +#if defined(CONFIG_ZRAM_SNAPPY) +#include "../snappy/csnappy.h" /* if built in drivers/staging */ +#define WMSIZE_ORDER ((PAGE_SHIFT > 14) ? (15) : (PAGE_SHIFT+1)) +static int +snappy_compress_( + const unsigned char *src, + size_t src_len, + unsigned char *dst, + size_t *dst_len, + void *workmem) +{ + const unsigned char *end = csnappy_compress_fragment( + src, (uint32_t)src_len, dst, workmem, WMSIZE_ORDER); + *dst_len = end - dst; + return 0; +} +static int +snappy_decompress_( + const unsigned char *src, + size_t src_len, + unsigned char *dst, + size_t *dst_len) +{ + uint32_t dst_len_ = (uint32_t)*dst_len; + int ret = csnappy_decompress_noheader(src, src_len, dst, &dst_len_); + *dst_len = (size_t)dst_len_; + return ret; +} +#ifdef MULTIPLE_COMPRESSORS +static const struct zram_compressor snappy_compressor = { + .name = "SNAPPY", + .workmem_bytes = (1 << WMSIZE_ORDER), + .compress = &snappy_compress_, + .decompress = &snappy_decompress_ +}; +#else /* !MULTIPLE_COMPRESSORS */ +#define WMSIZE (1 << WMSIZE_ORDER) +#define COMPRESS(s, sl, d, dl, wm) \ + snappy_compress_(s, sl, d, dl, wm) +#define DECOMPRESS(s, sl, d, dl) \ + snappy_decompress_(s, sl, d, dl) +#endif /* !MULTIPLE_COMPRESSORS */ +#endif /* defined(CONFIG_ZRAM_SNAPPY) */ + +#ifdef MULTIPLE_COMPRESSORS +const struct zram_compressor * const zram_compressors[] = { +#if defined(CONFIG_ZRAM_LZO) + &lzo_compressor, +#endif +#if defined(CONFIG_ZRAM_SNAPPY) + &snappy_compressor, +#endif + NULL +}; +#define WMSIZE (zram->compressor->workmem_bytes) +#define COMPRESS(s, sl, d, dl, wm) \ + (zram->compressor->compress(s, sl, d, dl, wm)) +#define DECOMPRESS(s, sl, d, dl) \ + (zram->compressor->decompress(s, sl, d, dl)) +#endif /* MULTIPLE_COMPRESSORS */ + /* Globals */ static int zram_major; struct zram *zram_devices; @@ -104,19 +182,33 @@ static int page_zero_filled(void *ptr) return 1; } -static u64 zram_default_disksize_bytes(void) +static void zram_set_disksize(struct zram *zram, size_t totalram_bytes) { -#if 0 - return ((totalram_pages << PAGE_SHIFT) * - default_disksize_perc_ram / 100) & PAGE_MASK; -#endif - return CONFIG_ZRAM_DEFAULT_DISKSIZE; -} + if (!zram->disksize) { + pr_info( + "disk size not provided. You can use disksize_kb module " + "param to specify size.\nUsing default: (%u%% of RAM).\n", + default_disksize_perc_ram + ); + zram->disksize = default_disksize_perc_ram * + (totalram_bytes / 100); + } -static void zram_set_disksize(struct zram *zram, u64 size_bytes) -{ - zram->disksize = size_bytes; - set_capacity(zram->disk, size_bytes >> SECTOR_SHIFT); + if (zram->disksize > 2 * (totalram_bytes)) { + pr_info( + "There is little point creating a zram of greater than " + "twice the size of memory since we expect a 2:1 compression " + "ratio. Note that zram uses about 0.1%% of the size of " + "the disk when not in use so a huge zram is " + "wasteful.\n" + "\tMemory Size: %zu kB\n" + "\tSize you selected: %llu kB\n" + "Continuing anyway ...\n", + totalram_bytes >> 10, zram->disksize + ); + } + + zram->disksize &= PAGE_MASK; } static void zram_free_page(struct zram *zram, size_t index) @@ -243,7 +335,7 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, cmem = kmap_atomic(zram->table[index].page, KM_USER1) + zram->table[index].offset; - ret = lzo1x_decompress_safe(cmem + sizeof(*zheader), + ret = DECOMPRESS(cmem + sizeof(*zheader), xv_get_object_size(cmem) - sizeof(*zheader), uncmem, &clen); @@ -257,7 +349,7 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, kunmap_atomic(user_mem, KM_USER0); /* Should NEVER happen. Return bio error if it does. */ - if (unlikely(ret != LZO_E_OK)) { + if (unlikely(ret)) { pr_err("Decompression failed! err=%d, page=%u\n", ret, index); zram_stat64_inc(zram, &zram->stats.failed_reads); return ret; @@ -291,13 +383,13 @@ static int zram_read_before_write(struct zram *zram, char *mem, u32 index) return 0; } - ret = lzo1x_decompress_safe(cmem + sizeof(*zheader), + ret = DECOMPRESS(cmem + sizeof(*zheader), xv_get_object_size(cmem) - sizeof(*zheader), mem, &clen); kunmap_atomic(cmem, KM_USER0); /* Should NEVER happen. Return bio error if it does. */ - if (unlikely(ret != LZO_E_OK)) { + if (unlikely(ret)) { pr_err("Decompression failed! err=%d, page=%u\n", ret, index); zram_stat64_inc(zram, &zram->stats.failed_reads); return ret; @@ -363,18 +455,13 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, goto out; } - ret = lzo1x_1_compress(uncmem, PAGE_SIZE, src, &clen, + COMPRESS(uncmem, PAGE_SIZE, src, &clen, zram->compress_workmem); kunmap_atomic(user_mem, KM_USER0); if (is_partial_io(bvec)) kfree(uncmem); - if (unlikely(ret != LZO_E_OK)) { - pr_err("Compression failed! err=%d\n", ret); - goto out; - } - /* * Page is incompressible. Store it as-is (uncompressed) * since we do not want to return too many disk write @@ -546,27 +633,35 @@ static int zram_make_request(struct request_queue *queue, struct bio *bio) { struct zram *zram = queue->queuedata; + if (unlikely(!zram->init_done) && zram_init_device(zram)) + goto error; + + down_read(&zram->init_lock); + if (unlikely(!zram->init_done)) + goto error_unlock; + if (!valid_io_request(zram, bio)) { zram_stat64_inc(zram, &zram->stats.invalid_io); - bio_io_error(bio); - return 0; - } - - if (unlikely(!zram->init_done) && zram_init_device(zram)) { - bio_io_error(bio); - return 0; + goto error_unlock; } __zram_make_request(zram, bio, bio_data_dir(bio)); + up_read(&zram->init_lock); return 0; + +error_unlock: + up_read(&zram->init_lock); +error: + bio_io_error(bio); + return 0; + } -void zram_reset_device(struct zram *zram) +void __zram_reset_device(struct zram *zram) { size_t index; - mutex_lock(&zram->init_lock); zram->init_done = 0; /* Free various per-device buffers */ @@ -602,8 +697,14 @@ void zram_reset_device(struct zram *zram) /* Reset stats */ memset(&zram->stats, 0, sizeof(zram->stats)); - zram_set_disksize(zram, zram_default_disksize_bytes()); - mutex_unlock(&zram->init_lock); + zram->disksize = 0; +} + +void zram_reset_device(struct zram *zram) +{ + down_write(&zram->init_lock); + __zram_reset_device(zram); + up_write(&zram->init_lock); } int zram_init_device(struct zram *zram) @@ -611,37 +712,39 @@ int zram_init_device(struct zram *zram) int ret; size_t num_pages; - mutex_lock(&zram->init_lock); + down_write(&zram->init_lock); if (zram->init_done) { - mutex_unlock(&zram->init_lock); + up_write(&zram->init_lock); return 0; } - zram->compress_workmem = kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL); + zram_set_disksize(zram, totalram_pages << PAGE_SHIFT); + + zram->compress_workmem = kzalloc(WMSIZE, GFP_KERNEL); if (!zram->compress_workmem) { pr_err("Error allocating compressor working memory!\n"); ret = -ENOMEM; - goto fail; + goto fail_no_table; } - zram->compress_buffer = (void *)__get_free_pages(__GFP_ZERO, 1); + zram->compress_buffer = + (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1); if (!zram->compress_buffer) { pr_err("Error allocating compressor buffer space\n"); ret = -ENOMEM; - goto fail; + goto fail_no_table; } num_pages = zram->disksize >> PAGE_SHIFT; zram->table = vmalloc(num_pages * sizeof(*zram->table)); if (!zram->table) { pr_err("Error allocating zram address table\n"); - /* To prevent accessing table entries during cleanup */ - zram->disksize = 0; ret = -ENOMEM; - goto fail; + goto fail_no_table; } - memset(zram->table, 0, num_pages * sizeof(*zram->table)); + memset(zram->table, 0, num_pages * sizeof(*zram->table)); + set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); /* zram devices sort of resembles non-rotational disks */ queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue); @@ -654,15 +757,17 @@ int zram_init_device(struct zram *zram) } zram->init_done = 1; - mutex_unlock(&zram->init_lock); + up_write(&zram->init_lock); pr_debug("Initialization done!\n"); return 0; +fail_no_table: + /* To prevent accessing table entries during cleanup */ + zram->disksize = 0; fail: - mutex_unlock(&zram->init_lock); - zram_reset_device(zram); - + __zram_reset_device(zram); + up_write(&zram->init_lock); pr_err("Initialization failed: err=%d\n", ret); return ret; } @@ -687,7 +792,7 @@ static int create_device(struct zram *zram, int device_id) int ret = 0; init_rwsem(&zram->lock); - mutex_init(&zram->init_lock); + init_rwsem(&zram->init_lock); spin_lock_init(&zram->stat64_lock); zram->queue = blk_alloc_queue(GFP_KERNEL); @@ -718,13 +823,13 @@ static int create_device(struct zram *zram, int device_id) zram->disk->private_data = zram; snprintf(zram->disk->disk_name, 16, "zram%d", device_id); - /* - * Set some default disksize. To set another disksize, user - * must reset the device and then write a new disksize to - * corresponding device's sysfs node. - */ - zram_set_disksize(zram, zram_default_disksize_bytes()); + /* Actual capacity set using syfs (/sys/block/zram/disksize */ + set_capacity(zram->disk, 0); + /* Can be changed using sysfs (/sys/block/zram/compressor) */ +#ifdef MULTIPLE_COMPRESSORS + zram->compressor = zram_compressors[0]; +#endif /* * To ensure that we always get PAGE_SIZE aligned * and n*PAGE_SIZED sized I/O requests. @@ -768,13 +873,6 @@ static int __init zram_init(void) { int ret, dev_id; - /* - * Module parameter not specified by user. Use default - * value as defined during kernel config. - */ - if (zram_num_devices == 0) - zram_num_devices = CONFIG_ZRAM_NUM_DEVICES; - if (zram_num_devices > max_num_devices) { pr_warning("Invalid value for num_devices: %u\n", zram_num_devices); @@ -789,12 +887,15 @@ static int __init zram_init(void) goto out; } + if (!zram_num_devices) { + pr_info("num_devices not specified. Using default: 1\n"); + zram_num_devices = 1; + } + /* Allocate the device array and initialize each one */ pr_info("Creating %u devices ...\n", zram_num_devices); - zram_devices = kzalloc(zram_num_devices * sizeof(struct zram), - GFP_KERNEL); - if (!zram_devices) - { + zram_devices = kzalloc(zram_num_devices * sizeof(struct zram), GFP_KERNEL); + if (!zram_devices) { ret = -ENOMEM; goto unregister; } @@ -836,8 +937,8 @@ static void __exit zram_exit(void) pr_debug("Cleanup done!\n"); } -module_param_named(num_devices, zram_num_devices, uint, 0); -MODULE_PARM_DESC(num_devices, "Number of zram devices"); +module_param(zram_num_devices, uint, 0); +MODULE_PARM_DESC(zram_num_devices, "Number of zram devices"); module_init(zram_init); module_exit(zram_exit); diff --git a/drivers/staging/zram/zram_drv.h b/drivers/staging/zram/zram_drv.h old mode 100644 new mode 100755 index fed0d14b..6ccb855b --- a/drivers/staging/zram/zram_drv.h +++ b/drivers/staging/zram/zram_drv.h @@ -41,7 +41,7 @@ struct zobj_header { /*-- Configurable parameters */ /* Default zram disk size: 25% of total RAM */ -static const unsigned default_disksize_perc_ram = CONFIG_ZRAM_DEFAULT_PERCENTAGE; +static const unsigned default_disksize_perc_ram = 25; /* * Pages that compress to size greater than this are stored @@ -66,6 +66,13 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3; #define ZRAM_SECTOR_PER_LOGICAL_BLOCK \ (1 << (ZRAM_LOGICAL_BLOCK_SHIFT - SECTOR_SHIFT)) +#if defined(CONFIG_ZRAM_LZO) + defined(CONFIG_ZRAM_SNAPPY) == 0 +#error At least one of CONFIG_ZRAM_LZO, CONFIG_ZRAM_SNAPPY must be defined! +#endif +#if defined(CONFIG_ZRAM_LZO) + defined(CONFIG_ZRAM_SNAPPY) > 1 +#define MULTIPLE_COMPRESSORS +#endif + /* Flags for zram pages (table[page_no].flags) */ enum zram_pageflags { /* Page is stored uncompressed */ @@ -103,6 +110,9 @@ struct zram_stats { struct zram { struct xv_pool *mem_pool; +#ifdef MULTIPLE_COMPRESSORS + const struct zram_compressor *compressor; +#endif void *compress_workmem; void *compress_buffer; struct table *table; @@ -112,8 +122,8 @@ struct zram { struct request_queue *queue; struct gendisk *disk; int init_done; - /* Prevent concurrent execution of device init and reset */ - struct mutex init_lock; + /* Prevent concurrent execution of device init, reset and R/W request */ + struct rw_semaphore init_lock; /* * This is the limit on amount of *uncompressed* worth of data * we can store in a disk. @@ -130,7 +140,27 @@ extern struct attribute_group zram_disk_attr_group; #endif extern int zram_init_device(struct zram *zram); -extern void zram_reset_device(struct zram *zram); +extern void __zram_reset_device(struct zram *zram); + +#ifdef MULTIPLE_COMPRESSORS +struct zram_compressor { + const char *name; + int (*compress)( + const unsigned char *src, + size_t src_len, + unsigned char *dst, + size_t *dst_len, + void *workmem); + int (*decompress)( + const unsigned char *src, + size_t src_len, + unsigned char *dst, + size_t *dst_len); + unsigned workmem_bytes; +}; + +extern const struct zram_compressor * const zram_compressors[]; +#endif #endif diff --git a/drivers/staging/zram/zram_sysfs.c b/drivers/staging/zram/zram_sysfs.c old mode 100644 new mode 100755 index d894928a..231924e0 --- a/drivers/staging/zram/zram_sysfs.c +++ b/drivers/staging/zram/zram_sysfs.c @@ -55,23 +55,78 @@ static ssize_t disksize_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { int ret; + u64 disksize; struct zram *zram = dev_to_zram(dev); + ret = strict_strtoull(buf, 10, &disksize); + if (ret) + return ret; + + down_write(&zram->init_lock); if (zram->init_done) { + up_write(&zram->init_lock); pr_info("Cannot change disksize for initialized device\n"); return -EBUSY; } - ret = strict_strtoull(buf, 10, &zram->disksize); - if (ret) - return ret; - - zram->disksize = PAGE_ALIGN(zram->disksize); + zram->disksize = PAGE_ALIGN(disksize); set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); + up_write(&zram->init_lock); return len; } +#ifdef MULTIPLE_COMPRESSORS +static ssize_t compressor_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + char * const buf_base = buf; + const struct zram_compressor *p, *curr; + unsigned int i = 0; + struct zram *zram = dev_to_zram(dev); + curr = zram->compressor; + p = zram_compressors[i]; + while (p) { + if (curr == p) + buf += sprintf(buf, "*"); + buf += sprintf(buf, "%u - %s\n", i, p->name); + p = zram_compressors[++i]; + } + return buf - buf_base; +} + +static ssize_t compressor_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + const struct zram_compressor *p; + unsigned long requested; + unsigned int i = 0; + int ret; + struct zram *zram = dev_to_zram(dev); + + if (zram->init_done) { + pr_info("Cannot change compressor for initialized device\n"); + return -EBUSY; + } + + ret = strict_strtoul(buf, 10, &requested); + if (ret) + return ret; + + p = zram_compressors[i]; + while (p && (i < requested)) + p = zram_compressors[++i]; + + if (!p) { + pr_info("No compressor with index #%lu\n", requested); + return -EINVAL; + } + + zram->compressor = p; + return len; +} +#endif /* MULTIPLE_COMPRESSORS */ + static ssize_t initstate_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -106,8 +161,10 @@ static ssize_t reset_store(struct device *dev, if (bdev) fsync_bdev(bdev); + down_write(&zram->init_lock); if (zram->init_done) - zram_reset_device(zram); + __zram_reset_device(zram); + up_write(&zram->init_lock); return len; } @@ -188,6 +245,10 @@ static ssize_t mem_used_total_show(struct device *dev, return sprintf(buf, "%llu\n", val); } +#ifdef MULTIPLE_COMPRESSORS +static DEVICE_ATTR(compressor, S_IRUGO | S_IWUSR, + compressor_show, compressor_store); +#endif static DEVICE_ATTR(disksize, S_IRUGO | S_IWUSR, disksize_show, disksize_store); static DEVICE_ATTR(initstate, S_IRUGO, initstate_show, NULL); @@ -202,6 +263,9 @@ static DEVICE_ATTR(compr_data_size, S_IRUGO, compr_data_size_show, NULL); static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL); static struct attribute *zram_disk_attrs[] = { +#ifdef MULTIPLE_COMPRESSORS + &dev_attr_compressor.attr, +#endif &dev_attr_disksize.attr, &dev_attr_initstate.attr, &dev_attr_reset.attr, diff --git a/drivers/video/msm/msm_fb.c b/drivers/video/msm/msm_fb.c index 82e110ce..028a1c7c 100644 --- a/drivers/video/msm/msm_fb.c +++ b/drivers/video/msm/msm_fb.c @@ -992,7 +992,7 @@ static void setup_fb_info(struct msmfb_info *msmfb) int r; /* finish setting up the fb_info struct */ - strncpy(fb_info->fix.id, "msmfb", 16); + strncpy(fb_info->fix.id, "msmfb31_0", 16); fb_info->fix.ypanstep = 1; fb_info->fbops = &msmfb_ops; diff --git a/include/linux/capability.h b/include/linux/capability.h old mode 100644 new mode 100755 index c8f2a5f7..c4f6d94d --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -357,7 +357,11 @@ struct cpu_vfs_cap_data { #define CAP_MAC_ADMIN 33 -#define CAP_LAST_CAP CAP_MAC_ADMIN +/* Allow configuring the kernel's syslog (printk behaviour) */ + +#define CAP_SYSLOG 34 + +#define CAP_LAST_CAP CAP_SYSLOG #define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP) diff --git a/include/linux/genlock.h b/include/linux/genlock.h old mode 100644 new mode 100755 index 2e9f9d68..587c49df --- a/include/linux/genlock.h +++ b/include/linux/genlock.h @@ -12,7 +12,7 @@ void genlock_put_handle(struct genlock_handle *handle); struct genlock *genlock_create_lock(struct genlock_handle *); struct genlock *genlock_attach_lock(struct genlock_handle *, int fd); int genlock_wait(struct genlock_handle *handle, u32 timeout); -void genlock_release_lock(struct genlock_handle *); +/* genlock_release_lock was deprecated */ int genlock_lock(struct genlock_handle *handle, int op, int flags, u32 timeout); #endif @@ -21,7 +21,8 @@ int genlock_lock(struct genlock_handle *handle, int op, int flags, #define GENLOCK_WRLOCK 1 #define GENLOCK_RDLOCK 2 -#define GENLOCK_NOBLOCK (1 << 0) +#define GENLOCK_NOBLOCK (1 << 0) +#define GENLOCK_WRITE_TO_READ (1 << 1) struct genlock_lock { int fd; @@ -37,9 +38,15 @@ struct genlock_lock { struct genlock_lock) #define GENLOCK_IOC_ATTACH _IOW(GENLOCK_IOC_MAGIC, 2, \ struct genlock_lock) + +/* Deprecated */ #define GENLOCK_IOC_LOCK _IOW(GENLOCK_IOC_MAGIC, 3, \ struct genlock_lock) + +/* Deprecated */ #define GENLOCK_IOC_RELEASE _IO(GENLOCK_IOC_MAGIC, 4) #define GENLOCK_IOC_WAIT _IOW(GENLOCK_IOC_MAGIC, 5, \ struct genlock_lock) +#define GENLOCK_IOC_DREADLOCK _IOW(GENLOCK_IOC_MAGIC, 6, \ + struct genlock_lock) #endif diff --git a/include/linux/ion.h b/include/linux/ion.h new file mode 100755 index 00000000..4b7b8b7d --- /dev/null +++ b/include/linux/ion.h @@ -0,0 +1,548 @@ +/* + * include/linux/ion.h + * + * Copyright (C) 2011 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _LINUX_ION_H +#define _LINUX_ION_H + +#include +#include + + +struct ion_handle; +/** + * enum ion_heap_types - list of all possible types of heaps + * @ION_HEAP_TYPE_SYSTEM: memory allocated via vmalloc + * @ION_HEAP_TYPE_SYSTEM_CONTIG: memory allocated via kmalloc + * @ION_HEAP_TYPE_CARVEOUT: memory allocated from a prereserved + * carveout heap, allocations are physically + * contiguous + * @ION_HEAP_END: helper for iterating over heaps + */ +enum ion_heap_type { + ION_HEAP_TYPE_SYSTEM, + ION_HEAP_TYPE_SYSTEM_CONTIG, + ION_HEAP_TYPE_CARVEOUT, + ION_HEAP_TYPE_CUSTOM, /* must be last so device specific heaps always + are at the end of this enum */ + ION_NUM_HEAPS, +}; + +#define ION_HEAP_SYSTEM_MASK (1 << ION_HEAP_TYPE_SYSTEM) +#define ION_HEAP_SYSTEM_CONTIG_MASK (1 << ION_HEAP_TYPE_SYSTEM_CONTIG) +#define ION_HEAP_CARVEOUT_MASK (1 << ION_HEAP_TYPE_CARVEOUT) + + +/** + * These are the only ids that should be used for Ion heap ids. + * The ids listed are the order in which allocation will be attempted + * if specified. Don't swap the order of heap ids unless you know what + * you are doing! + */ + +enum ion_heap_ids { + ION_HEAP_SYSTEM_ID, + ION_HEAP_SYSTEM_CONTIG_ID, + ION_HEAP_EBI_ID, + ION_HEAP_SMI_ID, + ION_HEAP_ADSP_ID, + ION_HEAP_AUDIO_ID, +}; + +#define ION_KMALLOC_HEAP_NAME "kmalloc" +#define ION_VMALLOC_HEAP_NAME "vmalloc" +#define ION_EBI1_HEAP_NAME "EBI1" +#define ION_ADSP_HEAP_NAME "adsp" +#define ION_SMI_HEAP_NAME "smi" + +#define CACHED 1 +#define UNCACHED 0 + +#define ION_CACHE_SHIFT 0 + +#define ION_SET_CACHE(__cache) ((__cache) << ION_CACHE_SHIFT) + +#define ION_IS_CACHED(__flags) ((__flags) & (1 << ION_CACHE_SHIFT)) + +#ifdef __KERNEL__ +#include +#include +struct ion_device; +struct ion_heap; +struct ion_mapper; +struct ion_client; +struct ion_buffer; + +/* This should be removed some day when phys_addr_t's are fully + plumbed in the kernel, and all instances of ion_phys_addr_t should + be converted to phys_addr_t. For the time being many kernel interfaces + do not accept phys_addr_t's that would have to */ +#define ion_phys_addr_t unsigned long + +/** + * struct ion_platform_heap - defines a heap in the given platform + * @type: type of the heap from ion_heap_type enum + * @id: unique identifier for heap. When allocating (lower numbers + * will be allocated from first) + * @name: used for debug purposes + * @base: base address of heap in physical memory if applicable + * @size: size of the heap in bytes if applicable + * + * Provided by the board file. + */ +struct ion_platform_heap { + enum ion_heap_type type; + unsigned int id; + const char *name; + ion_phys_addr_t base; + size_t size; + enum ion_memory_types memory_type; +}; + +/** + * struct ion_platform_data - array of platform heaps passed from board file + * @nr: number of structures in the array + * @heaps: array of platform_heap structions + * + * Provided by the board file in the form of platform data to a platform device. + */ +struct ion_platform_data { + int nr; + struct ion_platform_heap heaps[]; +}; + +#ifdef CONFIG_ION + +/** + * ion_client_create() - allocate a client and returns it + * @dev: the global ion device + * @heap_mask: mask of heaps this client can allocate from + * @name: used for debugging + */ +struct ion_client *ion_client_create(struct ion_device *dev, + unsigned int heap_mask, const char *name); + +/** + * msm_ion_client_create - allocate a client using the ion_device specified in + * drivers/gpu/ion/msm/msm_ion.c + * + * heap_mask and name are the same as ion_client_create, return values + * are the same as ion_client_create. + */ + +struct ion_client *msm_ion_client_create(unsigned int heap_mask, + const char *name); + +/** + * ion_client_destroy() - free's a client and all it's handles + * @client: the client + * + * Free the provided client and all it's resources including + * any handles it is holding. + */ +void ion_client_destroy(struct ion_client *client); + +/** + * ion_alloc - allocate ion memory + * @client: the client + * @len: size of the allocation + * @align: requested allocation alignment, lots of hardware blocks have + * alignment requirements of some kind + * @flags: mask of heaps to allocate from, if multiple bits are set + * heaps will be tried in order from lowest to highest order bit + * + * Allocate memory in one of the heaps provided in heap mask and return + * an opaque handle to it. + */ +struct ion_handle *ion_alloc(struct ion_client *client, size_t len, + size_t align, unsigned int flags); + +/** + * ion_free - free a handle + * @client: the client + * @handle: the handle to free + * + * Free the provided handle. + */ +void ion_free(struct ion_client *client, struct ion_handle *handle); + +/** + * ion_phys - returns the physical address and len of a handle + * @client: the client + * @handle: the handle + * @addr: a pointer to put the address in + * @len: a pointer to put the length in + * + * This function queries the heap for a particular handle to get the + * handle's physical address. It't output is only correct if + * a heap returns physically contiguous memory -- in other cases + * this api should not be implemented -- ion_map_dma should be used + * instead. Returns -EINVAL if the handle is invalid. This has + * no implications on the reference counting of the handle -- + * the returned value may not be valid if the caller is not + * holding a reference. + */ +int ion_phys(struct ion_client *client, struct ion_handle *handle, + ion_phys_addr_t *addr, size_t *len); + +/** + * ion_map_kernel - create mapping for the given handle + * @client: the client + * @handle: handle to map + * @flags: flags for this mapping + * + * Map the given handle into the kernel and return a kernel address that + * can be used to access this address. If no flags are specified, this + * will return a non-secure uncached mapping. + */ +void *ion_map_kernel(struct ion_client *client, struct ion_handle *handle, + unsigned long flags); + +/** + * ion_unmap_kernel() - destroy a kernel mapping for a handle + * @client: the client + * @handle: handle to unmap + */ +void ion_unmap_kernel(struct ion_client *client, struct ion_handle *handle); + +/** + * ion_map_dma - create a dma mapping for a given handle + * @client: the client + * @handle: handle to map + * + * Return an sglist describing the given handle + */ +struct scatterlist *ion_map_dma(struct ion_client *client, + struct ion_handle *handle, + unsigned long flags); + +/** + * ion_unmap_dma() - destroy a dma mapping for a handle + * @client: the client + * @handle: handle to unmap + */ +void ion_unmap_dma(struct ion_client *client, struct ion_handle *handle); + +/** + * ion_share() - given a handle, obtain a buffer to pass to other clients + * @client: the client + * @handle: the handle to share + * + * Given a handle, return a buffer, which exists in a global name + * space, and can be passed to other clients. Should be passed into ion_import + * to obtain a new handle for this buffer. + * + * NOTE: This function does do not an extra reference. The burden is on the + * caller to make sure the buffer doesn't go away while it's being passed to + * another client. That is, ion_free should not be called on this handle until + * the buffer has been imported into the other client. + */ +struct ion_buffer *ion_share(struct ion_client *client, + struct ion_handle *handle); + +/** + * ion_import() - given an buffer in another client, import it + * @client: this blocks client + * @buffer: the buffer to import (as obtained from ion_share) + * + * Given a buffer, add it to the client and return the handle to use to refer + * to it further. This is called to share a handle from one kernel client to + * another. + */ +struct ion_handle *ion_import(struct ion_client *client, + struct ion_buffer *buffer); + +/** + * ion_import_fd() - given an fd obtained via ION_IOC_SHARE ioctl, import it + * @client: this blocks client + * @fd: the fd + * + * A helper function for drivers that will be recieving ion buffers shared + * with them from userspace. These buffers are represented by a file + * descriptor obtained as the return from the ION_IOC_SHARE ioctl. + * This function coverts that fd into the underlying buffer, and returns + * the handle to use to refer to it further. + */ +struct ion_handle *ion_import_fd(struct ion_client *client, int fd); + +/** + * ion_handle_get_flags - get the flags for a given handle + * + * @client - client who allocated the handle + * @handle - handle to get the flags + * @flags - pointer to store the flags + * + * Gets the current flags for a handle. These flags indicate various options + * of the buffer (caching, security, etc.) + */ +int ion_handle_get_flags(struct ion_client *client, struct ion_handle *handle, + unsigned long *flags); + +#else +static inline struct ion_client *ion_client_create(struct ion_device *dev, + unsigned int heap_mask, const char *name) +{ + return ERR_PTR(-ENODEV); +} + +static inline struct ion_client *msm_ion_client_create(unsigned int heap_mask, + const char *name) +{ + return ERR_PTR(-ENODEV); +} + +static inline void ion_client_destroy(struct ion_client *client) { } + +static inline struct ion_handle *ion_alloc(struct ion_client *client, + size_t len, size_t align, unsigned int flags) +{ + return ERR_PTR(-ENODEV); +} + +static inline void ion_free(struct ion_client *client, + struct ion_handle *handle) { } + + +static inline int ion_phys(struct ion_client *client, + struct ion_handle *handle, ion_phys_addr_t *addr, size_t *len) +{ + return -ENODEV; +} + +static inline void *ion_map_kernel(struct ion_client *client, + struct ion_handle *handle, unsigned long flags) +{ + return ERR_PTR(-ENODEV); +} + +static inline void ion_unmap_kernel(struct ion_client *client, + struct ion_handle *handle) { } + +static inline struct scatterlist *ion_map_dma(struct ion_client *client, + struct ion_handle *handle, unsigned long flags) +{ + return ERR_PTR(-ENODEV); +} + +static inline void ion_unmap_dma(struct ion_client *client, + struct ion_handle *handle) { } + +static inline struct ion_buffer *ion_share(struct ion_client *client, + struct ion_handle *handle) +{ + return ERR_PTR(-ENODEV); +} + +static inline struct ion_handle *ion_import(struct ion_client *client, + struct ion_buffer *buffer) +{ + return ERR_PTR(-ENODEV); +} + +static inline struct ion_handle *ion_import_fd(struct ion_client *client, + int fd) +{ + return ERR_PTR(-ENODEV); +} + +static inline int ion_handle_get_flags(struct ion_client *client, + struct ion_handle *handle, unsigned long *flags) +{ + return -ENODEV; +} +#endif /* CONFIG_ION */ +#endif /* __KERNEL__ */ + +/** + * DOC: Ion Userspace API + * + * create a client by opening /dev/ion + * most operations handled via following ioctls + * + */ + +/** + * struct ion_allocation_data - metadata passed from userspace for allocations + * @len: size of the allocation + * @align: required alignment of the allocation + * @flags: flags passed to heap + * @handle: pointer that will be populated with a cookie to use to refer + * to this allocation + * + * Provided by userspace as an argument to the ioctl + */ +struct ion_allocation_data { + size_t len; + size_t align; + unsigned int flags; + struct ion_handle *handle; +}; + +/** + * struct ion_fd_data - metadata passed to/from userspace for a handle/fd pair + * @handle: a handle + * @fd: a file descriptor representing that handle + * + * For ION_IOC_SHARE or ION_IOC_MAP userspace populates the handle field with + * the handle returned from ion alloc, and the kernel returns the file + * descriptor to share or map in the fd field. For ION_IOC_IMPORT, userspace + * provides the file descriptor and the kernel returns the handle. + */ +struct ion_fd_data { + struct ion_handle *handle; + int fd; +}; + +/** + * struct ion_handle_data - a handle passed to/from the kernel + * @handle: a handle + */ +struct ion_handle_data { + struct ion_handle *handle; +}; + +/** + * struct ion_custom_data - metadata passed to/from userspace for a custom ioctl + * @cmd: the custom ioctl function to call + * @arg: additional data to pass to the custom ioctl, typically a user + * pointer to a predefined structure + * + * This works just like the regular cmd and arg fields of an ioctl. + */ +struct ion_custom_data { + unsigned int cmd; + unsigned long arg; +}; + + +/* struct ion_flush_data - data passed to ion for flushing caches + * + * @handle: handle with data to flush + * @vaddr: userspace virtual address mapped with mmap + * @offset: offset into the handle to flush + * @length: length of handle to flush + * + * Performs cache operations on the handle. If p is the start address + * of the handle, p + offset through p + offset + length will have + * the cache operations performed + */ +struct ion_flush_data { + struct ion_handle *handle; + void *vaddr; + unsigned int offset; + unsigned int length; +}; + +/* struct ion_flag_data - information about flags for this buffer + * + * @handle: handle to get flags from + * @flags: flags of this handle + * + * Takes handle as an input and outputs the flags from the handle + * in the flag field. + */ +struct ion_flag_data { + struct ion_handle *handle; + unsigned long flags; +}; + +#define ION_IOC_MAGIC 'I' + +/** + * DOC: ION_IOC_ALLOC - allocate memory + * + * Takes an ion_allocation_data struct and returns it with the handle field + * populated with the opaque handle for the allocation. + */ +#define ION_IOC_ALLOC _IOWR(ION_IOC_MAGIC, 0, \ + struct ion_allocation_data) + +/** + * DOC: ION_IOC_FREE - free memory + * + * Takes an ion_handle_data struct and frees the handle. + */ +#define ION_IOC_FREE _IOWR(ION_IOC_MAGIC, 1, struct ion_handle_data) + +/** + * DOC: ION_IOC_MAP - get a file descriptor to mmap + * + * Takes an ion_fd_data struct with the handle field populated with a valid + * opaque handle. Returns the struct with the fd field set to a file + * descriptor open in the current address space. This file descriptor + * can then be used as an argument to mmap. + */ +#define ION_IOC_MAP _IOWR(ION_IOC_MAGIC, 2, struct ion_fd_data) + +/** + * DOC: ION_IOC_SHARE - creates a file descriptor to use to share an allocation + * + * Takes an ion_fd_data struct with the handle field populated with a valid + * opaque handle. Returns the struct with the fd field set to a file + * descriptor open in the current address space. This file descriptor + * can then be passed to another process. The corresponding opaque handle can + * be retrieved via ION_IOC_IMPORT. + */ +#define ION_IOC_SHARE _IOWR(ION_IOC_MAGIC, 4, struct ion_fd_data) + +/** + * DOC: ION_IOC_IMPORT - imports a shared file descriptor + * + * Takes an ion_fd_data struct with the fd field populated with a valid file + * descriptor obtained from ION_IOC_SHARE and returns the struct with the handle + * filed set to the corresponding opaque handle. + */ +#define ION_IOC_IMPORT _IOWR(ION_IOC_MAGIC, 5, int) + +/** + * DOC: ION_IOC_CUSTOM - call architecture specific ion ioctl + * + * Takes the argument of the architecture specific ioctl to call and + * passes appropriate userdata for that ioctl + */ +#define ION_IOC_CUSTOM _IOWR(ION_IOC_MAGIC, 6, struct ion_custom_data) + + +/** + * DOC: ION_IOC_CLEAN_CACHES - clean the caches + * + * Clean the caches of the handle specified. + */ +#define ION_IOC_CLEAN_CACHES _IOWR(ION_IOC_MAGIC, 7, \ + struct ion_flush_data) +/** + * DOC: ION_MSM_IOC_INV_CACHES - invalidate the caches + * + * Invalidate the caches of the handle specified. + */ +#define ION_IOC_INV_CACHES _IOWR(ION_IOC_MAGIC, 8, \ + struct ion_flush_data) +/** + * DOC: ION_MSM_IOC_CLEAN_CACHES - clean and invalidate the caches + * + * Clean and invalidate the caches of the handle specified. + */ +#define ION_IOC_CLEAN_INV_CACHES _IOWR(ION_IOC_MAGIC, 9, \ + struct ion_flush_data) + +/** + * DOC: ION_IOC_GET_FLAGS - get the flags of the handle + * + * Gets the flags of the current handle which indicate cachability, + * secure state etc. + */ +#define ION_IOC_GET_FLAGS _IOWR(ION_IOC_MAGIC, 10, \ + struct ion_flag_data) +#endif /* _LINUX_ION_H */ diff --git a/include/linux/kobject.h b/include/linux/kobject.h index 58ae8e00..aabe5a8d 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -106,7 +106,7 @@ extern char *kobject_get_path(struct kobject *kobj, gfp_t flag); struct kobj_type { void (*release)(struct kobject *kobj); - struct sysfs_ops *sysfs_ops; + const struct sysfs_ops *sysfs_ops; struct attribute **default_attrs; }; diff --git a/include/linux/msm_kgsl.h b/include/linux/msm_kgsl.h old mode 100644 new mode 100755 index 56e6cc6b..92b41a5d --- a/include/linux/msm_kgsl.h +++ b/include/linux/msm_kgsl.h @@ -35,13 +35,18 @@ #define _MSM_KGSL_H #define KGSL_VERSION_MAJOR 3 -#define KGSL_VERSION_MINOR 8 +#define KGSL_VERSION_MINOR 10 /*context flags */ -#define KGSL_CONTEXT_SAVE_GMEM 1 -#define KGSL_CONTEXT_NO_GMEM_ALLOC 2 -#define KGSL_CONTEXT_SUBMIT_IB_LIST 4 -#define KGSL_CONTEXT_CTX_SWITCH 8 +#define KGSL_CONTEXT_SAVE_GMEM 0x00000001 +#define KGSL_CONTEXT_NO_GMEM_ALLOC 0x00000002 +#define KGSL_CONTEXT_SUBMIT_IB_LIST 0x00000004 +#define KGSL_CONTEXT_CTX_SWITCH 0x00000008 +#define KGSL_CONTEXT_PREAMBLE 0x00000010 +#define KGSL_CONTEXT_TRASH_STATE 0x00000020 +#define KGSL_CONTEXT_PER_CONTEXT_TS 0x00000040 + +#define KGSL_CONTEXT_INVALID 0xffffffff /* Memory allocayion flags */ #define KGSL_MEMFLAGS_GPUREADONLY 0x01000000 @@ -57,6 +62,25 @@ #define KGSL_FLAGS_RESERVED1 0x00000040 #define KGSL_FLAGS_RESERVED2 0x00000080 #define KGSL_FLAGS_SOFT_RESET 0x00000100 +#define KGSL_FLAGS_PER_CONTEXT_TIMESTAMPS 0x00000200 + +/* Clock flags to show which clocks should be controled by a given platform */ +#define KGSL_CLK_SRC 0x00000001 +#define KGSL_CLK_CORE 0x00000002 +#define KGSL_CLK_IFACE 0x00000004 +#define KGSL_CLK_MEM 0x00000008 +#define KGSL_CLK_MEM_IFACE 0x00000010 +#define KGSL_CLK_AXI 0x00000020 + +/* + * Reset status values for context + */ +enum kgsl_ctx_reset_stat { + KGSL_CTX_STAT_NO_ERROR = 0x00000000, + KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT = 0x00000001, + KGSL_CTX_STAT_INNOCENT_CONTEXT_RESET_EXT = 0x00000002, + KGSL_CTX_STAT_UNKNOWN_CONTEXT_RESET_EXT = 0x00000003 +}; #define KGSL_MAX_PWRLEVELS 5 @@ -74,7 +98,9 @@ enum kgsl_deviceid { enum kgsl_user_mem_type { KGSL_USER_MEM_TYPE_PMEM = 0x00000000, KGSL_USER_MEM_TYPE_ASHMEM = 0x00000001, - KGSL_USER_MEM_TYPE_ADDR = 0x00000002 + KGSL_USER_MEM_TYPE_ADDR = 0x00000002, + KGSL_USER_MEM_TYPE_ION = 0x00000003, + KGSL_USER_MEM_TYPE_MAX = 0x00000004, }; struct kgsl_devinfo { @@ -111,9 +137,9 @@ struct kgsl_devmemstore { unsigned int sbz5; }; -#define KGSL_DEVICE_MEMSTORE_OFFSET(field) \ - offsetof(struct kgsl_devmemstore, field) - +#define KGSL_MEMSTORE_OFFSET(ctxt_id, field) \ + ((ctxt_id)*sizeof(struct kgsl_devmemstore) + \ + offsetof(struct kgsl_devmemstore, field)) /* timestamp id*/ enum kgsl_timestamp_type { @@ -132,6 +158,7 @@ enum kgsl_property_type { KGSL_PROP_MMU_ENABLE = 0x00000006, KGSL_PROP_INTERRUPT_WAITS = 0x00000007, KGSL_PROP_VERSION = 0x00000008, + KGSL_PROP_GPU_RESET_STAT = 0x00000009 }; struct kgsl_shadowprop { @@ -246,6 +273,14 @@ struct kgsl_device_waittimestamp { #define IOCTL_KGSL_DEVICE_WAITTIMESTAMP \ _IOW(KGSL_IOC_TYPE, 0x6, struct kgsl_device_waittimestamp) +struct kgsl_device_waittimestamp_ctxtid { + unsigned int context_id; + unsigned int timestamp; + unsigned int timeout; +}; + +#define IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID \ + _IOW(KGSL_IOC_TYPE, 0x7, struct kgsl_device_waittimestamp_ctxtid) /* issue indirect commands to the GPU. * drawctxt_id must have been created with IOCTL_KGSL_DRAWCTXT_CREATE @@ -339,6 +374,26 @@ struct kgsl_map_user_mem { #define IOCTL_KGSL_MAP_USER_MEM \ _IOWR(KGSL_IOC_TYPE, 0x15, struct kgsl_map_user_mem) +struct kgsl_cmdstream_readtimestamp_ctxtid { + unsigned int context_id; + unsigned int type; + unsigned int timestamp; /*output param */ +}; + +#define IOCTL_KGSL_CMDSTREAM_READTIMESTAMP_CTXTID \ + _IOWR(KGSL_IOC_TYPE, 0x16, struct kgsl_cmdstream_readtimestamp_ctxtid) + +struct kgsl_cmdstream_freememontimestamp_ctxtid { + unsigned int context_id; + unsigned int gpuaddr; + unsigned int type; + unsigned int timestamp; +}; + +#define IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP_CTXTID \ + _IOW(KGSL_IOC_TYPE, 0x17, \ + struct kgsl_cmdstream_freememontimestamp_ctxtid) + /* add a block of pmem or fb into the GPU address space */ struct kgsl_sharedmem_from_pmem { int pmem_fd; @@ -482,6 +537,14 @@ struct kgsl_timestamp_event_genlock { int handle; /* Handle of the genlock lock to release */ }; +/* + * Set a property within the kernel. Uses the same structure as + * IOCTL_KGSL_GETPROPERTY + */ + +#define IOCTL_KGSL_SETPROPERTY \ + _IOW(KGSL_IOC_TYPE, 0x32, struct kgsl_device_getproperty) + #ifdef __KERNEL__ #ifdef CONFIG_MSM_KGSL_DRM int kgsl_gem_obj_addr(int drm_fd, int handle, unsigned long *start, diff --git a/include/linux/msm_mdp.h b/include/linux/msm_mdp.h index a933facc..dcedc554 100644 --- a/include/linux/msm_mdp.h +++ b/include/linux/msm_mdp.h @@ -1,6 +1,7 @@ /* include/linux/msm_mdp.h * * Copyright (C) 2007 Google Incorporated + * Copyright (c) 2012 Code Aurora Forum. All rights reserved. * * This software is licensed under the terms of the GNU General Public * License version 2, as published by the Free Software Foundation, and @@ -15,25 +16,90 @@ #define _MSM_MDP_H_ #include +#include #define MSMFB_IOCTL_MAGIC 'm' #define MSMFB_GRP_DISP _IOW(MSMFB_IOCTL_MAGIC, 1, unsigned int) #define MSMFB_BLIT _IOW(MSMFB_IOCTL_MAGIC, 2, unsigned int) +#define MSMFB_SUSPEND_SW_REFRESHER _IOW(MSMFB_IOCTL_MAGIC, 128, unsigned int) +#define MSMFB_RESUME_SW_REFRESHER _IOW(MSMFB_IOCTL_MAGIC, 129, unsigned int) +#define MSMFB_CURSOR _IOW(MSMFB_IOCTL_MAGIC, 130, struct fb_cursor) +#define MSMFB_SET_LUT _IOW(MSMFB_IOCTL_MAGIC, 131, struct fb_cmap) +#define MSMFB_HISTOGRAM _IOWR(MSMFB_IOCTL_MAGIC, 132, struct mdp_histogram_data) +/* new ioctls's for set/get ccs matrix */ +#define MSMFB_GET_CCS_MATRIX _IOWR(MSMFB_IOCTL_MAGIC, 133, struct mdp_ccs) +#define MSMFB_SET_CCS_MATRIX _IOW(MSMFB_IOCTL_MAGIC, 134, struct mdp_ccs) +#define MSMFB_OVERLAY_SET _IOWR(MSMFB_IOCTL_MAGIC, 135, \ + struct mdp_overlay) +#define MSMFB_OVERLAY_UNSET _IOW(MSMFB_IOCTL_MAGIC, 136, unsigned int) +#define MSMFB_OVERLAY_PLAY _IOW(MSMFB_IOCTL_MAGIC, 137, \ + struct msmfb_overlay_data) +#define MSMFB_GET_PAGE_PROTECTION _IOR(MSMFB_IOCTL_MAGIC, 138, \ + struct mdp_page_protection) +#define MSMFB_SET_PAGE_PROTECTION _IOW(MSMFB_IOCTL_MAGIC, 139, \ + struct mdp_page_protection) +#define MSMFB_OVERLAY_GET _IOR(MSMFB_IOCTL_MAGIC, 140, \ + struct mdp_overlay) +#define MSMFB_OVERLAY_PLAY_ENABLE _IOW(MSMFB_IOCTL_MAGIC, 141, unsigned int) +#define MSMFB_OVERLAY_BLT _IOWR(MSMFB_IOCTL_MAGIC, 142, \ + struct msmfb_overlay_blt) +#define MSMFB_OVERLAY_BLT_OFFSET _IOW(MSMFB_IOCTL_MAGIC, 143, unsigned int) +#define MSMFB_HISTOGRAM_START _IOR(MSMFB_IOCTL_MAGIC, 144, \ + struct mdp_histogram_start_req) +#define MSMFB_HISTOGRAM_STOP _IOR(MSMFB_IOCTL_MAGIC, 145, unsigned int) +#define MSMFB_NOTIFY_UPDATE _IOW(MSMFB_IOCTL_MAGIC, 146, unsigned int) + +#define MSMFB_OVERLAY_3D _IOWR(MSMFB_IOCTL_MAGIC, 147, \ + struct msmfb_overlay_3d) + +#define MSMFB_MIXER_INFO _IOWR(MSMFB_IOCTL_MAGIC, 148, \ + struct msmfb_mixer_info_req) +#define MSMFB_OVERLAY_PLAY_WAIT _IOWR(MSMFB_IOCTL_MAGIC, 149, \ + struct msmfb_overlay_data) +#define MSMFB_WRITEBACK_INIT _IO(MSMFB_IOCTL_MAGIC, 150) +#define MSMFB_WRITEBACK_START _IO(MSMFB_IOCTL_MAGIC, 151) +#define MSMFB_WRITEBACK_STOP _IO(MSMFB_IOCTL_MAGIC, 152) +#define MSMFB_WRITEBACK_QUEUE_BUFFER _IOW(MSMFB_IOCTL_MAGIC, 153, \ + struct msmfb_data) +#define MSMFB_WRITEBACK_DEQUEUE_BUFFER _IOW(MSMFB_IOCTL_MAGIC, 154, \ + struct msmfb_data) +#define MSMFB_WRITEBACK_TERMINATE _IO(MSMFB_IOCTL_MAGIC, 155) +#define MSMFB_MDP_PP _IOWR(MSMFB_IOCTL_MAGIC, 156, struct msmfb_mdp_pp) + +#define FB_TYPE_3D_PANEL 0x10101010 +#define MDP_IMGTYPE2_START 0x10000 +#define MSMFB_DRIVER_VERSION 0xF9E8D701 enum { - MDP_RGB_565, /* RGB 565 planar */ + NOTIFY_UPDATE_START, + NOTIFY_UPDATE_STOP, +}; + +enum { + MDP_RGB_565, /* RGB 565 planer */ MDP_XRGB_8888, /* RGB 888 padded */ - MDP_Y_CBCR_H2V2, /* Y and CbCr, pseudo planar w/ Cb is in MSB */ + MDP_Y_CBCR_H2V2, /* Y and CbCr, pseudo planer w/ Cb is in MSB */ + MDP_Y_CBCR_H2V2_ADRENO, MDP_ARGB_8888, /* ARGB 888 */ - MDP_RGB_888, /* RGB 888 planar */ - MDP_Y_CRCB_H2V2, /* Y and CrCb, pseudo planar w/ Cr is in MSB */ + MDP_RGB_888, /* RGB 888 planer */ + MDP_Y_CRCB_H2V2, /* Y and CrCb, pseudo planer w/ Cr is in MSB */ MDP_YCRYCB_H2V1, /* YCrYCb interleave */ - MDP_Y_CRCB_H2V1, /* Y and CrCb, pseduo planar w/ Cr is in MSB */ - MDP_Y_CBCR_H2V1, /* Y and CrCb, pseduo planar w/ Cr is in MSB */ + MDP_Y_CRCB_H2V1, /* Y and CrCb, pseduo planer w/ Cr is in MSB */ + MDP_Y_CBCR_H2V1, /* Y and CrCb, pseduo planer w/ Cr is in MSB */ MDP_RGBA_8888, /* ARGB 888 */ MDP_BGRA_8888, /* ABGR 888 */ MDP_RGBX_8888, /* RGBX 888 */ - MDP_IMGTYPE_LIMIT /* Non valid image type after this enum */ + MDP_Y_CRCB_H2V2_TILE, /* Y and CrCb, pseudo planer tile */ + MDP_Y_CBCR_H2V2_TILE, /* Y and CbCr, pseudo planer tile */ + MDP_Y_CR_CB_H2V2, /* Y, Cr and Cb, planar */ + MDP_Y_CR_CB_GH2V2, /* Y, Cr and Cb, planar aligned to Android YV12 */ + MDP_Y_CB_CR_H2V2, /* Y, Cb and Cr, planar */ + MDP_Y_CRCB_H1V1, /* Y and CrCb, pseduo planer w/ Cr is in MSB */ + MDP_Y_CBCR_H1V1, /* Y and CbCr, pseduo planer w/ Cb is in MSB */ + MDP_IMGTYPE_LIMIT, + MDP_BGR_565 = MDP_IMGTYPE2_START, /* BGR 565 planer */ + MDP_FB_FORMAT, /* framebuffer format */ + MDP_IMGTYPE_LIMIT2 /* Non valid image type after this enum */ }; enum { @@ -41,24 +107,57 @@ enum { FB_IMG, }; -/* flag values */ +enum { + HSIC_HUE = 0, + HSIC_SAT, + HSIC_INT, + HSIC_CON, + NUM_HSIC_PARAM, +}; + +/* mdp_blit_req flag values */ #define MDP_ROT_NOP 0 #define MDP_FLIP_LR 0x1 #define MDP_FLIP_UD 0x2 #define MDP_ROT_90 0x4 #define MDP_ROT_180 (MDP_FLIP_UD|MDP_FLIP_LR) #define MDP_ROT_270 (MDP_ROT_90|MDP_FLIP_UD|MDP_FLIP_LR) -#define MDP_ROT_MASK 0x7 #define MDP_DITHER 0x8 #define MDP_BLUR 0x10 #define MDP_BLEND_FG_PREMULT 0x20000 +#define MDP_DEINTERLACE 0x80000000 +#define MDP_SHARPENING 0x40000000 +#define MDP_NO_DMA_BARRIER_START 0x20000000 +#define MDP_NO_DMA_BARRIER_END 0x10000000 +#define MDP_NO_BLIT 0x08000000 +#define MDP_BLIT_WITH_DMA_BARRIERS 0x000 +#define MDP_BLIT_WITH_NO_DMA_BARRIERS \ + (MDP_NO_DMA_BARRIER_START | MDP_NO_DMA_BARRIER_END) +#define MDP_BLIT_SRC_GEM 0x04000000 +#define MDP_BLIT_DST_GEM 0x02000000 +#define MDP_BLIT_NON_CACHED 0x01000000 +#define MDP_OV_PIPE_SHARE 0x00800000 +#define MDP_DEINTERLACE_ODD 0x00400000 +#define MDP_OV_PLAY_NOWAIT 0x00200000 +#define MDP_SOURCE_ROTATED_90 0x00100000 +#define MDP_DPP_HSIC 0x00080000 +#define MDP_BACKEND_COMPOSITION 0x00040000 +#define MDP_BORDERFILL_SUPPORTED 0x00010000 +#define MDP_SECURE_OVERLAY_SESSION 0x00008000 +#define MDP_MEMORY_ID_TYPE_FB 0x00001000 #define MDP_TRANSP_NOP 0xffffffff #define MDP_ALPHA_NOP 0xff -/* drewis: added for android 4.0 */ -#define MDP_BLIT_NON_CACHED 0x01000000 -/* drewis: end */ +#define MDP_FB_PAGE_PROTECTION_NONCACHED (0) +#define MDP_FB_PAGE_PROTECTION_WRITECOMBINE (1) +#define MDP_FB_PAGE_PROTECTION_WRITETHROUGHCACHE (2) +#define MDP_FB_PAGE_PROTECTION_WRITEBACKCACHE (3) +#define MDP_FB_PAGE_PROTECTION_WRITEBACKWACACHE (4) +/* Sentinel: Don't use! */ +#define MDP_FB_PAGE_PROTECTION_INVALID (5) +/* Count of the number of MDP_FB_PAGE_PROTECTION_... values. */ +#define MDP_NUM_FB_PAGE_PROTECTION_VALUES (5) struct mdp_rect { uint32_t x; @@ -73,8 +172,41 @@ struct mdp_img { uint32_t format; uint32_t offset; int memory_id; /* the file descriptor */ + uint32_t priv; }; +/* + * {3x3} + {3} ccs matrix + */ + +#define MDP_CCS_RGB2YUV 0 +#define MDP_CCS_YUV2RGB 1 + +#define MDP_CCS_SIZE 9 +#define MDP_BV_SIZE 3 + +struct mdp_ccs { + int direction; /* MDP_CCS_RGB2YUV or YUV2RGB */ + uint16_t ccs[MDP_CCS_SIZE]; /* 3x3 color coefficients */ + uint16_t bv[MDP_BV_SIZE]; /* 1x3 bias vector */ +}; + +struct mdp_csc { + int id; + uint32_t csc_mv[9]; + uint32_t csc_pre_bv[3]; + uint32_t csc_post_bv[3]; + uint32_t csc_pre_lv[6]; + uint32_t csc_post_lv[6]; +}; + +/* The version of the mdp_blit_req structure so that + * user applications can selectively decide which functionality + * to include + */ + +#define MDP_BLIT_REQ_VERSION 2 + struct mdp_blit_req { struct mdp_img src; struct mdp_img dst; @@ -83,6 +215,7 @@ struct mdp_blit_req { uint32_t alpha; uint32_t transp_mask; uint32_t flags; + int sharpening_strength; /* -127 <--> 127, default 64 */ }; struct mdp_blit_req_list { @@ -90,4 +223,289 @@ struct mdp_blit_req_list { struct mdp_blit_req req[]; }; +#define MSMFB_DATA_VERSION 2 + +struct msmfb_data { + uint32_t offset; + int memory_id; + int id; + uint32_t flags; + uint32_t priv; + uint32_t iova; +}; + +#define MSMFB_NEW_REQUEST -1 + +struct msmfb_overlay_data { + uint32_t id; + struct msmfb_data data; + uint32_t version_key; + struct msmfb_data plane1_data; + struct msmfb_data plane2_data; +}; + +struct msmfb_img { + uint32_t width; + uint32_t height; + uint32_t format; +}; + +#define MSMFB_WRITEBACK_DEQUEUE_BLOCKING 0x1 +struct msmfb_writeback_data { + struct msmfb_data buf_info; + struct msmfb_img img; +}; + +struct dpp_ctrl { + /* + *'sharp_strength' has inputs = -128 <-> 127 + * Increasingly positive values correlate with increasingly sharper + * picture. Increasingly negative values correlate with increasingly + * smoothed picture. + */ + int8_t sharp_strength; + int8_t hsic_params[NUM_HSIC_PARAM]; +}; + +struct mdp_overlay { + struct msmfb_img src; + struct mdp_rect src_rect; + struct mdp_rect dst_rect; + uint32_t z_order; /* stage number */ + uint32_t is_fg; /* control alpha & transp */ + uint32_t alpha; + uint32_t transp_mask; + uint32_t flags; + uint32_t id; + uint32_t user_data[8]; + struct dpp_ctrl dpp; +}; + +struct msmfb_overlay_3d { + uint32_t is_3d; + uint32_t width; + uint32_t height; +}; + + +struct msmfb_overlay_blt { + uint32_t enable; + uint32_t offset; + uint32_t width; + uint32_t height; + uint32_t bpp; +}; + +struct mdp_histogram { + uint32_t frame_cnt; + uint32_t bin_cnt; + uint32_t *r; + uint32_t *g; + uint32_t *b; +}; + + +/* + + mdp_block_type defines the identifiers for each of pipes in MDP 4.3 + + MDP_BLOCK_RESERVED is provided for backward compatibility and is + deprecated. It corresponds to DMA_P. So MDP_BLOCK_DMA_P should be used + instead. + +*/ + +enum { + MDP_BLOCK_RESERVED = 0, + MDP_BLOCK_OVERLAY_0, + MDP_BLOCK_OVERLAY_1, + MDP_BLOCK_VG_1, + MDP_BLOCK_VG_2, + MDP_BLOCK_RGB_1, + MDP_BLOCK_RGB_2, + MDP_BLOCK_DMA_P, + MDP_BLOCK_DMA_S, + MDP_BLOCK_DMA_E, + MDP_BLOCK_MAX, +}; + +/* +mdp_histogram_start_req is used to provide the parameters for +histogram start request +*/ + +struct mdp_histogram_start_req { + uint32_t block; + uint8_t frame_cnt; + uint8_t bit_mask; + uint8_t num_bins; +}; + + +/* + + mdp_histogram_data is used to return the histogram data, once + the histogram is done/stopped/cance + + */ + + +struct mdp_histogram_data { + uint32_t block; + uint8_t bin_cnt; + uint32_t *c0; + uint32_t *c1; + uint32_t *c2; + uint32_t *extra_info; +}; + +struct mdp_pcc_coeff { + uint32_t c, r, g, b, rr, gg, bb, rg, gb, rb, rgb_0, rgb_1; +}; + +struct mdp_pcc_cfg_data { + uint32_t block; + uint32_t ops; + struct mdp_pcc_coeff r, g, b; +}; + +#define MDP_CSC_FLAG_ENABLE 0x1 +#define MDP_CSC_FLAG_YUV_IN 0x2 +#define MDP_CSC_FLAG_YUV_OUT 0x4 + +struct mdp_csc_cfg { + /* flags for enable CSC, toggling RGB,YUV input/output */ + uint32_t flags; + uint32_t csc_mv[9]; + uint32_t csc_pre_bv[3]; + uint32_t csc_post_bv[3]; + uint32_t csc_pre_lv[6]; + uint32_t csc_post_lv[6]; +}; + +struct mdp_csc_cfg_data { + uint32_t block; + struct mdp_csc_cfg csc_data; +}; + +enum { + mdp_lut_igc, + mdp_lut_pgc, + mdp_lut_hist, + mdp_lut_max, +}; + + +struct mdp_igc_lut_data { + uint32_t block; + uint32_t len, ops; + uint32_t *c0_c1_data; + uint32_t *c2_data; +}; + +struct mdp_ar_gc_lut_data { + uint32_t x_start; + uint32_t slope; + uint32_t offset; +}; + +struct mdp_pgc_lut_data { + uint32_t block; + uint32_t flags; + uint8_t num_r_stages; + uint8_t num_g_stages; + uint8_t num_b_stages; + struct mdp_ar_gc_lut_data *r_data; + struct mdp_ar_gc_lut_data *g_data; + struct mdp_ar_gc_lut_data *b_data; +}; + + +struct mdp_hist_lut_data { + uint32_t block; + uint32_t ops; + uint32_t len; + uint32_t *data; +}; + + +struct mdp_lut_cfg_data { + uint32_t lut_type; + union { + struct mdp_igc_lut_data igc_lut_data; + struct mdp_pgc_lut_data pgc_lut_data; + struct mdp_hist_lut_data hist_lut_data; + } data; +}; + +struct mdp_qseed_cfg_data { + uint32_t block; + uint32_t table_num; + uint32_t ops; + uint32_t len; + uint32_t *data; +}; + + +enum { + mdp_op_pcc_cfg, + mdp_op_csc_cfg, + mdp_op_lut_cfg, + mdp_op_qseed_cfg, + mdp_op_max, +}; + +struct msmfb_mdp_pp { + uint32_t op; + union { + struct mdp_pcc_cfg_data pcc_cfg_data; + struct mdp_csc_cfg_data csc_cfg_data; + struct mdp_lut_cfg_data lut_cfg_data; + struct mdp_qseed_cfg_data qseed_cfg_data; + } data; +}; + + +struct mdp_page_protection { + uint32_t page_protection; +}; + + +struct mdp_mixer_info { + int pndx; + int pnum; + int ptype; + int mixer_num; + int z_order; +}; + +#define MAX_PIPE_PER_MIXER 4 + +struct msmfb_mixer_info_req { + int mixer_num; + int cnt; + struct mdp_mixer_info info[MAX_PIPE_PER_MIXER]; +}; + +enum { + DISPLAY_SUBSYSTEM_ID, + ROTATOR_SUBSYSTEM_ID, +}; + +#ifdef __KERNEL__ + +/* get the framebuffer physical address information */ +int get_fb_phys_info(unsigned long *start, unsigned long *len, int fb_num, + int subsys_id); +struct fb_info *msm_fb_get_writeback_fb(void); +int msm_fb_writeback_init(struct fb_info *info); +int msm_fb_writeback_start(struct fb_info *info); +int msm_fb_writeback_queue_buffer(struct fb_info *info, + struct msmfb_data *data); +int msm_fb_writeback_dequeue_buffer(struct fb_info *info, + struct msmfb_data *data); +int msm_fb_writeback_stop(struct fb_info *info); +int msm_fb_writeback_terminate(struct fb_info *info); +#endif + #endif /* _MSM_MDP_H_ */ diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 2ac98852..748f853b 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -26,7 +26,7 @@ #define __HCI_CORE_H #include - +#include /* HCI upper protocols */ #define HCI_PROTO_L2CAP 0 #define HCI_PROTO_SCO 1 @@ -183,10 +183,11 @@ struct hci_conn { struct timer_list disc_timer; struct timer_list idle_timer; + struct timer_list auto_accept_timer; struct work_struct work_add; struct work_struct work_del; - + struct wake_lock idle_lock; struct device dev; atomic_t devref; @@ -246,6 +247,7 @@ enum { HCI_CONN_ENCRYPT_PEND, HCI_CONN_RSWITCH_PEND, HCI_CONN_MODE_CHANGE_PEND, + HCI_CONN_SCO_SETUP_PEND, }; static inline void hci_conn_hash_init(struct hci_dev *hdev) @@ -326,6 +328,7 @@ void hci_acl_connect(struct hci_conn *conn); void hci_acl_disconn(struct hci_conn *conn, __u8 reason); void hci_add_sco(struct hci_conn *conn, __u16 handle); void hci_setup_sync(struct hci_conn *conn, __u16 handle); +void hci_sco_setup(struct hci_conn *conn, __u8 status); struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, __u16 pkt_type, bdaddr_t *dst); diff --git a/kernel/sched_features.h b/kernel/sched_features.h index 0d940835..152922a4 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h @@ -10,7 +10,7 @@ SCHED_FEAT(FAIR_SLEEPERS, 1) * them to run sooner, but does not allow tons of sleepers to * rip the spread apart. */ -SCHED_FEAT(GENTLE_FAIR_SLEEPERS, 1) +SCHED_FEAT(GENTLE_FAIR_SLEEPERS, 0) /* * By not normalizing the sleep time, heavy tasks get an effective diff --git a/mm/ashmem.c b/mm/ashmem.c old mode 100644 new mode 100755 index 5e059283..365fed2e --- a/mm/ashmem.c +++ b/mm/ashmem.c @@ -29,6 +29,7 @@ #include #include #include +#include #define ASHMEM_NAME_PREFIX "" #define ASHMEM_NAME_PREFIX_LEN 0 @@ -45,6 +46,8 @@ struct ashmem_area { struct list_head unpinned_list; /* list of all ashmem areas */ struct file *file; /* the shmem-based backing file */ size_t size; /* size of the mapping, in bytes */ + unsigned long vm_start; /* Start address of vm_area + * which maps this ashmem */ unsigned long prot_mask; /* allowed prot bits, as vm_flags */ }; @@ -178,7 +181,7 @@ static int ashmem_open(struct inode *inode, struct file *file) struct ashmem_area *asma; int ret; - ret = nonseekable_open(inode, file); + ret = generic_file_open(inode, file); if (unlikely(ret)) return ret; @@ -210,6 +213,67 @@ static int ashmem_release(struct inode *ignored, struct file *file) return 0; } +static ssize_t ashmem_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + struct ashmem_area *asma = file->private_data; + int ret = 0; + + mutex_lock(&ashmem_mutex); + + /* If size is not set, or set to 0, always return EOF. */ + if (asma->size == 0) { + goto out; + } + + if (!asma->file) { + ret = -EBADF; + goto out; + } + + ret = asma->file->f_op->read(asma->file, buf, len, pos); + if (ret < 0) { + goto out; + } + + /** Update backing file pos, since f_ops->read() doesn't */ + asma->file->f_pos = *pos; + +out: + mutex_unlock(&ashmem_mutex); + return ret; +} + +static loff_t ashmem_llseek(struct file *file, loff_t offset, int origin) +{ + struct ashmem_area *asma = file->private_data; + int ret; + + mutex_lock(&ashmem_mutex); + + if (asma->size == 0) { + ret = -EINVAL; + goto out; + } + + if (!asma->file) { + ret = -EBADF; + goto out; + } + + ret = asma->file->f_op->llseek(asma->file, offset, origin); + if (ret < 0) { + goto out; + } + + /** Copy f_pos from backing file, since f_ops->llseek() sets it */ + file->f_pos = asma->file->f_pos; + +out: + mutex_unlock(&ashmem_mutex); + return ret; +} + static inline unsigned long calc_vm_may_flags(unsigned long prot) { @@ -264,6 +328,7 @@ static int ashmem_mmap(struct file *file, struct vm_area_struct *vma) vma->vm_file = asma->file; } vma->vm_flags |= VM_CAN_NONLINEAR; + asma->vm_start = vma->vm_start; out: mutex_unlock(&ashmem_mutex); @@ -564,6 +629,69 @@ static int ashmem_pin_unpin(struct ashmem_area *asma, unsigned long cmd, return ret; } +#ifdef CONFIG_OUTER_CACHE +static unsigned int virtaddr_to_physaddr(unsigned int virtaddr) +{ + unsigned int physaddr = 0; + pgd_t *pgd_ptr = NULL; + pmd_t *pmd_ptr = NULL; + pte_t *pte_ptr = NULL, pte; + + spin_lock(¤t->mm->page_table_lock); + pgd_ptr = pgd_offset(current->mm, virtaddr); + if (pgd_none(*pgd) || pgd_bad(*pgd)) { + pr_err("Failed to convert virtaddr %x to pgd_ptr\n", + virtaddr); + goto done; + } + + pmd_ptr = pmd_offset(pgd_ptr, virtaddr); + if (pmd_none(*pmd_ptr) || pmd_bad(*pmd_ptr)) { + pr_err("Failed to convert pgd_ptr %p to pmd_ptr\n", + (void *)pgd_ptr); + goto done; + } + + pte_ptr = pte_offset_map(pmd_ptr, virtaddr); + if (!pte_ptr) { + pr_err("Failed to convert pmd_ptr %p to pte_ptr\n", + (void *)pmd_ptr); + goto done; + } + pte = *pte_ptr; + physaddr = pte_pfn(pte); + pte_unmap(pte_ptr); +done: + spin_unlock(¤t->mm->page_table_lock); + physaddr <<= PAGE_SHIFT; + return physaddr; +} +#endif + +static int ashmem_cache_op(struct ashmem_area *asma, + void (*cache_func)(unsigned long vstart, unsigned long length, + unsigned long pstart)) +{ +#ifdef CONFIG_OUTER_CACHE + unsigned long vaddr; +#endif + mutex_lock(&ashmem_mutex); +#ifndef CONFIG_OUTER_CACHE + cache_func(asma->vm_start, asma->size, 0); +#else + for (vaddr = asma->vm_start; vaddr < asma->vm_start + asma->size; + vaddr += PAGE_SIZE) { + unsigned long physaddr; + physaddr = virtaddr_to_physaddr(vaddr); + if (!physaddr) + return -EINVAL; + cache_func(vaddr, PAGE_SIZE, physaddr); + } +#endif + mutex_unlock(&ashmem_mutex); + return 0; +} + static long ashmem_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct ashmem_area *asma = file->private_data; @@ -604,6 +732,15 @@ static long ashmem_ioctl(struct file *file, unsigned int cmd, unsigned long arg) ashmem_shrink(ret, GFP_KERNEL); } break; + case ASHMEM_CACHE_FLUSH_RANGE: + ret = ashmem_cache_op(asma, &clean_and_invalidate_caches); + break; + case ASHMEM_CACHE_CLEAN_RANGE: + ret = ashmem_cache_op(asma, &clean_caches); + break; + case ASHMEM_CACHE_INV_RANGE: + ret = ashmem_cache_op(asma, &invalidate_caches); + break; } return ret; @@ -666,6 +803,8 @@ static struct file_operations ashmem_fops = { .owner = THIS_MODULE, .open = ashmem_open, .release = ashmem_release, + .read = ashmem_read, + .llseek = ashmem_llseek, .mmap = ashmem_mmap, .unlocked_ioctl = ashmem_ioctl, .compat_ioctl = ashmem_ioctl, diff --git a/mm/ksm.c b/mm/ksm.c old mode 100644 new mode 100755 index e9501f83..d8ce84ad --- a/mm/ksm.c +++ b/mm/ksm.c @@ -163,9 +163,6 @@ static unsigned long ksm_pages_unshared; /* The number of rmap_items in use: to calculate pages_volatile */ static unsigned long ksm_rmap_items; -/* Limit on the number of unswappable pages used */ -static unsigned long ksm_max_kernel_pages; - /* Number of pages ksmd should scan in one batch */ static unsigned int ksm_thread_pages_to_scan = 100; @@ -317,7 +314,7 @@ static int break_ksm(struct vm_area_struct *vma, unsigned long addr) do { cond_resched(); page = follow_page(vma, addr, FOLL_GET); - if (!page) + if (IS_ERR_OR_NULL(page)) break; if (PageKsm(page)) ret = handle_mm_fault(vma->vm_mm, vma, addr, @@ -391,7 +388,7 @@ static struct page *get_mergeable_page(struct rmap_item *rmap_item) goto out; page = follow_page(vma, addr, FOLL_GET); - if (!page) + if (IS_ERR_OR_NULL(page)) goto out; if (PageAnon(page)) { flush_anon_page(vma, page, addr); @@ -628,7 +625,7 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page, if (!ptep) goto out; - if (pte_write(*ptep)) { + if (pte_write(*ptep) || pte_dirty(*ptep)) { pte_t entry; swapped = PageSwapCache(page); @@ -648,10 +645,12 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page, * page */ if ((page_mapcount(page) + 2 + swapped) != page_count(page)) { - set_pte_at_notify(mm, addr, ptep, entry); + set_pte_at(mm, addr, ptep, entry); goto out_unlock; } - entry = pte_wrprotect(entry); + if (pte_dirty(entry)) + set_page_dirty(page); + entry = pte_mkclean(pte_wrprotect(entry)); set_pte_at_notify(mm, addr, ptep, entry); } *orig_pte = *ptep; @@ -717,6 +716,8 @@ static int replace_page(struct vm_area_struct *vma, struct page *oldpage, set_pte_at_notify(mm, addr, ptep, mk_pte(newpage, prot)); page_remove_rmap(oldpage); + if (!page_mapped(oldpage)) + try_to_free_swap(oldpage); put_page(oldpage); pte_unmap_unlock(ptep, ptl); @@ -827,13 +828,6 @@ static int try_to_merge_two_pages(struct mm_struct *mm1, unsigned long addr1, struct page *kpage; int err = -EFAULT; - /* - * The number of nodes in the stable tree - * is the number of kernel pages that we hold. - */ - if (ksm_max_kernel_pages && - ksm_max_kernel_pages <= ksm_pages_shared) - return err; kpage = alloc_page(GFP_HIGHUSER); if (!kpage) @@ -1209,6 +1203,18 @@ static struct rmap_item *scan_get_next_rmap_item(struct page **page) slot = ksm_scan.mm_slot; if (slot == &ksm_mm_head) { + /* + * A number of pages can hang around indefinitely on per-cpu + * pagevecs, raised page count preventing write_protect_page + * from merging them. Though it doesn't really matter much, + * it is puzzling to see some stuck in pages_volatile until + * other activity jostles them out, and they also prevented + * LTP's KSM test from succeeding deterministically; so drain + * them here (here rather than on entry to ksm_do_scan(), + * so we don't IPI too often when pages_to_scan is set low). + */ + lru_add_drain_all(); + root_unstable_tree = RB_ROOT; spin_lock(&ksm_mmlist_lock); @@ -1314,7 +1320,7 @@ next_mm: static void ksm_do_scan(unsigned int scan_npages) { struct rmap_item *rmap_item; - struct page *page; + struct page *uninitialized_var(page); while (scan_npages--) { cond_resched(); @@ -1577,29 +1583,6 @@ static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr, } KSM_ATTR(run); -static ssize_t max_kernel_pages_store(struct kobject *kobj, - struct kobj_attribute *attr, - const char *buf, size_t count) -{ - int err; - unsigned long nr_pages; - - err = strict_strtoul(buf, 10, &nr_pages); - if (err) - return -EINVAL; - - ksm_max_kernel_pages = nr_pages; - - return count; -} - -static ssize_t max_kernel_pages_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - return sprintf(buf, "%lu\n", ksm_max_kernel_pages); -} -KSM_ATTR(max_kernel_pages); - static ssize_t pages_shared_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { @@ -1649,7 +1632,6 @@ static struct attribute *ksm_attrs[] = { &sleep_millisecs_attr.attr, &pages_to_scan_attr.attr, &run_attr.attr, - &max_kernel_pages_attr.attr, &pages_shared_attr.attr, &pages_sharing_attr.attr, &pages_unshared_attr.attr, @@ -1669,8 +1651,6 @@ static int __init ksm_init(void) struct task_struct *ksm_thread; int err; - ksm_max_kernel_pages = totalram_pages / 4; - err = ksm_slab_init(); if (err) goto out; diff --git a/mm/vmalloc.c b/mm/vmalloc.c old mode 100644 new mode 100755 index c2287313..b689e2ae --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1470,6 +1470,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, { struct page **pages; unsigned int nr_pages, array_size, i; + gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO; nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT; array_size = (nr_pages * sizeof(struct page *)); @@ -1477,13 +1478,11 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, area->nr_pages = nr_pages; /* Please note that the recursion is strictly bounded. */ if (array_size > PAGE_SIZE) { - pages = __vmalloc_node(array_size, 1, gfp_mask | __GFP_ZERO, + pages = __vmalloc_node(array_size, 1, nested_gfp|__GFP_HIGHMEM, PAGE_KERNEL, node, caller); area->flags |= VM_VPAGES; } else { - pages = kmalloc_node(array_size, - (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO, - node); + pages = kmalloc_node(array_size, nested_gfp, node); } area->pages = pages; area->caller = caller; diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 2f4d30fd..c22bc17c 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -155,6 +155,27 @@ void hci_setup_sync(struct hci_conn *conn, __u16 handle) hci_send_cmd(hdev, HCI_OP_SETUP_SYNC_CONN, sizeof(cp), &cp); } +/* Device _must_ be locked */ +void hci_sco_setup(struct hci_conn *conn, __u8 status) +{ + struct hci_conn *sco = conn->link; + + BT_DBG("%p", conn); + + if (!sco) + return; + + if (!status) { + if (lmp_esco_capable(conn->hdev)) + hci_setup_sync(sco, conn->handle); + else + hci_add_sco(sco, conn->handle); + } else { + hci_proto_connect_cfm(sco, status); + hci_conn_del(sco); + } +} + static void hci_conn_timeout(unsigned long arg) { struct hci_conn *conn = (void *) arg; @@ -216,6 +237,7 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, conn->power_save = 1; conn->disc_timeout = HCI_DISCONN_TIMEOUT; + wake_lock_init(&conn->idle_lock, WAKE_LOCK_SUSPEND, "bt_idle"); switch (type) { case ACL_LINK: @@ -271,9 +293,11 @@ int hci_conn_del(struct hci_conn *conn) BT_DBG("%s conn %p handle %d", hdev->name, conn, conn->handle); + /* Make sure no timers are running */ del_timer(&conn->idle_timer); - + wake_lock_destroy(&conn->idle_lock); del_timer(&conn->disc_timer); + del_timer(&conn->auto_accept_timer); if (conn->type == ACL_LINK) { struct hci_conn *sco = conn->link; @@ -521,9 +545,11 @@ void hci_conn_enter_active_mode(struct hci_conn *conn) } timer: - if (hdev->idle_timeout > 0) + if (hdev->idle_timeout > 0) { mod_timer(&conn->idle_timer, jiffies + msecs_to_jiffies(hdev->idle_timeout)); + wake_lock(&conn->idle_lock); + } } /* Enter sniff mode */ diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 8c59afcc..6b0b59c4 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1482,6 +1482,12 @@ static inline void hci_mode_change_evt(struct hci_dev *hdev, struct sk_buff *skb else conn->power_save = 0; } + if (conn->mode == HCI_CM_SNIFF) + if (wake_lock_active(&conn->idle_lock)) + wake_unlock(&conn->idle_lock); + + if (test_and_clear_bit(HCI_CONN_SCO_SETUP_PEND, &conn->pend)) + hci_sco_setup(conn, ev->status); } hci_dev_unlock(hdev); diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index 4fa12857..10640fdd 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -22,6 +22,7 @@ #include +/* #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) #define XT_TPROXY_HAVE_IPV6 1 #include @@ -29,6 +30,7 @@ #include #include #endif +*/ #include #include diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 9b38fd15..0f10dfc6 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -22,11 +22,13 @@ #include #include +/* #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) #define XT_SOCKET_HAVE_IPV6 1 #include #include #endif +*/ #include