android_hardware_qcom_display/libcopybit/software_converter.cpp

/*
 * Copyright (c) 2011, Code Aurora Forum. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are
 * met:
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above
 *       copyright notice, this list of conditions and the following
 *       disclaimer in the documentation and/or other materials provided
 *       with the distribution.
 *     * Neither the name of Code Aurora Forum, Inc. nor the names of its
 *       contributors may be used to endorse or promote products derived
 *       from this software without specific prior written permission.
 * 
 * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
 * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#define LOG_TAG "copybit"
#include <cutils/log.h>
#include <stdlib.h>
#include <errno.h>
#include "software_converter.h"
#include "gralloc_priv.h"

/** Convert YV12 to YCrCb_420_SP */
int convertYV12toYCrCb420SP(const copybit_image_t *src)
{
    private_handle_t* hnd = (private_handle_t*)src->handle;

    if(hnd == NULL){
        LOGE("Invalid handle");
        return -1;
    }

    // Please refer to the description of YV12 in hardware.h
    // for the formulae used to calculate buffer sizes and offsets

    // In a copybit_image_t, w is the stride and
    // stride - horiz_padding is the actual width
    // vertical stride is the same as height, so not considered
    unsigned int   stride  = src->w;
    unsigned int   width   = src->w - src->horiz_padding;
    unsigned int   height  = src->h;
    unsigned int   padding = src->horiz_padding;
    unsigned int   y_size  = stride * src->h;
    unsigned int   c_width = ALIGN(stride/2, 16);
    unsigned int   c_size  = c_width * src->h/2;
    unsigned char* chroma  = (unsigned char *) (hnd->base + y_size);
    unsigned int   tempBufSize = c_size * 2;
    unsigned char* tempBuf = (unsigned char*) malloc (tempBufSize);

    if(tempBuf == NULL) {
        LOGE("Failed to allocate temporary buffer");
        return -errno;
    }

#ifdef __ARM_HAVE_NEON
    /* copy into temp buffer */

    unsigned char * t1 = chroma;
    unsigned char * t2 = tempBuf;

#ifdef TARGET_7x27A
    // Since the Sparrow core on 7x27A has a performance issue
    // with reading from uncached memory using Neon instructions,
    // use regular ARM instructions to copy the buffer on this
    // target. There is no issue with storing, hence using
    // Neon instructions for interleaving
    for(unsigned int i=0; i < (tempBufSize>>5); i++) {
        __asm__ __volatile__ (
                                "LDMIA %0!, {r3 - r10} \n"
                                "STMIA %1!, {r3 - r10} \n"
                                :"+r"(t1), "+r"(t2)
                                :
                                :"memory","r3","r4","r5",
                                "r6","r7","r8","r9","r10"
                             );

    }
#else
    for(unsigned int i=0; i < (tempBufSize>>5); i++) {
        __asm__ __volatile__ (
                                "vld1.u8 {d0-d3}, [%0]! \n"
                                "vst1.u8 {d0-d3}, [%1]! \n"
                                :"+r"(t1), "+r"(t2)
                                :
                                :"memory","d0","d1","d2","d3"
                             );

    }
#endif //TARGET_7x27A

    /* interleave */
    if(!padding) {
        t1 = chroma;
        t2 = tempBuf;
        unsigned char * t3 = t2 + tempBufSize/2;
        for(unsigned int i=0; i < (tempBufSize/2)>>3; i++) {
            __asm__ __volatile__ (
                                    "vld1.u8 d0, [%0]! \n"
                                    "vld1.u8 d1, [%1]! \n"
                                    "vst2.u8 {d0, d1}, [%2]! \n"
                                    :"+r"(t2), "+r"(t3), "+r"(t1)
                                    :
                                    :"memory","d0","d1"
                                 );

        }
    }
#else  //__ARM_HAVE_NEON
    memcpy(tempBuf, chroma, tempBufSize);
    if(!padding) {
        for(unsigned int i = 0; i< tempBufSize/2; i++) {
            chroma[i*2]   = tempBuf[i];
            chroma[i*2+1] = tempBuf[i+tempBufSize/2];
        }

    }
#endif
    // If the image is not aligned to 16 pixels,
    // convert using the C routine below
    // r1 tracks the row of the source buffer
    // r2 tracks the row of the destination buffer
    // The width/2 checks are to avoid copying
    // from the padding

    if(padding) {
        unsigned int r1 = 0, r2 = 0, i = 0, j = 0;
        while(r1 < height/2) {
            if(j == width/2) {
                j = 0;
                r2++;
                continue;
            }
            if (j+1 == width/2) {
                chroma[r2*c_width + j] = tempBuf[r1*c_width+i];
                r2++;
                chroma[r2*c_width] = tempBuf[r1*c_width+i+c_size];
                j = 1;
            } else {
                chroma[r2*c_width + j] = tempBuf[r1*c_width+i];
                chroma[r2*c_width + j + 1] = tempBuf[r1*c_width+i+c_size];
                j+=2;
            }
            i++;
            if (i == width/2 ) {
                i = 0;
                r1++;
            }
        }
    }

    if(tempBuf)
        free(tempBuf);
    return 0;
}
copybit: Enable copybit for 7K targets Change-Id: Ie88f36d54d2eac51bcb5ea6c1793b41e4573ca53 2011-11-21 10:58:17 +00:00			`/*`
			`* Copyright (c) 2011, Code Aurora Forum. All rights reserved.`
			`*`
			`* Redistribution and use in source and binary forms, with or without`
			`* modification, are permitted provided that the following conditions are`
			`* met:`
			`* * Redistributions of source code must retain the above copyright`
			`* notice, this list of conditions and the following disclaimer.`
			`* * Redistributions in binary form must reproduce the above`
			`* copyright notice, this list of conditions and the following`
			`* disclaimer in the documentation and/or other materials provided`
			`* with the distribution.`
			`* * Neither the name of Code Aurora Forum, Inc. nor the names of its`
			`* contributors may be used to endorse or promote products derived`
			`* from this software without specific prior written permission.`
			`*`
			`* THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED`
			`* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF`
			`* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT`
			`* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS`
			`* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR`
			`* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF`
			`* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR`
			`* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,`
			`* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE`
			`* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN`
			`* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.`
			`*/`

			`#define LOG_TAG "copybit"`
			`#include <cutils/log.h>`
			`#include <stdlib.h>`
			`#include <errno.h>`
			`#include "software_converter.h"`
			`#include "gralloc_priv.h"`

			`/** Convert YV12 to YCrCb_420_SP */`
			`int convertYV12toYCrCb420SP(const copybit_image_t *src)`
			`{`
			`private_handle_t* hnd = (private_handle_t*)src->handle;`

			`if(hnd == NULL){`
			`LOGE("Invalid handle");`
			`return -1;`
			`}`

			`// Please refer to the description of YV12 in hardware.h`
			`// for the formulae used to calculate buffer sizes and offsets`

			`// In a copybit_image_t, w is the stride and`
			`// stride - horiz_padding is the actual width`
			`// vertical stride is the same as height, so not considered`
			`unsigned int stride = src->w;`
			`unsigned int width = src->w - src->horiz_padding;`
			`unsigned int height = src->h;`
			`unsigned int padding = src->horiz_padding;`
			`unsigned int y_size = stride * src->h;`
			`unsigned int c_width = ALIGN(stride/2, 16);`
			`unsigned int c_size = c_width * src->h/2;`
			`unsigned char* chroma = (unsigned char *) (hnd->base + y_size);`
			`unsigned int tempBufSize = c_size * 2;`
			`unsigned char* tempBuf = (unsigned char*) malloc (tempBufSize);`

			`if(tempBuf == NULL) {`
			`LOGE("Failed to allocate temporary buffer");`
			`return -errno;`
			`}`

			`#ifdef __ARM_HAVE_NEON`
			`/* copy into temp buffer */`

			`unsigned char * t1 = chroma;`
			`unsigned char * t2 = tempBuf;`

			`#ifdef TARGET_7x27A`
			`// Since the Sparrow core on 7x27A has a performance issue`
			`// with reading from uncached memory using Neon instructions,`
			`// use regular ARM instructions to copy the buffer on this`
			`// target. There is no issue with storing, hence using`
			`// Neon instructions for interleaving`
			`for(unsigned int i=0; i < (tempBufSize>>5); i++) {`
			`__asm__ __volatile__ (`
			`"LDMIA %0!, {r3 - r10} \n"`
			`"STMIA %1!, {r3 - r10} \n"`
			`:"+r"(t1), "+r"(t2)`
			`:`
			`:"memory","r3","r4","r5",`
			`"r6","r7","r8","r9","r10"`
			`);`

			`}`
			`#else`
			`for(unsigned int i=0; i < (tempBufSize>>5); i++) {`
			`__asm__ __volatile__ (`
			`"vld1.u8 {d0-d3}, [%0]! \n"`
			`"vst1.u8 {d0-d3}, [%1]! \n"`
			`:"+r"(t1), "+r"(t2)`
			`:`
			`:"memory","d0","d1","d2","d3"`
			`);`

			`}`
			`#endif //TARGET_7x27A`

			`/* interleave */`
			`if(!padding) {`
			`t1 = chroma;`
			`t2 = tempBuf;`
			`unsigned char * t3 = t2 + tempBufSize/2;`
			`for(unsigned int i=0; i < (tempBufSize/2)>>3; i++) {`
			`__asm__ __volatile__ (`
			`"vld1.u8 d0, [%0]! \n"`
			`"vld1.u8 d1, [%1]! \n"`
			`"vst2.u8 {d0, d1}, [%2]! \n"`
			`:"+r"(t2), "+r"(t3), "+r"(t1)`
			`:`
			`:"memory","d0","d1"`
			`);`

			`}`
			`}`
			`#else //__ARM_HAVE_NEON`
			`memcpy(tempBuf, chroma, tempBufSize);`
			`if(!padding) {`
			`for(unsigned int i = 0; i< tempBufSize/2; i++) {`
			`chroma[i*2] = tempBuf[i];`
			`chroma[i*2+1] = tempBuf[i+tempBufSize/2];`
			`}`

			`}`
			`#endif`
			`// If the image is not aligned to 16 pixels,`
			`// convert using the C routine below`
			`// r1 tracks the row of the source buffer`
			`// r2 tracks the row of the destination buffer`
			`// The width/2 checks are to avoid copying`
			`// from the padding`

			`if(padding) {`
			`unsigned int r1 = 0, r2 = 0, i = 0, j = 0;`
			`while(r1 < height/2) {`
			`if(j == width/2) {`
			`j = 0;`
			`r2++;`
			`continue;`
			`}`
			`if (j+1 == width/2) {`
			`chroma[r2c_width + j] = tempBuf[r1c_width+i];`
			`r2++;`
			`chroma[r2c_width] = tempBuf[r1c_width+i+c_size];`
			`j = 1;`
			`} else {`
			`chroma[r2c_width + j] = tempBuf[r1c_width+i];`
			`chroma[r2c_width + j + 1] = tempBuf[r1c_width+i+c_size];`
			`j+=2;`
			`}`
			`i++;`
			`if (i == width/2 ) {`
			`i = 0;`
			`r1++;`
			`}`
			`}`
			`}`

			`if(tempBuf)`
			`free(tempBuf);`
			`return 0;`
			`}`