@/*****************************************************************************
@*
@* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
@*
@* Licensed under the Apache License, Version 2.0 (the "License");
@* you may not use this file except in compliance with the License.
@* You may obtain a copy of the License at:
@*
@* http://www.apache.org/licenses/LICENSE-2.0
@*
@* Unless required by applicable law or agreed to in writing, software
@* distributed under the License is distributed on an "AS IS" BASIS,
@* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@* See the License for the specific language governing permissions and
@* limitations under the License.
@*
@*****************************************************************************/
@/**
@/*******************************************************************************
@* @file
@*  ihevc_deblk_luma_vert.s
@*
@* @brief
@*  contains function definitions for inter prediction  interpolation.
@* functions are coded using neon  intrinsics and can be compiled using

@* rvct
@*
@* @author
@*  anand s
@*
@* @par list of functions:
@*
@*
@* @remarks
@*  none
@*
@*******************************************************************************/

.text
.align 4





.extern gai4_ihevc_qp_table
.extern gai4_ihevc_tc_table
.globl ihevc_deblk_chroma_vert_a9q

gai4_ihevc_qp_table_addr:
.long gai4_ihevc_qp_table - ulbl1 - 8

gai4_ihevc_tc_table_addr:
.long gai4_ihevc_tc_table  - ulbl2 - 8

.type ihevc_deblk_chroma_vert_a9q, %function

ihevc_deblk_chroma_vert_a9q:
    push        {r4-r12,lr}
    sub         r8,r0,#4
    add         r2,r2,r3
    vld1.8      {d5},[r8],r1
    add         r2,r2,#1
    vld1.8      {d17},[r8],r1
    ldr         r7,[sp,#0x28]
    vld1.8      {d16},[r8],r1
    ldr         r4,[sp,#0x38]
    vld1.8      {d4},[r8]
    ldr         r5,[sp,#0x30]
    vtrn.8      d5,d17
    adds        r3,r7,r2,asr #1
    vtrn.8      d16,d4
    ldr         r7,gai4_ihevc_qp_table_addr
ulbl1:
    add         r7,r7,pc
    ldr         r12,[sp,#0x34]
    ldr         r6,[sp,#0x2c]
    bmi         l1.2944
    cmp         r3,#0x39
    ldrle       r3,[r7,r3,lsl #2]
    subgt       r3,r3,#6
l1.2944:
    vtrn.16     d5,d16
    adds        r2,r6,r2,asr #1
    vtrn.16     d17,d4
    bmi         l1.2964
    cmp         r2,#0x39
    ldrle       r2,[r7,r2,lsl #2]
    subgt       r2,r2,#6
l1.2964:
    vtrn.32     d5,d17
    add         r3,r3,r5,lsl #1
    vtrn.32     d16,d4
    add         r6,r3,#2
    vmovl.u8    q9,d17
    cmp         r6,#0x35
    movgt       r3,#0x35
    bgt         l1.2996
    adds        r6,r3,#2
    addpl       r3,r3,#2
    movmi       r3,#0
l1.2996:
    vsubl.u8    q0,d17,d16
    ldr         r6,gai4_ihevc_tc_table_addr
ulbl2:
    add         r6,r6,pc
    vshl.i16    q0,q0,#2
    add         r2,r2,r5,lsl #1
    add         r5,r2,#2
    vaddw.u8    q0,q0,d5
    cmp         r5,#0x35
    ldr         r3,[r6,r3,lsl #2]
    vsubw.u8    q2,q0,d4
    movgt       r2,#0x35
    bgt         l1.3036
    adds        r5,r2,#2
    addpl       r2,r2,#2
    movmi       r2,#0
l1.3036:


    vrshr.s16   q3,q2,#3
    vdup.16     d2,r3
    ldr         r2,[r6,r2,lsl #2]
    rsb         r3,r3,#0
    cmp         r12,#0
    vdup.16     d3,r2
    rsb         r2,r2,#0
    vdup.16     d30,r3
    vdup.16     d31,r2


    vmin.s16    q2,q3,q1
    vmax.s16    q1,q15,q2

    vmovl.u8    q3,d16

    vadd.i16    q0,q3,q1
    vsub.i16    q1,q9,q1
    vqmovun.s16 d0,q0
    sub         r2,r0,#2
    vqmovun.s16 d1,q1
    vtrn.32     d0,d1
    vtrn.8      d0,d1
    beq         l1.3204

    vst1.16     {d0[0]},[r2],r1
    vst1.16     {d1[0]},[r2],r1
    vst1.16     {d0[1]},[r2],r1
    vst1.16     {d1[1]},[r2]
l1.3204:
    cmp         r4,#0
    beq         l1.3228
    vst1.16     {d0[2]},[r0],r1
    vst1.16     {d1[2]},[r0],r1
    vst1.16     {d0[3]},[r0],r1
    vst1.16     {d1[3]},[r0]
l1.3228:
    pop         {r4-r12,pc}