/****************************************************************************** * * Copyright (C) 2015 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ***************************************************************************** * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore */ /** ******************************************************************************* * @file * ih264e_process.c * * @brief * Contains functions for codec thread * * @author * Harish * * @par List of Functions: * - ih264e_generate_sps_pps() * - ih264e_init_entropy_ctxt() * - ih264e_entropy() * - ih264e_pack_header_data() * - ih264e_update_proc_ctxt() * - ih264e_init_proc_ctxt() * - ih264e_pad_recon_buffer() * - ih264e_dblk_pad_hpel_processing_n_mbs() * - ih264e_process() * - ih264e_set_rc_pic_params() * - ih264e_update_rc_post_enc() * - ih264e_process_thread() * * @remarks * None * ******************************************************************************* */ /*****************************************************************************/ /* File Includes */ /*****************************************************************************/ /* System include files */ #include <stdio.h> #include <stddef.h> #include <stdlib.h> #include <string.h> #include <limits.h> #include <assert.h> /* User include files */ #include "ih264_typedefs.h" #include "iv2.h" #include "ive2.h" #include "ih264_defs.h" #include "ih264_debug.h" #include "ime_distortion_metrics.h" #include "ime_defs.h" #include "ime_structs.h" #include "ih264_error.h" #include "ih264_structs.h" #include "ih264_trans_quant_itrans_iquant.h" #include "ih264_inter_pred_filters.h" #include "ih264_mem_fns.h" #include "ih264_padding.h" #include "ih264_intra_pred_filters.h" #include "ih264_deblk_edge_filters.h" #include "ih264_cabac_tables.h" #include "ih264_platform_macros.h" #include "ih264_macros.h" #include "ih264_buf_mgr.h" #include "ih264e_error.h" #include "ih264e_bitstream.h" #include "ih264_common_tables.h" #include "ih264_list.h" #include "ih264e_defs.h" #include "irc_cntrl_param.h" #include "irc_frame_info_collector.h" #include "ih264e_rate_control.h" #include "ih264e_cabac_structs.h" #include "ih264e_structs.h" #include "ih264e_cabac.h" #include "ih264e_process.h" #include "ithread.h" #include "ih264e_intra_modes_eval.h" #include "ih264e_encode_header.h" #include "ih264e_globals.h" #include "ih264e_config.h" #include "ih264e_trace.h" #include "ih264e_statistics.h" #include "ih264_cavlc_tables.h" #include "ih264e_cavlc.h" #include "ih264e_deblk.h" #include "ih264e_me.h" #include "ih264e_debug.h" #include "ih264e_master.h" #include "ih264e_utils.h" #include "irc_mem_req_and_acq.h" #include "irc_rate_control_api.h" #include "ih264e_platform_macros.h" #include "ime_statistics.h" /*****************************************************************************/ /* Function Definitions */ /*****************************************************************************/ /** ****************************************************************************** * * @brief This function generates sps, pps set on request * * @par Description * When the encoder is set in header generation mode, the following function * is called. This generates sps and pps headers and returns the control back * to caller. * * @param[in] ps_codec * pointer to codec context * * @return success or failure error code * ****************************************************************************** */ IH264E_ERROR_T ih264e_generate_sps_pps(codec_t *ps_codec) { /* choose between ping-pong process buffer set */ WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS; /* entropy ctxt */ entropy_ctxt_t *ps_entropy = &ps_codec->as_process[ctxt_sel * MAX_PROCESS_THREADS].s_entropy; /* Bitstream structure */ bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm; /* sps */ sps_t *ps_sps = NULL; /* pps */ pps_t *ps_pps = NULL; /* output buff */ out_buf_t *ps_out_buf = &ps_codec->as_out_buf[ctxt_sel]; /********************************************************************/ /* initialize the bit stream buffer */ /********************************************************************/ ih264e_bitstrm_init(ps_bitstrm, ps_out_buf->s_bits_buf.pv_buf, ps_out_buf->s_bits_buf.u4_bufsize); /********************************************************************/ /* BEGIN HEADER GENERATION */ /********************************************************************/ /*ps_codec->i4_pps_id ++;*/ ps_codec->i4_pps_id %= MAX_PPS_CNT; /*ps_codec->i4_sps_id ++;*/ ps_codec->i4_sps_id %= MAX_SPS_CNT; /* populate sps header */ ps_sps = ps_codec->ps_sps_base + ps_codec->i4_sps_id; ih264e_populate_sps(ps_codec, ps_sps); /* populate pps header */ ps_pps = ps_codec->ps_pps_base + ps_codec->i4_pps_id; ih264e_populate_pps(ps_codec, ps_pps); ps_entropy->i4_error_code = IH264E_SUCCESS; /* generate sps */ ps_entropy->i4_error_code |= ih264e_generate_sps(ps_bitstrm, ps_sps, &ps_codec->s_cfg.s_vui); /* generate pps */ ps_entropy->i4_error_code |= ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps); /* queue output buffer */ ps_out_buf->s_bits_buf.u4_bytes = ps_bitstrm->u4_strm_buf_offset; return ps_entropy->i4_error_code; } /** ******************************************************************************* * * @brief initialize entropy context. * * @par Description: * Before invoking the call to perform to entropy coding the entropy context * associated with the job needs to be initialized. This involves the start * mb address, end mb address, slice index and the pointer to location at * which the mb residue info and mb header info are packed. * * @param[in] ps_proc * Pointer to the current process context * * @returns error status * * @remarks none * ******************************************************************************* */ IH264E_ERROR_T ih264e_init_entropy_ctxt(process_ctxt_t *ps_proc) { /* codec context */ codec_t *ps_codec = ps_proc->ps_codec; /* entropy ctxt */ entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy; /* start address */ ps_entropy->i4_mb_start_add = ps_entropy->i4_mb_y * ps_entropy->i4_wd_mbs + ps_entropy->i4_mb_x; /* end address */ ps_entropy->i4_mb_end_add = ps_entropy->i4_mb_start_add + ps_entropy->i4_mb_cnt; /* slice index */ ps_entropy->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_entropy->i4_mb_start_add]; /* sof */ /* @ start of frame or start of a new slice, set sof flag */ if (ps_entropy->i4_mb_start_add == 0) { ps_entropy->i4_sof = 1; } if (ps_entropy->i4_mb_x == 0) { /* packed mb coeff data */ ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) + ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data; /* packed mb header data */ ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) + ps_entropy->i4_mb_y * ps_codec->u4_size_header_data; } return IH264E_SUCCESS; } /** ******************************************************************************* * * @brief entry point for entropy coding * * @par Description * This function calls lower level functions to perform entropy coding for a * group (n rows) of mb's. After encoding 1 row of mb's, the function takes * back the control, updates the ctxt and calls lower level functions again. * This process is repeated till all the rows or group of mb's (which ever is * minimum) are coded * * @param[in] ps_proc * process context * * @returns error status * * @remarks * ******************************************************************************* */ IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc) { /* codec context */ codec_t *ps_codec = ps_proc->ps_codec; /* entropy context */ entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy; /* cabac context */ cabac_ctxt_t *ps_cabac_ctxt = ps_entropy->ps_cabac; /* sps */ sps_t *ps_sps = ps_entropy->ps_sps_base + (ps_entropy->u4_sps_id % MAX_SPS_CNT); /* pps */ pps_t *ps_pps = ps_entropy->ps_pps_base + (ps_entropy->u4_pps_id % MAX_PPS_CNT); /* slice header */ slice_header_t *ps_slice_hdr = ps_entropy->ps_slice_hdr_base + (ps_entropy->i4_cur_slice_idx % MAX_SLICE_HDR_CNT); /* slice type */ WORD32 i4_slice_type = ps_proc->i4_slice_type; /* Bitstream structure */ bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm; /* output buff */ out_buf_t s_out_buf; /* proc map */ UWORD8 *pu1_proc_map; /* entropy map */ UWORD8 *pu1_entropy_map_curr; /* proc base idx */ WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt % MAX_CTXT_SETS; /* temp var */ WORD32 i4_wd_mbs, i4_ht_mbs; UWORD32 u4_mb_cnt, u4_mb_idx, u4_mb_end_idx; WORD32 bitstream_start_offset, bitstream_end_offset; /********************************************************************/ /* BEGIN INIT */ /********************************************************************/ /* entropy encode start address */ u4_mb_idx = ps_entropy->i4_mb_start_add; /* entropy encode end address */ u4_mb_end_idx = ps_entropy->i4_mb_end_add; /* width in mbs */ i4_wd_mbs = ps_entropy->i4_wd_mbs; /* height in mbs */ i4_ht_mbs = ps_entropy->i4_ht_mbs; /* total mb cnt */ u4_mb_cnt = i4_wd_mbs * i4_ht_mbs; /* proc map */ pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs; /* entropy map */ pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs; /********************************************************************/ /* @ start of frame / slice, */ /* initialize the output buffer, */ /* initialize the bit stream buffer, */ /* check if sps and pps headers have to be generated, */ /* populate and generate slice header */ /********************************************************************/ if (ps_entropy->i4_sof) { /********************************************************************/ /* initialize the output buffer */ /********************************************************************/ s_out_buf = ps_codec->as_out_buf[ctxt_sel]; /* is last frame to encode */ s_out_buf.u4_is_last = ps_entropy->u4_is_last; /* frame idx */ s_out_buf.u4_timestamp_high = ps_entropy->u4_timestamp_high; s_out_buf.u4_timestamp_low = ps_entropy->u4_timestamp_low; /********************************************************************/ /* initialize the bit stream buffer */ /********************************************************************/ ih264e_bitstrm_init(ps_bitstrm, s_out_buf.s_bits_buf.pv_buf, s_out_buf.s_bits_buf.u4_bufsize); /********************************************************************/ /* BEGIN HEADER GENERATION */ /********************************************************************/ if (1 == ps_entropy->i4_gen_header) { /* generate sps */ ps_entropy->i4_error_code |= ih264e_generate_sps(ps_bitstrm, ps_sps, &ps_codec->s_cfg.s_vui); /* generate pps */ ps_entropy->i4_error_code |= ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps); /* reset i4_gen_header */ ps_entropy->i4_gen_header = 0; } /* populate slice header */ ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps, ps_sps); /* generate slice header */ ps_entropy->i4_error_code |= ih264e_generate_slice_header(ps_bitstrm, ps_slice_hdr, ps_pps, ps_sps); /* once start of frame / slice is done, you can reset it */ /* it is the responsibility of the caller to set this flag */ ps_entropy->i4_sof = 0; if (CABAC == ps_entropy->u1_entropy_coding_mode_flag) { BITSTREAM_BYTE_ALIGN(ps_bitstrm); BITSTREAM_FLUSH(ps_bitstrm); ih264e_init_cabac_ctxt(ps_entropy); } } /* begin entropy coding for the mb set */ while (u4_mb_idx < u4_mb_end_idx) { /* init ptrs/indices */ if (ps_entropy->i4_mb_x == i4_wd_mbs) { ps_entropy->i4_mb_y++; ps_entropy->i4_mb_x = 0; /* packed mb coeff data */ ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) + ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data; /* packed mb header data */ ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) + ps_entropy->i4_mb_y * ps_codec->u4_size_header_data; /* proc map */ pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs; /* entropy map */ pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs; } DEBUG("\nmb indices x, y %d, %d", ps_entropy->i4_mb_x, ps_entropy->i4_mb_y); ENTROPY_TRACE("mb index x %d", ps_entropy->i4_mb_x); ENTROPY_TRACE("mb index y %d", ps_entropy->i4_mb_y); /* wait until the curr mb is core coded */ /* The wait for curr mb to be core coded is essential when entropy is launched * as a separate job */ while (1) { volatile UWORD8 *pu1_buf1; WORD32 idx = ps_entropy->i4_mb_x; pu1_buf1 = pu1_proc_map + idx; if (*pu1_buf1) break; ithread_yield(); } /* write mb layer */ ps_entropy->i4_error_code |= ps_codec->pf_write_mb_syntax_layer[ps_entropy->u1_entropy_coding_mode_flag][i4_slice_type](ps_entropy); /* Starting bitstream offset for header in bits */ bitstream_start_offset = GET_NUM_BITS(ps_bitstrm); /* set entropy map */ pu1_entropy_map_curr[ps_entropy->i4_mb_x] = 1; u4_mb_idx++; ps_entropy->i4_mb_x++; /* check for eof */ if (CABAC == ps_entropy->u1_entropy_coding_mode_flag) { if (ps_entropy->i4_mb_x < i4_wd_mbs) { ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0); } } if (ps_entropy->i4_mb_x == i4_wd_mbs) { /* if slices are enabled */ if (ps_codec->s_cfg.e_slice_mode == IVE_SLICE_MODE_BLOCKS) { /* current slice index */ WORD32 i4_curr_slice_idx = ps_entropy->i4_cur_slice_idx; /* slice map */ UWORD8 *pu1_slice_idx = ps_entropy->pu1_slice_idx; /* No need to open a slice at end of frame. The current slice can be closed at the time * of signaling eof flag. */ if ((u4_mb_idx != u4_mb_cnt) && (i4_curr_slice_idx != pu1_slice_idx[u4_mb_idx])) { if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag) { /* mb skip run */ if ((i4_slice_type != ISLICE) && *ps_entropy->pi4_mb_skip_run) { if (*ps_entropy->pi4_mb_skip_run) { PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run, ps_entropy->i4_error_code, "mb skip run"); *ps_entropy->pi4_mb_skip_run = 0; } } /* put rbsp trailing bits for the previous slice */ ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm); } else { ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1); } /* update slice header pointer */ i4_curr_slice_idx = pu1_slice_idx[u4_mb_idx]; ps_entropy->i4_cur_slice_idx = i4_curr_slice_idx; ps_slice_hdr = ps_entropy->ps_slice_hdr_base+ (i4_curr_slice_idx % MAX_SLICE_HDR_CNT); /* populate slice header */ ps_entropy->i4_mb_start_add = u4_mb_idx; ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps, ps_sps); /* generate slice header */ ps_entropy->i4_error_code |= ih264e_generate_slice_header( ps_bitstrm, ps_slice_hdr, ps_pps, ps_sps); if (CABAC == ps_entropy->u1_entropy_coding_mode_flag) { BITSTREAM_BYTE_ALIGN(ps_bitstrm); BITSTREAM_FLUSH(ps_bitstrm); ih264e_init_cabac_ctxt(ps_entropy); } } else { if (CABAC == ps_entropy->u1_entropy_coding_mode_flag && u4_mb_idx != u4_mb_cnt) { ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0); } } } /* Dont execute any further instructions until store synchronization took place */ DATA_SYNC(); } /* Ending bitstream offset for header in bits */ bitstream_end_offset = GET_NUM_BITS(ps_bitstrm); ps_entropy->u4_header_bits[i4_slice_type == PSLICE] += bitstream_end_offset - bitstream_start_offset; } /* check for eof */ if (u4_mb_idx == u4_mb_cnt) { /* set end of frame flag */ ps_entropy->i4_eof = 1; } else { if (CABAC == ps_entropy->u1_entropy_coding_mode_flag && ps_codec->s_cfg.e_slice_mode != IVE_SLICE_MODE_BLOCKS) { ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0); } } if (ps_entropy->i4_eof) { if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag) { /* mb skip run */ if ((i4_slice_type != ISLICE) && *ps_entropy->pi4_mb_skip_run) { if (*ps_entropy->pi4_mb_skip_run) { PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run, ps_entropy->i4_error_code, "mb skip run"); *ps_entropy->pi4_mb_skip_run = 0; } } /* put rbsp trailing bits */ ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm); } else { ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1); } /* update current frame stats to rc library */ { /* number of bytes to stuff */ WORD32 i4_stuff_bytes; /* update */ i4_stuff_bytes = ih264e_update_rc_post_enc( ps_codec, ctxt_sel, (ps_proc->ps_codec->i4_poc == 0)); /* cbr rc - house keeping */ if (ps_codec->s_rate_control.post_encode_skip[ctxt_sel]) { ps_entropy->ps_bitstrm->u4_strm_buf_offset = 0; } else if (i4_stuff_bytes) { /* add filler nal units */ ps_entropy->i4_error_code |= ih264e_add_filler_nal_unit(ps_bitstrm, i4_stuff_bytes); } } /* *Frame number is to be incremented only if the current frame is a * reference frame. After each successful frame encode, we increment * frame number by 1 */ if (!ps_codec->s_rate_control.post_encode_skip[ctxt_sel] && ps_codec->u4_is_curr_frm_ref) { ps_codec->i4_frame_num++; } /********************************************************************/ /* signal the output */ /********************************************************************/ ps_codec->as_out_buf[ctxt_sel].s_bits_buf.u4_bytes = ps_entropy->ps_bitstrm->u4_strm_buf_offset; DEBUG("entropy status %x", ps_entropy->i4_error_code); } /* allow threads to dequeue entropy jobs */ ps_codec->au4_entropy_thread_active[ctxt_sel] = 0; return ps_entropy->i4_error_code; } /** ******************************************************************************* * * @brief Packs header information of a mb in to a buffer * * @par Description: * After the deciding the mode info of a macroblock, the syntax elements * associated with the mb are packed and stored. The entropy thread unpacks * this buffer and generates the end bit stream. * * @param[in] ps_proc * Pointer to the current process context * * @returns error status * * @remarks none * ******************************************************************************* */ IH264E_ERROR_T ih264e_pack_header_data(process_ctxt_t *ps_proc) { /* curr mb type */ UWORD32 u4_mb_type = ps_proc->u4_mb_type; /* pack mb syntax layer of curr mb (used for entropy coding) */ if (u4_mb_type == I4x4) { /* pointer to mb header storage space */ UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; mb_hdr_i4x4_t *ps_mb_hdr = (mb_hdr_i4x4_t *)ps_proc->pv_mb_header_data; /* temp var */ WORD32 i4, byte; /* mb type plus mode */ ps_mb_hdr->common.u1_mb_type_mode = (ps_proc->u1_c_i8_mode << 6) + u4_mb_type; /* cbp */ ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp; /* mb qp delta */ ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev; /* sub mb modes */ for (i4 = 0; i4 < 16; i4 ++) { byte = 0; if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] == ps_proc->au1_intra_luma_mb_4x4_modes[i4]) { byte |= 1; } else { if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] < ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4]) { byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 1); } else { byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 1; } } i4++; if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] == ps_proc->au1_intra_luma_mb_4x4_modes[i4]) { byte |= 16; } else { if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] < ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4]) { byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 5); } else { byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 5; } } ps_mb_hdr->au1_sub_blk_modes[i4 >> 1] = byte; } /* end of mb layer */ pu1_ptr += sizeof(mb_hdr_i4x4_t); ps_proc->pv_mb_header_data = pu1_ptr; } else if (u4_mb_type == I16x16) { /* pointer to mb header storage space */ UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; mb_hdr_i16x16_t *ps_mb_hdr = (mb_hdr_i16x16_t *)ps_proc->pv_mb_header_data; /* mb type plus mode */ ps_mb_hdr->common.u1_mb_type_mode = (ps_proc->u1_c_i8_mode << 6) + (ps_proc->u1_l_i16_mode << 4) + u4_mb_type; /* cbp */ ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp; /* mb qp delta */ ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev; /* end of mb layer */ pu1_ptr += sizeof(mb_hdr_i16x16_t); ps_proc->pv_mb_header_data = pu1_ptr; } else if (u4_mb_type == P16x16) { /* pointer to mb header storage space */ UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; mb_hdr_p16x16_t *ps_mb_hdr = (mb_hdr_p16x16_t *)ps_proc->pv_mb_header_data; /* mb type */ ps_mb_hdr->common.u1_mb_type_mode = u4_mb_type; /* cbp */ ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp; /* mb qp delta */ ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev; ps_mb_hdr->ai2_mv[0] = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx - ps_proc->ps_pred_mv[0].s_mv.i2_mvx; ps_mb_hdr->ai2_mv[1] = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy - ps_proc->ps_pred_mv[0].s_mv.i2_mvy; /* end of mb layer */ pu1_ptr += sizeof(mb_hdr_p16x16_t); ps_proc->pv_mb_header_data = pu1_ptr; } else if (u4_mb_type == PSKIP) { /* pointer to mb header storage space */ UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; mb_hdr_pskip_t *ps_mb_hdr = (mb_hdr_pskip_t *)ps_proc->pv_mb_header_data; /* mb type */ ps_mb_hdr->common.u1_mb_type_mode = u4_mb_type; /* end of mb layer */ pu1_ptr += sizeof(mb_hdr_pskip_t); ps_proc->pv_mb_header_data = pu1_ptr; } else if(u4_mb_type == B16x16) { /* pointer to mb header storage space */ UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; mb_hdr_b16x16_t *ps_mb_hdr = (mb_hdr_b16x16_t *)ps_proc->pv_mb_header_data; UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode; /* mb type plus mode */ ps_mb_hdr->common.u1_mb_type_mode = (u4_pred_mode << 4) + u4_mb_type; /* cbp */ ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp; /* mb qp delta */ ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev; /* l0 & l1 me data */ if (u4_pred_mode != PRED_L1) { ps_mb_hdr->ai2_mv[0][0] = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx - ps_proc->ps_pred_mv[0].s_mv.i2_mvx; ps_mb_hdr->ai2_mv[0][1] = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy - ps_proc->ps_pred_mv[0].s_mv.i2_mvy; } if (u4_pred_mode != PRED_L0) { ps_mb_hdr->ai2_mv[1][0] = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvx - ps_proc->ps_pred_mv[1].s_mv.i2_mvx; ps_mb_hdr->ai2_mv[1][1] = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvy - ps_proc->ps_pred_mv[1].s_mv.i2_mvy; } /* end of mb layer */ pu1_ptr += sizeof(mb_hdr_b16x16_t); ps_proc->pv_mb_header_data = pu1_ptr; } else if(u4_mb_type == BDIRECT) { /* pointer to mb header storage space */ UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; mb_hdr_bdirect_t *ps_mb_hdr = (mb_hdr_bdirect_t *)ps_proc->pv_mb_header_data; /* mb type plus mode */ ps_mb_hdr->common.u1_mb_type_mode = u4_mb_type; /* cbp */ ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp; /* mb qp delta */ ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev; /* end of mb layer */ pu1_ptr += sizeof(mb_hdr_bdirect_t); ps_proc->pv_mb_header_data = pu1_ptr; } else if(u4_mb_type == BSKIP) { UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode; /* pointer to mb header storage space */ UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; mb_hdr_bskip_t *ps_mb_hdr = (mb_hdr_bskip_t *)ps_proc->pv_mb_header_data; /* mb type plus mode */ ps_mb_hdr->common.u1_mb_type_mode = (u4_pred_mode << 4) + u4_mb_type; /* end of mb layer */ pu1_ptr += sizeof(mb_hdr_bskip_t); ps_proc->pv_mb_header_data = pu1_ptr; } return IH264E_SUCCESS; } /** ******************************************************************************* * * @brief update process context after encoding an mb. This involves preserving * the current mb information for later use, initialize the proc ctxt elements to * encode next mb. * * @par Description: * This function performs house keeping tasks after encoding an mb. * After encoding an mb, various elements of the process context needs to be * updated to encode the next mb. For instance, the source, recon and reference * pointers, mb indices have to be adjusted to the next mb. The slice index of * the current mb needs to be updated. If mb qp modulation is enabled, then if * the qp changes the quant param structure needs to be updated. Also to encoding * the next mb, the current mb info is used as part of mode prediction or mv * prediction. Hence the current mb info has to preserved at top/top left/left * locations. * * @param[in] ps_proc * Pointer to the current process context * * @returns none * * @remarks none * ******************************************************************************* */ WORD32 ih264e_update_proc_ctxt(process_ctxt_t *ps_proc) { /* error status */ WORD32 error_status = IH264_SUCCESS; /* codec context */ codec_t *ps_codec = ps_proc->ps_codec; /* curr mb indices */ WORD32 i4_mb_x = ps_proc->i4_mb_x; WORD32 i4_mb_y = ps_proc->i4_mb_y; /* mb syntax elements of neighbors */ mb_info_t *ps_left_syn = &ps_proc->s_left_mb_syntax_ele; mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + i4_mb_x; mb_info_t *ps_top_left_syn = &ps_proc->s_top_left_mb_syntax_ele; /* curr mb type */ UWORD32 u4_mb_type = ps_proc->u4_mb_type; /* curr mb type */ UWORD32 u4_is_intra = ps_proc->u4_is_intra; /* width in mbs */ WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs; /*height in mbs*/ WORD32 i4_ht_mbs = ps_proc->i4_ht_mbs; /* proc map */ UWORD8 *pu1_proc_map = ps_proc->pu1_proc_map + (i4_mb_y * i4_wd_mbs); /* deblk context */ deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt; /* deblk bs context */ bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt); /* top row motion vector info */ enc_pu_t *ps_top_row_pu = ps_proc->ps_top_row_pu + i4_mb_x; /* top left mb motion vector */ enc_pu_t *ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu; /* left mb motion vector */ enc_pu_t *ps_left_mb_pu = &ps_proc->s_left_mb_pu; /* sub mb modes */ UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (i4_mb_x << 4); /*************************************************************/ /* During MV prediction, when top right mb is not available, */ /* top left mb info. is used for prediction. Hence the curr */ /* top, which will be top left for the next mb needs to be */ /* preserved before updating it with curr mb info. */ /*************************************************************/ /* mb type, mb class, csbp */ *ps_top_left_syn = *ps_top_syn; if (ps_proc->i4_slice_type != ISLICE) { /*****************************************/ /* update top left with top info results */ /*****************************************/ /* mv */ *ps_top_left_mb_pu = *ps_top_row_pu; } /*************************************************/ /* update top and left with curr mb info results */ /*************************************************/ /* mb type */ ps_left_syn->u2_mb_type = ps_top_syn->u2_mb_type = u4_mb_type; /* mb class */ ps_left_syn->u2_is_intra = ps_top_syn->u2_is_intra = u4_is_intra; /* csbp */ ps_left_syn->u4_csbp = ps_top_syn->u4_csbp = ps_proc->u4_csbp; /* distortion */ ps_left_syn->i4_mb_distortion = ps_top_syn->i4_mb_distortion = ps_proc->i4_mb_distortion; if (u4_is_intra) { /* mb / sub mb modes */ if (I16x16 == u4_mb_type) { pu1_top_mb_intra_modes[0] = ps_proc->au1_left_mb_intra_modes[0] = ps_proc->u1_l_i16_mode; } else if (I4x4 == u4_mb_type) { ps_codec->pf_mem_cpy_mul8(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16); ps_codec->pf_mem_cpy_mul8(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16); } else if (I8x8 == u4_mb_type) { memcpy(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4); memcpy(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4); } if ((ps_proc->i4_slice_type == PSLICE) ||(ps_proc->i4_slice_type == BSLICE)) { /* mv */ *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu); } *ps_proc->pu4_mb_pu_cnt = 1; } else { /* mv */ *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu); } /* * Mark that the MB has been coded intra * So that future AIRs can skip it */ ps_proc->pu1_is_intra_coded[i4_mb_x + (i4_mb_y * i4_wd_mbs)] = u4_is_intra; /**************************************************/ /* pack mb header info. for entropy coding */ /**************************************************/ ih264e_pack_header_data(ps_proc); /* update previous mb qp */ ps_proc->u4_mb_qp_prev = ps_proc->u4_mb_qp; /* store qp */ ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp; /* * We need to sync the cache to make sure that the nmv content of proc * is updated to cache properly */ DATA_SYNC(); /* Just before finishing the row, enqueue the job in to entropy queue. * The master thread depending on its convenience shall dequeue it and * performs entropy. * * WARN !! Placing this block post proc map update can cause queuing of * entropy jobs in out of order. */ if (i4_mb_x == i4_wd_mbs - 1) { /* job structures */ job_t s_job; /* job class */ s_job.i4_cmd = CMD_ENTROPY; /* number of mbs to be processed in the current job */ s_job.i2_mb_cnt = ps_codec->s_cfg.i4_wd_mbs; /* job start index x */ s_job.i2_mb_x = 0; /* job start index y */ s_job.i2_mb_y = ps_proc->i4_mb_y; /* proc base idx */ s_job.i2_proc_base_idx = (ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS) ? (MAX_PROCESS_CTXT / 2) : 0; /* queue the job */ error_status |= ih264_list_queue(ps_proc->pv_entropy_jobq, &s_job, 1); if(ps_proc->i4_mb_y == (i4_ht_mbs - 1)) ih264_list_terminate(ps_codec->pv_entropy_jobq); } /* update proc map */ pu1_proc_map[i4_mb_x] = 1; /**************************************************/ /* update proc ctxt elements for encoding next mb */ /**************************************************/ /* update indices */ i4_mb_x ++; ps_proc->i4_mb_x = i4_mb_x; if (ps_proc->i4_mb_x == i4_wd_mbs) { ps_proc->i4_mb_y++; ps_proc->i4_mb_x = 0; } /* update slice index */ ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_proc->i4_mb_y * i4_wd_mbs + ps_proc->i4_mb_x]; /* update buffers pointers */ ps_proc->pu1_src_buf_luma += MB_SIZE; ps_proc->pu1_rec_buf_luma += MB_SIZE; ps_proc->apu1_ref_buf_luma[0] += MB_SIZE; ps_proc->apu1_ref_buf_luma[1] += MB_SIZE; /* * Note: Although chroma mb size is 8, as the chroma buffers are interleaved, * the stride per MB is MB_SIZE */ ps_proc->pu1_src_buf_chroma += MB_SIZE; ps_proc->pu1_rec_buf_chroma += MB_SIZE; ps_proc->apu1_ref_buf_chroma[0] += MB_SIZE; ps_proc->apu1_ref_buf_chroma[1] += MB_SIZE; /* Reset cost, distortion params */ ps_proc->i4_mb_cost = INT_MAX; ps_proc->i4_mb_distortion = SHRT_MAX; ps_proc->ps_pu += *ps_proc->pu4_mb_pu_cnt; ps_proc->pu4_mb_pu_cnt += 1; /* Update colocated pu */ if (ps_proc->i4_slice_type == BSLICE) ps_proc->ps_colpu += *(ps_proc->aps_mv_buf[1]->pu4_mb_pu_cnt + (i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x); /* deblk ctxts */ if (ps_proc->u4_disable_deblock_level != 1) { /* indices */ ps_bs->i4_mb_x = ps_proc->i4_mb_x; ps_bs->i4_mb_y = ps_proc->i4_mb_y; #ifndef N_MB_ENABLE /* For N MB processing update take place inside deblocking function */ ps_deblk->i4_mb_x ++; ps_deblk->pu1_cur_pic_luma += MB_SIZE; /* * Note: Although chroma mb size is 8, as the chroma buffers are interleaved, * the stride per MB is MB_SIZE */ ps_deblk->pu1_cur_pic_chroma += MB_SIZE; #endif } return error_status; } /** ******************************************************************************* * * @brief initialize process context. * * @par Description: * Before dispatching the current job to process thread, the process context * associated with the job is initialized. Usually every job aims to encode one * row of mb's. Basing on the row indices provided by the job, the process * context's buffer ptrs, slice indices and other elements that are necessary * during core-coding are initialized. * * @param[in] ps_proc * Pointer to the current process context * * @returns error status * * @remarks none * ******************************************************************************* */ IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc) { /* codec context */ codec_t *ps_codec = ps_proc->ps_codec; /* nmb processing context*/ n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt; /* indices */ WORD32 i4_mb_x, i4_mb_y; /* strides */ WORD32 i4_src_strd = ps_proc->i4_src_strd; WORD32 i4_src_chroma_strd = ps_proc->i4_src_chroma_strd; WORD32 i4_rec_strd = ps_proc->i4_rec_strd; /* quant params */ quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0]; /* deblk ctxt */ deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt; /* deblk bs context */ bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt); /* Pointer to mv_buffer of current frame */ mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf; /* Pointers for color space conversion */ UWORD8 *pu1_y_buf_base, *pu1_u_buf_base, *pu1_v_buf_base; /* Pad the MB to support non standard sizes */ UWORD32 u4_pad_right_sz = ps_codec->s_cfg.u4_wd - ps_codec->s_cfg.u4_disp_wd; UWORD32 u4_pad_bottom_sz = ps_codec->s_cfg.u4_ht - ps_codec->s_cfg.u4_disp_ht; UWORD16 u2_num_rows = MB_SIZE; WORD32 convert_uv_only; /********************************************************************/ /* BEGIN INIT */ /********************************************************************/ i4_mb_x = ps_proc->i4_mb_x; i4_mb_y = ps_proc->i4_mb_y; /* Number of mbs processed in one loop of process function */ ps_proc->i4_nmb_ntrpy = ps_proc->i4_wd_mbs; ps_proc->u4_nmb_me = ps_proc->i4_wd_mbs; /* init buffer pointers */ convert_uv_only = 1; if (u4_pad_bottom_sz || u4_pad_right_sz || ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE) { if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) u2_num_rows = (UWORD16) MB_SIZE - u4_pad_bottom_sz; ps_proc->pu1_src_buf_luma_base = ps_codec->pu1_y_csc_buf_base; i4_src_strd = ps_proc->i4_src_strd = ps_codec->s_cfg.u4_max_wd; ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * MB_SIZE); convert_uv_only = 0; } else { i4_src_strd = ps_proc->i4_src_strd = ps_proc->s_inp_buf.s_raw_buf.au4_strd[0]; ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_src_strd * (i4_mb_y * MB_SIZE); } if (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE || ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420P || ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1) || u4_pad_bottom_sz || u4_pad_right_sz) { if ((ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_UV) || (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU)) ps_proc->pu1_src_buf_chroma_base = ps_codec->pu1_uv_csc_buf_base; ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * BLK8x8SIZE); i4_src_chroma_strd = ps_proc->i4_src_chroma_strd = ps_codec->s_cfg.u4_max_wd; } else { i4_src_chroma_strd = ps_proc->i4_src_chroma_strd = ps_proc->s_inp_buf.s_raw_buf.au4_strd[1]; ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_src_chroma_strd * (i4_mb_y * BLK8x8SIZE); } ps_proc->pu1_rec_buf_luma = ps_proc->pu1_rec_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE); ps_proc->pu1_rec_buf_chroma = ps_proc->pu1_rec_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE); /* Tempral back and forward reference buffer */ ps_proc->apu1_ref_buf_luma[0] = ps_proc->apu1_ref_buf_luma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE); ps_proc->apu1_ref_buf_chroma[0] = ps_proc->apu1_ref_buf_chroma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE); ps_proc->apu1_ref_buf_luma[1] = ps_proc->apu1_ref_buf_luma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE); ps_proc->apu1_ref_buf_chroma[1] = ps_proc->apu1_ref_buf_chroma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE); /* * Do color space conversion * NOTE : We assume there that the number of MB's to process will not span multiple rows */ switch (ps_codec->s_cfg.e_inp_color_fmt) { case IV_YUV_420SP_UV: case IV_YUV_420SP_VU: /* In case of 420 semi-planar input, copy last few rows to intermediate buffer as chroma trans functions access one extra byte due to interleaved input. This data will be padded if required */ if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1) || u4_pad_bottom_sz || u4_pad_right_sz) { WORD32 num_rows = MB_SIZE; UWORD8 *pu1_src; UWORD8 *pu1_dst; WORD32 i; pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) + ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE); pu1_dst = ps_proc->pu1_src_buf_luma; /* If padding is required, we always copy luma, if padding isn't required we never copy luma. */ if (u4_pad_bottom_sz || u4_pad_right_sz) { if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1)) num_rows = MB_SIZE - u4_pad_bottom_sz; for (i = 0; i < num_rows; i++) { memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_wd); pu1_src += ps_proc->s_inp_buf.s_raw_buf.au4_strd[0]; pu1_dst += ps_proc->i4_src_strd; } } pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) + ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE); pu1_dst = ps_proc->pu1_src_buf_chroma; /* Last MB row of chroma is copied unconditionally, since trans functions access an extra byte * due to interleaved input */ if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1)) num_rows = (ps_codec->s_cfg.u4_disp_ht >> 1) - (ps_proc->i4_mb_y * BLK8x8SIZE); else num_rows = BLK8x8SIZE; for (i = 0; i < num_rows; i++) { memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_wd); pu1_src += ps_proc->s_inp_buf.s_raw_buf.au4_strd[1]; pu1_dst += ps_proc->i4_src_chroma_strd; } } break; case IV_YUV_420P : pu1_y_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) + ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE); pu1_u_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) + ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE); pu1_v_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[2] + (i4_mb_x * BLK8x8SIZE) + ps_proc->s_inp_buf.s_raw_buf.au4_strd[2] * (i4_mb_y * BLK8x8SIZE); ps_codec->pf_ih264e_conv_420p_to_420sp( pu1_y_buf_base, pu1_u_buf_base, pu1_v_buf_base, ps_proc->pu1_src_buf_luma, ps_proc->pu1_src_buf_chroma, u2_num_rows, ps_codec->s_cfg.u4_disp_wd, ps_proc->s_inp_buf.s_raw_buf.au4_strd[0], ps_proc->s_inp_buf.s_raw_buf.au4_strd[1], ps_proc->s_inp_buf.s_raw_buf.au4_strd[2], ps_proc->i4_src_strd, ps_proc->i4_src_chroma_strd, convert_uv_only); break; case IV_YUV_422ILE : pu1_y_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE * 2) + ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE); ps_codec->pf_ih264e_fmt_conv_422i_to_420sp( ps_proc->pu1_src_buf_luma, ps_proc->pu1_src_buf_chroma, ps_proc->pu1_src_buf_chroma + 1, pu1_y_buf_base, ps_codec->s_cfg.u4_disp_wd, u2_num_rows, ps_proc->i4_src_strd, ps_proc->i4_src_chroma_strd, ps_proc->i4_src_chroma_strd, ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] >> 1); break; default: break; } if (u4_pad_right_sz && (ps_proc->i4_mb_x == 0)) { UWORD32 u4_pad_wd, u4_pad_ht; u4_pad_wd = (UWORD32)(ps_proc->i4_src_strd - ps_codec->s_cfg.u4_disp_wd); u4_pad_wd = MIN(u4_pad_right_sz, u4_pad_wd); u4_pad_ht = MB_SIZE; if(ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) u4_pad_ht = MIN(MB_SIZE, (MB_SIZE - u4_pad_bottom_sz)); ih264_pad_right_luma( ps_proc->pu1_src_buf_luma + ps_codec->s_cfg.u4_disp_wd, ps_proc->i4_src_strd, u4_pad_ht, u4_pad_wd); ih264_pad_right_chroma( ps_proc->pu1_src_buf_chroma + ps_codec->s_cfg.u4_disp_wd, ps_proc->i4_src_chroma_strd, u4_pad_ht / 2, u4_pad_wd); } /* pad bottom edge */ if (u4_pad_bottom_sz && (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) && ps_proc->i4_mb_x == 0) { ih264_pad_bottom(ps_proc->pu1_src_buf_luma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_strd, ps_proc->i4_src_strd, ps_proc->i4_src_strd, u4_pad_bottom_sz); ih264_pad_bottom(ps_proc->pu1_src_buf_chroma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_chroma_strd / 2, ps_proc->i4_src_chroma_strd, ps_proc->i4_src_chroma_strd, (u4_pad_bottom_sz / 2)); } /* packed mb coeff data */ ps_proc->pv_mb_coeff_data = ((UWORD8 *)ps_proc->pv_pic_mb_coeff_data) + i4_mb_y * ps_codec->u4_size_coeff_data; /* packed mb header data */ ps_proc->pv_mb_header_data = ((UWORD8 *)ps_proc->pv_pic_mb_header_data) + i4_mb_y * ps_codec->u4_size_header_data; /* slice index */ ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[i4_mb_y * ps_proc->i4_wd_mbs + i4_mb_x]; /*********************************************************************/ /* ih264e_init_quant_params() routine is called at the pic init level*/ /* this would have initialized the qp. */ /* TODO_LATER: currently it is assumed that quant params donot change*/ /* across mb's. When they do calculate update ps_qp_params accordingly*/ /*********************************************************************/ /* init mv buffer ptr */ ps_proc->ps_pu = ps_cur_mv_buf->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs * ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE))); /* Init co-located mv buffer */ ps_proc->ps_colpu = ps_proc->aps_mv_buf[1]->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs * ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE))); if (i4_mb_y == 0) { ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu; } else { ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu + ((i4_mb_y - 1) * ps_proc->i4_wd_mbs * ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE))); } ps_proc->pu4_mb_pu_cnt = ps_cur_mv_buf->pu4_mb_pu_cnt + (i4_mb_y * ps_proc->i4_wd_mbs); /* mb type */ ps_proc->u4_mb_type = I16x16; /* lambda */ ps_proc->u4_lambda = gu1_qp0[ps_qp_params->u1_mb_qp]; /* mb distortion */ ps_proc->i4_mb_distortion = SHRT_MAX; if (i4_mb_x == 0) { ps_proc->s_left_mb_syntax_ele.i4_mb_distortion = 0; ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion = 0; ps_proc->s_top_left_mb_syntax_ME.i4_mb_distortion = 0; if (i4_mb_y == 0) { memset(ps_proc->ps_top_row_mb_syntax_ele, 0, (ps_proc->i4_wd_mbs + 1)*sizeof(mb_info_t)); } } /* mb cost */ ps_proc->i4_mb_cost = INT_MAX; /**********************/ /* init deblk context */ /**********************/ ps_deblk->i4_mb_x = ps_proc->i4_mb_x; /* deblk lags the current mb proc by 1 row */ /* NOTE: Intra prediction has to happen with non deblocked samples used as reference */ /* Hence to deblk MB 0 of row 0, you have wait till MB 0 of row 1 is encoded. */ /* For simplicity, we chose to lag deblking by 1 Row wrt to proc */ ps_deblk->i4_mb_y = ps_proc->i4_mb_y - 1; /* buffer ptrs */ ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + i4_rec_strd * (ps_deblk->i4_mb_y * MB_SIZE); ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + i4_rec_strd * (ps_deblk->i4_mb_y * BLK8x8SIZE); /* init deblk bs context */ /* mb indices */ ps_bs->i4_mb_x = ps_proc->i4_mb_x; ps_bs->i4_mb_y = ps_proc->i4_mb_y; /* init n_mb_process context */ ps_n_mb_ctxt->i4_mb_x = 0; ps_n_mb_ctxt->i4_mb_y = ps_deblk->i4_mb_y; ps_n_mb_ctxt->i4_n_mbs = ps_proc->i4_nmb_ntrpy; return IH264E_SUCCESS; } /** ******************************************************************************* * * @brief This function performs luma & chroma padding * * @par Description: * * @param[in] ps_proc * Process context corresponding to the job * * @param[in] pu1_curr_pic_luma * Pointer to luma buffer * * @param[in] pu1_curr_pic_chroma * Pointer to chroma buffer * * @param[in] i4_mb_x * mb index x * * @param[in] i4_mb_y * mb index y * * @param[in] i4_pad_ht * number of rows to be padded * * @returns error status * * @remarks none * ******************************************************************************* */ IH264E_ERROR_T ih264e_pad_recon_buffer(process_ctxt_t *ps_proc, UWORD8 *pu1_curr_pic_luma, UWORD8 *pu1_curr_pic_chroma, WORD32 i4_mb_x, WORD32 i4_mb_y, WORD32 i4_pad_ht) { /* codec context */ codec_t *ps_codec = ps_proc->ps_codec; /* strides */ WORD32 i4_rec_strd = ps_proc->i4_rec_strd; if (i4_mb_x == 0) { /* padding left luma */ ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, i4_pad_ht, PAD_LEFT); /* padding left chroma */ ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, i4_pad_ht >> 1, PAD_LEFT); } if (i4_mb_x == ps_proc->i4_wd_mbs - 1) { /* padding right luma */ ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, i4_pad_ht, PAD_RIGHT); /* padding right chroma */ ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, i4_pad_ht >> 1, PAD_RIGHT); if (i4_mb_y == ps_proc->i4_ht_mbs - 1) { UWORD8 *pu1_rec_luma = pu1_curr_pic_luma + MB_SIZE + PAD_RIGHT + ((i4_pad_ht - 1) * i4_rec_strd); UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma + MB_SIZE + PAD_RIGHT + (((i4_pad_ht >> 1) - 1) * i4_rec_strd); /* padding bottom luma */ ps_codec->pf_pad_bottom(pu1_rec_luma, i4_rec_strd, i4_rec_strd, PAD_BOT); /* padding bottom chroma */ ps_codec->pf_pad_bottom(pu1_rec_chroma, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1)); } } if (i4_mb_y == 0) { UWORD8 *pu1_rec_luma = pu1_curr_pic_luma; UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma; WORD32 wd = MB_SIZE; if (i4_mb_x == 0) { pu1_rec_luma -= PAD_LEFT; pu1_rec_chroma -= PAD_LEFT; wd += PAD_LEFT; } if (i4_mb_x == ps_proc->i4_wd_mbs - 1) { wd += PAD_RIGHT; } /* padding top luma */ ps_codec->pf_pad_top(pu1_rec_luma, i4_rec_strd, wd, PAD_TOP); /* padding top chroma */ ps_codec->pf_pad_top(pu1_rec_chroma, i4_rec_strd, wd, (PAD_TOP >> 1)); } return IH264E_SUCCESS; } /** ******************************************************************************* * * @brief This function performs deblocking, padding and halfpel generation for * 'n' MBs * * @par Description: * * @param[in] ps_proc * Process context corresponding to the job * * @param[in] pu1_curr_pic_luma * Current MB being processed(Luma) * * @param[in] pu1_curr_pic_chroma * Current MB being processed(Chroma) * * @param[in] i4_mb_x * Column value of current MB processed * * @param[in] i4_mb_y * Curent row processed * * @returns error status * * @remarks none * ******************************************************************************* */ IH264E_ERROR_T ih264e_dblk_pad_hpel_processing_n_mbs(process_ctxt_t *ps_proc, UWORD8 *pu1_curr_pic_luma, UWORD8 *pu1_curr_pic_chroma, WORD32 i4_mb_x, WORD32 i4_mb_y) { /* codec context */ codec_t *ps_codec = ps_proc->ps_codec; /* n_mb processing context */ n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt; /* deblk context */ deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt; /* strides */ WORD32 i4_rec_strd = ps_proc->i4_rec_strd; /* loop variables */ WORD32 row, i, j, col; /* Padding Width */ UWORD32 u4_pad_wd; /* deblk_map of the row being deblocked */ UWORD8 *pu1_deblk_map = ps_proc->pu1_deblk_map + ps_deblk->i4_mb_y * ps_proc->i4_wd_mbs; /* deblk_map_previous row */ UWORD8 *pu1_deblk_map_prev_row = pu1_deblk_map - ps_proc->i4_wd_mbs; WORD32 u4_pad_top = 0; WORD32 u4_deblk_prev_row = 0; /* Number of mbs to be processed */ WORD32 i4_n_mbs = ps_n_mb_ctxt->i4_n_mbs; /* Number of mbs actually processed * (at the end of a row, when remaining number of MBs are less than i4_n_mbs) */ WORD32 i4_n_mb_process_count = 0; UWORD8 *pu1_pad_bottom_src = NULL; UWORD8 *pu1_pad_src_luma = NULL; UWORD8 *pu1_pad_src_chroma = NULL; if (ps_proc->u4_disable_deblock_level == 1) { /* If left most MB is processed, then pad left */ if (i4_mb_x == 0) { /* padding left luma */ ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, MB_SIZE, PAD_LEFT); /* padding left chroma */ ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT); } /*last col*/ if (i4_mb_x == (ps_proc->i4_wd_mbs - 1)) { /* padding right luma */ ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT); /* padding right chroma */ ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT); } } if ((i4_mb_y > 0) || (i4_mb_y == (ps_proc->i4_ht_mbs - 1))) { /* if number of mb's to be processed are less than 'N', go back. * exception to the above clause is end of row */ if ( ((i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1)) < i4_n_mbs) && (i4_mb_x < (ps_proc->i4_wd_mbs - 1)) ) { return IH264E_SUCCESS; } else { i4_n_mb_process_count = MIN(i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1), i4_n_mbs); /* performing deblocking for required number of MBs */ if ((i4_mb_y > 0) && (ps_proc->u4_disable_deblock_level != 1)) { u4_deblk_prev_row = 1; /* checking whether the top rows are deblocked */ for (col = 0; col < i4_n_mb_process_count; col++) { u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + col]; } /* checking whether the top right MB is deblocked */ if ((ps_deblk->i4_mb_x + i4_n_mb_process_count) != ps_proc->i4_wd_mbs) { u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + i4_n_mb_process_count]; } /* Top or Top right MBs not deblocked */ if ((u4_deblk_prev_row != 1) && (i4_mb_y > 0)) { return IH264E_SUCCESS; } for (row = 0; row < i4_n_mb_process_count; row++) { ih264e_deblock_mb(ps_proc, ps_deblk); pu1_deblk_map[ps_deblk->i4_mb_x] = 1; if (ps_deblk->i4_mb_y > 0) { if (ps_deblk->i4_mb_x == 0)/* If left most MB is processed, then pad left*/ { /* padding left luma */ ps_codec->pf_pad_left_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE, i4_rec_strd, MB_SIZE, PAD_LEFT); /* padding left chroma */ ps_codec->pf_pad_left_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT); } if (ps_deblk->i4_mb_x == (ps_proc->i4_wd_mbs - 1))/*last column*/ { /* padding right luma */ ps_codec->pf_pad_right_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT); /* padding right chroma */ ps_codec->pf_pad_right_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT); } } ps_deblk->i4_mb_x++; ps_deblk->pu1_cur_pic_luma += MB_SIZE; ps_deblk->pu1_cur_pic_chroma += MB_SIZE; } } else if(i4_mb_y > 0) { ps_deblk->i4_mb_x += i4_n_mb_process_count; ps_deblk->pu1_cur_pic_luma += i4_n_mb_process_count * MB_SIZE; ps_deblk->pu1_cur_pic_chroma += i4_n_mb_process_count * MB_SIZE; } if (i4_mb_y == 2) { u4_pad_wd = i4_n_mb_process_count * MB_SIZE; u4_pad_top = ps_n_mb_ctxt->i4_mb_x * MB_SIZE; if (ps_n_mb_ctxt->i4_mb_x == 0) { u4_pad_wd += PAD_LEFT; u4_pad_top = -PAD_LEFT; } if (i4_mb_x == ps_proc->i4_wd_mbs - 1) { u4_pad_wd += PAD_RIGHT; } /* padding top luma */ ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_luma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, PAD_TOP); /* padding top chroma */ ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_chroma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, (PAD_TOP >> 1)); } ps_n_mb_ctxt->i4_mb_x += i4_n_mb_process_count; if (i4_mb_x == ps_proc->i4_wd_mbs - 1) { if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) { /* Bottom Padding is done in one stretch for the entire width */ if (ps_proc->u4_disable_deblock_level != 1) { ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * MB_SIZE; ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * BLK8x8SIZE; ps_n_mb_ctxt->i4_mb_x = 0; ps_n_mb_ctxt->i4_mb_y = ps_proc->i4_mb_y; ps_deblk->i4_mb_x = 0; ps_deblk->i4_mb_y = ps_proc->i4_mb_y; /* update pic qp map (as update_proc_ctxt is still not called for the last MB) */ ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp; i4_n_mb_process_count = (ps_proc->i4_wd_mbs) % i4_n_mbs; j = (ps_proc->i4_wd_mbs) / i4_n_mbs; for (i = 0; i < j; i++) { for (col = 0; col < i4_n_mbs; col++) { ih264e_deblock_mb(ps_proc, ps_deblk); pu1_deblk_map[ps_deblk->i4_mb_x] = 1; ps_deblk->i4_mb_x++; ps_deblk->pu1_cur_pic_luma += MB_SIZE; ps_deblk->pu1_cur_pic_chroma += MB_SIZE; ps_n_mb_ctxt->i4_mb_x++; } } for (col = 0; col < i4_n_mb_process_count; col++) { ih264e_deblock_mb(ps_proc, ps_deblk); pu1_deblk_map[ps_deblk->i4_mb_x] = 1; ps_deblk->i4_mb_x++; ps_deblk->pu1_cur_pic_luma += MB_SIZE; ps_deblk->pu1_cur_pic_chroma += MB_SIZE; ps_n_mb_ctxt->i4_mb_x++; } pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd; pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd; /* padding left luma */ ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT); /* padding left chroma */ ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT); pu1_pad_src_luma += i4_rec_strd * MB_SIZE; pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE; /* padding left luma */ ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT); /* padding left chroma */ ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT); pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE; pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE; /* padding right luma */ ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT); /* padding right chroma */ ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT); pu1_pad_src_luma += i4_rec_strd * MB_SIZE; pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE; /* padding right luma */ ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT); /* padding right chroma */ ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT); } /* In case height is less than 2 MBs pad top */ if (ps_proc->i4_ht_mbs <= 2) { UWORD8 *pu1_pad_top_src; /* padding top luma */ pu1_pad_top_src = ps_proc->pu1_rec_buf_luma_base - PAD_LEFT; ps_codec->pf_pad_top(pu1_pad_top_src, i4_rec_strd, i4_rec_strd, PAD_TOP); /* padding top chroma */ pu1_pad_top_src = ps_proc->pu1_rec_buf_chroma_base - PAD_LEFT; ps_codec->pf_pad_top(pu1_pad_top_src, i4_rec_strd, i4_rec_strd, (PAD_TOP >> 1)); } /* padding bottom luma */ pu1_pad_bottom_src = ps_proc->pu1_rec_buf_luma_base + ps_proc->i4_ht_mbs * MB_SIZE * i4_rec_strd - PAD_LEFT; ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, PAD_BOT); /* padding bottom chroma */ pu1_pad_bottom_src = ps_proc->pu1_rec_buf_chroma_base + ps_proc->i4_ht_mbs * (MB_SIZE >> 1) * i4_rec_strd - PAD_LEFT; ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1)); } } } } return IH264E_SUCCESS; } /** ******************************************************************************* * * @brief This function performs luma & chroma core coding for a set of mb's. * * @par Description: * The mb to be coded is taken and is evaluated over a predefined set of modes * (intra (i16, i4, i8)/inter (mv, skip)) for best cost. The mode with least cost * is selected and using intra/inter prediction filters, prediction is carried out. * The deviation between src and pred signal constitutes error signal. This error * signal is transformed (hierarchical transform if necessary) and quantized. The * quantized residue is packed in to entropy buffer for entropy coding. This is * repeated for all the mb's enlisted under the job. * * @param[in] ps_proc * Process context corresponding to the job * * @returns error status * * @remarks none * ******************************************************************************* */ WORD32 ih264e_process(process_ctxt_t *ps_proc) { /* error status */ WORD32 error_status = IH264_SUCCESS; /* codec context */ codec_t *ps_codec = ps_proc->ps_codec; /* cbp luma, chroma */ UWORD32 u4_cbp_l, u4_cbp_c; /* width in mbs */ WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs; /* loop var */ WORD32 i4_mb_idx, i4_mb_cnt = ps_proc->i4_mb_cnt; /* valid modes */ UWORD32 u4_valid_modes = 0; /* gate threshold */ WORD32 i4_gate_threshold = 0; /* is intra */ WORD32 luma_idx, chroma_idx, is_intra; /* temp variables */ WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt % MAX_CTXT_SETS; /* * list of modes for evaluation * ------------------------------------------------------------------------- * Note on enabling I4x4 and I16x16 * At very low QP's the hadamard transform in I16x16 will push up the maximum * coeff value very high. CAVLC may not be able to represent the value and * hence the stream may not be decodable in some clips. * Hence at low QPs, we will enable I4x4 and disable I16x16 irrespective of preset. */ if (ps_proc->i4_slice_type == ISLICE) { if (ps_proc->u4_frame_qp > 10) { /* enable intra 16x16 */ u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0; /* enable intra 8x8 */ u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_8x8 ? (1 << I8x8) : 0; } /* enable intra 4x4 */ u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0; u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4; } else if (ps_proc->i4_slice_type == PSLICE) { if (ps_proc->u4_frame_qp > 10) { /* enable intra 16x16 */ u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0; } /* enable intra 4x4 */ if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST) { u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0; } u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4; /* enable inter P16x16 */ u4_valid_modes |= (1 << P16x16); } else if (ps_proc->i4_slice_type == BSLICE) { if (ps_proc->u4_frame_qp > 10) { /* enable intra 16x16 */ u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0; } /* enable intra 4x4 */ if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST) { u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0; } u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4; /* enable inter B16x16 */ u4_valid_modes |= (1 << B16x16); } /* init entropy */ ps_proc->s_entropy.i4_mb_x = ps_proc->i4_mb_x; ps_proc->s_entropy.i4_mb_y = ps_proc->i4_mb_y; ps_proc->s_entropy.i4_mb_cnt = MIN(ps_proc->i4_nmb_ntrpy, i4_wd_mbs - ps_proc->i4_mb_x); /* compute recon when : * 1. current frame is to be used as a reference * 2. dump recon for bit stream sanity check */ ps_proc->u4_compute_recon = ps_codec->u4_is_curr_frm_ref || ps_codec->s_cfg.u4_enable_recon; /* Encode 'n' macroblocks, * 'n' being the number of mbs dictated by current proc ctxt */ for (i4_mb_idx = 0; i4_mb_idx < i4_mb_cnt; i4_mb_idx ++) { /* since we have not yet found sad, we have not yet got min sad */ /* we need to initialize these variables for each MB */ /* TODO how to get the min sad into the codec */ ps_proc->u4_min_sad = ps_codec->s_cfg.i4_min_sad; ps_proc->u4_min_sad_reached = 0; /* mb analysis */ { /* temp var */ WORD32 i4_mb_id = ps_proc->i4_mb_x + ps_proc->i4_mb_y * i4_wd_mbs; /* force intra refresh ? */ WORD32 i4_air_enable_inter = (ps_codec->s_cfg.e_air_mode == IVE_AIR_MODE_NONE) || (ps_codec->pu2_intr_rfrsh_map[i4_mb_id] != ps_codec->i4_air_pic_cnt); /* evaluate inter 16x16 modes */ if ((u4_valid_modes & (1 << P16x16)) || (u4_valid_modes & (1 << B16x16))) { /* compute nmb me */ if (ps_proc->i4_mb_x % ps_proc->u4_nmb_me == 0) { ih264e_compute_me_nmb(ps_proc, MIN((WORD32)ps_proc->u4_nmb_me, i4_wd_mbs - ps_proc->i4_mb_x)); } /* set pointers to ME data appropriately for other modules to use */ { UWORD32 u4_mb_index = ps_proc->i4_mb_x % ps_proc->u4_nmb_me ; /* get the min sad condition for current mb */ ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached; ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad; ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_skip_mv[0]); ps_proc->ps_ngbr_avbl = &(ps_proc->ps_nmb_info[u4_mb_index].s_ngbr_avbl); ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_pred_mv[0]); ps_proc->i4_mb_distortion = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_distortion; ps_proc->i4_mb_cost = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_cost; ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad; ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached; ps_proc->u4_mb_type = ps_proc->ps_nmb_info[u4_mb_index].u4_mb_type; /* get the best sub pel buffer */ ps_proc->pu1_best_subpel_buf = ps_proc->ps_nmb_info[u4_mb_index].pu1_best_sub_pel_buf; ps_proc->u4_bst_spel_buf_strd = ps_proc->ps_nmb_info[u4_mb_index].u4_bst_spel_buf_strd; } ih264e_derive_nghbr_avbl_of_mbs(ps_proc); } else { /* Derive neighbor availability for the current macroblock */ ps_proc->ps_ngbr_avbl = &ps_proc->s_ngbr_avbl; ih264e_derive_nghbr_avbl_of_mbs(ps_proc); } /* * If air says intra, we need to force the following code path to evaluate intra * The easy way is just to say that the inter cost is too much */ if (!i4_air_enable_inter) { ps_proc->u4_min_sad_reached = 0; ps_proc->i4_mb_cost = INT_MAX; ps_proc->i4_mb_distortion = INT_MAX; } else if (ps_proc->u4_mb_type == PSKIP) { goto UPDATE_MB_INFO; } /* wait until the proc of [top + 1] mb is computed. * We wait till the proc dependencies are satisfied */ if(ps_proc->i4_mb_y > 0) { /* proc map */ UWORD8 *pu1_proc_map_top; pu1_proc_map_top = ps_proc->pu1_proc_map + ((ps_proc->i4_mb_y - 1) * i4_wd_mbs); while (1) { volatile UWORD8 *pu1_buf; WORD32 idx = i4_mb_idx + 1; idx = MIN(idx, ((WORD32)ps_codec->s_cfg.i4_wd_mbs - 1)); pu1_buf = pu1_proc_map_top + idx; if(*pu1_buf) break; ithread_yield(); } } /* If we already have the minimum sad, there is no point in searching for sad again */ if (ps_proc->u4_min_sad_reached == 0) { /* intra gating in inter slices */ /* No need of gating if we want to force intra, we need to find the threshold only if inter is enabled by AIR*/ if (i4_air_enable_inter && ps_proc->i4_slice_type != ISLICE && ps_codec->u4_inter_gate) { /* distortion of neighboring blocks */ WORD32 i4_distortion[4]; i4_distortion[0] = ps_proc->s_left_mb_syntax_ele.i4_mb_distortion; i4_distortion[1] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x].i4_mb_distortion; i4_distortion[2] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x + 1].i4_mb_distortion; i4_distortion[3] = ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion; i4_gate_threshold = (i4_distortion[0] + i4_distortion[1] + i4_distortion[2] + i4_distortion[3]) >> 2; } /* If we are going to force intra we need to evaluate intra irrespective of gating */ if ( (!i4_air_enable_inter) || ((i4_gate_threshold + 16 *((WORD32) ps_proc->u4_lambda)) < ps_proc->i4_mb_distortion)) { /* evaluate intra 4x4 modes */ if (u4_valid_modes & (1 << I4x4)) { if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST) { ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton(ps_proc); } else { ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff(ps_proc); } } /* evaluate intra 16x16 modes */ if (u4_valid_modes & (1 << I16x16)) { ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff(ps_proc); } /* evaluate intra 8x8 modes */ if (u4_valid_modes & (1 << I8x8)) { ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff(ps_proc); } } } } /* is intra */ if (ps_proc->u4_mb_type == I4x4 || ps_proc->u4_mb_type == I16x16 || ps_proc->u4_mb_type == I8x8) { luma_idx = ps_proc->u4_mb_type; chroma_idx = 0; is_intra = 1; /* evaluate chroma blocks for intra */ ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(ps_proc); } else { luma_idx = 3; chroma_idx = 1; is_intra = 0; } ps_proc->u4_is_intra = is_intra; ps_proc->ps_pu->b1_intra_flag = is_intra; /* redo MV pred of neighbors in the case intra mb */ /* TODO : currently called unconditionally, needs to be called only in the case of intra * to modify neighbors */ if (ps_proc->i4_slice_type != ISLICE) { ih264e_mv_pred(ps_proc, ps_proc->i4_slice_type); } /* Perform luma mb core coding */ u4_cbp_l = (ps_codec->luma_energy_compaction)[luma_idx](ps_proc); /* Perform luma mb core coding */ u4_cbp_c = (ps_codec->chroma_energy_compaction)[chroma_idx](ps_proc); /* coded block pattern */ ps_proc->u4_cbp = (u4_cbp_c << 4) | u4_cbp_l; if (!ps_proc->u4_is_intra) { if (ps_proc->i4_slice_type == BSLICE) { if (ih264e_find_bskip_params(ps_proc, PRED_L0)) { ps_proc->u4_mb_type = (ps_proc->u4_cbp) ? BDIRECT : BSKIP; } } else if(!ps_proc->u4_cbp) { if (ih264e_find_pskip_params(ps_proc, PRED_L0)) { ps_proc->u4_mb_type = PSKIP; } } } UPDATE_MB_INFO: /* Update mb sad, mb qp and intra mb cost. Will be used by rate control */ ih264e_update_rc_mb_info(&ps_proc->s_frame_info, ps_proc); /**********************************************************************/ /* if disable deblock level is '0' this implies enable deblocking for */ /* all edges of all macroblocks with out any restrictions */ /* */ /* if disable deblock level is '1' this implies disable deblocking for*/ /* all edges of all macroblocks with out any restrictions */ /* */ /* if disable deblock level is '2' this implies enable deblocking for */ /* all edges of all macroblocks except edges overlapping with slice */ /* boundaries. This option is not currently supported by the encoder */ /* hence the slice map should be of no significance to perform debloc */ /* king */ /**********************************************************************/ if (ps_proc->u4_compute_recon) { /* deblk context */ /* src pointers */ UWORD8 *pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma; UWORD8 *pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma; /* src indices */ UWORD32 i4_mb_x = ps_proc->i4_mb_x; UWORD32 i4_mb_y = ps_proc->i4_mb_y; /* compute blocking strength */ if (ps_proc->u4_disable_deblock_level != 1) { ih264e_compute_bs(ps_proc); } /* nmb deblocking and hpel and padding */ ih264e_dblk_pad_hpel_processing_n_mbs(ps_proc, pu1_cur_pic_luma, pu1_cur_pic_chroma, i4_mb_x, i4_mb_y); } /* update the context after for coding next mb */ error_status |= ih264e_update_proc_ctxt(ps_proc); /* Once the last row is processed, mark the buffer status appropriately */ if (ps_proc->i4_ht_mbs == ps_proc->i4_mb_y) { /* Pointer to current picture buffer structure */ pic_buf_t *ps_cur_pic = ps_proc->ps_cur_pic; /* Pointer to current picture's mv buffer structure */ mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf; /**********************************************************************/ /* if disable deblock level is '0' this implies enable deblocking for */ /* all edges of all macroblocks with out any restrictions */ /* */ /* if disable deblock level is '1' this implies disable deblocking for*/ /* all edges of all macroblocks with out any restrictions */ /* */ /* if disable deblock level is '2' this implies enable deblocking for */ /* all edges of all macroblocks except edges overlapping with slice */ /* boundaries. This option is not currently supported by the encoder */ /* hence the slice map should be of no significance to perform debloc */ /* king */ /**********************************************************************/ error_status |= ih264_buf_mgr_release(ps_codec->pv_mv_buf_mgr, ps_cur_mv_buf->i4_buf_id , BUF_MGR_CODEC); error_status |= ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_cur_pic->i4_buf_id , BUF_MGR_CODEC); if (ps_codec->s_cfg.u4_enable_recon) { /* pic cnt */ ps_codec->as_rec_buf[ctxt_sel].i4_pic_cnt = ps_proc->i4_pic_cnt; /* rec buffers */ ps_codec->as_rec_buf[ctxt_sel].s_pic_buf = *ps_proc->ps_cur_pic; /* is last? */ ps_codec->as_rec_buf[ctxt_sel].u4_is_last = ps_proc->s_entropy.u4_is_last; /* frame time stamp */ ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_high = ps_proc->s_entropy.u4_timestamp_high; ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_low = ps_proc->s_entropy.u4_timestamp_low; } } } DEBUG_HISTOGRAM_DUMP(ps_codec->s_cfg.i4_ht_mbs == ps_proc->i4_mb_y); return error_status; } /** ******************************************************************************* * * @brief * Function to update rc context after encoding * * @par Description * This function updates the rate control context after the frame is encoded. * Number of bits consumed by the current frame, frame distortion, frame cost, * number of intra/inter mb's, ... are passed on to rate control context for * updating the rc model. * * @param[in] ps_codec * Handle to codec context * * @param[in] ctxt_sel * frame context selector * * @param[in] pic_cnt * pic count * * @returns i4_stuffing_byte * number of stuffing bytes (if necessary) * * @remarks * ******************************************************************************* */ WORD32 ih264e_update_rc_post_enc(codec_t *ps_codec, WORD32 ctxt_sel, WORD32 i4_is_first_frm) { /* proc set base idx */ WORD32 i4_proc_ctxt_sel_base = ctxt_sel ? (MAX_PROCESS_CTXT / 2) : 0; /* proc ctxt */ process_ctxt_t *ps_proc = &ps_codec->as_process[i4_proc_ctxt_sel_base]; /* frame qp */ UWORD8 u1_frame_qp = ps_codec->u4_frame_qp; /* cbr rc return status */ WORD32 i4_stuffing_byte = 0; /* current frame stats */ frame_info_t s_frame_info; picture_type_e rc_pic_type; /* temp var */ WORD32 i, j; /********************************************************************/ /* BEGIN INIT */ /********************************************************************/ /* init frame info */ irc_init_frame_info(&s_frame_info); /* get frame info */ for (i = 0; i < (WORD32)ps_codec->s_cfg.u4_num_cores; i++) { /*****************************************************************/ /* One frame can be encoded by max of u4_num_cores threads */ /* Accumulating the num mbs, sad, qp and intra_mb_cost from */ /* u4_num_cores threads */ /*****************************************************************/ for (j = 0; j< MAX_MB_TYPE; j++) { s_frame_info.num_mbs[j] += ps_proc[i].s_frame_info.num_mbs[j]; s_frame_info.tot_mb_sad[j] += ps_proc[i].s_frame_info.tot_mb_sad[j]; s_frame_info.qp_sum[j] += ps_proc[i].s_frame_info.qp_sum[j]; } s_frame_info.intra_mb_cost_sum += ps_proc[i].s_frame_info.intra_mb_cost_sum; s_frame_info.activity_sum += ps_proc[i].s_frame_info.activity_sum; /*****************************************************************/ /* gather number of residue and header bits consumed by the frame*/ /*****************************************************************/ ih264e_update_rc_bits_info(&s_frame_info, &ps_proc[i].s_entropy); } /* get pic type */ switch (ps_codec->pic_type) { case PIC_I: case PIC_IDR: rc_pic_type = I_PIC; break; case PIC_P: rc_pic_type = P_PIC; break; case PIC_B: rc_pic_type = B_PIC; break; default: assert(0); break; } /* update rc lib with current frame stats */ i4_stuffing_byte = ih264e_rc_post_enc(ps_codec->s_rate_control.pps_rate_control_api, &(s_frame_info), ps_codec->s_rate_control.pps_pd_frm_rate, ps_codec->s_rate_control.pps_time_stamp, ps_codec->s_rate_control.pps_frame_time, (ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs), &rc_pic_type, i4_is_first_frm, &ps_codec->s_rate_control.post_encode_skip[ctxt_sel], u1_frame_qp, &ps_codec->s_rate_control.num_intra_in_prev_frame, &ps_codec->s_rate_control.i4_avg_activity); return i4_stuffing_byte; } /** ******************************************************************************* * * @brief * entry point of a spawned encoder thread * * @par Description: * The encoder thread dequeues a proc/entropy job from the encoder queue and * calls necessary routines. * * @param[in] pv_proc * Process context corresponding to the thread * * @returns error status * * @remarks * ******************************************************************************* */ WORD32 ih264e_process_thread(void *pv_proc) { /* error status */ IH264_ERROR_T ret = IH264_SUCCESS; WORD32 error_status = IH264_SUCCESS; /* proc ctxt */ process_ctxt_t *ps_proc = pv_proc; /* codec ctxt */ codec_t *ps_codec = ps_proc->ps_codec; /* structure to represent a processing job entry */ job_t s_job; /* blocking call : entropy dequeue is non-blocking till all * the proc jobs are processed */ WORD32 is_blocking = 0; /* set affinity */ ithread_set_affinity(ps_proc->i4_id); while(1) { /* dequeue a job from the entropy queue */ { int error = ithread_mutex_lock(ps_codec->pv_entropy_mutex); /* codec context selector */ WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS; volatile UWORD32 *pu4_buf = &ps_codec->au4_entropy_thread_active[ctxt_sel]; /* have the lock */ if (error == 0) { if (*pu4_buf == 0) { /* no entropy threads are active, try dequeuing a job from the entropy queue */ ret = ih264_list_dequeue(ps_proc->pv_entropy_jobq, &s_job, is_blocking); if (IH264_SUCCESS == ret) { *pu4_buf = 1; ithread_mutex_unlock(ps_codec->pv_entropy_mutex); goto WORKER; } else if(is_blocking) { ithread_mutex_unlock(ps_codec->pv_entropy_mutex); break; } } ithread_mutex_unlock(ps_codec->pv_entropy_mutex); } } /* dequeue a job from the process queue */ ret = ih264_list_dequeue(ps_proc->pv_proc_jobq, &s_job, 1); if (IH264_SUCCESS != ret) { if(ps_proc->i4_id) break; else { is_blocking = 1; continue; } } WORKER: /* choose appropriate proc context based on proc_base_idx */ ps_proc = &ps_codec->as_process[ps_proc->i4_id + s_job.i2_proc_base_idx]; switch (s_job.i4_cmd) { case CMD_PROCESS: ps_proc->i4_mb_cnt = s_job.i2_mb_cnt; ps_proc->i4_mb_x = s_job.i2_mb_x; ps_proc->i4_mb_y = s_job.i2_mb_y; /* init process context */ ih264e_init_proc_ctxt(ps_proc); /* core code all mbs enlisted under the current job */ error_status |= ih264e_process(ps_proc); break; case CMD_ENTROPY: ps_proc->s_entropy.i4_mb_x = s_job.i2_mb_x; ps_proc->s_entropy.i4_mb_y = s_job.i2_mb_y; ps_proc->s_entropy.i4_mb_cnt = s_job.i2_mb_cnt; /* init entropy */ ih264e_init_entropy_ctxt(ps_proc); /* entropy code all mbs enlisted under the current job */ error_status |= ih264e_entropy(ps_proc); break; default: error_status |= IH264_FAIL; break; } } /* send error code */ ps_proc->i4_error_code = error_status; return ret; }