Spaces:
Runtime error
Runtime error
/* | |
* Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org> | |
* | |
* This file is part of FFmpeg. | |
* | |
* FFmpeg is free software; you can redistribute it and/or | |
* modify it under the terms of the GNU Lesser General Public | |
* License as published by the Free Software Foundation; either | |
* version 2.1 of the License, or (at your option) any later version. | |
* | |
* FFmpeg is distributed in the hope that it will be useful, | |
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
* Lesser General Public License for more details. | |
* | |
* You should have received a copy of the GNU Lesser General Public | |
* License along with FFmpeg; if not, write to the Free Software | |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
*/ | |
/* this code assume that stride % 16 == 0 */ | |
static void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, const uint8_t * src, | |
ptrdiff_t stride, int h, | |
int x, int y) | |
{ | |
DECLARE_ALIGNED(16, signed int, ABCD)[4] = | |
{((8 - x) * (8 - y)), | |
(( x) * (8 - y)), | |
((8 - x) * ( y)), | |
(( x) * ( y))}; | |
register int i; | |
vec_u8 fperm; | |
LOAD_ZERO; | |
const vec_s32 vABCD = vec_ld(0, ABCD); | |
const vec_s16 vA = VEC_SPLAT16(vABCD, 1); | |
const vec_s16 vB = VEC_SPLAT16(vABCD, 3); | |
const vec_s16 vC = VEC_SPLAT16(vABCD, 5); | |
const vec_s16 vD = VEC_SPLAT16(vABCD, 7); | |
const vec_s16 v32ss = vec_sl(vec_splat_s16(1),vec_splat_u16(5)); | |
const vec_u16 v6us = vec_splat_u16(6); | |
vec_u8 vsrcperm0, vsrcperm1; | |
vec_u8 vsrc0uc, vsrc1uc; | |
vec_s16 vsrc0ssH, vsrc1ssH; | |
vec_u8 vsrc2uc, vsrc3uc; | |
vec_s16 vsrc2ssH, vsrc3ssH, psum; | |
vec_u8 vdst, ppsum, vfdst, fsum; | |
register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1; | |
register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0; | |
vsrcperm0 = vec_lvsl(0, src); | |
vsrcperm1 = vec_lvsl(1, src); | |
if (((unsigned long)dst) % 16 == 0) { | |
fperm = (vec_u8){0x10, 0x11, 0x12, 0x13, | |
0x14, 0x15, 0x16, 0x17, | |
0x08, 0x09, 0x0A, 0x0B, | |
0x0C, 0x0D, 0x0E, 0x0F}; | |
} else { | |
fperm = (vec_u8){0x00, 0x01, 0x02, 0x03, | |
0x04, 0x05, 0x06, 0x07, | |
0x18, 0x19, 0x1A, 0x1B, | |
0x1C, 0x1D, 0x1E, 0x1F}; | |
} | |
GET_VSRC(vsrc0uc, vsrc1uc, 0, 16, vsrcperm0, vsrcperm1, src); | |
vsrc0ssH = (vec_s16)VEC_MERGEH(zero_u8v,(vec_u8)vsrc0uc); | |
vsrc1ssH = (vec_s16)VEC_MERGEH(zero_u8v,(vec_u8)vsrc1uc); | |
if (ABCD[3]) { | |
for (i = 0 ; i < h ; i++) { | |
GET_VSRC(vsrc2uc, vsrc3uc, stride, 16, vsrcperm0, vsrcperm1, src); | |
CHROMA_MC8_ALTIVEC_CORE(v32ss, noop); | |
} | |
} else { | |
const vec_s16 vE = vec_add(vB, vC); | |
if (ABCD[2]) { // x == 0 B == 0 | |
for (i = 0 ; i < h ; i++) { | |
GET_VSRC1(vsrc1uc, stride, 15, vsrcperm0, src); | |
CHROMA_MC8_ALTIVEC_CORE_SIMPLE; | |
vsrc0uc = vsrc1uc; | |
} | |
} else { // y == 0 C == 0 | |
for (i = 0 ; i < h ; i++) { | |
GET_VSRC(vsrc0uc, vsrc1uc, 0, 15, vsrcperm0, vsrcperm1, src); | |
CHROMA_MC8_ALTIVEC_CORE_SIMPLE; | |
} | |
} | |
} | |
} | |
/* this code assume that stride % 16 == 0 */ | |
static void PREFIX_no_rnd_vc1_chroma_mc8_altivec(uint8_t *dst, const uint8_t *src, | |
ptrdiff_t stride, int h, | |
int x, int y) | |
{ | |
DECLARE_ALIGNED(16, signed int, ABCD)[4] = | |
{((8 - x) * (8 - y)), | |
(( x) * (8 - y)), | |
((8 - x) * ( y)), | |
(( x) * ( y))}; | |
register int i; | |
vec_u8 fperm; | |
LOAD_ZERO; | |
const vec_s32 vABCD = vec_ld(0, ABCD); | |
const vec_s16 vA = VEC_SPLAT16(vABCD, 1); | |
const vec_s16 vB = VEC_SPLAT16(vABCD, 3); | |
const vec_s16 vC = VEC_SPLAT16(vABCD, 5); | |
const vec_s16 vD = VEC_SPLAT16(vABCD, 7); | |
const vec_s16 v28ss = vec_sub(vec_sl(vec_splat_s16(1),vec_splat_u16(5)),vec_splat_s16(4)); | |
const vec_u16 v6us = vec_splat_u16(6); | |
vec_u8 vsrcperm0, vsrcperm1; | |
vec_u8 vsrc0uc, vsrc1uc; | |
vec_s16 vsrc0ssH, vsrc1ssH; | |
vec_u8 vsrc2uc, vsrc3uc; | |
vec_s16 vsrc2ssH, vsrc3ssH, psum; | |
vec_u8 vdst, ppsum, vfdst, fsum; | |
register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1; | |
register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0; | |
vsrcperm0 = vec_lvsl(0, src); | |
vsrcperm1 = vec_lvsl(1, src); | |
if (((unsigned long)dst) % 16 == 0) { | |
fperm = (vec_u8){0x10, 0x11, 0x12, 0x13, | |
0x14, 0x15, 0x16, 0x17, | |
0x08, 0x09, 0x0A, 0x0B, | |
0x0C, 0x0D, 0x0E, 0x0F}; | |
} else { | |
fperm = (vec_u8){0x00, 0x01, 0x02, 0x03, | |
0x04, 0x05, 0x06, 0x07, | |
0x18, 0x19, 0x1A, 0x1B, | |
0x1C, 0x1D, 0x1E, 0x1F}; | |
} | |
GET_VSRC(vsrc0uc, vsrc1uc, 0, 16, vsrcperm0, vsrcperm1, src); | |
vsrc0ssH = (vec_s16)VEC_MERGEH(zero_u8v, (vec_u8)vsrc0uc); | |
vsrc1ssH = (vec_s16)VEC_MERGEH(zero_u8v, (vec_u8)vsrc1uc); | |
for (i = 0 ; i < h ; i++) { | |
GET_VSRC(vsrc2uc, vsrc3uc, stride, 16, vsrcperm0, vsrcperm1, src); | |
CHROMA_MC8_ALTIVEC_CORE(vec_splat_s16(0), add28); | |
} | |
} | |