[libav-devel] [PATCH 1/2] checkasm: Check register clobbering on arm

Martin Storsjö martin at martin.st
Thu Dec 31 16:00:40 CET 2015


On Thu, 31 Dec 2015, Janne Grunau wrote:

> On 2015-12-31 12:46:39 +0200, Martin Storsjö wrote:
>> Use two separate functions, depending on whether neon is available.
>
> should be neon or vfp

Ah, yes, so if either NEON or VFPv3 is available (or just check for 
VFPv3?).

>> This is set to require armv5te - it uses blx, which is only available
>> since armv5t, but we don't have a separate configure item for that.
>> (It also uses ldrd, which requires armv5te, but this could be avoided
>> if necessary.)
>
> This breaks running checkasm on armv4 with runtime cpu detection. It's
> not very useful since it only checks if two consecutive calls of the c
> variant return equal/similar results.

Hmm, indeed. Shouldn't this be fixed by using HAVE_ARMV5TE_EXTERNAL 
instead?

>> ---
>> Compared to x264's checkasm, this is slightly simpler since it only
>> is expected to return void, i.e. no issues with handling of 64 bit return
>> values vs function pointer type casting. (The assembly wrapper still tries
>> to maintain the return value intact though, even though it's currently
>> not used.) It also is simplified since it doesn't use separate parameter
>> for indicating failures (avoiding one extra register to backup, and
>> reducing the number of parameters to shift by from 2 to 1).
>
> I think that is actually a disadvantage because it breaks type which are
> naturally aligned to 8-byte. Those are assigned register pairs starting
> with an even register or 8-byte aligned stack address. If we shift only
> one register, that rule is broken so we need a second dummy register.

Hmm, ok, will try to fix.

>> Other than
>> that, there's only been some minor touch-ups, using more of libav's
>> asm.S and using #ifdef __APPLE__ instead of #if SYS_DARWIN.
>>
>> If all calls would include the cpuflags like declare_new_emms,
>> one wouldn't need the neon vs noneon function selection at startup.
>
> declare_new_emms doesn't include cpuflags, it specifies for which cpu
> flags a emms is required. the actual (masked) cpuflags are queried by
> av_get_cpu_flags(). That would work two but the startup check is more
> correct since the neon/vfp checked_call variant should be always used if
> the cpu supports it.

Ah, thanks for clarifying it.

>> ---
>>  tests/checkasm/arm/Makefile   |    1 +
>>  tests/checkasm/arm/checkasm.S |  132 +++++++++++++++++++++++++++++++++++++++++
>>  tests/checkasm/checkasm.c     |    9 +++
>>  tests/checkasm/checkasm.h     |    8 +++
>>  4 files changed, 150 insertions(+)
>>  create mode 100644 tests/checkasm/arm/Makefile
>>  create mode 100644 tests/checkasm/arm/checkasm.S
>>
>> diff --git a/tests/checkasm/arm/Makefile b/tests/checkasm/arm/Makefile
>> new file mode 100644
>> index 0000000..f73da18
>> --- /dev/null
>> +++ b/tests/checkasm/arm/Makefile
>> @@ -0,0 +1 @@
>> +CHECKASMOBJS-$(HAVE_ARMV5TE) += arm/checkasm.o
>> diff --git a/tests/checkasm/arm/checkasm.S b/tests/checkasm/arm/checkasm.S
>> new file mode 100644
>> index 0000000..d1740bc
>> --- /dev/null
>> +++ b/tests/checkasm/arm/checkasm.S
>> @@ -0,0 +1,132 @@
>> +/****************************************************************************
>> + * Assembly testing and benchmarking tool
>> + * Copyright (c) 2015 Martin Storsjo
>> + * Copyright (c) 2015 Janne Grunau
>> + *
>> + * This file is part of Libav.
>> + *
>> + * Libav is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License as published by
>> + * the Free Software Foundation; either version 2 of the License, or
>> + * (at your option) any later version.
>> + *
>> + * Libav is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> + * GNU General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU General Public License
>> + * along with this program; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
>> + *****************************************************************************/
>> +
>> +#include "libavutil/arm/asm.S"
>> +
>> +const register_init
>> +    .quad 0x21f86d66c8ca00ce
>> +    .quad 0x75b6ba21077c48ad
>> +    .quad 0xed56bb2dcb3c7736
>> +    .quad 0x8bda43d3fd1a7e06
>> +    .quad 0xb64a9c9e5d318408
>> +    .quad 0xdf9a54b303f1d3a3
>> +    .quad 0x4a75479abd64e097
>> +    .quad 0x249214109d5d1c88
>> +endconst
>> +
>> +const error_message
>> +    .asciz "failed to preserve register"
>> +endconst
>> +
>> +@ max number of args used by any asm function.
>> +#define MAX_ARGS 15
>> +
>> +#define ARG_STACK 4*(MAX_ARGS - 3)
>> +
>> +.macro clobbercheck variant
>> +.equ pushed, 4*9
>> +function checkasm_checked_call_\variant, export=1
>> +    push        {r4-r11, lr}
>> +.ifc \variant, neon
>> +    vpush       {q4-q7}
>> +.equ pushed, pushed + 16*4
>> +.endif
>> +
>> +    movrel      r12, register_init
>> +.ifc \variant, neon
>> +    vldm        r12, {q4-q7}
>> +.endif
>> +    ldm         r12, {r4-r11}
>> +
>> +    sub         sp,  sp,  #ARG_STACK
>> +.equ pos, 0
>> +.rept MAX_ARGS-3
>> +    ldr         r12, [sp, #ARG_STACK + pushed + 4 + pos]
>> +    str         r12, [sp, #pos]
>> +.equ pos, pos + 4
>> +.endr
>> +
>> +    mov         r12, r0
>> +    mov         r0,  r1
>> +    mov         r1,  r2
>> +    mov         r2,  r3
>> +    ldr         r3,  [sp, #ARG_STACK + pushed]
>> +    blx         r12
>> +    add         sp,  sp,  #ARG_STACK
>> +
>> +    push        {r0, r1}
>> +    movrel      r12, register_init
>> +.ifc \variant, neon
>> +    vldm        r12, {q0-q3}
>> +    veor        q0,  q0,  q4
>> +    veor        q1,  q1,  q5
>> +    veor        q2,  q2,  q6
>> +    veor        q3,  q3,  q7
>> +    vorr        q0,  q0,  q1
>> +    vorr        q0,  q0,  q2
>> +    vorr        q0,  q0,  q3
>> +    vorr        d0,  d0,  d1
>> +    vrev64.32   d1,  d0
>> +    vorr        d0,  d0,  d1
>> +    vmov.32     r3,  d0[0]
>> +.else
>> +    mov         r3,  #0
>> +.endif
>> +
>> +.macro check_reg reg1, reg2=
>> +    ldrd        r0,  r1,  [r12], #8
>> +    eor         r0,  r0, \reg1
>> +    orr         r3,  r3, r0
>> +.ifnb \reg2
>> +    eor         r1,  r1, \reg2
>> +    orr         r3,  r3, r1
>> +.endif
>> +.endm
>> +    check_reg   r4,  r5
>> +    check_reg   r6,  r7
>> +@ r9 is a volatile register in the ios ABI
>> +#ifdef __APPLE__
>> +    check_reg   r8
>> +#else
>> +    check_reg   r8,  r9
>> +#endif
>> +    check_reg   r10, r11
>> +.purgem check_reg
>> +
>> +    cmp         r3,  #0
>> +    beq         0f
>> +
>> +    movrel      r0, error_message
>> +    blx         X(checkasm_fail_func)
>> +0:
>> +    pop         {r0, r1}
>> +.ifc \variant, neon
>> +    vpop        {q4-q7}
>> +.endif
>> +    pop         {r4-r11, pc}
>> +endfunc
>> +.endm
>> +
>> +#if HAVE_NEON
>> +clobbercheck neon
>> +#endif
>> +clobbercheck noneon
>
> except for the alignment issues for types with 8-byte alignment ok
>
>> diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
>> index d6f8ffc..2c0bbbe 100644
>> --- a/tests/checkasm/checkasm.c
>> +++ b/tests/checkasm/checkasm.c
>> @@ -53,6 +53,10 @@
>>  #define isatty(fd) 1
>>  #endif
>>
>> +#if HAVE_ARMV5TE
>> +void (*checkasm_checked_call)(void *func, ...) = checkasm_checked_call_noneon;
>> +#endif
>> +
>>  /* List of tests to invoke */
>>  static const struct {
>>      const char *name;
>> @@ -463,6 +467,11 @@ int main(int argc, char *argv[])
>>  {
>>      int i, seed, ret = 0;
>>
>> +#if HAVE_ARMV5TE && HAVE_NEON
>> +    if (av_get_cpu_flags() & AV_CPU_FLAG_NEON)
>> +        checkasm_checked_call = checkasm_checked_call_neon;
>> +#endif
>
> I think it would be cleaner to have this in a single ARCH_ARM block here

Hmm, can you elaborate a little? We need to have at least an ifdef check 
for HAVE_NEON (or rather VFPV3) to avoid undefined references - is there 
some existing ARCH_ARM block you suggest I should merge it into?

// Martin


More information about the libav-devel mailing list