[libav-devel] [PATCH 1/2] checkasm: Check register clobbering on arm

Janne Grunau janne-libav at jannau.net
Thu Dec 31 15:11:10 CET 2015


On 2015-12-31 12:46:39 +0200, Martin Storsjö wrote:
> Use two separate functions, depending on whether neon is available.

should be neon or vfp

> This is set to require armv5te - it uses blx, which is only available
> since armv5t, but we don't have a separate configure item for that.
> (It also uses ldrd, which requires armv5te, but this could be avoided
> if necessary.)

This breaks running checkasm on armv4 with runtime cpu detection. It's 
not very useful since it only checks if two consecutive calls of the c 
variant return equal/similar results.

> ---
> Compared to x264's checkasm, this is slightly simpler since it only
> is expected to return void, i.e. no issues with handling of 64 bit return
> values vs function pointer type casting. (The assembly wrapper still tries
> to maintain the return value intact though, even though it's currently
> not used.) It also is simplified since it doesn't use separate parameter
> for indicating failures (avoiding one extra register to backup, and
> reducing the number of parameters to shift by from 2 to 1).

I think that is actually a disadvantage because it breaks type which are 
naturally aligned to 8-byte. Those are assigned register pairs starting 
with an even register or 8-byte aligned stack address. If we shift only 
one register, that rule is broken so we need a second dummy register.
 
> Other than
> that, there's only been some minor touch-ups, using more of libav's
> asm.S and using #ifdef __APPLE__ instead of #if SYS_DARWIN.
> 
> If all calls would include the cpuflags like declare_new_emms,
> one wouldn't need the neon vs noneon function selection at startup.

declare_new_emms doesn't include cpuflags, it specifies for which cpu 
flags a emms is required. the actual (masked) cpuflags are queried by 
av_get_cpu_flags(). That would work two but the startup check is more 
correct since the neon/vfp checked_call variant should be always used if 
the cpu supports it.

> ---
>  tests/checkasm/arm/Makefile   |    1 +
>  tests/checkasm/arm/checkasm.S |  132 +++++++++++++++++++++++++++++++++++++++++
>  tests/checkasm/checkasm.c     |    9 +++
>  tests/checkasm/checkasm.h     |    8 +++
>  4 files changed, 150 insertions(+)
>  create mode 100644 tests/checkasm/arm/Makefile
>  create mode 100644 tests/checkasm/arm/checkasm.S
> 
> diff --git a/tests/checkasm/arm/Makefile b/tests/checkasm/arm/Makefile
> new file mode 100644
> index 0000000..f73da18
> --- /dev/null
> +++ b/tests/checkasm/arm/Makefile
> @@ -0,0 +1 @@
> +CHECKASMOBJS-$(HAVE_ARMV5TE) += arm/checkasm.o
> diff --git a/tests/checkasm/arm/checkasm.S b/tests/checkasm/arm/checkasm.S
> new file mode 100644
> index 0000000..d1740bc
> --- /dev/null
> +++ b/tests/checkasm/arm/checkasm.S
> @@ -0,0 +1,132 @@
> +/****************************************************************************
> + * Assembly testing and benchmarking tool
> + * Copyright (c) 2015 Martin Storsjo
> + * Copyright (c) 2015 Janne Grunau
> + *
> + * This file is part of Libav.
> + *
> + * Libav is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * Libav is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
> + *****************************************************************************/
> +
> +#include "libavutil/arm/asm.S"
> +
> +const register_init
> +    .quad 0x21f86d66c8ca00ce
> +    .quad 0x75b6ba21077c48ad
> +    .quad 0xed56bb2dcb3c7736
> +    .quad 0x8bda43d3fd1a7e06
> +    .quad 0xb64a9c9e5d318408
> +    .quad 0xdf9a54b303f1d3a3
> +    .quad 0x4a75479abd64e097
> +    .quad 0x249214109d5d1c88
> +endconst
> +
> +const error_message
> +    .asciz "failed to preserve register"
> +endconst
> +
> +@ max number of args used by any asm function.
> +#define MAX_ARGS 15
> +
> +#define ARG_STACK 4*(MAX_ARGS - 3)
> +
> +.macro clobbercheck variant
> +.equ pushed, 4*9
> +function checkasm_checked_call_\variant, export=1
> +    push        {r4-r11, lr}
> +.ifc \variant, neon
> +    vpush       {q4-q7}
> +.equ pushed, pushed + 16*4
> +.endif
> +
> +    movrel      r12, register_init
> +.ifc \variant, neon
> +    vldm        r12, {q4-q7}
> +.endif
> +    ldm         r12, {r4-r11}
> +
> +    sub         sp,  sp,  #ARG_STACK
> +.equ pos, 0
> +.rept MAX_ARGS-3
> +    ldr         r12, [sp, #ARG_STACK + pushed + 4 + pos]
> +    str         r12, [sp, #pos]
> +.equ pos, pos + 4
> +.endr
> +
> +    mov         r12, r0
> +    mov         r0,  r1
> +    mov         r1,  r2
> +    mov         r2,  r3
> +    ldr         r3,  [sp, #ARG_STACK + pushed]
> +    blx         r12
> +    add         sp,  sp,  #ARG_STACK
> +
> +    push        {r0, r1}
> +    movrel      r12, register_init
> +.ifc \variant, neon
> +    vldm        r12, {q0-q3}
> +    veor        q0,  q0,  q4
> +    veor        q1,  q1,  q5
> +    veor        q2,  q2,  q6
> +    veor        q3,  q3,  q7
> +    vorr        q0,  q0,  q1
> +    vorr        q0,  q0,  q2
> +    vorr        q0,  q0,  q3
> +    vorr        d0,  d0,  d1
> +    vrev64.32   d1,  d0
> +    vorr        d0,  d0,  d1
> +    vmov.32     r3,  d0[0]
> +.else
> +    mov         r3,  #0
> +.endif
> +
> +.macro check_reg reg1, reg2=
> +    ldrd        r0,  r1,  [r12], #8
> +    eor         r0,  r0, \reg1
> +    orr         r3,  r3, r0
> +.ifnb \reg2
> +    eor         r1,  r1, \reg2
> +    orr         r3,  r3, r1
> +.endif
> +.endm
> +    check_reg   r4,  r5
> +    check_reg   r6,  r7
> +@ r9 is a volatile register in the ios ABI
> +#ifdef __APPLE__
> +    check_reg   r8
> +#else
> +    check_reg   r8,  r9
> +#endif
> +    check_reg   r10, r11
> +.purgem check_reg
> +
> +    cmp         r3,  #0
> +    beq         0f
> +
> +    movrel      r0, error_message
> +    blx         X(checkasm_fail_func)
> +0:
> +    pop         {r0, r1}
> +.ifc \variant, neon
> +    vpop        {q4-q7}
> +.endif
> +    pop         {r4-r11, pc}
> +endfunc
> +.endm
> +
> +#if HAVE_NEON
> +clobbercheck neon
> +#endif
> +clobbercheck noneon

except for the alignment issues for types with 8-byte alignment ok

> diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
> index d6f8ffc..2c0bbbe 100644
> --- a/tests/checkasm/checkasm.c
> +++ b/tests/checkasm/checkasm.c
> @@ -53,6 +53,10 @@
>  #define isatty(fd) 1
>  #endif
>  
> +#if HAVE_ARMV5TE
> +void (*checkasm_checked_call)(void *func, ...) = checkasm_checked_call_noneon;
> +#endif
> +
>  /* List of tests to invoke */
>  static const struct {
>      const char *name;
> @@ -463,6 +467,11 @@ int main(int argc, char *argv[])
>  {
>      int i, seed, ret = 0;
>  
> +#if HAVE_ARMV5TE && HAVE_NEON
> +    if (av_get_cpu_flags() & AV_CPU_FLAG_NEON)
> +        checkasm_checked_call = checkasm_checked_call_neon;
> +#endif

I think it would be cleaner to have this in a single ARCH_ARM block here

the easiest solution to the armv4 + runtime cpu detection would be to 
return here without running any tests.

Janne


More information about the libav-devel mailing list