[libav-commits] vf_interlace: x86: improve asm performance

Michael Niedermayer git at libav.org
Tue Nov 25 03:01:55 CET 2014


Module: libav
Branch: master
Commit: ca5c3ff90972a5c97aabda2ace57ba72dcd7d83b

Author:    Michael Niedermayer <michaelni at gmx.at>
Committer: Vittorio Giovara <vittorio.giovara at gmail.com>
Date:      Sat Nov 15 04:07:08 2014 +0100

vf_interlace: x86: improve asm performance

4775 decicycles -> 3688 decicycles

---

 libavfilter/x86/vf_interlace.asm |   46 ++++++++++++++++----------------------
 1 file changed, 19 insertions(+), 27 deletions(-)

diff --git a/libavfilter/x86/vf_interlace.asm b/libavfilter/x86/vf_interlace.asm
index 8c2e9b0..b8d8616 100644
--- a/libavfilter/x86/vf_interlace.asm
+++ b/libavfilter/x86/vf_interlace.asm
@@ -2,6 +2,7 @@
 ;* x86-optimized functions for interlace filter
 ;*
 ;* Copyright (C) 2014 Kieran Kunhya <kierank at obe.tv>
+;* Copyright (c) 2014 Michael Niedermayer <michaelni at gmx.at>
 ;*
 ;* This file is part of Libav.
 ;*
@@ -34,36 +35,27 @@ cglobal lowpass_line, 5, 5, 7
     add r4, r1
     neg r1
 
-    pxor m6, m6
+    pcmpeqb m6, m6
 
 .loop
-    mova m0, [r2+r1]
-    punpcklbw m1, m0, m6
-    punpckhbw m0, m6
-    paddw m0, m0
-    paddw m1, m1
+    mova m0, [r3+r1]
+    mova m1, [r3+r1+mmsize]
+    pavgb m0, [r4+r1]
+    pavgb m1, [r4+r1+mmsize]
+    mova m2, [r2+r1]
+    mova m3, [r2+r1+mmsize]
+    pxor m0, m6
+    pxor m1, m6
+    pxor m2, m6, [r2+r1]
+    pxor m3, m6, [r2+r1+mmsize]
+    pavgb m0, m2
+    pavgb m1, m3
+    pxor m0, m6
+    pxor m1, m6
+    mova [r0+r1], m0
+    mova [r0+r1+mmsize], m1
 
-    mova m2, [r3+r1]
-    punpcklbw m3, m2, m6
-    punpckhbw m2, m6
-
-    mova m4, [r4+r1]
-    punpcklbw m5, m4, m6
-    punpckhbw m4, m6
-
-    paddw m1, m3
-    pavgw m1, m5
-
-    paddw m0, m2
-    pavgw m0, m4
-
-    psrlw m0, 1
-    psrlw m1, 1
-
-    packuswb m1, m0
-    mova [r0+r1], m1
-
-    add r1, mmsize
+    add r1, 2*mmsize
     jl .loop
 REP_RET
 %endmacro



More information about the libav-commits mailing list