[libav-devel] [PATCH 4/8] hevcdsp: split the epel functions by width

Anton Khirnov anton at khirnov.net
Wed Aug 19 21:43:19 CEST 2015


This should allow for more efficient SIMD.
---
 libavcodec/hevc.c             | 29 ++++++++----------
 libavcodec/hevcdsp.c          | 18 ++++++++---
 libavcodec/hevcdsp.h          |  6 ++--
 libavcodec/hevcdsp_template.c | 71 +++++++++++++++++++++++++++++++++----------
 4 files changed, 85 insertions(+), 39 deletions(-)

diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c
index 7070106..3dc510d 100644
--- a/libavcodec/hevc.c
+++ b/libavcodec/hevc.c
@@ -1533,7 +1533,7 @@ static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride,
  */
 static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2,
                       ptrdiff_t dststride, AVFrame *ref, const Mv *mv,
-                      int x_off, int y_off, int block_w, int block_h)
+                      int x_off, int y_off, int block_w, int block_h, int pred_idx)
 {
     HEVCLocalContext *lc = &s->HEVClc;
     uint8_t *src1        = ref->data[1];
@@ -1571,8 +1571,8 @@ static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2,
 
         src1 = lc->edge_emu_buffer + buf_offset1;
         src1stride = edge_emu_stride;
-        s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
-                                             block_w, block_h, mx, my, lc->mc_buffer);
+        s->hevcdsp.put_hevc_epel[!!my][!!mx][pred_idx](dst1, dststride, src1, src1stride,
+                                                       block_h, mx, my, lc->mc_buffer);
 
         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src2 - offset2,
                                  edge_emu_stride, src2stride,
@@ -1583,16 +1583,13 @@ static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2,
         src2 = lc->edge_emu_buffer + buf_offset2;
         src2stride = edge_emu_stride;
 
-        s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
-                                             block_w, block_h, mx, my,
-                                             lc->mc_buffer);
+        s->hevcdsp.put_hevc_epel[!!my][!!mx][pred_idx](dst2, dststride, src2, src2stride,
+                                                       block_h, mx, my, lc->mc_buffer);
     } else {
-        s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
-                                             block_w, block_h, mx, my,
-                                             lc->mc_buffer);
-        s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
-                                             block_w, block_h, mx, my,
-                                             lc->mc_buffer);
+        s->hevcdsp.put_hevc_epel[!!my][!!mx][pred_idx](dst1, dststride, src1, src1stride,
+                                                       block_h, mx, my, lc->mc_buffer);
+        s->hevcdsp.put_hevc_epel[!!my][!!mx][pred_idx](dst2, dststride, src2, src2stride,
+                                                       block_h, mx, my, lc->mc_buffer);
     }
 }
 
@@ -1737,7 +1734,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
         }
         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
-                  &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
+                  &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2, pred_idx);
 
         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
@@ -1774,7 +1771,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
         }
 
         chroma_mc(s, tmp, tmp2, tmpstride, ref1->frame,
-                  &current_mv.mv[1], x0/2, y0/2, nPbW/2, nPbH/2);
+                  &current_mv.mv[1], x0/2, y0/2, nPbW/2, nPbH/2, pred_idx);
 
         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
@@ -1816,9 +1813,9 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
         }
 
         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
-                  &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
+                  &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2, pred_idx);
         chroma_mc(s, tmp3, tmp4, tmpstride, ref1->frame,
-                  &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
+                  &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2, pred_idx);
 
         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
diff --git a/libavcodec/hevcdsp.c b/libavcodec/hevcdsp.c
index 4e311a6..7cb273b 100644
--- a/libavcodec/hevcdsp.c
+++ b/libavcodec/hevcdsp.c
@@ -122,6 +122,12 @@ void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int bit_depth)
     hevcdsp->put_hevc_qpel[1][0][i] = FUNC(put_hevc_qpel_v_      ## width, depth);  \
     hevcdsp->put_hevc_qpel[1][1][i] = FUNC(put_hevc_qpel_hv_     ## width, depth);  \
 
+#define EPEL_FUNC(i, width, depth)    \
+    hevcdsp->put_hevc_epel[0][0][i] = FUNC(put_hevc_epel_pixels_ ## width, depth);  \
+    hevcdsp->put_hevc_epel[0][1][i] = FUNC(put_hevc_epel_h_      ## width, depth);  \
+    hevcdsp->put_hevc_epel[1][0][i] = FUNC(put_hevc_epel_v_      ## width, depth);  \
+    hevcdsp->put_hevc_epel[1][1][i] = FUNC(put_hevc_epel_hv_     ## width, depth);  \
+
 #define HEVC_DSP(depth)                                                     \
     hevcdsp->put_pcm                = FUNC(put_pcm, depth);                 \
     hevcdsp->transquant_bypass[0]   = FUNC(transquant_bypass4x4, depth);    \
@@ -154,10 +160,14 @@ void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int bit_depth)
     QPEL_FUNC(6, 48, depth);\
     QPEL_FUNC(7, 64, depth);\
                                                                             \
-    hevcdsp->put_hevc_epel[0][0] = FUNC(put_hevc_epel_pixels, depth);       \
-    hevcdsp->put_hevc_epel[0][1] = FUNC(put_hevc_epel_h, depth);            \
-    hevcdsp->put_hevc_epel[1][0] = FUNC(put_hevc_epel_v, depth);            \
-    hevcdsp->put_hevc_epel[1][1] = FUNC(put_hevc_epel_hv, depth);           \
+    EPEL_FUNC(0, 2,  depth);                                                \
+    EPEL_FUNC(1, 4,  depth);                                                \
+    EPEL_FUNC(2, 6, depth);                                                 \
+    EPEL_FUNC(3, 8, depth);                                                 \
+    EPEL_FUNC(4, 12, depth);                                                \
+    EPEL_FUNC(5, 16, depth);                                                \
+    EPEL_FUNC(6, 24, depth);                                                \
+    EPEL_FUNC(7, 32, depth);                                                \
                                                                             \
     hevcdsp->put_unweighted_pred   = FUNC(put_unweighted_pred, depth);      \
     hevcdsp->put_unweighted_pred_avg = FUNC(put_unweighted_pred_avg, depth);    \
diff --git a/libavcodec/hevcdsp.h b/libavcodec/hevcdsp.h
index c70b845..9004d17 100644
--- a/libavcodec/hevcdsp.h
+++ b/libavcodec/hevcdsp.h
@@ -61,9 +61,9 @@ typedef struct HEVCDSPContext {
     void (*put_hevc_qpel[2][2][8])(int16_t *dst, ptrdiff_t dststride, uint8_t *src,
                                 ptrdiff_t srcstride, int height,
                                 int mx, int my, int16_t *mcbuffer);
-    void (*put_hevc_epel[2][2])(int16_t *dst, ptrdiff_t dststride, uint8_t *src,
-                                ptrdiff_t srcstride, int width, int height,
-                                int mx, int my, int16_t *mcbuffer);
+    void (*put_hevc_epel[2][2][8])(int16_t *dst, ptrdiff_t dststride, uint8_t *src,
+                                   ptrdiff_t srcstride, int height,
+                                   int mx, int my, int16_t *mcbuffer);
 
     void (*put_unweighted_pred)(uint8_t *dst, ptrdiff_t dststride, int16_t *src,
                                 ptrdiff_t srcstride, int width, int height);
diff --git a/libavcodec/hevcdsp_template.c b/libavcodec/hevcdsp_template.c
index 807b53c..32c282a 100644
--- a/libavcodec/hevcdsp_template.c
+++ b/libavcodec/hevcdsp_template.c
@@ -963,10 +963,10 @@ QPEL(12)
 QPEL(8)
 QPEL(4)
 
-static void FUNC(put_hevc_epel_pixels)(int16_t *dst, ptrdiff_t dststride,
-                                       uint8_t *_src, ptrdiff_t _srcstride,
-                                       int width, int height, int mx, int my,
-                                       int16_t* mcbuffer)
+static inline void FUNC(put_hevc_epel_pixels)(int16_t *dst, ptrdiff_t dststride,
+                                              uint8_t *_src, ptrdiff_t _srcstride,
+                                              int width, int height, int mx, int my,
+                                              int16_t* mcbuffer)
 {
     int x, y;
     pixel *src          = (pixel *)_src;
@@ -986,10 +986,10 @@ static void FUNC(put_hevc_epel_pixels)(int16_t *dst, ptrdiff_t dststride,
      filter_2 * src[x + stride] +               \
      filter_3 * src[x + 2 * stride])
 
-static void FUNC(put_hevc_epel_h)(int16_t *dst, ptrdiff_t dststride,
-                                  uint8_t *_src, ptrdiff_t _srcstride,
-                                  int width, int height, int mx, int my,
-                                  int16_t* mcbuffer)
+static inline void FUNC(put_hevc_epel_h)(int16_t *dst, ptrdiff_t dststride,
+                                         uint8_t *_src, ptrdiff_t _srcstride,
+                                         int width, int height, int mx, int my,
+                                         int16_t* mcbuffer)
 {
     int x, y;
     pixel *src = (pixel *)_src;
@@ -1007,10 +1007,10 @@ static void FUNC(put_hevc_epel_h)(int16_t *dst, ptrdiff_t dststride,
     }
 }
 
-static void FUNC(put_hevc_epel_v)(int16_t *dst, ptrdiff_t dststride,
-                                  uint8_t *_src, ptrdiff_t _srcstride,
-                                  int width, int height, int mx, int my,
-                                  int16_t* mcbuffer)
+static inline void FUNC(put_hevc_epel_v)(int16_t *dst, ptrdiff_t dststride,
+                                         uint8_t *_src, ptrdiff_t _srcstride,
+                                         int width, int height, int mx, int my,
+                                         int16_t* mcbuffer)
 {
     int x, y;
     pixel *src = (pixel *)_src;
@@ -1029,10 +1029,10 @@ static void FUNC(put_hevc_epel_v)(int16_t *dst, ptrdiff_t dststride,
     }
 }
 
-static void FUNC(put_hevc_epel_hv)(int16_t *dst, ptrdiff_t dststride,
-                                   uint8_t *_src, ptrdiff_t _srcstride,
-                                   int width, int height, int mx, int my,
-                                   int16_t* mcbuffer)
+static inline void FUNC(put_hevc_epel_hv)(int16_t *dst, ptrdiff_t dststride,
+                                          uint8_t *_src, ptrdiff_t _srcstride,
+                                          int width, int height, int mx, int my,
+                                          int16_t* mcbuffer)
 {
     int x, y;
     pixel *src = (pixel *)_src;
@@ -1068,6 +1068,45 @@ static void FUNC(put_hevc_epel_hv)(int16_t *dst, ptrdiff_t dststride,
     }
 }
 
+#define EPEL(W) \
+static void FUNC(put_hevc_epel_pixels_ ## W)(int16_t *dst, ptrdiff_t dststride,                           \
+                                             uint8_t *src, ptrdiff_t srcstride,                           \
+                                             int height, int mx, int my,                                  \
+                                             int16_t *mcbuffer)                                           \
+{                                                                                                         \
+    FUNC(put_hevc_epel_pixels)(dst, dststride, src, srcstride, W, height, mx, my, mcbuffer);              \
+}                                                                                                         \
+static void FUNC(put_hevc_epel_h_ ## W)(int16_t *dst, ptrdiff_t dststride,                                \
+                                             uint8_t *src, ptrdiff_t srcstride,                           \
+                                             int height, int mx, int my,                                  \
+                                             int16_t *mcbuffer)                                           \
+{                                                                                                         \
+    FUNC(put_hevc_epel_h)(dst, dststride, src, srcstride, W, height, mx, my, mcbuffer);                   \
+}                                                                                                         \
+static void FUNC(put_hevc_epel_v_ ## W)(int16_t *dst, ptrdiff_t dststride,                                \
+                                             uint8_t *src, ptrdiff_t srcstride,                           \
+                                             int height, int mx, int my,                                  \
+                                             int16_t *mcbuffer)                                           \
+{                                                                                                         \
+    FUNC(put_hevc_epel_v)(dst, dststride, src, srcstride, W, height, mx, my, mcbuffer);                   \
+}                                                                                                         \
+static void FUNC(put_hevc_epel_hv_ ## W)(int16_t *dst, ptrdiff_t dststride,                               \
+                                         uint8_t *src, ptrdiff_t srcstride,                               \
+                                         int height, int mx, int my,                                      \
+                                         int16_t *mcbuffer)                                               \
+{                                                                                                         \
+    FUNC(put_hevc_epel_hv)(dst, dststride, src, srcstride, W, height, mx, my, mcbuffer);                  \
+}
+
+EPEL(32)
+EPEL(24)
+EPEL(16)
+EPEL(12)
+EPEL(8)
+EPEL(6)
+EPEL(4)
+EPEL(2)
+
 static void FUNC(put_unweighted_pred)(uint8_t *_dst, ptrdiff_t _dststride,
                                       int16_t *src, ptrdiff_t srcstride,
                                       int width, int height)
-- 
2.0.0



More information about the libav-devel mailing list