Libav
vp8.c
Go to the documentation of this file.
1 /*
2  * VP8 compatible video decoder
3  *
4  * Copyright (C) 2010 David Conrad
5  * Copyright (C) 2010 Ronald S. Bultje
6  * Copyright (C) 2010 Jason Garrett-Glaser
7  * Copyright (C) 2012 Daniel Kang
8  *
9  * This file is part of Libav.
10  *
11  * Libav is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * Libav is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with Libav; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24  */
25 
26 #include "libavutil/imgutils.h"
27 #include "avcodec.h"
28 #include "internal.h"
29 #include "vp8.h"
30 #include "vp8data.h"
31 #include "rectangle.h"
32 #include "thread.h"
33 
34 #if ARCH_ARM
35 # include "arm/vp8.h"
36 #endif
37 
38 static void free_buffers(VP8Context *s)
39 {
40  int i;
41  if (s->thread_data)
42  for (i = 0; i < MAX_THREADS; i++) {
43 #if HAVE_THREADS
44  pthread_cond_destroy(&s->thread_data[i].cond);
46 #endif
48  }
49  av_freep(&s->thread_data);
52  av_freep(&s->top_nnz);
53  av_freep(&s->top_border);
54 
55  s->macroblocks = NULL;
56 }
57 
58 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
59 {
60  int ret;
61  if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
62  ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
63  return ret;
64  if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) {
66  return AVERROR(ENOMEM);
67  }
68  return 0;
69 }
70 
72 {
75 }
76 
77 static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
78 {
79  int ret;
80 
81  vp8_release_frame(s, dst);
82 
83  if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
84  return ret;
85  if (src->seg_map &&
86  !(dst->seg_map = av_buffer_ref(src->seg_map))) {
87  vp8_release_frame(s, dst);
88  return AVERROR(ENOMEM);
89  }
90 
91  return 0;
92 }
93 
94 
95 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
96 {
97  VP8Context *s = avctx->priv_data;
98  int i;
99 
100  for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
101  vp8_release_frame(s, &s->frames[i]);
102  memset(s->framep, 0, sizeof(s->framep));
103 
104  if (free_mem)
105  free_buffers(s);
106 }
107 
108 static void vp8_decode_flush(AVCodecContext *avctx)
109 {
110  vp8_decode_flush_impl(avctx, 0);
111 }
112 
113 static int update_dimensions(VP8Context *s, int width, int height)
114 {
115  AVCodecContext *avctx = s->avctx;
116  int i, ret;
117 
118  if (width != s->avctx->width ||
119  height != s->avctx->height) {
121 
122  ret = ff_set_dimensions(s->avctx, width, height);
123  if (ret < 0)
124  return ret;
125  }
126 
127  s->mb_width = (s->avctx->coded_width +15) / 16;
128  s->mb_height = (s->avctx->coded_height+15) / 16;
129 
131  if (!s->mb_layout) { // Frame threading and one thread
132  s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
134  }
135  else // Sliced threading
136  s->macroblocks_base = av_mallocz((s->mb_width+2)*(s->mb_height+2)*sizeof(*s->macroblocks));
137  s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
138  s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
140 
141  for (i = 0; i < MAX_THREADS; i++) {
143 #if HAVE_THREADS
144  pthread_mutex_init(&s->thread_data[i].lock, NULL);
145  pthread_cond_init(&s->thread_data[i].cond, NULL);
146 #endif
147  }
148 
149  if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
150  (!s->intra4x4_pred_mode_top && !s->mb_layout))
151  return AVERROR(ENOMEM);
152 
153  s->macroblocks = s->macroblocks_base + 1;
154 
155  return 0;
156 }
157 
159 {
160  VP56RangeCoder *c = &s->c;
161  int i;
162 
164 
165  if (vp8_rac_get(c)) { // update segment feature data
167 
168  for (i = 0; i < 4; i++)
170 
171  for (i = 0; i < 4; i++)
173  }
174  if (s->segmentation.update_map)
175  for (i = 0; i < 3; i++)
176  s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
177 }
178 
180 {
181  VP56RangeCoder *c = &s->c;
182  int i;
183 
184  for (i = 0; i < 4; i++) {
185  if (vp8_rac_get(c)) {
186  s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
187 
188  if (vp8_rac_get(c))
189  s->lf_delta.ref[i] = -s->lf_delta.ref[i];
190  }
191  }
192 
193  for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
194  if (vp8_rac_get(c)) {
195  s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
196 
197  if (vp8_rac_get(c))
198  s->lf_delta.mode[i] = -s->lf_delta.mode[i];
199  }
200  }
201 }
202 
203 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
204 {
205  const uint8_t *sizes = buf;
206  int i;
207 
208  s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
209 
210  buf += 3*(s->num_coeff_partitions-1);
211  buf_size -= 3*(s->num_coeff_partitions-1);
212  if (buf_size < 0)
213  return -1;
214 
215  for (i = 0; i < s->num_coeff_partitions-1; i++) {
216  int size = AV_RL24(sizes + 3*i);
217  if (buf_size - size < 0)
218  return -1;
219 
220  ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
221  buf += size;
222  buf_size -= size;
223  }
224  ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
225 
226  return 0;
227 }
228 
229 static void get_quants(VP8Context *s)
230 {
231  VP56RangeCoder *c = &s->c;
232  int i, base_qi;
233 
234  int yac_qi = vp8_rac_get_uint(c, 7);
235  int ydc_delta = vp8_rac_get_sint(c, 4);
236  int y2dc_delta = vp8_rac_get_sint(c, 4);
237  int y2ac_delta = vp8_rac_get_sint(c, 4);
238  int uvdc_delta = vp8_rac_get_sint(c, 4);
239  int uvac_delta = vp8_rac_get_sint(c, 4);
240 
241  for (i = 0; i < 4; i++) {
242  if (s->segmentation.enabled) {
243  base_qi = s->segmentation.base_quant[i];
244  if (!s->segmentation.absolute_vals)
245  base_qi += yac_qi;
246  } else
247  base_qi = yac_qi;
248 
249  s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
250  s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi , 7)];
251  s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
252  /* 101581>>16 is equivalent to 155/100 */
253  s->qmat[i].luma_dc_qmul[1] = (101581 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)]) >> 16;
254  s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
255  s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
256 
257  s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
258  s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
259  }
260 }
261 
275 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
276 {
277  VP56RangeCoder *c = &s->c;
278 
279  if (update)
280  return VP56_FRAME_CURRENT;
281 
282  switch (vp8_rac_get_uint(c, 2)) {
283  case 1:
284  return VP56_FRAME_PREVIOUS;
285  case 2:
287  }
288  return VP56_FRAME_NONE;
289 }
290 
291 static void update_refs(VP8Context *s)
292 {
293  VP56RangeCoder *c = &s->c;
294 
295  int update_golden = vp8_rac_get(c);
296  int update_altref = vp8_rac_get(c);
297 
298  s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
299  s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
300 }
301 
302 static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
303 {
304  VP56RangeCoder *c = &s->c;
305  int header_size, hscale, vscale, i, j, k, l, m, ret;
306  int width = s->avctx->width;
307  int height = s->avctx->height;
308 
309  s->keyframe = !(buf[0] & 1);
310  s->profile = (buf[0]>>1) & 7;
311  s->invisible = !(buf[0] & 0x10);
312  header_size = AV_RL24(buf) >> 5;
313  buf += 3;
314  buf_size -= 3;
315 
316  if (s->profile > 3)
317  av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
318 
319  if (!s->profile)
320  memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
321  else // profile 1-3 use bilinear, 4+ aren't defined so whatever
323 
324  if (header_size > buf_size - 7*s->keyframe) {
325  av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
326  return AVERROR_INVALIDDATA;
327  }
328 
329  if (s->keyframe) {
330  if (AV_RL24(buf) != 0x2a019d) {
331  av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
332  return AVERROR_INVALIDDATA;
333  }
334  width = AV_RL16(buf+3) & 0x3fff;
335  height = AV_RL16(buf+5) & 0x3fff;
336  hscale = buf[4] >> 6;
337  vscale = buf[6] >> 6;
338  buf += 7;
339  buf_size -= 7;
340 
341  if (hscale || vscale)
342  avpriv_request_sample(s->avctx, "Upscaling");
343 
345  for (i = 0; i < 4; i++)
346  for (j = 0; j < 16; j++)
347  memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
348  sizeof(s->prob->token[i][j]));
349  memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
350  memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
351  memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc));
352  memset(&s->segmentation, 0, sizeof(s->segmentation));
353  memset(&s->lf_delta, 0, sizeof(s->lf_delta));
354  }
355 
356  ff_vp56_init_range_decoder(c, buf, header_size);
357  buf += header_size;
358  buf_size -= header_size;
359 
360  if (s->keyframe) {
361  if (vp8_rac_get(c))
362  av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
363  vp8_rac_get(c); // whether we can skip clamping in dsp functions
364  }
365 
366  if ((s->segmentation.enabled = vp8_rac_get(c)))
368  else
369  s->segmentation.update_map = 0; // FIXME: move this to some init function?
370 
371  s->filter.simple = vp8_rac_get(c);
372  s->filter.level = vp8_rac_get_uint(c, 6);
373  s->filter.sharpness = vp8_rac_get_uint(c, 3);
374 
375  if ((s->lf_delta.enabled = vp8_rac_get(c)))
376  if (vp8_rac_get(c))
377  update_lf_deltas(s);
378 
379  if (setup_partitions(s, buf, buf_size)) {
380  av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
381  return AVERROR_INVALIDDATA;
382  }
383 
384  if (!s->macroblocks_base || /* first frame */
385  width != s->avctx->width || height != s->avctx->height) {
386  if ((ret = update_dimensions(s, width, height)) < 0)
387  return ret;
388  }
389 
390  get_quants(s);
391 
392  if (!s->keyframe) {
393  update_refs(s);
395  s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
396  }
397 
398  // if we aren't saving this frame's probabilities for future frames,
399  // make a copy of the current probabilities
400  if (!(s->update_probabilities = vp8_rac_get(c)))
401  s->prob[1] = s->prob[0];
402 
403  s->update_last = s->keyframe || vp8_rac_get(c);
404 
405  for (i = 0; i < 4; i++)
406  for (j = 0; j < 8; j++)
407  for (k = 0; k < 3; k++)
408  for (l = 0; l < NUM_DCT_TOKENS-1; l++)
410  int prob = vp8_rac_get_uint(c, 8);
411  for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
412  s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
413  }
414 
415  if ((s->mbskip_enabled = vp8_rac_get(c)))
416  s->prob->mbskip = vp8_rac_get_uint(c, 8);
417 
418  if (!s->keyframe) {
419  s->prob->intra = vp8_rac_get_uint(c, 8);
420  s->prob->last = vp8_rac_get_uint(c, 8);
421  s->prob->golden = vp8_rac_get_uint(c, 8);
422 
423  if (vp8_rac_get(c))
424  for (i = 0; i < 4; i++)
425  s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
426  if (vp8_rac_get(c))
427  for (i = 0; i < 3; i++)
428  s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
429 
430  // 17.2 MV probability update
431  for (i = 0; i < 2; i++)
432  for (j = 0; j < 19; j++)
434  s->prob->mvc[i][j] = vp8_rac_get_nn(c);
435  }
436 
437  return 0;
438 }
439 
440 static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
441 {
442  dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
443  dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
444 }
445 
449 static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
450 {
451  int bit, x = 0;
452 
453  if (vp56_rac_get_prob_branchy(c, p[0])) {
454  int i;
455 
456  for (i = 0; i < 3; i++)
457  x += vp56_rac_get_prob(c, p[9 + i]) << i;
458  for (i = 9; i > 3; i--)
459  x += vp56_rac_get_prob(c, p[9 + i]) << i;
460  if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
461  x += 8;
462  } else {
463  // small_mvtree
464  const uint8_t *ps = p+2;
465  bit = vp56_rac_get_prob(c, *ps);
466  ps += 1 + 3*bit;
467  x += 4*bit;
468  bit = vp56_rac_get_prob(c, *ps);
469  ps += 1 + bit;
470  x += 2*bit;
471  x += vp56_rac_get_prob(c, *ps);
472  }
473 
474  return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
475 }
476 
477 static av_always_inline
478 const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
479 {
480  if (left == top)
481  return vp8_submv_prob[4-!!left];
482  if (!top)
483  return vp8_submv_prob[2];
484  return vp8_submv_prob[1-!!left];
485 }
486 
491 static av_always_inline
493 {
494  int part_idx;
495  int n, num;
496  VP8Macroblock *top_mb;
497  VP8Macroblock *left_mb = &mb[-1];
498  const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
499  *mbsplits_top,
500  *mbsplits_cur, *firstidx;
501  VP56mv *top_mv;
502  VP56mv *left_mv = left_mb->bmv;
503  VP56mv *cur_mv = mb->bmv;
504 
505  if (!layout) // layout is inlined, s->mb_layout is not
506  top_mb = &mb[2];
507  else
508  top_mb = &mb[-s->mb_width-1];
509  mbsplits_top = vp8_mbsplits[top_mb->partitioning];
510  top_mv = top_mb->bmv;
511 
515  } else {
516  part_idx = VP8_SPLITMVMODE_8x8;
517  }
518  } else {
519  part_idx = VP8_SPLITMVMODE_4x4;
520  }
521 
522  num = vp8_mbsplit_count[part_idx];
523  mbsplits_cur = vp8_mbsplits[part_idx],
524  firstidx = vp8_mbfirstidx[part_idx];
525  mb->partitioning = part_idx;
526 
527  for (n = 0; n < num; n++) {
528  int k = firstidx[n];
529  uint32_t left, above;
530  const uint8_t *submv_prob;
531 
532  if (!(k & 3))
533  left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
534  else
535  left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
536  if (k <= 3)
537  above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
538  else
539  above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
540 
541  submv_prob = get_submv_prob(left, above);
542 
543  if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
544  if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
545  if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
546  mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
547  mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
548  } else {
549  AV_ZERO32(&mb->bmv[n]);
550  }
551  } else {
552  AV_WN32A(&mb->bmv[n], above);
553  }
554  } else {
555  AV_WN32A(&mb->bmv[n], left);
556  }
557  }
558 
559  return num;
560 }
561 
562 static av_always_inline
563 void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout)
564 {
565  VP8Macroblock *mb_edge[3] = { 0 /* top */,
566  mb - 1 /* left */,
567  0 /* top-left */ };
568  enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
569  enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
570  int idx = CNT_ZERO;
571  int cur_sign_bias = s->sign_bias[mb->ref_frame];
572  int8_t *sign_bias = s->sign_bias;
573  VP56mv near_mv[4];
574  uint8_t cnt[4] = { 0 };
575  VP56RangeCoder *c = &s->c;
576 
577  if (!layout) { // layout is inlined (s->mb_layout is not)
578  mb_edge[0] = mb + 2;
579  mb_edge[2] = mb + 1;
580  }
581  else {
582  mb_edge[0] = mb - s->mb_width-1;
583  mb_edge[2] = mb - s->mb_width-2;
584  }
585 
586  AV_ZERO32(&near_mv[0]);
587  AV_ZERO32(&near_mv[1]);
588  AV_ZERO32(&near_mv[2]);
589 
590  /* Process MB on top, left and top-left */
591  #define MV_EDGE_CHECK(n)\
592  {\
593  VP8Macroblock *edge = mb_edge[n];\
594  int edge_ref = edge->ref_frame;\
595  if (edge_ref != VP56_FRAME_CURRENT) {\
596  uint32_t mv = AV_RN32A(&edge->mv);\
597  if (mv) {\
598  if (cur_sign_bias != sign_bias[edge_ref]) {\
599  /* SWAR negate of the values in mv. */\
600  mv = ~mv;\
601  mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
602  }\
603  if (!n || mv != AV_RN32A(&near_mv[idx]))\
604  AV_WN32A(&near_mv[++idx], mv);\
605  cnt[idx] += 1 + (n != 2);\
606  } else\
607  cnt[CNT_ZERO] += 1 + (n != 2);\
608  }\
609  }
610 
611  MV_EDGE_CHECK(0)
612  MV_EDGE_CHECK(1)
613  MV_EDGE_CHECK(2)
614 
616  if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
617  mb->mode = VP8_MVMODE_MV;
618 
619  /* If we have three distinct MVs, merge first and last if they're the same */
620  if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
621  cnt[CNT_NEAREST] += 1;
622 
623  /* Swap near and nearest if necessary */
624  if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
625  FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
626  FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
627  }
628 
629  if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
630  if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
631 
632  /* Choose the best mv out of 0,0 and the nearest mv */
633  clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
634  cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
635  (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
636  (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
637 
638  if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
639  mb->mode = VP8_MVMODE_SPLIT;
640  mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout) - 1];
641  } else {
642  mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
643  mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
644  mb->bmv[0] = mb->mv;
645  }
646  } else {
647  clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
648  mb->bmv[0] = mb->mv;
649  }
650  } else {
651  clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
652  mb->bmv[0] = mb->mv;
653  }
654  } else {
655  mb->mode = VP8_MVMODE_ZERO;
656  AV_ZERO32(&mb->mv);
657  mb->bmv[0] = mb->mv;
658  }
659 }
660 
661 static av_always_inline
663  int mb_x, int keyframe, int layout)
664 {
665  uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
666 
667  if (layout == 1) {
668  VP8Macroblock *mb_top = mb - s->mb_width - 1;
669  memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
670  }
671  if (keyframe) {
672  int x, y;
673  uint8_t* top;
674  uint8_t* const left = s->intra4x4_pred_mode_left;
675  if (layout == 1)
676  top = mb->intra4x4_pred_mode_top;
677  else
678  top = s->intra4x4_pred_mode_top + 4 * mb_x;
679  for (y = 0; y < 4; y++) {
680  for (x = 0; x < 4; x++) {
681  const uint8_t *ctx;
682  ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
683  *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
684  left[y] = top[x] = *intra4x4;
685  intra4x4++;
686  }
687  }
688  } else {
689  int i;
690  for (i = 0; i < 16; i++)
692  }
693 }
694 
695 static av_always_inline
696 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
697  uint8_t *segment, uint8_t *ref, int layout)
698 {
699  VP56RangeCoder *c = &s->c;
700 
701  if (s->segmentation.update_map)
703  else if (s->segmentation.enabled)
704  *segment = ref ? *ref : *segment;
705  mb->segment = *segment;
706 
707  mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
708 
709  if (s->keyframe) {
711 
712  if (mb->mode == MODE_I4x4) {
713  decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
714  } else {
715  const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
716  if (s->mb_layout == 1)
717  AV_WN32A(mb->intra4x4_pred_mode_top, modes);
718  else
719  AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
720  AV_WN32A( s->intra4x4_pred_mode_left, modes);
721  }
722 
725  } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
726  // inter MB, 16.2
727  if (vp56_rac_get_prob_branchy(c, s->prob->last))
728  mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
729  VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
730  else
732  s->ref_count[mb->ref_frame-1]++;
733 
734  // motion vectors, 16.3
735  decode_mvs(s, mb, mb_x, mb_y, layout);
736  } else {
737  // intra MB, 16.1
739 
740  if (mb->mode == MODE_I4x4)
741  decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
742 
746  AV_ZERO32(&mb->bmv[0]);
747  }
748 }
749 
750 #ifndef decode_block_coeffs_internal
751 
761  uint8_t probs[16][3][NUM_DCT_TOKENS-1],
762  int i, uint8_t *token_prob, int16_t qmul[2])
763 {
764  VP56RangeCoder c = *r;
765  goto skip_eob;
766  do {
767  int coeff;
768  if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
769  break;
770 
771 skip_eob:
772  if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
773  if (++i == 16)
774  break; // invalid input; blocks should end with EOB
775  token_prob = probs[i][0];
776  goto skip_eob;
777  }
778 
779  if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
780  coeff = 1;
781  token_prob = probs[i+1][1];
782  } else {
783  if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
784  coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
785  if (coeff)
786  coeff += vp56_rac_get_prob(&c, token_prob[5]);
787  coeff += 2;
788  } else {
789  // DCT_CAT*
790  if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
791  if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
792  coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
793  } else { // DCT_CAT2
794  coeff = 7;
795  coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
796  coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
797  }
798  } else { // DCT_CAT3 and up
799  int a = vp56_rac_get_prob(&c, token_prob[8]);
800  int b = vp56_rac_get_prob(&c, token_prob[9+a]);
801  int cat = (a<<1) + b;
802  coeff = 3 + (8<<cat);
803  coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
804  }
805  }
806  token_prob = probs[i+1][2];
807  }
808  block[zigzag_scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
809  } while (++i < 16);
810 
811  *r = c;
812  return i;
813 }
814 #endif
815 
827 static av_always_inline
829  uint8_t probs[16][3][NUM_DCT_TOKENS-1],
830  int i, int zero_nhood, int16_t qmul[2])
831 {
832  uint8_t *token_prob = probs[i][zero_nhood];
833  if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
834  return 0;
835  return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
836 }
837 
838 static av_always_inline
840  uint8_t t_nnz[9], uint8_t l_nnz[9])
841 {
842  int i, x, y, luma_start = 0, luma_ctx = 3;
843  int nnz_pred, nnz, nnz_total = 0;
844  int segment = mb->segment;
845  int block_dc = 0;
846 
847  if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
848  nnz_pred = t_nnz[8] + l_nnz[8];
849 
850  // decode DC values and do hadamard
851  nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0, nnz_pred,
852  s->qmat[segment].luma_dc_qmul);
853  l_nnz[8] = t_nnz[8] = !!nnz;
854  if (nnz) {
855  nnz_total += nnz;
856  block_dc = 1;
857  if (nnz == 1)
859  else
860  s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
861  }
862  luma_start = 1;
863  luma_ctx = 0;
864  }
865 
866  // luma blocks
867  for (y = 0; y < 4; y++)
868  for (x = 0; x < 4; x++) {
869  nnz_pred = l_nnz[y] + t_nnz[x];
870  nnz = decode_block_coeffs(c, td->block[y][x], s->prob->token[luma_ctx], luma_start,
871  nnz_pred, s->qmat[segment].luma_qmul);
872  // nnz+block_dc may be one more than the actual last index, but we don't care
873  td->non_zero_count_cache[y][x] = nnz + block_dc;
874  t_nnz[x] = l_nnz[y] = !!nnz;
875  nnz_total += nnz;
876  }
877 
878  // chroma blocks
879  // TODO: what to do about dimensions? 2nd dim for luma is x,
880  // but for chroma it's (y<<1)|x
881  for (i = 4; i < 6; i++)
882  for (y = 0; y < 2; y++)
883  for (x = 0; x < 2; x++) {
884  nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
885  nnz = decode_block_coeffs(c, td->block[i][(y<<1)+x], s->prob->token[2], 0,
886  nnz_pred, s->qmat[segment].chroma_qmul);
887  td->non_zero_count_cache[i][(y<<1)+x] = nnz;
888  t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
889  nnz_total += nnz;
890  }
891 
892  // if there were no coded coeffs despite the macroblock not being marked skip,
893  // we MUST not do the inner loop filter and should not do IDCT
894  // Since skip isn't used for bitstream prediction, just manually set it.
895  if (!nnz_total)
896  mb->skip = 1;
897 }
898 
899 static av_always_inline
900 void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
901  int linesize, int uvlinesize, int simple)
902 {
903  AV_COPY128(top_border, src_y + 15*linesize);
904  if (!simple) {
905  AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
906  AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
907  }
908 }
909 
910 static av_always_inline
911 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
912  int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
913  int simple, int xchg)
914 {
915  uint8_t *top_border_m1 = top_border-32; // for TL prediction
916  src_y -= linesize;
917  src_cb -= uvlinesize;
918  src_cr -= uvlinesize;
919 
920 #define XCHG(a,b,xchg) do { \
921  if (xchg) AV_SWAP64(b,a); \
922  else AV_COPY64(b,a); \
923  } while (0)
924 
925  XCHG(top_border_m1+8, src_y-8, xchg);
926  XCHG(top_border, src_y, xchg);
927  XCHG(top_border+8, src_y+8, 1);
928  if (mb_x < mb_width-1)
929  XCHG(top_border+32, src_y+16, 1);
930 
931  // only copy chroma for normal loop filter
932  // or to initialize the top row to 127
933  if (!simple || !mb_y) {
934  XCHG(top_border_m1+16, src_cb-8, xchg);
935  XCHG(top_border_m1+24, src_cr-8, xchg);
936  XCHG(top_border+16, src_cb, 1);
937  XCHG(top_border+24, src_cr, 1);
938  }
939 }
940 
941 static av_always_inline
942 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
943 {
944  if (!mb_x) {
945  return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
946  } else {
947  return mb_y ? mode : LEFT_DC_PRED8x8;
948  }
949 }
950 
951 static av_always_inline
952 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
953 {
954  if (!mb_x) {
955  return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
956  } else {
957  return mb_y ? mode : HOR_PRED8x8;
958  }
959 }
960 
961 static av_always_inline
962 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
963 {
964  switch (mode) {
965  case DC_PRED8x8:
966  return check_dc_pred8x8_mode(mode, mb_x, mb_y);
967  case VERT_PRED8x8:
968  return !mb_y ? DC_127_PRED8x8 : mode;
969  case HOR_PRED8x8:
970  return !mb_x ? DC_129_PRED8x8 : mode;
971  case PLANE_PRED8x8 /*TM*/:
972  return check_tm_pred8x8_mode(mode, mb_x, mb_y);
973  }
974  return mode;
975 }
976 
977 static av_always_inline
978 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
979 {
980  if (!mb_x) {
981  return mb_y ? VERT_VP8_PRED : DC_129_PRED;
982  } else {
983  return mb_y ? mode : HOR_VP8_PRED;
984  }
985 }
986 
987 static av_always_inline
988 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
989 {
990  switch (mode) {
991  case VERT_PRED:
992  if (!mb_x && mb_y) {
993  *copy_buf = 1;
994  return mode;
995  }
996  /* fall-through */
997  case DIAG_DOWN_LEFT_PRED:
998  case VERT_LEFT_PRED:
999  return !mb_y ? DC_127_PRED : mode;
1000  case HOR_PRED:
1001  if (!mb_y) {
1002  *copy_buf = 1;
1003  return mode;
1004  }
1005  /* fall-through */
1006  case HOR_UP_PRED:
1007  return !mb_x ? DC_129_PRED : mode;
1008  case TM_VP8_PRED:
1009  return check_tm_pred4x4_mode(mode, mb_x, mb_y);
1010  case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
1011  case DIAG_DOWN_RIGHT_PRED:
1012  case VERT_RIGHT_PRED:
1013  case HOR_DOWN_PRED:
1014  if (!mb_y || !mb_x)
1015  *copy_buf = 1;
1016  return mode;
1017  }
1018  return mode;
1019 }
1020 
1021 static av_always_inline
1023  VP8Macroblock *mb, int mb_x, int mb_y)
1024 {
1025  int x, y, mode, nnz;
1026  uint32_t tr;
1027 
1028  // for the first row, we need to run xchg_mb_border to init the top edge to 127
1029  // otherwise, skip it if we aren't going to deblock
1030  if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1031  xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1032  s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1033  s->filter.simple, 1);
1034 
1035  if (mb->mode < MODE_I4x4) {
1036  mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
1037  s->hpc.pred16x16[mode](dst[0], s->linesize);
1038  } else {
1039  uint8_t *ptr = dst[0];
1040  uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1041  uint8_t tr_top[4] = { 127, 127, 127, 127 };
1042 
1043  // all blocks on the right edge of the macroblock use bottom edge
1044  // the top macroblock for their topright edge
1045  uint8_t *tr_right = ptr - s->linesize + 16;
1046 
1047  // if we're on the right edge of the frame, said edge is extended
1048  // from the top macroblock
1049  if (mb_y &&
1050  mb_x == s->mb_width-1) {
1051  tr = tr_right[-1]*0x01010101u;
1052  tr_right = (uint8_t *)&tr;
1053  }
1054 
1055  if (mb->skip)
1057 
1058  for (y = 0; y < 4; y++) {
1059  uint8_t *topright = ptr + 4 - s->linesize;
1060  for (x = 0; x < 4; x++) {
1061  int copy = 0, linesize = s->linesize;
1062  uint8_t *dst = ptr+4*x;
1063  DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
1064 
1065  if ((y == 0 || x == 3) && mb_y == 0) {
1066  topright = tr_top;
1067  } else if (x == 3)
1068  topright = tr_right;
1069 
1070  mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, &copy);
1071  if (copy) {
1072  dst = copy_dst + 12;
1073  linesize = 8;
1074  if (!(mb_y + y)) {
1075  copy_dst[3] = 127U;
1076  AV_WN32A(copy_dst+4, 127U * 0x01010101U);
1077  } else {
1078  AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
1079  if (!(mb_x + x)) {
1080  copy_dst[3] = 129U;
1081  } else {
1082  copy_dst[3] = ptr[4*x-s->linesize-1];
1083  }
1084  }
1085  if (!(mb_x + x)) {
1086  copy_dst[11] =
1087  copy_dst[19] =
1088  copy_dst[27] =
1089  copy_dst[35] = 129U;
1090  } else {
1091  copy_dst[11] = ptr[4*x -1];
1092  copy_dst[19] = ptr[4*x+s->linesize -1];
1093  copy_dst[27] = ptr[4*x+s->linesize*2-1];
1094  copy_dst[35] = ptr[4*x+s->linesize*3-1];
1095  }
1096  }
1097  s->hpc.pred4x4[mode](dst, topright, linesize);
1098  if (copy) {
1099  AV_COPY32(ptr+4*x , copy_dst+12);
1100  AV_COPY32(ptr+4*x+s->linesize , copy_dst+20);
1101  AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
1102  AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
1103  }
1104 
1105  nnz = td->non_zero_count_cache[y][x];
1106  if (nnz) {
1107  if (nnz == 1)
1108  s->vp8dsp.vp8_idct_dc_add(ptr+4*x, td->block[y][x], s->linesize);
1109  else
1110  s->vp8dsp.vp8_idct_add(ptr+4*x, td->block[y][x], s->linesize);
1111  }
1112  topright += 4;
1113  }
1114 
1115  ptr += 4*s->linesize;
1116  intra4x4 += 4;
1117  }
1118  }
1119 
1120  mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode, mb_x, mb_y);
1121  s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1122  s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1123 
1124  if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1125  xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1126  s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1127  s->filter.simple, 0);
1128 }
1129 
1130 static const uint8_t subpel_idx[3][8] = {
1131  { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1132  // also function pointer index
1133  { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1134  { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1135 };
1136 
1153 static av_always_inline
1155  ThreadFrame *ref, const VP56mv *mv,
1156  int x_off, int y_off, int block_w, int block_h,
1157  int width, int height, ptrdiff_t linesize,
1158  vp8_mc_func mc_func[3][3])
1159 {
1160  uint8_t *src = ref->f->data[0];
1161 
1162  if (AV_RN32A(mv)) {
1163  int src_linesize = linesize;
1164 
1165  int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
1166  int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
1167 
1168  x_off += mv->x >> 2;
1169  y_off += mv->y >> 2;
1170 
1171  // edge emulation
1172  ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1173  src += y_off * linesize + x_off;
1174  if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1175  y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1177  src - my_idx * linesize - mx_idx,
1178  EDGE_EMU_LINESIZE, linesize,
1179  block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1180  x_off - mx_idx, y_off - my_idx, width, height);
1181  src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1182  src_linesize = EDGE_EMU_LINESIZE;
1183  }
1184  mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1185  } else {
1186  ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1187  mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
1188  }
1189 }
1190 
1208 static av_always_inline
1210  ThreadFrame *ref, const VP56mv *mv, int x_off, int y_off,
1211  int block_w, int block_h, int width, int height, ptrdiff_t linesize,
1212  vp8_mc_func mc_func[3][3])
1213 {
1214  uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1215 
1216  if (AV_RN32A(mv)) {
1217  int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
1218  int my = mv->y&7, my_idx = subpel_idx[0][my];
1219 
1220  x_off += mv->x >> 3;
1221  y_off += mv->y >> 3;
1222 
1223  // edge emulation
1224  src1 += y_off * linesize + x_off;
1225  src2 += y_off * linesize + x_off;
1226  ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1227  if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1228  y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1230  src1 - my_idx * linesize - mx_idx,
1231  EDGE_EMU_LINESIZE, linesize,
1232  block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1233  x_off - mx_idx, y_off - my_idx, width, height);
1234  src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1235  mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
1236 
1238  src2 - my_idx * linesize - mx_idx,
1239  EDGE_EMU_LINESIZE, linesize,
1240  block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1241  x_off - mx_idx, y_off - my_idx, width, height);
1242  src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE* my_idx;
1243  mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
1244  } else {
1245  mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1246  mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1247  }
1248  } else {
1249  ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1250  mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1251  mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1252  }
1253 }
1254 
1255 static av_always_inline
1257  ThreadFrame *ref_frame, int x_off, int y_off,
1258  int bx_off, int by_off,
1259  int block_w, int block_h,
1260  int width, int height, VP56mv *mv)
1261 {
1262  VP56mv uvmv = *mv;
1263 
1264  /* Y */
1265  vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1266  ref_frame, mv, x_off + bx_off, y_off + by_off,
1267  block_w, block_h, width, height, s->linesize,
1268  s->put_pixels_tab[block_w == 8]);
1269 
1270  /* U/V */
1271  if (s->profile == 3) {
1272  uvmv.x &= ~7;
1273  uvmv.y &= ~7;
1274  }
1275  x_off >>= 1; y_off >>= 1;
1276  bx_off >>= 1; by_off >>= 1;
1277  width >>= 1; height >>= 1;
1278  block_w >>= 1; block_h >>= 1;
1279  vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1280  dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1281  &uvmv, x_off + bx_off, y_off + by_off,
1282  block_w, block_h, width, height, s->uvlinesize,
1283  s->put_pixels_tab[1 + (block_w == 4)]);
1284 }
1285 
1286 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1287  * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1288 static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
1289 {
1290  /* Don't prefetch refs that haven't been used very often this frame. */
1291  if (s->ref_count[ref-1] > (mb_xy >> 5)) {
1292  int x_off = mb_x << 4, y_off = mb_y << 4;
1293  int mx = (mb->mv.x>>2) + x_off + 8;
1294  int my = (mb->mv.y>>2) + y_off;
1295  uint8_t **src= s->framep[ref]->tf.f->data;
1296  int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
1297  /* For threading, a ff_thread_await_progress here might be useful, but
1298  * it actually slows down the decoder. Since a bad prefetch doesn't
1299  * generate bad decoder output, we don't run it here. */
1300  s->vdsp.prefetch(src[0]+off, s->linesize, 4);
1301  off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
1302  s->vdsp.prefetch(src[1]+off, src[2]-src[1], 2);
1303  }
1304 }
1305 
1309 static av_always_inline
1311  VP8Macroblock *mb, int mb_x, int mb_y)
1312 {
1313  int x_off = mb_x << 4, y_off = mb_y << 4;
1314  int width = 16*s->mb_width, height = 16*s->mb_height;
1315  ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1316  VP56mv *bmv = mb->bmv;
1317 
1318  switch (mb->partitioning) {
1319  case VP8_SPLITMVMODE_NONE:
1320  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1321  0, 0, 16, 16, width, height, &mb->mv);
1322  break;
1323  case VP8_SPLITMVMODE_4x4: {
1324  int x, y;
1325  VP56mv uvmv;
1326 
1327  /* Y */
1328  for (y = 0; y < 4; y++) {
1329  for (x = 0; x < 4; x++) {
1330  vp8_mc_luma(s, td, dst[0] + 4*y*s->linesize + x*4,
1331  ref, &bmv[4*y + x],
1332  4*x + x_off, 4*y + y_off, 4, 4,
1333  width, height, s->linesize,
1334  s->put_pixels_tab[2]);
1335  }
1336  }
1337 
1338  /* U/V */
1339  x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
1340  for (y = 0; y < 2; y++) {
1341  for (x = 0; x < 2; x++) {
1342  uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x +
1343  mb->bmv[ 2*y * 4 + 2*x+1].x +
1344  mb->bmv[(2*y+1) * 4 + 2*x ].x +
1345  mb->bmv[(2*y+1) * 4 + 2*x+1].x;
1346  uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y +
1347  mb->bmv[ 2*y * 4 + 2*x+1].y +
1348  mb->bmv[(2*y+1) * 4 + 2*x ].y +
1349  mb->bmv[(2*y+1) * 4 + 2*x+1].y;
1350  uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
1351  uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
1352  if (s->profile == 3) {
1353  uvmv.x &= ~7;
1354  uvmv.y &= ~7;
1355  }
1356  vp8_mc_chroma(s, td, dst[1] + 4*y*s->uvlinesize + x*4,
1357  dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
1358  4*x + x_off, 4*y + y_off, 4, 4,
1359  width, height, s->uvlinesize,
1360  s->put_pixels_tab[2]);
1361  }
1362  }
1363  break;
1364  }
1365  case VP8_SPLITMVMODE_16x8:
1366  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1367  0, 0, 16, 8, width, height, &bmv[0]);
1368  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1369  0, 8, 16, 8, width, height, &bmv[1]);
1370  break;
1371  case VP8_SPLITMVMODE_8x16:
1372  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1373  0, 0, 8, 16, width, height, &bmv[0]);
1374  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1375  8, 0, 8, 16, width, height, &bmv[1]);
1376  break;
1377  case VP8_SPLITMVMODE_8x8:
1378  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1379  0, 0, 8, 8, width, height, &bmv[0]);
1380  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1381  8, 0, 8, 8, width, height, &bmv[1]);
1382  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1383  0, 8, 8, 8, width, height, &bmv[2]);
1384  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1385  8, 8, 8, 8, width, height, &bmv[3]);
1386  break;
1387  }
1388 }
1389 
1391  uint8_t *dst[3], VP8Macroblock *mb)
1392 {
1393  int x, y, ch;
1394 
1395  if (mb->mode != MODE_I4x4) {
1396  uint8_t *y_dst = dst[0];
1397  for (y = 0; y < 4; y++) {
1398  uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1399  if (nnz4) {
1400  if (nnz4&~0x01010101) {
1401  for (x = 0; x < 4; x++) {
1402  if ((uint8_t)nnz4 == 1)
1403  s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, td->block[y][x], s->linesize);
1404  else if((uint8_t)nnz4 > 1)
1405  s->vp8dsp.vp8_idct_add(y_dst+4*x, td->block[y][x], s->linesize);
1406  nnz4 >>= 8;
1407  if (!nnz4)
1408  break;
1409  }
1410  } else {
1411  s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
1412  }
1413  }
1414  y_dst += 4*s->linesize;
1415  }
1416  }
1417 
1418  for (ch = 0; ch < 2; ch++) {
1419  uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4+ch]);
1420  if (nnz4) {
1421  uint8_t *ch_dst = dst[1+ch];
1422  if (nnz4&~0x01010101) {
1423  for (y = 0; y < 2; y++) {
1424  for (x = 0; x < 2; x++) {
1425  if ((uint8_t)nnz4 == 1)
1426  s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1427  else if((uint8_t)nnz4 > 1)
1428  s->vp8dsp.vp8_idct_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1429  nnz4 >>= 8;
1430  if (!nnz4)
1431  goto chroma_idct_end;
1432  }
1433  ch_dst += 4*s->uvlinesize;
1434  }
1435  } else {
1436  s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4+ch], s->uvlinesize);
1437  }
1438  }
1439 chroma_idct_end: ;
1440  }
1441 }
1442 
1444 {
1445  int interior_limit, filter_level;
1446 
1447  if (s->segmentation.enabled) {
1448  filter_level = s->segmentation.filter_level[mb->segment];
1449  if (!s->segmentation.absolute_vals)
1450  filter_level += s->filter.level;
1451  } else
1452  filter_level = s->filter.level;
1453 
1454  if (s->lf_delta.enabled) {
1455  filter_level += s->lf_delta.ref[mb->ref_frame];
1456  filter_level += s->lf_delta.mode[mb->mode];
1457  }
1458 
1459  filter_level = av_clip_uintp2(filter_level, 6);
1460 
1461  interior_limit = filter_level;
1462  if (s->filter.sharpness) {
1463  interior_limit >>= (s->filter.sharpness + 3) >> 2;
1464  interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
1465  }
1466  interior_limit = FFMAX(interior_limit, 1);
1467 
1468  f->filter_level = filter_level;
1469  f->inner_limit = interior_limit;
1470  f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
1471 }
1472 
1473 static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
1474 {
1475  int mbedge_lim, bedge_lim, hev_thresh;
1476  int filter_level = f->filter_level;
1477  int inner_limit = f->inner_limit;
1478  int inner_filter = f->inner_filter;
1479  int linesize = s->linesize;
1480  int uvlinesize = s->uvlinesize;
1481  static const uint8_t hev_thresh_lut[2][64] = {
1482  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1483  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1484  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1485  3, 3, 3, 3 },
1486  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1487  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1488  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1489  2, 2, 2, 2 }
1490  };
1491 
1492  if (!filter_level)
1493  return;
1494 
1495  bedge_lim = 2*filter_level + inner_limit;
1496  mbedge_lim = bedge_lim + 4;
1497 
1498  hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
1499 
1500  if (mb_x) {
1501  s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
1502  mbedge_lim, inner_limit, hev_thresh);
1503  s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
1504  mbedge_lim, inner_limit, hev_thresh);
1505  }
1506 
1507  if (inner_filter) {
1508  s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
1509  inner_limit, hev_thresh);
1510  s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
1511  inner_limit, hev_thresh);
1512  s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
1513  inner_limit, hev_thresh);
1514  s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
1515  uvlinesize, bedge_lim,
1516  inner_limit, hev_thresh);
1517  }
1518 
1519  if (mb_y) {
1520  s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
1521  mbedge_lim, inner_limit, hev_thresh);
1522  s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
1523  mbedge_lim, inner_limit, hev_thresh);
1524  }
1525 
1526  if (inner_filter) {
1527  s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
1528  linesize, bedge_lim,
1529  inner_limit, hev_thresh);
1530  s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
1531  linesize, bedge_lim,
1532  inner_limit, hev_thresh);
1533  s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
1534  linesize, bedge_lim,
1535  inner_limit, hev_thresh);
1536  s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
1537  dst[2] + 4 * uvlinesize,
1538  uvlinesize, bedge_lim,
1539  inner_limit, hev_thresh);
1540  }
1541 }
1542 
1543 static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
1544 {
1545  int mbedge_lim, bedge_lim;
1546  int filter_level = f->filter_level;
1547  int inner_limit = f->inner_limit;
1548  int inner_filter = f->inner_filter;
1549  int linesize = s->linesize;
1550 
1551  if (!filter_level)
1552  return;
1553 
1554  bedge_lim = 2*filter_level + inner_limit;
1555  mbedge_lim = bedge_lim + 4;
1556 
1557  if (mb_x)
1558  s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
1559  if (inner_filter) {
1560  s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
1561  s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
1562  s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
1563  }
1564 
1565  if (mb_y)
1566  s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
1567  if (inner_filter) {
1568  s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
1569  s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
1570  s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
1571  }
1572 }
1573 
1574 #define MARGIN (16 << 2)
1575 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
1576  VP8Frame *prev_frame)
1577 {
1578  VP8Context *s = avctx->priv_data;
1579  int mb_x, mb_y;
1580 
1581  s->mv_min.y = -MARGIN;
1582  s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1583  for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
1584  VP8Macroblock *mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1585  int mb_xy = mb_y*s->mb_width;
1586 
1587  AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1588 
1589  s->mv_min.x = -MARGIN;
1590  s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1591  for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1592  if (mb_y == 0)
1593  AV_WN32A((mb-s->mb_width-1)->intra4x4_pred_mode_top, DC_PRED*0x01010101);
1594  decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
1595  prev_frame && prev_frame->seg_map ?
1596  prev_frame->seg_map->data + mb_xy : NULL, 1);
1597  s->mv_min.x -= 64;
1598  s->mv_max.x -= 64;
1599  }
1600  s->mv_min.y -= 64;
1601  s->mv_max.y -= 64;
1602  }
1603 }
1604 
1605 #if HAVE_THREADS
1606 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)\
1607  do {\
1608  int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);\
1609  if (otd->thread_mb_pos < tmp) {\
1610  pthread_mutex_lock(&otd->lock);\
1611  td->wait_mb_pos = tmp;\
1612  do {\
1613  if (otd->thread_mb_pos >= tmp)\
1614  break;\
1615  pthread_cond_wait(&otd->cond, &otd->lock);\
1616  } while (1);\
1617  td->wait_mb_pos = INT_MAX;\
1618  pthread_mutex_unlock(&otd->lock);\
1619  }\
1620  } while(0);
1621 
1622 #define update_pos(td, mb_y, mb_x)\
1623  do {\
1624  int pos = (mb_y << 16) | (mb_x & 0xFFFF);\
1625  int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && (num_jobs > 1);\
1626  int is_null = (next_td == NULL) || (prev_td == NULL);\
1627  int pos_check = (is_null) ? 1 :\
1628  (next_td != td && pos >= next_td->wait_mb_pos) ||\
1629  (prev_td != td && pos >= prev_td->wait_mb_pos);\
1630  td->thread_mb_pos = pos;\
1631  if (sliced_threading && pos_check) {\
1632  pthread_mutex_lock(&td->lock);\
1633  pthread_cond_broadcast(&td->cond);\
1634  pthread_mutex_unlock(&td->lock);\
1635  }\
1636  } while(0);
1637 #else
1638 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)
1639 #define update_pos(td, mb_y, mb_x)
1640 #endif
1641 
1642 static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
1643  int jobnr, int threadnr)
1644 {
1645  VP8Context *s = avctx->priv_data;
1646  VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
1647  int mb_y = td->thread_mb_pos>>16;
1648  int mb_x, mb_xy = mb_y*s->mb_width;
1649  int num_jobs = s->num_jobs;
1650  VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
1651  VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
1652  VP8Macroblock *mb;
1653  uint8_t *dst[3] = {
1654  curframe->tf.f->data[0] + 16*mb_y*s->linesize,
1655  curframe->tf.f->data[1] + 8*mb_y*s->uvlinesize,
1656  curframe->tf.f->data[2] + 8*mb_y*s->uvlinesize
1657  };
1658  if (mb_y == 0) prev_td = td;
1659  else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
1660  if (mb_y == s->mb_height-1) next_td = td;
1661  else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
1662  if (s->mb_layout == 1)
1663  mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1664  else {
1665  // Make sure the previous frame has read its segmentation map,
1666  // if we re-use the same map.
1667  if (prev_frame && s->segmentation.enabled &&
1669  ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
1670  mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1671  memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
1672  AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1673  }
1674 
1675  memset(td->left_nnz, 0, sizeof(td->left_nnz));
1676 
1677  s->mv_min.x = -MARGIN;
1678  s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1679 
1680  for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1681  // Wait for previous thread to read mb_x+2, and reach mb_y-1.
1682  if (prev_td != td) {
1683  if (threadnr != 0) {
1684  check_thread_pos(td, prev_td, mb_x+1, mb_y-1);
1685  } else {
1686  check_thread_pos(td, prev_td, (s->mb_width+3) + (mb_x+1), mb_y-1);
1687  }
1688  }
1689 
1690  s->vdsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1691  s->vdsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1692 
1693  if (!s->mb_layout)
1694  decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
1695  prev_frame && prev_frame->seg_map ?
1696  prev_frame->seg_map->data + mb_xy : NULL, 0);
1697 
1698  prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
1699 
1700  if (!mb->skip)
1701  decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz);
1702 
1703  if (mb->mode <= MODE_I4x4)
1704  intra_predict(s, td, dst, mb, mb_x, mb_y);
1705  else
1706  inter_predict(s, td, dst, mb, mb_x, mb_y);
1707 
1708  prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
1709 
1710  if (!mb->skip) {
1711  idct_mb(s, td, dst, mb);
1712  } else {
1713  AV_ZERO64(td->left_nnz);
1714  AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
1715 
1716  // Reset DC block predictors if they would exist if the mb had coefficients
1717  if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
1718  td->left_nnz[8] = 0;
1719  s->top_nnz[mb_x][8] = 0;
1720  }
1721  }
1722 
1723  if (s->deblock_filter)
1724  filter_level_for_mb(s, mb, &td->filter_strength[mb_x]);
1725 
1726  if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs-1) {
1727  if (s->filter.simple)
1728  backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
1729  else
1730  backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1731  }
1732 
1733  prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
1734 
1735  dst[0] += 16;
1736  dst[1] += 8;
1737  dst[2] += 8;
1738  s->mv_min.x -= 64;
1739  s->mv_max.x -= 64;
1740 
1741  if (mb_x == s->mb_width+1) {
1742  update_pos(td, mb_y, s->mb_width+3);
1743  } else {
1744  update_pos(td, mb_y, mb_x);
1745  }
1746  }
1747 }
1748 
1749 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
1750  int jobnr, int threadnr)
1751 {
1752  VP8Context *s = avctx->priv_data;
1753  VP8ThreadData *td = &s->thread_data[threadnr];
1754  int mb_x, mb_y = td->thread_mb_pos>>16, num_jobs = s->num_jobs;
1755  AVFrame *curframe = s->curframe->tf.f;
1756  VP8Macroblock *mb;
1757  VP8ThreadData *prev_td, *next_td;
1758  uint8_t *dst[3] = {
1759  curframe->data[0] + 16*mb_y*s->linesize,
1760  curframe->data[1] + 8*mb_y*s->uvlinesize,
1761  curframe->data[2] + 8*mb_y*s->uvlinesize
1762  };
1763 
1764  if (s->mb_layout == 1)
1765  mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1766  else
1767  mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1768 
1769  if (mb_y == 0) prev_td = td;
1770  else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
1771  if (mb_y == s->mb_height-1) next_td = td;
1772  else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
1773 
1774  for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
1775  VP8FilterStrength *f = &td->filter_strength[mb_x];
1776  if (prev_td != td) {
1777  check_thread_pos(td, prev_td, (mb_x+1) + (s->mb_width+3), mb_y-1);
1778  }
1779  if (next_td != td)
1780  if (next_td != &s->thread_data[0]) {
1781  check_thread_pos(td, next_td, mb_x+1, mb_y+1);
1782  }
1783 
1784  if (num_jobs == 1) {
1785  if (s->filter.simple)
1786  backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
1787  else
1788  backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1789  }
1790 
1791  if (s->filter.simple)
1792  filter_mb_simple(s, dst[0], f, mb_x, mb_y);
1793  else
1794  filter_mb(s, dst, f, mb_x, mb_y);
1795  dst[0] += 16;
1796  dst[1] += 8;
1797  dst[2] += 8;
1798 
1799  update_pos(td, mb_y, (s->mb_width+3) + mb_x);
1800  }
1801 }
1802 
1803 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
1804  int jobnr, int threadnr)
1805 {
1806  VP8Context *s = avctx->priv_data;
1807  VP8ThreadData *td = &s->thread_data[jobnr];
1808  VP8ThreadData *next_td = NULL, *prev_td = NULL;
1809  VP8Frame *curframe = s->curframe;
1810  int mb_y, num_jobs = s->num_jobs;
1811  td->thread_nr = threadnr;
1812  for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
1813  if (mb_y >= s->mb_height) break;
1814  td->thread_mb_pos = mb_y<<16;
1815  vp8_decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
1816  if (s->deblock_filter)
1817  vp8_filter_mb_row(avctx, tdata, jobnr, threadnr);
1818  update_pos(td, mb_y, INT_MAX & 0xFFFF);
1819 
1820  s->mv_min.y -= 64;
1821  s->mv_max.y -= 64;
1822 
1823  if (avctx->active_thread_type == FF_THREAD_FRAME)
1824  ff_thread_report_progress(&curframe->tf, mb_y, 0);
1825  }
1826 
1827  return 0;
1828 }
1829 
1830 int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
1831  AVPacket *avpkt)
1832 {
1833  VP8Context *s = avctx->priv_data;
1834  int ret, i, referenced, num_jobs;
1835  enum AVDiscard skip_thresh;
1836  VP8Frame *av_uninit(curframe), *prev_frame;
1837 
1838  if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
1839  goto err;
1840 
1841  prev_frame = s->framep[VP56_FRAME_CURRENT];
1842 
1843  referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
1845 
1846  skip_thresh = !referenced ? AVDISCARD_NONREF :
1848 
1849  if (avctx->skip_frame >= skip_thresh) {
1850  s->invisible = 1;
1851  memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
1852  goto skip_decode;
1853  }
1854  s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
1855 
1856  // release no longer referenced frames
1857  for (i = 0; i < 5; i++)
1858  if (s->frames[i].tf.f->data[0] &&
1859  &s->frames[i] != prev_frame &&
1860  &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1861  &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1862  &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
1863  vp8_release_frame(s, &s->frames[i]);
1864 
1865  // find a free buffer
1866  for (i = 0; i < 5; i++)
1867  if (&s->frames[i] != prev_frame &&
1868  &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1869  &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1870  &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
1871  curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
1872  break;
1873  }
1874  if (i == 5) {
1875  av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
1876  abort();
1877  }
1878  if (curframe->tf.f->data[0])
1879  vp8_release_frame(s, curframe);
1880 
1881  // Given that arithmetic probabilities are updated every frame, it's quite likely
1882  // that the values we have on a random interframe are complete junk if we didn't
1883  // start decode on a keyframe. So just don't display anything rather than junk.
1884  if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
1885  !s->framep[VP56_FRAME_GOLDEN] ||
1886  !s->framep[VP56_FRAME_GOLDEN2])) {
1887  av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
1888  ret = AVERROR_INVALIDDATA;
1889  goto err;
1890  }
1891 
1892  curframe->tf.f->key_frame = s->keyframe;
1893  curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1894  if ((ret = vp8_alloc_frame(s, curframe, referenced))) {
1895  av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
1896  goto err;
1897  }
1898 
1899  // check if golden and altref are swapped
1900  if (s->update_altref != VP56_FRAME_NONE) {
1902  } else {
1904  }
1905  if (s->update_golden != VP56_FRAME_NONE) {
1907  } else {
1909  }
1910  if (s->update_last) {
1911  s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
1912  } else {
1914  }
1915  s->next_framep[VP56_FRAME_CURRENT] = curframe;
1916 
1917  ff_thread_finish_setup(avctx);
1918 
1919  s->linesize = curframe->tf.f->linesize[0];
1920  s->uvlinesize = curframe->tf.f->linesize[1];
1921 
1922  memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
1923  /* Zero macroblock structures for top/top-left prediction from outside the frame. */
1924  if (!s->mb_layout)
1925  memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
1926  if (!s->mb_layout && s->keyframe)
1927  memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
1928 
1929  memset(s->ref_count, 0, sizeof(s->ref_count));
1930 
1931 
1932  if (s->mb_layout == 1) {
1933  // Make sure the previous frame has read its segmentation map,
1934  // if we re-use the same map.
1935  if (prev_frame && s->segmentation.enabled &&
1937  ff_thread_await_progress(&prev_frame->tf, 1, 0);
1938  vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
1939  }
1940 
1941  if (avctx->active_thread_type == FF_THREAD_FRAME)
1942  num_jobs = 1;
1943  else
1944  num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
1945  s->num_jobs = num_jobs;
1946  s->curframe = curframe;
1947  s->prev_frame = prev_frame;
1948  s->mv_min.y = -MARGIN;
1949  s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1950  for (i = 0; i < MAX_THREADS; i++) {
1951  s->thread_data[i].thread_mb_pos = 0;
1952  s->thread_data[i].wait_mb_pos = INT_MAX;
1953  }
1954  avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL, num_jobs);
1955 
1956  ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
1957  memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
1958 
1959 skip_decode:
1960  // if future frames don't use the updated probabilities,
1961  // reset them to the values we saved
1962  if (!s->update_probabilities)
1963  s->prob[0] = s->prob[1];
1964 
1965  if (!s->invisible) {
1966  if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
1967  return ret;
1968  *got_frame = 1;
1969  }
1970 
1971  return avpkt->size;
1972 err:
1973  memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
1974  return ret;
1975 }
1976 
1978 {
1979  VP8Context *s = avctx->priv_data;
1980  int i;
1981 
1982  vp8_decode_flush_impl(avctx, 1);
1983  for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
1984  av_frame_free(&s->frames[i].tf.f);
1985 
1986  return 0;
1987 }
1988 
1990 {
1991  int i;
1992  for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
1993  s->frames[i].tf.f = av_frame_alloc();
1994  if (!s->frames[i].tf.f)
1995  return AVERROR(ENOMEM);
1996  }
1997  return 0;
1998 }
1999 
2001 {
2002  VP8Context *s = avctx->priv_data;
2003  int ret;
2004 
2005  s->avctx = avctx;
2006  avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2007  avctx->internal->allocate_progress = 1;
2008 
2009  ff_videodsp_init(&s->vdsp, 8);
2011  ff_vp8dsp_init(&s->vp8dsp);
2012 
2013  if ((ret = vp8_init_frames(s)) < 0) {
2014  ff_vp8_decode_free(avctx);
2015  return ret;
2016  }
2017 
2018  return 0;
2019 }
2020 
2022 {
2023  VP8Context *s = avctx->priv_data;
2024  int ret;
2025 
2026  s->avctx = avctx;
2027 
2028  if ((ret = vp8_init_frames(s)) < 0) {
2029  ff_vp8_decode_free(avctx);
2030  return ret;
2031  }
2032 
2033  return 0;
2034 }
2035 
2036 #define REBASE(pic) \
2037  pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
2038 
2040 {
2041  VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2042  int i;
2043 
2044  if (s->macroblocks_base &&
2045  (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2046  free_buffers(s);
2047  s->mb_width = s_src->mb_width;
2048  s->mb_height = s_src->mb_height;
2049  }
2050 
2051  s->prob[0] = s_src->prob[!s_src->update_probabilities];
2052  s->segmentation = s_src->segmentation;
2053  s->lf_delta = s_src->lf_delta;
2054  memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2055 
2056  for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2057  if (s_src->frames[i].tf.f->data[0]) {
2058  int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2059  if (ret < 0)
2060  return ret;
2061  }
2062  }
2063 
2064  s->framep[0] = REBASE(s_src->next_framep[0]);
2065  s->framep[1] = REBASE(s_src->next_framep[1]);
2066  s->framep[2] = REBASE(s_src->next_framep[2]);
2067  s->framep[3] = REBASE(s_src->next_framep[3]);
2068 
2069  return 0;
2070 }
2071 
2073  .name = "vp8",
2074  .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2075  .type = AVMEDIA_TYPE_VIDEO,
2076  .id = AV_CODEC_ID_VP8,
2077  .priv_data_size = sizeof(VP8Context),
2085 };