Libav
libspeexenc.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2009 Justin Ruggles
3  * Copyright (c) 2009 Xuggle Incorporated
4  *
5  * This file is part of Libav.
6  *
7  * Libav is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * Libav is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with Libav; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
84 #include <speex/speex.h>
85 #include <speex/speex_header.h>
86 #include <speex/speex_stereo.h>
87 
89 #include "libavutil/common.h"
90 #include "libavutil/opt.h"
91 #include "avcodec.h"
92 #include "internal.h"
93 #include "audio_frame_queue.h"
94 
95 typedef struct {
96  AVClass *class;
97  SpeexBits bits;
98  SpeexHeader header;
99  void *enc_state;
101  float vbr_quality;
103  int abr;
104  int vad;
105  int dtx;
109 
112 {
113  const char *mode_str = "unknown";
114 
115  av_log(avctx, AV_LOG_DEBUG, "channels: %d\n", avctx->channels);
116  switch (s->header.mode) {
117  case SPEEX_MODEID_NB: mode_str = "narrowband"; break;
118  case SPEEX_MODEID_WB: mode_str = "wideband"; break;
119  case SPEEX_MODEID_UWB: mode_str = "ultra-wideband"; break;
120  }
121  av_log(avctx, AV_LOG_DEBUG, "mode: %s\n", mode_str);
122  if (s->header.vbr) {
123  av_log(avctx, AV_LOG_DEBUG, "rate control: VBR\n");
124  av_log(avctx, AV_LOG_DEBUG, " quality: %f\n", s->vbr_quality);
125  } else if (s->abr) {
126  av_log(avctx, AV_LOG_DEBUG, "rate control: ABR\n");
127  av_log(avctx, AV_LOG_DEBUG, " bitrate: %d bps\n", avctx->bit_rate);
128  } else {
129  av_log(avctx, AV_LOG_DEBUG, "rate control: CBR\n");
130  av_log(avctx, AV_LOG_DEBUG, " bitrate: %d bps\n", avctx->bit_rate);
131  }
132  av_log(avctx, AV_LOG_DEBUG, "complexity: %d\n",
133  avctx->compression_level);
134  av_log(avctx, AV_LOG_DEBUG, "frame size: %d samples\n",
135  avctx->frame_size);
136  av_log(avctx, AV_LOG_DEBUG, "frames per packet: %d\n",
137  s->frames_per_packet);
138  av_log(avctx, AV_LOG_DEBUG, "packet size: %d\n",
139  avctx->frame_size * s->frames_per_packet);
140  av_log(avctx, AV_LOG_DEBUG, "voice activity detection: %d\n", s->vad);
141  av_log(avctx, AV_LOG_DEBUG, "discontinuous transmission: %d\n", s->dtx);
142 }
143 
145 {
146  LibSpeexEncContext *s = avctx->priv_data;
147  const SpeexMode *mode;
148  uint8_t *header_data;
149  int header_size;
150  int32_t complexity;
151 
152  /* channels */
153  if (avctx->channels < 1 || avctx->channels > 2) {
154  av_log(avctx, AV_LOG_ERROR, "Invalid channels (%d). Only stereo and "
155  "mono are supported\n", avctx->channels);
156  return AVERROR(EINVAL);
157  }
158 
159  /* sample rate and encoding mode */
160  switch (avctx->sample_rate) {
161  case 8000: mode = &speex_nb_mode; break;
162  case 16000: mode = &speex_wb_mode; break;
163  case 32000: mode = &speex_uwb_mode; break;
164  default:
165  av_log(avctx, AV_LOG_ERROR, "Sample rate of %d Hz is not supported. "
166  "Resample to 8, 16, or 32 kHz.\n", avctx->sample_rate);
167  return AVERROR(EINVAL);
168  }
169 
170  /* initialize libspeex */
171  s->enc_state = speex_encoder_init(mode);
172  if (!s->enc_state) {
173  av_log(avctx, AV_LOG_ERROR, "Error initializing libspeex\n");
174  return -1;
175  }
176  speex_init_header(&s->header, avctx->sample_rate, avctx->channels, mode);
177 
178  /* rate control method and parameters */
179  if (avctx->flags & CODEC_FLAG_QSCALE) {
180  /* VBR */
181  s->header.vbr = 1;
182  s->vad = 1; /* VAD is always implicitly activated for VBR */
183  speex_encoder_ctl(s->enc_state, SPEEX_SET_VBR, &s->header.vbr);
184  s->vbr_quality = av_clipf(avctx->global_quality / (float)FF_QP2LAMBDA,
185  0.0f, 10.0f);
186  speex_encoder_ctl(s->enc_state, SPEEX_SET_VBR_QUALITY, &s->vbr_quality);
187  } else {
188  s->header.bitrate = avctx->bit_rate;
189  if (avctx->bit_rate > 0) {
190  /* CBR or ABR by bitrate */
191  if (s->abr) {
192  speex_encoder_ctl(s->enc_state, SPEEX_SET_ABR,
193  &s->header.bitrate);
194  speex_encoder_ctl(s->enc_state, SPEEX_GET_ABR,
195  &s->header.bitrate);
196  } else {
197  speex_encoder_ctl(s->enc_state, SPEEX_SET_BITRATE,
198  &s->header.bitrate);
199  speex_encoder_ctl(s->enc_state, SPEEX_GET_BITRATE,
200  &s->header.bitrate);
201  }
202  } else {
203  /* CBR by quality */
204  speex_encoder_ctl(s->enc_state, SPEEX_SET_QUALITY,
205  &s->cbr_quality);
206  speex_encoder_ctl(s->enc_state, SPEEX_GET_BITRATE,
207  &s->header.bitrate);
208  }
209  /* stereo side information adds about 800 bps to the base bitrate */
210  /* TODO: this should be calculated exactly */
211  avctx->bit_rate = s->header.bitrate + (avctx->channels == 2 ? 800 : 0);
212  }
213 
214  /* VAD is activated with VBR or can be turned on by itself */
215  if (s->vad)
216  speex_encoder_ctl(s->enc_state, SPEEX_SET_VAD, &s->vad);
217 
218  /* Activiting Discontinuous Transmission */
219  if (s->dtx) {
220  speex_encoder_ctl(s->enc_state, SPEEX_SET_DTX, &s->dtx);
221  if (!(s->abr || s->vad || s->header.vbr))
222  av_log(avctx, AV_LOG_WARNING, "DTX is not much of use without ABR, VAD or VBR\n");
223  }
224 
225  /* set encoding complexity */
227  complexity = av_clip(avctx->compression_level, 0, 10);
228  speex_encoder_ctl(s->enc_state, SPEEX_SET_COMPLEXITY, &complexity);
229  }
230  speex_encoder_ctl(s->enc_state, SPEEX_GET_COMPLEXITY, &complexity);
231  avctx->compression_level = complexity;
232 
233  /* set packet size */
234  avctx->frame_size = s->header.frame_size;
235  s->header.frames_per_packet = s->frames_per_packet;
236 
237  /* set encoding delay */
238  speex_encoder_ctl(s->enc_state, SPEEX_GET_LOOKAHEAD, &avctx->delay);
239  ff_af_queue_init(avctx, &s->afq);
240 
241  /* create header packet bytes from header struct */
242  /* note: libspeex allocates the memory for header_data, which is freed
243  below with speex_header_free() */
244  header_data = speex_header_to_packet(&s->header, &header_size);
245 
246  /* allocate extradata and coded_frame */
247  avctx->extradata = av_malloc(header_size + FF_INPUT_BUFFER_PADDING_SIZE);
248  if (!avctx->extradata) {
249  speex_header_free(header_data);
250  speex_encoder_destroy(s->enc_state);
251  av_log(avctx, AV_LOG_ERROR, "memory allocation error\n");
252  return AVERROR(ENOMEM);
253  }
254 
255  /* copy header packet to extradata */
256  memcpy(avctx->extradata, header_data, header_size);
257  avctx->extradata_size = header_size;
258  speex_header_free(header_data);
259 
260  /* init libspeex bitwriter */
261  speex_bits_init(&s->bits);
262 
263  print_enc_params(avctx, s);
264  return 0;
265 }
266 
267 static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
268  const AVFrame *frame, int *got_packet_ptr)
269 {
270  LibSpeexEncContext *s = avctx->priv_data;
271  int16_t *samples = frame ? (int16_t *)frame->data[0] : NULL;
272  int ret;
273 
274  if (samples) {
275  /* encode Speex frame */
276  if (avctx->channels == 2)
277  speex_encode_stereo_int(samples, s->header.frame_size, &s->bits);
278  speex_encode_int(s->enc_state, samples, &s->bits);
279  s->pkt_frame_count++;
280  if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
281  return ret;
282  } else {
283  /* handle end-of-stream */
284  if (!s->pkt_frame_count)
285  return 0;
286  /* add extra terminator codes for unused frames in last packet */
287  while (s->pkt_frame_count < s->frames_per_packet) {
288  speex_bits_pack(&s->bits, 15, 5);
289  s->pkt_frame_count++;
290  }
291  }
292 
293  /* write output if all frames for the packet have been encoded */
294  if (s->pkt_frame_count == s->frames_per_packet) {
295  s->pkt_frame_count = 0;
296  if ((ret = ff_alloc_packet(avpkt, speex_bits_nbytes(&s->bits)))) {
297  av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
298  return ret;
299  }
300  ret = speex_bits_write(&s->bits, avpkt->data, avpkt->size);
301  speex_bits_reset(&s->bits);
302 
303  /* Get the next frame pts/duration */
305  &avpkt->pts, &avpkt->duration);
306 
307  avpkt->size = ret;
308  *got_packet_ptr = 1;
309  return 0;
310  }
311  return 0;
312 }
313 
315 {
316  LibSpeexEncContext *s = avctx->priv_data;
317 
318  speex_bits_destroy(&s->bits);
319  speex_encoder_destroy(s->enc_state);
320 
321  ff_af_queue_close(&s->afq);
322  av_freep(&avctx->extradata);
323 
324  return 0;
325 }
326 
327 #define OFFSET(x) offsetof(LibSpeexEncContext, x)
328 #define AE AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
329 static const AVOption options[] = {
330  { "abr", "Use average bit rate", OFFSET(abr), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, AE },
331  { "cbr_quality", "Set quality value (0 to 10) for CBR", OFFSET(cbr_quality), AV_OPT_TYPE_INT, { .i64 = 8 }, 0, 10, AE },
332  { "frames_per_packet", "Number of frames to encode in each packet", OFFSET(frames_per_packet), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, 8, AE },
333  { "vad", "Voice Activity Detection", OFFSET(vad), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, AE },
334  { "dtx", "Discontinuous Transmission", OFFSET(dtx), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, AE },
335  { NULL },
336 };
337 
338 static const AVClass class = {
339  .class_name = "libspeex",
340  .item_name = av_default_item_name,
341  .option = options,
343 };
344 
345 static const AVCodecDefault defaults[] = {
346  { "b", "0" },
347  { "compression_level", "3" },
348  { NULL },
349 };
350 
352  .name = "libspeex",
353  .long_name = NULL_IF_CONFIG_SMALL("libspeex Speex"),
354  .type = AVMEDIA_TYPE_AUDIO,
355  .id = AV_CODEC_ID_SPEEX,
356  .priv_data_size = sizeof(LibSpeexEncContext),
357  .init = encode_init,
358  .encode2 = encode_frame,
359  .close = encode_close,
360  .capabilities = CODEC_CAP_DELAY,
361  .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
363  .channel_layouts = (const uint64_t[]){ AV_CH_LAYOUT_MONO,
365  0 },
366  .supported_samplerates = (const int[]){ 8000, 16000, 32000, 0 },
367  .priv_class = &class,
368  .defaults = defaults,
369 };