Thu Apr 28 2011 17:15:21

Asterisk developer's documentation


codec_speex.c

Go to the documentation of this file.
00001 /*
00002  * Asterisk -- An open source telephony toolkit.
00003  *
00004  * Copyright (C) 1999 - 2005, Digium, Inc.
00005  *
00006  * Mark Spencer <markster@digium.com>
00007  *
00008  *
00009  * See http://www.asterisk.org for more information about
00010  * the Asterisk project. Please do not directly contact
00011  * any of the maintainers of this project for assistance;
00012  * the project provides a web site, mailing lists and IRC
00013  * channels for your use.
00014  *
00015  * This program is free software, distributed under the terms of
00016  * the GNU General Public License Version 2. See the LICENSE file
00017  * at the top of the source tree.
00018  */
00019 
00020 /*! \file
00021  *
00022  * \brief Translate between signed linear and Speex (Open Codec)
00023  *
00024  * \note This work was motivated by Jeremy McNamara 
00025  * hacked to be configurable by anthm and bkw 9/28/2004
00026  *
00027  * \ingroup codecs
00028  *
00029  * \extref The Speex library - http://www.speex.org
00030  *
00031  */
00032 
00033 /*** MODULEINFO
00034    <depend>speex</depend>
00035    <depend>speex_preprocess</depend>
00036    <use>speexdsp</use>
00037  ***/
00038 
00039 #include "asterisk.h"
00040 
00041 ASTERISK_FILE_VERSION(__FILE__, "$Revision: 211580 $")
00042 
00043 #include <speex/speex.h>
00044 
00045 /* We require a post 1.1.8 version of Speex to enable preprocessing
00046    and better type handling */   
00047 #ifdef _SPEEX_TYPES_H
00048 #include <speex/speex_preprocess.h>
00049 #endif
00050 
00051 #include "asterisk/translate.h"
00052 #include "asterisk/module.h"
00053 #include "asterisk/config.h"
00054 #include "asterisk/utils.h"
00055 
00056 /* codec variables */
00057 static int quality = 3;
00058 static int complexity = 2;
00059 static int enhancement = 0;
00060 static int vad = 0;
00061 static int vbr = 0;
00062 static float vbr_quality = 4;
00063 static int abr = 0;
00064 static int dtx = 0;  /* set to 1 to enable silence detection */
00065 
00066 static int preproc = 0;
00067 static int pp_vad = 0;
00068 static int pp_agc = 0;
00069 static float pp_agc_level = 8000; /* XXX what is this 8000 ? */
00070 static int pp_denoise = 0;
00071 static int pp_dereverb = 0;
00072 static float pp_dereverb_decay = 0.4;
00073 static float pp_dereverb_level = 0.3;
00074 
00075 #define TYPE_SILENCE  0x2
00076 #define TYPE_HIGH  0x0
00077 #define TYPE_LOW   0x1
00078 #define TYPE_MASK  0x3
00079 
00080 #define  BUFFER_SAMPLES 8000
00081 #define  SPEEX_SAMPLES  160
00082 
00083 /* Sample frame data */
00084 #include "asterisk/slin.h"
00085 #include "ex_speex.h"
00086 
00087 struct speex_coder_pvt {
00088    void *speex;
00089    SpeexBits bits;
00090    int framesize;
00091    int silent_state;
00092 #ifdef _SPEEX_TYPES_H
00093    SpeexPreprocessState *pp;
00094    spx_int16_t buf[BUFFER_SAMPLES];
00095 #else
00096    int16_t buf[BUFFER_SAMPLES];  /* input, waiting to be compressed */
00097 #endif
00098 };
00099 
00100 
00101 static int lintospeex_new(struct ast_trans_pvt *pvt)
00102 {
00103    struct speex_coder_pvt *tmp = pvt->pvt;
00104 
00105    if (!(tmp->speex = speex_encoder_init(&speex_nb_mode)))
00106       return -1;
00107 
00108    speex_bits_init(&tmp->bits);
00109    speex_bits_reset(&tmp->bits);
00110    speex_encoder_ctl(tmp->speex, SPEEX_GET_FRAME_SIZE, &tmp->framesize);
00111    speex_encoder_ctl(tmp->speex, SPEEX_SET_COMPLEXITY, &complexity);
00112 #ifdef _SPEEX_TYPES_H
00113    if (preproc) {
00114       tmp->pp = speex_preprocess_state_init(tmp->framesize, 8000); /* XXX what is this 8000 ? */
00115       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_VAD, &pp_vad);
00116       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_AGC, &pp_agc);
00117       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_AGC_LEVEL, &pp_agc_level);
00118       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DENOISE, &pp_denoise);
00119       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DEREVERB, &pp_dereverb);
00120       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DEREVERB_DECAY, &pp_dereverb_decay);
00121       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DEREVERB_LEVEL, &pp_dereverb_level);
00122    }
00123 #endif
00124    if (!abr && !vbr) {
00125       speex_encoder_ctl(tmp->speex, SPEEX_SET_QUALITY, &quality);
00126       if (vad)
00127          speex_encoder_ctl(tmp->speex, SPEEX_SET_VAD, &vad);
00128    }
00129    if (vbr) {
00130       speex_encoder_ctl(tmp->speex, SPEEX_SET_VBR, &vbr);
00131       speex_encoder_ctl(tmp->speex, SPEEX_SET_VBR_QUALITY, &vbr_quality);
00132    }
00133    if (abr)
00134       speex_encoder_ctl(tmp->speex, SPEEX_SET_ABR, &abr);
00135    if (dtx)
00136       speex_encoder_ctl(tmp->speex, SPEEX_SET_DTX, &dtx); 
00137    tmp->silent_state = 0;
00138 
00139    return 0;
00140 }
00141 
00142 static int speextolin_new(struct ast_trans_pvt *pvt)
00143 {
00144    struct speex_coder_pvt *tmp = pvt->pvt;
00145    
00146    if (!(tmp->speex = speex_decoder_init(&speex_nb_mode)))
00147       return -1;
00148 
00149    speex_bits_init(&tmp->bits);
00150    speex_decoder_ctl(tmp->speex, SPEEX_GET_FRAME_SIZE, &tmp->framesize);
00151    if (enhancement)
00152       speex_decoder_ctl(tmp->speex, SPEEX_SET_ENH, &enhancement);
00153 
00154    return 0;
00155 }
00156 
00157 /*! \brief convert and store into outbuf */
00158 static int speextolin_framein(struct ast_trans_pvt *pvt, struct ast_frame *f)
00159 {
00160    struct speex_coder_pvt *tmp = pvt->pvt;
00161 
00162    /* Assuming there's space left, decode into the current buffer at
00163       the tail location.  Read in as many frames as there are */
00164    int x;
00165    int res;
00166    int16_t *dst = pvt->outbuf.i16;
00167    /* XXX fout is a temporary buffer, may have different types */
00168 #ifdef _SPEEX_TYPES_H
00169    spx_int16_t fout[1024];
00170 #else
00171    float fout[1024];
00172 #endif
00173 
00174    if (f->datalen == 0) {  /* Native PLC interpolation */
00175       if (pvt->samples + tmp->framesize > BUFFER_SAMPLES) {
00176          ast_log(LOG_WARNING, "Out of buffer space\n");
00177          return -1;
00178       }
00179 #ifdef _SPEEX_TYPES_H
00180       speex_decode_int(tmp->speex, NULL, dst + pvt->samples);
00181 #else
00182       speex_decode(tmp->speex, NULL, fout);
00183       for (x=0;x<tmp->framesize;x++) {
00184          dst[pvt->samples + x] = (int16_t)fout[x];
00185       }
00186 #endif
00187       pvt->samples += tmp->framesize;
00188       pvt->datalen += 2 * tmp->framesize; /* 2 bytes/sample */
00189       return 0;
00190    }
00191 
00192    /* Read in bits */
00193    speex_bits_read_from(&tmp->bits, f->data.ptr, f->datalen);
00194    for (;;) {
00195 #ifdef _SPEEX_TYPES_H
00196       res = speex_decode_int(tmp->speex, &tmp->bits, fout);
00197 #else
00198       res = speex_decode(tmp->speex, &tmp->bits, fout);
00199 #endif
00200       if (res < 0)
00201          break;
00202       if (pvt->samples + tmp->framesize > BUFFER_SAMPLES) {
00203          ast_log(LOG_WARNING, "Out of buffer space\n");
00204          return -1;
00205       }
00206       for (x = 0 ; x < tmp->framesize; x++)
00207          dst[pvt->samples + x] = (int16_t)fout[x];
00208       pvt->samples += tmp->framesize;
00209       pvt->datalen += 2 * tmp->framesize; /* 2 bytes/sample */
00210    }
00211    return 0;
00212 }
00213 
00214 /*! \brief store input frame in work buffer */
00215 static int lintospeex_framein(struct ast_trans_pvt *pvt, struct ast_frame *f)
00216 {
00217    struct speex_coder_pvt *tmp = pvt->pvt;
00218 
00219    /* XXX We should look at how old the rest of our stream is, and if it
00220       is too old, then we should overwrite it entirely, otherwise we can
00221       get artifacts of earlier talk that do not belong */
00222    memcpy(tmp->buf + pvt->samples, f->data.ptr, f->datalen);
00223    pvt->samples += f->samples;
00224    return 0;
00225 }
00226 
00227 /*! \brief convert work buffer and produce output frame */
00228 static struct ast_frame *lintospeex_frameout(struct ast_trans_pvt *pvt)
00229 {
00230    struct speex_coder_pvt *tmp = pvt->pvt;
00231    int is_speech=1;
00232    int datalen = 0;  /* output bytes */
00233    int samples = 0;  /* output samples */
00234 
00235    /* We can't work on anything less than a frame in size */
00236    if (pvt->samples < tmp->framesize)
00237       return NULL;
00238    speex_bits_reset(&tmp->bits);
00239    while (pvt->samples >= tmp->framesize) {
00240 #ifdef _SPEEX_TYPES_H
00241       /* Preprocess audio */
00242       if (preproc)
00243          is_speech = speex_preprocess(tmp->pp, tmp->buf + samples, NULL);
00244       /* Encode a frame of data */
00245       if (is_speech) {
00246          /* If DTX enabled speex_encode returns 0 during silence */
00247          is_speech = speex_encode_int(tmp->speex, tmp->buf + samples, &tmp->bits) || !dtx;
00248       } else {
00249          /* 5 zeros interpreted by Speex as silence (submode 0) */
00250          speex_bits_pack(&tmp->bits, 0, 5);
00251       }
00252 #else
00253       {
00254          float fbuf[1024];
00255          int x;
00256          /* Convert to floating point */
00257          for (x = 0; x < tmp->framesize; x++)
00258             fbuf[x] = tmp->buf[samples + x];
00259          /* Encode a frame of data */
00260          is_speech = speex_encode(tmp->speex, fbuf, &tmp->bits) || !dtx;
00261       }
00262 #endif
00263       samples += tmp->framesize;
00264       pvt->samples -= tmp->framesize;
00265    }
00266 
00267    /* Move the data at the end of the buffer to the front */
00268    if (pvt->samples)
00269       memmove(tmp->buf, tmp->buf + samples, pvt->samples * 2);
00270 
00271    /* Use AST_FRAME_CNG to signify the start of any silence period */
00272    if (is_speech) {
00273       tmp->silent_state = 0;
00274    } else {
00275       if (tmp->silent_state) {
00276          return NULL;
00277       } else {
00278          tmp->silent_state = 1;
00279          speex_bits_reset(&tmp->bits);
00280          memset(&pvt->f, 0, sizeof(pvt->f));
00281          pvt->f.frametype = AST_FRAME_CNG;
00282          pvt->f.samples = samples;
00283          /* XXX what now ? format etc... */
00284       }
00285    }
00286 
00287    /* Terminate bit stream */
00288    speex_bits_pack(&tmp->bits, 15, 5);
00289    datalen = speex_bits_write(&tmp->bits, pvt->outbuf.c, pvt->t->buf_size);
00290    return ast_trans_frameout(pvt, datalen, samples);
00291 }
00292 
00293 static void speextolin_destroy(struct ast_trans_pvt *arg)
00294 {
00295    struct speex_coder_pvt *pvt = arg->pvt;
00296 
00297    speex_decoder_destroy(pvt->speex);
00298    speex_bits_destroy(&pvt->bits);
00299 }
00300 
00301 static void lintospeex_destroy(struct ast_trans_pvt *arg)
00302 {
00303    struct speex_coder_pvt *pvt = arg->pvt;
00304 #ifdef _SPEEX_TYPES_H
00305    if (preproc)
00306       speex_preprocess_state_destroy(pvt->pp);
00307 #endif
00308    speex_encoder_destroy(pvt->speex);
00309    speex_bits_destroy(&pvt->bits);
00310 }
00311 
00312 static struct ast_translator speextolin = {
00313    .name = "speextolin", 
00314    .srcfmt = AST_FORMAT_SPEEX,
00315    .dstfmt =  AST_FORMAT_SLINEAR,
00316    .newpvt = speextolin_new,
00317    .framein = speextolin_framein,
00318    .destroy = speextolin_destroy,
00319    .sample = speex_sample,
00320    .desc_size = sizeof(struct speex_coder_pvt),
00321    .buffer_samples = BUFFER_SAMPLES,
00322    .buf_size = BUFFER_SAMPLES * 2,
00323    .native_plc = 1,
00324 };
00325 
00326 static struct ast_translator lintospeex = {
00327    .name = "lintospeex", 
00328    .srcfmt = AST_FORMAT_SLINEAR,
00329    .dstfmt = AST_FORMAT_SPEEX,
00330    .newpvt = lintospeex_new,
00331    .framein = lintospeex_framein,
00332    .frameout = lintospeex_frameout,
00333    .destroy = lintospeex_destroy,
00334    .sample = slin8_sample,
00335    .desc_size = sizeof(struct speex_coder_pvt),
00336    .buffer_samples = BUFFER_SAMPLES,
00337    .buf_size = BUFFER_SAMPLES * 2, /* XXX maybe a lot less ? */
00338 };
00339 
00340 static int parse_config(int reload) 
00341 {
00342    struct ast_flags config_flags = { reload ? CONFIG_FLAG_FILEUNCHANGED : 0 };
00343    struct ast_config *cfg = ast_config_load("codecs.conf", config_flags);
00344    struct ast_variable *var;
00345    int res;
00346    float res_f;
00347 
00348    if (cfg == CONFIG_STATUS_FILEMISSING || cfg == CONFIG_STATUS_FILEUNCHANGED || cfg == CONFIG_STATUS_FILEINVALID)
00349       return 0;
00350 
00351    for (var = ast_variable_browse(cfg, "speex"); var; var = var->next) {
00352       if (!strcasecmp(var->name, "quality")) {
00353          res = abs(atoi(var->value));
00354          if (res > -1 && res < 11) {
00355             ast_verb(3, "CODEC SPEEX: Setting Quality to %d\n",res);
00356             quality = res;
00357          } else 
00358             ast_log(LOG_ERROR,"Error Quality must be 0-10\n");
00359       } else if (!strcasecmp(var->name, "complexity")) {
00360          res = abs(atoi(var->value));
00361          if (res > -1 && res < 11) {
00362             ast_verb(3, "CODEC SPEEX: Setting Complexity to %d\n",res);
00363             complexity = res;
00364          } else 
00365             ast_log(LOG_ERROR,"Error! Complexity must be 0-10\n");
00366       } else if (!strcasecmp(var->name, "vbr_quality")) {
00367          if (sscanf(var->value, "%30f", &res_f) == 1 && res_f >= 0 && res_f <= 10) {
00368             ast_verb(3, "CODEC SPEEX: Setting VBR Quality to %f\n",res_f);
00369             vbr_quality = res_f;
00370          } else
00371             ast_log(LOG_ERROR,"Error! VBR Quality must be 0-10\n");
00372       } else if (!strcasecmp(var->name, "abr_quality")) {
00373          ast_log(LOG_ERROR,"Error! ABR Quality setting obsolete, set ABR to desired bitrate\n");
00374       } else if (!strcasecmp(var->name, "enhancement")) {
00375          enhancement = ast_true(var->value) ? 1 : 0;
00376          ast_verb(3, "CODEC SPEEX: Perceptual Enhancement Mode. [%s]\n",enhancement ? "on" : "off");
00377       } else if (!strcasecmp(var->name, "vbr")) {
00378          vbr = ast_true(var->value) ? 1 : 0;
00379          ast_verb(3, "CODEC SPEEX: VBR Mode. [%s]\n",vbr ? "on" : "off");
00380       } else if (!strcasecmp(var->name, "abr")) {
00381          res = abs(atoi(var->value));
00382          if (res >= 0) {
00383                if (res > 0)
00384                ast_verb(3, "CODEC SPEEX: Setting ABR target bitrate to %d\n",res);
00385                else
00386                ast_verb(3, "CODEC SPEEX: Disabling ABR\n");
00387             abr = res;
00388          } else 
00389             ast_log(LOG_ERROR,"Error! ABR target bitrate must be >= 0\n");
00390       } else if (!strcasecmp(var->name, "vad")) {
00391          vad = ast_true(var->value) ? 1 : 0;
00392          ast_verb(3, "CODEC SPEEX: VAD Mode. [%s]\n",vad ? "on" : "off");
00393       } else if (!strcasecmp(var->name, "dtx")) {
00394          dtx = ast_true(var->value) ? 1 : 0;
00395          ast_verb(3, "CODEC SPEEX: DTX Mode. [%s]\n",dtx ? "on" : "off");
00396       } else if (!strcasecmp(var->name, "preprocess")) {
00397          preproc = ast_true(var->value) ? 1 : 0;
00398          ast_verb(3, "CODEC SPEEX: Preprocessing. [%s]\n",preproc ? "on" : "off");
00399       } else if (!strcasecmp(var->name, "pp_vad")) {
00400          pp_vad = ast_true(var->value) ? 1 : 0;
00401          ast_verb(3, "CODEC SPEEX: Preprocessor VAD. [%s]\n",pp_vad ? "on" : "off");
00402       } else if (!strcasecmp(var->name, "pp_agc")) {
00403          pp_agc = ast_true(var->value) ? 1 : 0;
00404          ast_verb(3, "CODEC SPEEX: Preprocessor AGC. [%s]\n",pp_agc ? "on" : "off");
00405       } else if (!strcasecmp(var->name, "pp_agc_level")) {
00406          if (sscanf(var->value, "%30f", &res_f) == 1 && res_f >= 0) {
00407             ast_verb(3, "CODEC SPEEX: Setting preprocessor AGC Level to %f\n",res_f);
00408             pp_agc_level = res_f;
00409          } else
00410             ast_log(LOG_ERROR,"Error! Preprocessor AGC Level must be >= 0\n");
00411       } else if (!strcasecmp(var->name, "pp_denoise")) {
00412          pp_denoise = ast_true(var->value) ? 1 : 0;
00413          ast_verb(3, "CODEC SPEEX: Preprocessor Denoise. [%s]\n",pp_denoise ? "on" : "off");
00414       } else if (!strcasecmp(var->name, "pp_dereverb")) {
00415          pp_dereverb = ast_true(var->value) ? 1 : 0;
00416          ast_verb(3, "CODEC SPEEX: Preprocessor Dereverb. [%s]\n",pp_dereverb ? "on" : "off");
00417       } else if (!strcasecmp(var->name, "pp_dereverb_decay")) {
00418          if (sscanf(var->value, "%30f", &res_f) == 1 && res_f >= 0) {
00419             ast_verb(3, "CODEC SPEEX: Setting preprocessor Dereverb Decay to %f\n",res_f);
00420             pp_dereverb_decay = res_f;
00421          } else
00422             ast_log(LOG_ERROR,"Error! Preprocessor Dereverb Decay must be >= 0\n");
00423       } else if (!strcasecmp(var->name, "pp_dereverb_level")) {
00424          if (sscanf(var->value, "%30f", &res_f) == 1 && res_f >= 0) {
00425             ast_verb(3, "CODEC SPEEX: Setting preprocessor Dereverb Level to %f\n",res_f);
00426             pp_dereverb_level = res_f;
00427          } else
00428             ast_log(LOG_ERROR,"Error! Preprocessor Dereverb Level must be >= 0\n");
00429       }
00430    }
00431    ast_config_destroy(cfg);
00432    return 0;
00433 }
00434 
00435 static int reload(void) 
00436 {
00437    if (parse_config(1))
00438       return AST_MODULE_LOAD_DECLINE;
00439    return AST_MODULE_LOAD_SUCCESS;
00440 }
00441 
00442 static int unload_module(void)
00443 {
00444    int res;
00445 
00446    res = ast_unregister_translator(&lintospeex);
00447    res |= ast_unregister_translator(&speextolin);
00448 
00449    return res;
00450 }
00451 
00452 static int load_module(void)
00453 {
00454    int res;
00455 
00456    if (parse_config(0))
00457       return AST_MODULE_LOAD_DECLINE;
00458    res=ast_register_translator(&speextolin);
00459    if (!res) 
00460       res=ast_register_translator(&lintospeex);
00461    else
00462       ast_unregister_translator(&speextolin);
00463    if (res)
00464       return AST_MODULE_LOAD_FAILURE;
00465    return AST_MODULE_LOAD_SUCCESS;
00466 }
00467 
00468 AST_MODULE_INFO(ASTERISK_GPL_KEY, AST_MODFLAG_DEFAULT, "Speex Coder/Decoder",
00469       .load = load_module,
00470       .unload = unload_module,
00471       .reload = reload,
00472           );