/* SPDX-License-Identifier: GPL-3.0-or-later * Copyright © 2016-2025 The TokTok team. * Copyright © 2013-2015 Tox project. */ #include "audio.h" #include #include #include #include "rtp.h" #include "../toxcore/ccompat.h" #include "../toxcore/logger.h" #include "../toxcore/mono_time.h" #include "../toxcore/network.h" static struct JitterBuffer *jbuf_new(uint32_t capacity); static void jbuf_clear(struct JitterBuffer *q); static void jbuf_free(struct JitterBuffer *q); static int jbuf_write(const Logger *log, struct JitterBuffer *q, struct RTPMessage *m); static struct RTPMessage *jbuf_read(struct JitterBuffer *q, int32_t *success); static OpusEncoder *create_audio_encoder(const Logger *log, uint32_t bit_rate, uint32_t sampling_rate, uint8_t channel_count); static bool reconfigure_audio_encoder(const Logger *log, OpusEncoder **e, uint32_t new_br, uint32_t new_sr, uint8_t new_ch, uint32_t *old_br, uint32_t *old_sr, uint8_t *old_ch); static bool reconfigure_audio_decoder(ACSession *ac, uint32_t sampling_rate, uint8_t channels); ACSession *ac_new(Mono_Time *mono_time, const Logger *log, ToxAV *av, uint32_t friend_number, toxav_audio_receive_frame_cb *cb, void *cb_data) { ACSession *ac = (ACSession *)calloc(1, sizeof(ACSession)); if (ac == nullptr) { LOGGER_WARNING(log, "Allocation failed! Application might misbehave!"); return nullptr; } if (create_recursive_mutex(ac->queue_mutex) != 0) { LOGGER_WARNING(log, "Failed to create recursive mutex!"); free(ac); return nullptr; } int status; ac->decoder = opus_decoder_create(AUDIO_DECODER_START_SAMPLE_RATE, AUDIO_DECODER_START_CHANNEL_COUNT, &status); if (status != OPUS_OK) { LOGGER_ERROR(log, "Error while starting audio decoder: %s", opus_strerror(status)); goto BASE_CLEANUP; } ac->j_buf = jbuf_new(AUDIO_JITTERBUFFER_COUNT); if (ac->j_buf == nullptr) { LOGGER_WARNING(log, "Jitter buffer creaton failed!"); opus_decoder_destroy(ac->decoder); goto BASE_CLEANUP; } ac->mono_time = mono_time; ac->log = log; /* Initialize encoders with default values */ ac->encoder = create_audio_encoder(log, AUDIO_START_BITRATE, AUDIO_START_SAMPLE_RATE, AUDIO_START_CHANNEL_COUNT); if (ac->encoder == nullptr) { goto DECODER_CLEANUP; } ac->le_bit_rate = AUDIO_START_BITRATE; ac->le_sample_rate = AUDIO_START_SAMPLE_RATE; ac->le_channel_count = AUDIO_START_CHANNEL_COUNT; ac->ld_channel_count = AUDIO_DECODER_START_CHANNEL_COUNT; ac->ld_sample_rate = AUDIO_DECODER_START_SAMPLE_RATE; ac->ldrts = 0; /* Make it possible to reconfigure straight away */ /* These need to be set in order to properly * do error correction with opus */ ac->lp_frame_duration = AUDIO_MAX_FRAME_DURATION_MS; ac->lp_sampling_rate = AUDIO_DECODER_START_SAMPLE_RATE; ac->lp_channel_count = AUDIO_DECODER_START_CHANNEL_COUNT; ac->av = av; ac->friend_number = friend_number; ac->acb = cb; ac->acb_user_data = cb_data; return ac; DECODER_CLEANUP: opus_decoder_destroy(ac->decoder); jbuf_free((struct JitterBuffer *)ac->j_buf); BASE_CLEANUP: pthread_mutex_destroy(ac->queue_mutex); free(ac); return nullptr; } void ac_kill(ACSession *ac) { if (ac == nullptr) { return; } opus_encoder_destroy(ac->encoder); opus_decoder_destroy(ac->decoder); jbuf_free((struct JitterBuffer *)ac->j_buf); pthread_mutex_destroy(ac->queue_mutex); LOGGER_DEBUG(ac->log, "Terminated audio handler: %p", (void *)ac); free(ac); } void ac_iterate(ACSession *ac) { if (ac == nullptr) { return; } /* TODO: fix this and jitter buffering */ /* Enough space for the maximum frame size (120 ms 48 KHz stereo audio) */ int16_t *temp_audio_buffer = (int16_t *)malloc(AUDIO_MAX_BUFFER_SIZE_PCM16 * AUDIO_MAX_CHANNEL_COUNT * sizeof(int16_t)); if (temp_audio_buffer == nullptr) { LOGGER_ERROR(ac->log, "Failed to allocate memory for audio buffer"); return; } pthread_mutex_lock(ac->queue_mutex); struct JitterBuffer *const j_buf = (struct JitterBuffer *)ac->j_buf; int rc = 0; for (struct RTPMessage *msg = jbuf_read(j_buf, &rc); msg != nullptr || rc == 2; msg = jbuf_read(j_buf, &rc)) { pthread_mutex_unlock(ac->queue_mutex); if (rc == 2) { LOGGER_DEBUG(ac->log, "OPUS correction"); const int fs = (ac->lp_sampling_rate * ac->lp_frame_duration) / 1000; rc = opus_decode(ac->decoder, nullptr, 0, temp_audio_buffer, fs, 1); } else { assert(msg->len > 4); /* Pick up sampling rate from packet */ memcpy(&ac->lp_sampling_rate, msg->data, 4); ac->lp_sampling_rate = net_ntohl(ac->lp_sampling_rate); ac->lp_channel_count = opus_packet_get_nb_channels(msg->data + 4); /** NOTE: even though OPUS supports decoding mono frames with stereo decoder and vice versa, * it didn't work quite well. */ if (!reconfigure_audio_decoder(ac, ac->lp_sampling_rate, ac->lp_channel_count)) { LOGGER_WARNING(ac->log, "Failed to reconfigure decoder!"); free(msg); pthread_mutex_lock(ac->queue_mutex); continue; } /* * frame_size = opus_decode(dec, packet, len, decoded, max_size, 0); * where * packet is the byte array containing the compressed data * len is the exact number of bytes contained in the packet * decoded is the decoded audio data in opus_int16 (or float for opus_decode_float()) * max_size is the max duration of the frame in samples (per channel) that can fit * into the decoded_frame array */ rc = opus_decode(ac->decoder, msg->data + 4, msg->len - 4, temp_audio_buffer, 5760, 0); free(msg); } if (rc < 0) { LOGGER_WARNING(ac->log, "Decoding error: %s", opus_strerror(rc)); } else if (ac->acb != nullptr) { ac->lp_frame_duration = (rc * 1000) / ac->lp_sampling_rate; ac->acb(ac->av, ac->friend_number, temp_audio_buffer, rc, ac->lp_channel_count, ac->lp_sampling_rate, ac->acb_user_data); } free(temp_audio_buffer); return; } pthread_mutex_unlock(ac->queue_mutex); free(temp_audio_buffer); } int ac_queue_message(const Mono_Time *mono_time, void *cs, struct RTPMessage *msg) { ACSession *ac = (ACSession *)cs; if (ac == nullptr || msg == nullptr) { free(msg); return -1; } if ((msg->header.pt & 0x7f) == (RTP_TYPE_AUDIO + 2) % 128) { LOGGER_WARNING(ac->log, "Got dummy!"); free(msg); return 0; } if ((msg->header.pt & 0x7f) != RTP_TYPE_AUDIO % 128) { LOGGER_WARNING(ac->log, "Invalid payload type!"); free(msg); return -1; } pthread_mutex_lock(ac->queue_mutex); const int rc = jbuf_write(ac->log, (struct JitterBuffer *)ac->j_buf, msg); pthread_mutex_unlock(ac->queue_mutex); if (rc == -1) { LOGGER_WARNING(ac->log, "Could not queue the message!"); free(msg); return -1; } return 0; } int ac_reconfigure_encoder(ACSession *ac, uint32_t bit_rate, uint32_t sampling_rate, uint8_t channels) { if (ac == nullptr || !reconfigure_audio_encoder( ac->log, &ac->encoder, bit_rate, sampling_rate, channels, &ac->le_bit_rate, &ac->le_sample_rate, &ac->le_channel_count)) { return -1; } return 0; } struct JitterBuffer { struct RTPMessage **queue; uint32_t size; uint32_t capacity; uint16_t bottom; uint16_t top; }; static struct JitterBuffer *jbuf_new(uint32_t capacity) { unsigned int size = 1; while (size <= (capacity * 4)) { size *= 2; } struct JitterBuffer *q = (struct JitterBuffer *)calloc(1, sizeof(struct JitterBuffer)); if (q == nullptr) { return nullptr; } q->queue = (struct RTPMessage **)calloc(size, sizeof(struct RTPMessage *)); if (q->queue == nullptr) { free(q); return nullptr; } q->size = size; q->capacity = capacity; return q; } static void jbuf_clear(struct JitterBuffer *q) { while (q->bottom != q->top) { free(q->queue[q->bottom % q->size]); q->queue[q->bottom % q->size] = nullptr; ++q->bottom; } } static void jbuf_free(struct JitterBuffer *q) { if (q == nullptr) { return; } jbuf_clear(q); free(q->queue); free(q); } /* * if -1 is returned the RTPMessage m needs to be free'd by the caller * if 0 is returned the RTPMessage m is stored in the ringbuffer and must NOT be freed by the caller */ static int jbuf_write(const Logger *log, struct JitterBuffer *q, struct RTPMessage *m) { const uint16_t sequnum = m->header.sequnum; const unsigned int num = sequnum % q->size; if ((uint32_t)(sequnum - q->bottom) > q->size) { LOGGER_DEBUG(log, "Clearing filled jitter buffer: %p", (void *)q); jbuf_clear(q); q->bottom = sequnum - q->capacity; q->queue[num] = m; q->top = sequnum + 1; return 0; } if (q->queue[num] != nullptr) { return -1; } q->queue[num] = m; if ((sequnum - q->bottom) >= (q->top - q->bottom)) { q->top = sequnum + 1; } return 0; } static struct RTPMessage *jbuf_read(struct JitterBuffer *q, int32_t *success) { if (q->top == q->bottom) { *success = 0; return nullptr; } const unsigned int num = q->bottom % q->size; if (q->queue[num] != nullptr) { struct RTPMessage *ret = q->queue[num]; q->queue[num] = nullptr; ++q->bottom; *success = 1; return ret; } if ((uint32_t)(q->top - q->bottom) > q->capacity) { ++q->bottom; *success = 2; return nullptr; } *success = 0; return nullptr; } static OpusEncoder *create_audio_encoder(const Logger *log, uint32_t bit_rate, uint32_t sampling_rate, uint8_t channel_count) { int status = OPUS_OK; /* * OPUS_APPLICATION_VOIP Process signal for improved speech intelligibility * OPUS_APPLICATION_AUDIO Favor faithfulness to the original input * OPUS_APPLICATION_RESTRICTED_LOWDELAY Configure the minimum possible coding delay */ OpusEncoder *rc = opus_encoder_create(sampling_rate, channel_count, OPUS_APPLICATION_VOIP, &status); if (status != OPUS_OK) { LOGGER_ERROR(log, "Error while starting audio encoder: %s", opus_strerror(status)); return nullptr; } /* * Rates from 500 to 512000 bits per second are meaningful as well as the special * values OPUS_BITRATE_AUTO and OPUS_BITRATE_MAX. The value OPUS_BITRATE_MAX can * be used to cause the codec to use as much rate as it can, which is useful for * controlling the rate by adjusting the output buffer size. * * Parameters: * `[in]` `x` `opus_int32`: bitrate in bits per second. */ status = opus_encoder_ctl(rc, OPUS_SET_BITRATE(bit_rate)); if (status != OPUS_OK) { LOGGER_ERROR(log, "Error while setting encoder ctl: %s", opus_strerror(status)); goto FAILURE; } /* * The libopus library defaults to VBR, which is unsafe in any VoIP environment * (see for example doi:10.1109/SP.2011.34). Switching to CBR very slightly * decreases audio quality at lower bitrates. * * Parameters: * `[in]` `x` `opus_int32`: Whether to use VBR mode, 1 (VBR) is default */ status = opus_encoder_ctl(rc, OPUS_SET_VBR(0)); if (status != OPUS_OK) { LOGGER_ERROR(log, "Error while setting encoder ctl: %s", opus_strerror(status)); goto FAILURE; } /* * Configures the encoder's use of inband forward error correction. * Note: * This is only applicable to the LPC layer * Parameters: * `[in]` `x` `int`: FEC flag, 0 (disabled) is default */ /* Enable in-band forward error correction in codec */ status = opus_encoder_ctl(rc, OPUS_SET_INBAND_FEC(1)); if (status != OPUS_OK) { LOGGER_ERROR(log, "Error while setting encoder ctl: %s", opus_strerror(status)); goto FAILURE; } /* * Configures the encoder's expected packet loss percentage. * Higher values with trigger progressively more loss resistant behavior in * the encoder at the expense of quality at a given bitrate in the lossless case, * but greater quality under loss. * Parameters: * `[in]` `x` `int`: Loss percentage in the range 0-100, inclusive. */ /* Make codec resistant to up to 10% packet loss * NOTE This could also be adjusted on the fly, rather than hard-coded, * with feedback from the receiving client. */ status = opus_encoder_ctl(rc, OPUS_SET_PACKET_LOSS_PERC(AUDIO_OPUS_PACKET_LOSS_PERC)); if (status != OPUS_OK) { LOGGER_ERROR(log, "Error while setting encoder ctl: %s", opus_strerror(status)); goto FAILURE; } /* * Configures the encoder's computational complexity. * * The supported range is 0-10 inclusive with 10 representing the highest complexity. * The default value is 10. * * Parameters: * `[in]` `x` `int`: 0-10, inclusive */ /* Set algorithm to the highest complexity, maximizing compression */ status = opus_encoder_ctl(rc, OPUS_SET_COMPLEXITY(AUDIO_OPUS_COMPLEXITY)); if (status != OPUS_OK) { LOGGER_ERROR(log, "Error while setting encoder ctl: %s", opus_strerror(status)); goto FAILURE; } return rc; FAILURE: opus_encoder_destroy(rc); return nullptr; } static bool reconfigure_audio_encoder(const Logger *log, OpusEncoder **e, uint32_t new_br, uint32_t new_sr, uint8_t new_ch, uint32_t *old_br, uint32_t *old_sr, uint8_t *old_ch) { /* Values are checked in toxav.c */ if (*old_sr != new_sr || *old_ch != new_ch) { OpusEncoder *new_encoder = create_audio_encoder(log, new_br, new_sr, new_ch); if (new_encoder == nullptr) { return false; } opus_encoder_destroy(*e); *e = new_encoder; } else if (*old_br == new_br) { return true; /* Nothing changed */ } const int status = opus_encoder_ctl(*e, OPUS_SET_BITRATE(new_br)); if (status != OPUS_OK) { LOGGER_ERROR(log, "Error while setting encoder ctl: %s", opus_strerror(status)); return false; } *old_br = new_br; *old_sr = new_sr; *old_ch = new_ch; LOGGER_DEBUG(log, "Reconfigured audio encoder br: %d sr: %d cc:%d", new_br, new_sr, new_ch); return true; } static bool reconfigure_audio_decoder(ACSession *ac, uint32_t sampling_rate, uint8_t channels) { if (sampling_rate != ac->ld_sample_rate || channels != ac->ld_channel_count) { if (current_time_monotonic(ac->mono_time) - ac->ldrts < 500) { return false; } int status; OpusDecoder *new_dec = opus_decoder_create(sampling_rate, channels, &status); if (status != OPUS_OK) { LOGGER_ERROR(ac->log, "Error while starting audio decoder(%d %d): %s", sampling_rate, channels, opus_strerror(status)); return false; } ac->ld_sample_rate = sampling_rate; ac->ld_channel_count = channels; ac->ldrts = current_time_monotonic(ac->mono_time); opus_decoder_destroy(ac->decoder); ac->decoder = new_dec; LOGGER_DEBUG(ac->log, "Reconfigured audio decoder sr: %d cc: %d", sampling_rate, channels); } return true; }