Files
tomato/toxav/rtp.c
Green Sky e95f2cbb1c Squashed 'external/toxcore/c-toxcore/' changes from e58eb27a8..1828c5356
1828c5356 fix(toxav): remove extra copy of video frame on encode
b66b8ded6 refactor: improve group stability, moderation determinism, and DHT dual-stack handling
4fbd7c10a fix(toxav): fix heap buffer overflow in RTP video packet handling
809fe8c78 refactor(tox): make the `#define` consts int literals.
50d242a37 refactor(toxav): improve MSI safety and testability
da1c13a2f fix(toxav): harden video processing and fix large frame handling
472825288 fix(toxav): fix multiple logic bugs in audio module
dc963d9a9 fix(toxav): fix multiple bugs in bandwidth controller and add tests
3bf5778ef refactor(toxav): split out RTP module and add exhaustive unit tests
b79b7d436 fix(autotools): add tox_log_level.h to public headers list
ea2e34ff2 chore: Disable cirrus. We're out of quota again.
b449ea2ed chore(ci): update azure runner image to windows-2022 windows-2019 is EOL
e115b136d refactor: Make add_to_list non-recursive.
REVERT: e58eb27a8 fix(toxav): remove extra copy of video frame on encode Tested and works, but there might be alignment issues and other stuff.

git-subtree-dir: external/toxcore/c-toxcore
git-subtree-split: 1828c5356b2daf1d5f680854e776d74b181d268c
2026-01-01 19:15:15 +01:00

1045 lines
36 KiB
C

/* SPDX-License-Identifier: GPL-3.0-or-later
* Copyright © 2016-2025 The TokTok team.
* Copyright © 2013-2015 Tox project.
*/
#include "rtp.h"
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <sodium.h>
#include "../toxcore/ccompat.h"
#include "../toxcore/logger.h"
#include "../toxcore/mono_time.h"
#include "../toxcore/net_crypto.h"
#include "../toxcore/network.h"
#include "../toxcore/util.h"
/**
* Maximum size of a single RTP frame in bytes.
* This limit prevents memory exhaustion attacks where a malicious peer sends
* a header indicating a very large frame size, causing the receiver to allocate
* excessive memory.
*/
#define MAX_RTP_FRAME_SIZE (32 * 1024 * 1024)
struct RTPHeader {
/* Standard RTP header */
unsigned ve: 2; /* Version has only 2 bits! */
unsigned pe: 1; /* Padding */
unsigned xe: 1; /* Extra header */
unsigned cc: 4; /* Contributing sources count */
unsigned ma: 1; /* Marker */
unsigned pt: 7; /* Payload type */
uint16_t sequnum;
uint32_t timestamp;
uint32_t ssrc;
/* Non-standard Tox-specific fields */
/**
* Bit mask of `RTPFlags` setting features of the current frame.
*/
uint64_t flags;
/**
* The full 32 bit data offset of the current data chunk. The
* @ref offset_lower data member contains the lower 16 bits of this value.
* For frames smaller than 64KiB, @ref offset_full and @ref offset_lower are
* equal.
*/
uint32_t offset_full;
/**
* The full 32 bit payload length without header and packet id.
*/
uint32_t data_length_full;
/**
* Only the receiver uses this field (why do we have this?).
*/
uint32_t received_length_full;
/**
* Data offset of the current part (lower bits).
*/
uint16_t offset_lower;
/**
* Total message length (lower bits).
*/
uint16_t data_length_lower;
};
struct RTPMessage {
/**
* This is used in the old code that doesn't deal with large frames, i.e.
* the audio code or receiving code for old 16 bit messages. We use it to
* record the number of bytes received so far in a multi-part message. The
* multi-part message in the old code is stored in `RTPSession::mp`.
*/
uint32_t len;
struct RTPHeader header;
uint8_t data[];
};
/**
* One slot in the work buffer list. Represents one frame that is currently
* being assembled.
*/
struct RTPWorkBuffer {
/**
* Whether this slot contains a key frame. This is true iff
* `buf->header.flags & RTP_KEY_FRAME`.
*/
bool is_keyframe;
/**
* The number of bytes received so far, regardless of which pieces. I.e. we
* could have received the first 1000 bytes and the last 1000 bytes with
* 4000 bytes in the middle still to come, and this number would be 2000.
*/
uint32_t received_len;
/**
* The message currently being assembled.
*/
struct RTPMessage *buf;
};
struct RTPWorkBufferList {
int8_t next_free_entry;
struct RTPWorkBuffer work_buffer[USED_RTP_WORKBUFFER_COUNT];
};
/**
* RTP control session.
*/
struct RTPSession {
uint8_t payload_type;
uint16_t sequnum; /* Sending sequence number */
uint16_t rsequnum; /* Receiving sequence number */
uint32_t rtimestamp;
uint32_t ssrc; // this seems to be unused!?
struct RTPMessage *mp; /* Expected parted message */
struct RTPWorkBufferList *work_buffer_list;
uint8_t first_packets_counter; /* dismiss first few lost video packets */
const Logger *log;
Mono_Time *mono_time;
bool rtp_receive_active; /* if this is set to false then incoming rtp packets will not be processed by rtp_receive_packet() */
rtp_send_packet_cb *send_packet;
void *send_packet_user_data;
rtp_add_recv_cb *add_recv;
rtp_add_lost_cb *add_lost;
void *bwc_user_data;
void *cs;
rtp_m_cb *mcb;
};
const uint8_t *rtp_message_data(const RTPMessage *msg)
{
return msg->data;
}
uint32_t rtp_message_len(const RTPMessage *msg)
{
return msg->len;
}
uint8_t rtp_message_pt(const RTPMessage *msg)
{
return msg->header.pt;
}
uint16_t rtp_message_sequnum(const RTPMessage *msg)
{
return msg->header.sequnum;
}
uint64_t rtp_message_flags(const RTPMessage *msg)
{
return msg->header.flags;
}
uint32_t rtp_message_data_length_full(const RTPMessage *msg)
{
return msg->header.data_length_full;
}
bool rtp_session_is_receiving_active(const RTPSession *session)
{
return session->rtp_receive_active;
}
uint32_t rtp_session_get_ssrc(const RTPSession *session)
{
return session->ssrc;
}
void rtp_session_set_ssrc(RTPSession *session, uint32_t ssrc)
{
session->ssrc = ssrc;
}
/**
* The number of milliseconds we want to keep a keyframe in the buffer for,
* even though there are no free slots for incoming frames.
*/
#define VIDEO_KEEP_KEYFRAME_IN_BUFFER_FOR_MS 15
// allocate_len is NOT including header!
static struct RTPMessage *new_message(const Logger *log, const struct RTPHeader *header, size_t allocate_len,
const uint8_t *data, uint16_t data_length)
{
if (allocate_len < data_length) {
LOGGER_WARNING(log, "new_message: allocate_len (%zu) < data_length (%u)", allocate_len, data_length);
return nullptr;
}
struct RTPMessage *msg = (struct RTPMessage *)calloc(1, sizeof(struct RTPMessage) + allocate_len);
if (msg == nullptr) {
LOGGER_WARNING(log, "Could not allocate RTPMessage buffer");
return nullptr;
}
msg->len = data_length; // result without header
msg->header = *header;
memcpy(msg->data, data, msg->len);
return msg;
}
/**
* Instruct the caller to clear slot 0.
*/
#define GET_SLOT_RESULT_DROP_OLDEST_SLOT (-1)
/**
* Instruct the caller to drop the incoming packet.
*/
#define GET_SLOT_RESULT_DROP_INCOMING (-2)
/**
* Find the next free slot in work_buffer for the incoming data packet.
*
* - If the data packet belongs to a frame that's already in the work_buffer then
* use that slot.
* - If there is no free slot return GET_SLOT_RESULT_DROP_OLDEST_SLOT.
* - If the data packet is too old return GET_SLOT_RESULT_DROP_INCOMING.
*
* If there is a keyframe being assembled in slot 0, keep it a bit longer and
* do not kick it out right away if all slots are full instead kick out the new
* incoming interframe.
*/
static int8_t get_slot(const Logger *log, struct RTPWorkBufferList *wkbl, bool is_keyframe,
const struct RTPHeader *header, bool is_multipart)
{
if (is_multipart) {
// This RTP message is part of a multipart frame, so we try to find an
// existing slot with the previous parts of the frame in it.
for (uint8_t i = 0; i < wkbl->next_free_entry; ++i) {
const struct RTPWorkBuffer *slot = &wkbl->work_buffer[i];
if ((slot->buf->header.sequnum == header->sequnum) && (slot->buf->header.timestamp == header->timestamp)) {
// Sequence number and timestamp match, so this slot belongs to
// the same frame.
//
// In reality, these will almost certainly either both match or
// both not match. Only if somehow there were 65535 frames
// between, the timestamp will matter.
return i;
}
}
}
// The message may or may not be part of a multipart frame.
//
// If it is part of a multipart frame, then this is an entirely new frame
// for which we did not have a slot *or* the frame is so old that its slot
// has been evicted by now.
//
// |----------- time ----------->
// _________________
// slot 0 | |
// -----------------
// _________________
// slot 1 | |
// -----------------
// ____________
// slot 2 | | -> frame too old, drop
// ------------
//
//
//
// |----------- time ----------->
// _________________
// slot 0 | |
// -----------------
// _________________
// slot 1 | |
// -----------------
// ____________
// slot 2 | | -> ok, start filling in a new slot
// ------------
// If there is a free slot:
if (wkbl->next_free_entry < USED_RTP_WORKBUFFER_COUNT) {
// If there is at least one filled slot:
if (wkbl->next_free_entry > 0) {
// Get the most recently filled slot.
const struct RTPWorkBuffer *slot = &wkbl->work_buffer[wkbl->next_free_entry - 1];
// If the incoming packet is older than our newest slot, drop it.
// This is the first situation in the above diagram.
if (slot->buf->header.timestamp > header->timestamp) {
LOGGER_DEBUG(log, "workbuffer:2:timestamp too old");
return GET_SLOT_RESULT_DROP_INCOMING;
}
}
// Not all slots are filled, and the packet is newer than our most
// recent slot, so it's a new frame we want to start assembling. This is
// the second situation in the above diagram.
return wkbl->next_free_entry;
}
// If the incoming frame is a key frame, then stop assembling the oldest
// slot, regardless of whether there was a keyframe in that or not.
if (is_keyframe) {
return GET_SLOT_RESULT_DROP_OLDEST_SLOT;
}
// The incoming slot is not a key frame, so we look at slot 0 to see what to
// do next.
const struct RTPWorkBuffer *slot = &wkbl->work_buffer[0];
// The incoming frame is not a key frame, but the existing slot 0 is also
// not a keyframe, so we stop assembling the existing frame and make space
// for the new one.
if (!slot->is_keyframe) {
return GET_SLOT_RESULT_DROP_OLDEST_SLOT;
}
// If this key frame is fully received, we also stop assembling and clear
// slot 0. This also means sending the frame to the decoder.
if (slot->received_len == slot->buf->header.data_length_full) {
return GET_SLOT_RESULT_DROP_OLDEST_SLOT;
}
// This is a key frame, not fully received yet, but it's already much older
// than the incoming frame, so we stop assembling it and send whatever part
// we did receive to the decoder.
if (slot->buf->header.timestamp + VIDEO_KEEP_KEYFRAME_IN_BUFFER_FOR_MS <= header->timestamp) {
return GET_SLOT_RESULT_DROP_OLDEST_SLOT;
}
// This is a key frame, it's not too old yet, so we keep it in its slot for
// a little longer.
LOGGER_INFO(log, "keep KEYFRAME in workbuffer");
return GET_SLOT_RESULT_DROP_INCOMING;
}
/**
* Returns an assembled frame (as much data as we currently have for this frame,
* some pieces may be missing)
*
* If there are no frames ready, we return NULL. If this function returns
* non-NULL, it transfers ownership of the message to the caller, i.e. the
* caller is responsible for storing it elsewhere or calling `free()`.
*/
static struct RTPMessage *process_frame(const Logger *log, struct RTPWorkBufferList *wkbl, uint8_t slot_id)
{
assert(wkbl->next_free_entry >= 0);
if (wkbl->next_free_entry == 0) {
// There are no frames in any slot.
return nullptr;
}
// Slot 0 contains a key frame, slot_id points at an interframe that is
// relative to that key frame, so we don't use it yet.
if (wkbl->work_buffer[0].is_keyframe && slot_id != 0) {
LOGGER_DEBUG(log, "process_frame:KEYFRAME waiting in slot 0");
return nullptr;
}
// Either slot_id is 0 and slot 0 is a key frame, or there is no key frame
// in slot 0 (and slot_id is anything).
struct RTPWorkBuffer *const slot = &wkbl->work_buffer[slot_id];
// Move ownership of the frame out of the slot into m_new.
struct RTPMessage *msg = slot->buf;
msg->len = msg->header.data_length_full;
slot->buf = nullptr;
assert(wkbl->next_free_entry >= 1 && wkbl->next_free_entry <= USED_RTP_WORKBUFFER_COUNT);
if (slot_id != wkbl->next_free_entry - 1) {
// The slot is not the last slot, so we created a gap. We move all the
// entries after it one step up.
for (uint8_t i = slot_id; i < wkbl->next_free_entry - 1; ++i) {
// Move entry (i+1) into entry (i).
wkbl->work_buffer[i] = wkbl->work_buffer[i + 1];
}
}
// We now have a free entry at the end of the array.
--wkbl->next_free_entry;
// Clear the newly freed entry.
const struct RTPWorkBuffer empty = {0};
wkbl->work_buffer[wkbl->next_free_entry] = empty;
// Move ownership of the frame to the caller.
return msg;
}
/**
* @param log A pointer to the Logger object.
* @param wkbl The list of in-progress frames, i.e. all the slots.
* @param slot_id The slot we want to fill the data into.
* @param is_keyframe Whether the data is part of a key frame.
* @param header The RTP header from the incoming packet.
* @param incoming_data The pure payload without header.
* @param incoming_data_length The length in bytes of the incoming data payload.
*/
static bool fill_data_into_slot(const Logger *log, struct RTPWorkBufferList *wkbl, const uint8_t slot_id,
bool is_keyframe, const struct RTPHeader *header,
const uint8_t *incoming_data, uint16_t incoming_data_length)
{
// We're either filling the data into an existing slot, or in a new one that
// is the next free entry.
assert(slot_id <= wkbl->next_free_entry);
struct RTPWorkBuffer *const slot = &wkbl->work_buffer[slot_id];
assert(header != nullptr);
assert(is_keyframe == (bool)((header->flags & RTP_KEY_FRAME) != 0));
if (slot->received_len == 0) {
assert(slot->buf == nullptr);
if (header->data_length_full > MAX_RTP_FRAME_SIZE) {
LOGGER_WARNING(log, "RTP frame too large: %u > %u", (unsigned)header->data_length_full, (unsigned)MAX_RTP_FRAME_SIZE);
return false;
}
// No data for this slot has been received, yet, so we create a new
// message for it with enough memory for the entire frame.
struct RTPMessage *msg = (struct RTPMessage *)calloc(1, sizeof(struct RTPMessage) + header->data_length_full);
if (msg == nullptr) {
LOGGER_ERROR(log, "Out of memory while trying to allocate for frame of size %u",
(unsigned)header->data_length_full);
// Out of memory: throw away the incoming data.
return false;
}
// Unused in the new video receiving code, as it's 16 bit and can't hold
// the full length of large frames. Instead, we use slot->received_len.
msg->len = 0;
msg->header = *header;
slot->buf = msg;
slot->is_keyframe = is_keyframe;
slot->received_len = 0;
assert(wkbl->next_free_entry < USED_RTP_WORKBUFFER_COUNT);
++wkbl->next_free_entry;
} else {
if (slot->buf->header.data_length_full != header->data_length_full) {
LOGGER_WARNING(log, "Received packet with different length than previous packets in same frame: %u != %u",
header->data_length_full, slot->buf->header.data_length_full);
return false;
}
}
// We already checked this when we received the packet, but we rely on it
// here, so assert again.
assert(header->offset_full < header->data_length_full);
if (header->data_length_full - header->offset_full < incoming_data_length) {
LOGGER_ERROR(log, "Packet too long for buffer: offset %u + len %u > total %u",
(unsigned)header->offset_full, (unsigned)incoming_data_length, (unsigned)header->data_length_full);
return false;
}
// Copy the incoming chunk of data into the correct position in the full
// frame data array.
memcpy(
slot->buf->data + header->offset_full,
incoming_data,
incoming_data_length
);
// Update the total received length of this slot.
slot->received_len += incoming_data_length;
// Update received length also in the header of the message, for later use.
slot->buf->header.received_length_full = slot->received_len;
return slot->received_len == header->data_length_full;
}
static void update_bwc_values(RTPSession *session, const struct RTPMessage *msg)
{
if (session->first_packets_counter < DISMISS_FIRST_LOST_VIDEO_PACKET_COUNT) {
++session->first_packets_counter;
} else {
const uint32_t data_length_full = msg->header.data_length_full; // without header
const uint32_t received_length_full = msg->header.received_length_full; // without header
if (session->add_recv) {
session->add_recv(session->bwc_user_data, data_length_full);
}
if (received_length_full < data_length_full) {
LOGGER_DEBUG(session->log, "BWC: full length=%u received length=%u", data_length_full, received_length_full);
if (session->add_lost) {
session->add_lost(session->bwc_user_data, data_length_full - received_length_full);
}
}
}
}
/**
* Handle a single RTP video packet.
*
* The packet may or may not be part of a multipart frame. This function will
* find out and handle it appropriately.
*
* @param session The current RTP session
* @param header The RTP header deserialised from the packet.
* @param incoming_data The packet data *not* header, i.e. this is the actual
* payload.
* @param incoming_data_length The packet length *not* including header, i.e.
* this is the actual payload length.
* @param log A logger.
*
* @retval -1 on error.
* @retval 0 on success.
*/
static int handle_video_packet(const Logger *log, RTPSession *session, const struct RTPHeader *header,
const uint8_t *incoming_data, uint16_t incoming_data_length)
{
// Full frame length in bytes. The frame may be split into multiple packets,
// but this value is the complete assembled frame size.
const uint32_t full_frame_length = header->data_length_full;
// The sender tells us whether this is a key frame.
const bool is_keyframe = (header->flags & RTP_KEY_FRAME) != 0;
LOGGER_DEBUG(log, "wkbl->next_free_entry:003=%d", session->work_buffer_list->next_free_entry);
const bool is_multipart = full_frame_length != incoming_data_length;
/* The message was sent in single part */
int8_t slot_id = get_slot(log, session->work_buffer_list, is_keyframe, header, is_multipart);
LOGGER_DEBUG(log, "slot num=%d", slot_id);
// get_slot told us to drop the packet, so we ignore it.
if (slot_id == GET_SLOT_RESULT_DROP_INCOMING) {
return -1;
}
// get_slot said there is no free slot.
if (slot_id == GET_SLOT_RESULT_DROP_OLDEST_SLOT) {
LOGGER_DEBUG(log, "there was no free slot, so we process the oldest frame");
// We now own the frame.
struct RTPMessage *m_new = process_frame(log, session->work_buffer_list, 0);
// The process_frame function returns NULL if there is no slot 0, i.e.
// the work buffer list is completely empty. It can't be empty, because
// get_slot just told us it's full, so process_frame must return non-null.
assert(m_new != nullptr);
if (m_new->len >= 2) {
LOGGER_DEBUG(log, "-- handle_video_packet -- CALLBACK-001a b0=%d b1=%d", (int)m_new->data[0],
(int)m_new->data[1]);
} else if (m_new->len == 1) {
LOGGER_DEBUG(log, "-- handle_video_packet -- CALLBACK-001a b0=%d", (int)m_new->data[0]);
} else {
LOGGER_DEBUG(log, "-- handle_video_packet -- CALLBACK-001a (empty)");
}
update_bwc_values(session, m_new);
// Pass ownership of m_new to the callback.
session->mcb(session->mono_time, session->cs, m_new);
// Now we no longer own m_new.
m_new = nullptr;
// Now we must have a free slot, so we either get that slot, i.e. >= 0,
// or get told to drop the incoming packet if it's too old.
slot_id = get_slot(log, session->work_buffer_list, is_keyframe, header, /* is_multipart */false);
if (slot_id == GET_SLOT_RESULT_DROP_INCOMING) {
// The incoming frame is too old, so we drop it.
return -1;
}
}
// We must have a valid slot here.
assert(slot_id >= 0);
LOGGER_DEBUG(log, "fill_data_into_slot.1");
// fill in this part into the slot buffer at the correct offset
if (!fill_data_into_slot(
log,
session->work_buffer_list,
slot_id,
is_keyframe,
header,
incoming_data,
incoming_data_length)) {
// Memory allocation failed. Return error.
return -1;
}
struct RTPMessage *m_new = process_frame(log, session->work_buffer_list, slot_id);
if (m_new != nullptr) {
if (m_new->len >= 2) {
LOGGER_DEBUG(log, "-- handle_video_packet -- CALLBACK-003a b0=%d b1=%d", (int)m_new->data[0],
(int)m_new->data[1]);
} else if (m_new->len == 1) {
LOGGER_DEBUG(log, "-- handle_video_packet -- CALLBACK-003a b0=%d", (int)m_new->data[0]);
} else {
LOGGER_DEBUG(log, "-- handle_video_packet -- CALLBACK-003a (empty)");
}
update_bwc_values(session, m_new);
session->mcb(session->mono_time, session->cs, m_new);
m_new = nullptr;
}
return 0;
}
/**
* receive custom lossypackets and process them. they can be incoming audio or video packets
*/
void rtp_receive_packet(RTPSession *session, const uint8_t *data, size_t length)
{
const Logger *log = session->log;
if (length < RTP_HEADER_SIZE + 1) {
LOGGER_WARNING(log, "Invalid length of received buffer!");
return;
}
// Get the packet type.
const uint8_t packet_type = data[0];
const uint8_t *payload = &data[1];
// TODO(Zoff): is this ok?
const uint16_t payload_size = (uint16_t)length - 1;
// Unpack the header.
struct RTPHeader header;
rtp_header_unpack(payload, &header);
if (header.pt != packet_type % 128) {
LOGGER_WARNING(log, "RTPHeader packet type and Tox protocol packet type did not agree: %d != %d",
header.pt, packet_type % 128);
return;
}
if (header.pt != session->payload_type % 128) {
LOGGER_WARNING(log, "RTPHeader packet type does not match this session's payload type: %d != %d",
header.pt, session->payload_type % 128);
return;
}
if ((header.flags & RTP_LARGE_FRAME) != 0 && header.offset_full >= header.data_length_full) {
LOGGER_ERROR(log, "Invalid video packet: frame offset (%u) >= full frame length (%u)",
(unsigned)header.offset_full, (unsigned)header.data_length_full);
return;
}
if (header.offset_lower >= header.data_length_lower) {
LOGGER_ERROR(log, "Invalid old protocol video packet: frame offset (%u) >= full frame length (%u)",
(unsigned)header.offset_lower, (unsigned)header.data_length_lower);
return;
}
LOGGER_DEBUG(log, "header.pt %d, video %d", (uint8_t)header.pt, RTP_TYPE_VIDEO % 128);
// The sender uses the new large-frame capable protocol and is sending a
// video packet.
if ((header.flags & RTP_LARGE_FRAME) != 0 && header.pt == (RTP_TYPE_VIDEO % 128)) {
handle_video_packet(log, session, &header, &payload[RTP_HEADER_SIZE], payload_size - RTP_HEADER_SIZE);
return;
}
// everything below here is for the old 16 bit protocol ------------------
if (header.data_length_lower == payload_size - RTP_HEADER_SIZE) {
/* The message is sent in single part */
/* Message is not late; pick up the latest parameters */
session->rsequnum = header.sequnum;
session->rtimestamp = header.timestamp;
if (session->add_recv) {
session->add_recv(session->bwc_user_data, payload_size);
}
/* Invoke processing of active multiparted message */
if (session->mp != nullptr) {
session->mcb(session->mono_time, session->cs, session->mp);
session->mp = nullptr;
}
/* The message came in the allowed time;
*/
session->mp = new_message(log, &header, payload_size - RTP_HEADER_SIZE, &payload[RTP_HEADER_SIZE], payload_size - RTP_HEADER_SIZE);
session->mcb(session->mono_time, session->cs, session->mp);
session->mp = nullptr;
return;
}
/* The message is sent in multiple parts */
if (session->mp != nullptr) {
/* There are 2 possible situations in this case:
* 1) being that we got the part of already processing message.
* 2) being that we got the part of a new/old message.
*
* We handle them differently as we only allow a single multiparted
* processing message
*/
if (session->mp->header.sequnum == header.sequnum &&
session->mp->header.timestamp == header.timestamp) {
/* First case */
/* Make sure we have enough allocated memory */
if (session->mp->header.data_length_lower - session->mp->len < payload_size - RTP_HEADER_SIZE ||
session->mp->header.data_length_lower <= header.offset_lower ||
session->mp->header.data_length_lower - header.offset_lower < payload_size - RTP_HEADER_SIZE) {
LOGGER_WARNING(log, "Corruption on the stream: multipart audio packet does not fit");
return;
}
memcpy(session->mp->data + header.offset_lower, &payload[RTP_HEADER_SIZE],
payload_size - RTP_HEADER_SIZE);
session->mp->len += payload_size - RTP_HEADER_SIZE;
if (session->add_recv) {
session->add_recv(session->bwc_user_data, payload_size);
}
if (session->mp->len == session->mp->header.data_length_lower) {
/* Received a full message; now push it for the further
* processing.
*/
session->mcb(session->mono_time, session->cs, session->mp);
session->mp = nullptr;
}
} else {
/* Second case */
if (session->mp->header.timestamp > header.timestamp) {
/* The received message part is from the old message;
* discard it.
*/
return;
}
/* Push the previous message for processing */
session->mcb(session->mono_time, session->cs, session->mp);
session->mp = nullptr;
goto NEW_MULTIPARTED;
}
} else {
/* In this case treat the message as if it was received in order
*/
/* This is also a point for new multiparted messages */
NEW_MULTIPARTED:
if (header.data_length_lower - header.offset_lower < payload_size - RTP_HEADER_SIZE) {
LOGGER_WARNING(log, "Packet too long for buffer: offset %u + len %u > total %u",
(unsigned)header.offset_lower, (unsigned)(payload_size - RTP_HEADER_SIZE),
(unsigned)header.data_length_lower);
return;
}
/* Message is not late; pick up the latest parameters */
session->rsequnum = header.sequnum;
session->rtimestamp = header.timestamp;
if (session->add_recv) {
session->add_recv(session->bwc_user_data, payload_size);
}
/* Store message.
*/
session->mp = new_message(log, &header, header.data_length_lower, &payload[RTP_HEADER_SIZE], payload_size - RTP_HEADER_SIZE);
if (session->mp != nullptr) {
memmove(session->mp->data + header.offset_lower, session->mp->data, session->mp->len);
} else {
LOGGER_WARNING(log, "new_message() returned a null pointer");
return;
}
}
return;
}
size_t rtp_header_pack(uint8_t *const rdata, const struct RTPHeader *header)
{
uint8_t *p = rdata;
*p = (header->ve & 3) << 6
| (header->pe & 1) << 5
| (header->xe & 1) << 4
| (header->cc & 0xf);
++p;
*p = (header->ma & 1) << 7
| (header->pt & 0x7f);
++p;
p += net_pack_u16(p, header->sequnum);
p += net_pack_u32(p, header->timestamp);
p += net_pack_u32(p, header->ssrc);
p += net_pack_u64(p, header->flags);
p += net_pack_u32(p, header->offset_full);
p += net_pack_u32(p, header->data_length_full);
p += net_pack_u32(p, header->received_length_full);
for (size_t i = 0; i < RTP_PADDING_FIELDS; ++i) {
p += net_pack_u32(p, 0);
}
p += net_pack_u16(p, header->offset_lower);
p += net_pack_u16(p, header->data_length_lower);
assert(p == rdata + RTP_HEADER_SIZE);
return p - rdata;
}
size_t rtp_header_unpack(const uint8_t *data, struct RTPHeader *header)
{
const uint8_t *p = data;
header->ve = (*p >> 6) & 3;
header->pe = (*p >> 5) & 1;
header->xe = (*p >> 4) & 1;
header->cc = *p & 0xf;
++p;
header->ma = (*p >> 7) & 1;
header->pt = *p & 0x7f;
++p;
p += net_unpack_u16(p, &header->sequnum);
p += net_unpack_u32(p, &header->timestamp);
p += net_unpack_u32(p, &header->ssrc);
p += net_unpack_u64(p, &header->flags);
p += net_unpack_u32(p, &header->offset_full);
p += net_unpack_u32(p, &header->data_length_full);
p += net_unpack_u32(p, &header->received_length_full);
p += sizeof(uint32_t) * RTP_PADDING_FIELDS;
p += net_unpack_u16(p, &header->offset_lower);
p += net_unpack_u16(p, &header->data_length_lower);
assert(p == data + RTP_HEADER_SIZE);
return p - data;
}
static uint32_t rtp_random_u32(void)
{
// HINT: uses libsodium function
return randombytes_random();
}
RTPSession *rtp_new(const Logger *log, int payload_type, Mono_Time *mono_time,
rtp_send_packet_cb *send_packet, void *send_packet_user_data,
rtp_add_recv_cb *add_recv, rtp_add_lost_cb *add_lost, void *bwc_user_data,
void *cs, rtp_m_cb *mcb)
{
assert(mcb != nullptr);
assert(cs != nullptr);
RTPSession *session = (RTPSession *)calloc(1, sizeof(RTPSession));
if (session == nullptr) {
LOGGER_WARNING(log, "Alloc failed! Program might misbehave!");
return nullptr;
}
session->work_buffer_list = (struct RTPWorkBufferList *)calloc(1, sizeof(struct RTPWorkBufferList));
if (session->work_buffer_list == nullptr) {
LOGGER_ERROR(log, "out of memory while allocating work buffer list");
free(session);
return nullptr;
}
// First entry is free.
session->work_buffer_list->next_free_entry = 0;
session->ssrc = payload_type == RTP_TYPE_VIDEO ? 0 : rtp_random_u32(); // Zoff: what is this??
session->payload_type = payload_type;
session->log = log;
session->mono_time = mono_time;
session->rtp_receive_active = true;
session->send_packet = send_packet;
session->send_packet_user_data = send_packet_user_data;
session->add_recv = add_recv;
session->add_lost = add_lost;
session->bwc_user_data = bwc_user_data;
// set NULL just in case
session->mp = nullptr;
session->first_packets_counter = 1;
/* Also set payload type as prefix */
session->cs = cs;
session->mcb = mcb;
return session;
}
void rtp_kill(const Logger *log, RTPSession *session)
{
if (session == nullptr) {
LOGGER_WARNING(log, "No session");
return;
}
LOGGER_DEBUG(log, "Terminated RTP session: %p", (void *)session);
LOGGER_DEBUG(log, "Terminated RTP session V3 work_buffer_list->next_free_entry: %d",
(int)session->work_buffer_list->next_free_entry);
if (session->work_buffer_list) {
for (int8_t i = 0; i < session->work_buffer_list->next_free_entry; ++i) {
free(session->work_buffer_list->work_buffer[i].buf);
}
free(session->work_buffer_list);
}
free(session->mp);
free(session);
}
void rtp_allow_receiving_mark(RTPSession *session)
{
if (session != nullptr) {
session->rtp_receive_active = true;
}
}
void rtp_stop_receiving_mark(RTPSession *session)
{
if (session != nullptr) {
session->rtp_receive_active = false;
}
}
static void rtp_send_piece(RTPSession *session, const struct RTPHeader *header,
const uint8_t *data, uint8_t *rdata, uint16_t length)
{
rtp_header_pack(rdata + 1, header);
memcpy(rdata + 1 + RTP_HEADER_SIZE, data, length);
const uint16_t rdata_size = length + RTP_HEADER_SIZE + 1;
if (session->send_packet) {
session->send_packet(session->send_packet_user_data, rdata, rdata_size);
}
}
static struct RTPHeader rtp_default_header(const RTPSession *session, uint32_t length, bool is_keyframe)
{
uint16_t length_safe = (uint16_t)length;
if (length > UINT16_MAX) {
length_safe = UINT16_MAX;
}
struct RTPHeader header = {0};
if (is_keyframe) {
header.flags |= RTP_KEY_FRAME;
}
if (session->payload_type == RTP_TYPE_VIDEO) {
header.flags |= RTP_LARGE_FRAME;
}
header.ve = 2; // this is unused in toxav
header.pe = 0;
header.xe = 0;
header.cc = 0;
header.ma = 0;
header.pt = session->payload_type % 128;
header.sequnum = session->sequnum;
if (session->mono_time != nullptr) {
header.timestamp = current_time_monotonic(session->mono_time);
} else {
header.timestamp = 0;
}
header.ssrc = session->ssrc;
header.offset_lower = 0;
header.data_length_lower = length_safe;
header.data_length_full = length; // without header
header.offset_full = 0;
return header;
}
/**
* @brief Send a frame of audio or video data, chunked in @ref RTPMessage instances.
*
* @param session The A/V session to send the data for.
* @param data A byte array of length @p length.
* @param length The number of bytes to send from @p data.
* @param is_keyframe Whether this video frame is a key frame. If it is an
* audio frame, this parameter is ignored.
*/
int rtp_send_data(const Logger *log, RTPSession *session, const uint8_t *data, uint32_t length,
bool is_keyframe)
{
if (session == nullptr) {
return -1;
}
const uint16_t rdata_size = min_u32(length + RTP_HEADER_SIZE + 1, MAX_CRYPTO_DATA_SIZE);
VLA(uint8_t, rdata, rdata_size);
memset(rdata, 0, rdata_size);
rdata[0] = session->payload_type; // packet id == payload_type
struct RTPHeader header = rtp_default_header(session, length, is_keyframe);
if (MAX_CRYPTO_DATA_SIZE > (length + RTP_HEADER_SIZE + 1)) {
/*
* The length is lesser than the maximum allowed length (including header)
* Send the packet in single piece.
*/
assert(length < UINT16_MAX);
rtp_send_piece(session, &header, data, rdata, (uint16_t)length);
} else {
/*
* The length is greater than the maximum allowed length (including header)
* Send the packet in multiple pieces.
*/
uint32_t sent = 0;
uint16_t piece = MAX_CRYPTO_DATA_SIZE - (RTP_HEADER_SIZE + 1);
while ((length - sent) + RTP_HEADER_SIZE + 1 > MAX_CRYPTO_DATA_SIZE) {
rtp_send_piece(session, &header, data + sent, rdata, piece);
sent += piece;
header.offset_lower = (uint16_t)sent;
header.offset_full = sent; // raw data offset, without any header
}
/* Send remaining */
piece = (uint16_t)(length - sent);
if (piece != 0) {
rtp_send_piece(session, &header, data + sent, rdata, piece);
}
}
++session->sequnum;
return 0;
}