/* SPDX-License-Identifier: GPL-3.0-or-later * Copyright © 2016-2025 The TokTok team. * Copyright © 2013-2015 Tox project. */ #include "rtp.h" #include #include #include #include #include "../toxcore/ccompat.h" #include "../toxcore/logger.h" #include "../toxcore/mono_time.h" #include "../toxcore/net_crypto.h" #include "../toxcore/network.h" #include "../toxcore/util.h" /** * Maximum size of a single RTP frame in bytes. * This limit prevents memory exhaustion attacks where a malicious peer sends * a header indicating a very large frame size, causing the receiver to allocate * excessive memory. */ #define MAX_RTP_FRAME_SIZE (32 * 1024 * 1024) struct RTPHeader { /* Standard RTP header */ unsigned ve: 2; /* Version has only 2 bits! */ unsigned pe: 1; /* Padding */ unsigned xe: 1; /* Extra header */ unsigned cc: 4; /* Contributing sources count */ unsigned ma: 1; /* Marker */ unsigned pt: 7; /* Payload type */ uint16_t sequnum; uint32_t timestamp; uint32_t ssrc; /* Non-standard Tox-specific fields */ /** * Bit mask of `RTPFlags` setting features of the current frame. */ uint64_t flags; /** * The full 32 bit data offset of the current data chunk. The * @ref offset_lower data member contains the lower 16 bits of this value. * For frames smaller than 64KiB, @ref offset_full and @ref offset_lower are * equal. */ uint32_t offset_full; /** * The full 32 bit payload length without header and packet id. */ uint32_t data_length_full; /** * Only the receiver uses this field (why do we have this?). */ uint32_t received_length_full; /** * Data offset of the current part (lower bits). */ uint16_t offset_lower; /** * Total message length (lower bits). */ uint16_t data_length_lower; }; struct RTPMessage { /** * This is used in the old code that doesn't deal with large frames, i.e. * the audio code or receiving code for old 16 bit messages. We use it to * record the number of bytes received so far in a multi-part message. The * multi-part message in the old code is stored in `RTPSession::mp`. */ uint32_t len; struct RTPHeader header; uint8_t data[]; }; /** * One slot in the work buffer list. Represents one frame that is currently * being assembled. */ struct RTPWorkBuffer { /** * Whether this slot contains a key frame. This is true iff * `buf->header.flags & RTP_KEY_FRAME`. */ bool is_keyframe; /** * The number of bytes received so far, regardless of which pieces. I.e. we * could have received the first 1000 bytes and the last 1000 bytes with * 4000 bytes in the middle still to come, and this number would be 2000. */ uint32_t received_len; /** * The message currently being assembled. */ struct RTPMessage *buf; }; struct RTPWorkBufferList { int8_t next_free_entry; struct RTPWorkBuffer work_buffer[USED_RTP_WORKBUFFER_COUNT]; }; /** * RTP control session. */ struct RTPSession { uint8_t payload_type; uint16_t sequnum; /* Sending sequence number */ uint16_t rsequnum; /* Receiving sequence number */ uint32_t rtimestamp; uint32_t ssrc; // this seems to be unused!? struct RTPMessage *mp; /* Expected parted message */ struct RTPWorkBufferList *work_buffer_list; uint8_t first_packets_counter; /* dismiss first few lost video packets */ const Logger *log; Mono_Time *mono_time; bool rtp_receive_active; /* if this is set to false then incoming rtp packets will not be processed by rtp_receive_packet() */ rtp_send_packet_cb *send_packet; void *send_packet_user_data; rtp_add_recv_cb *add_recv; rtp_add_lost_cb *add_lost; void *bwc_user_data; void *cs; rtp_m_cb *mcb; }; const uint8_t *rtp_message_data(const RTPMessage *msg) { return msg->data; } uint32_t rtp_message_len(const RTPMessage *msg) { return msg->len; } uint8_t rtp_message_pt(const RTPMessage *msg) { return msg->header.pt; } uint16_t rtp_message_sequnum(const RTPMessage *msg) { return msg->header.sequnum; } uint64_t rtp_message_flags(const RTPMessage *msg) { return msg->header.flags; } uint32_t rtp_message_data_length_full(const RTPMessage *msg) { return msg->header.data_length_full; } bool rtp_session_is_receiving_active(const RTPSession *session) { return session->rtp_receive_active; } uint32_t rtp_session_get_ssrc(const RTPSession *session) { return session->ssrc; } void rtp_session_set_ssrc(RTPSession *session, uint32_t ssrc) { session->ssrc = ssrc; } /** * The number of milliseconds we want to keep a keyframe in the buffer for, * even though there are no free slots for incoming frames. */ #define VIDEO_KEEP_KEYFRAME_IN_BUFFER_FOR_MS 15 // allocate_len is NOT including header! static struct RTPMessage *new_message(const Logger *log, const struct RTPHeader *header, size_t allocate_len, const uint8_t *data, uint16_t data_length) { if (allocate_len < data_length) { LOGGER_WARNING(log, "new_message: allocate_len (%zu) < data_length (%u)", allocate_len, data_length); return nullptr; } struct RTPMessage *msg = (struct RTPMessage *)calloc(1, sizeof(struct RTPMessage) + allocate_len); if (msg == nullptr) { LOGGER_WARNING(log, "Could not allocate RTPMessage buffer"); return nullptr; } msg->len = data_length; // result without header msg->header = *header; memcpy(msg->data, data, msg->len); return msg; } /** * Instruct the caller to clear slot 0. */ #define GET_SLOT_RESULT_DROP_OLDEST_SLOT (-1) /** * Instruct the caller to drop the incoming packet. */ #define GET_SLOT_RESULT_DROP_INCOMING (-2) /** * Find the next free slot in work_buffer for the incoming data packet. * * - If the data packet belongs to a frame that's already in the work_buffer then * use that slot. * - If there is no free slot return GET_SLOT_RESULT_DROP_OLDEST_SLOT. * - If the data packet is too old return GET_SLOT_RESULT_DROP_INCOMING. * * If there is a keyframe being assembled in slot 0, keep it a bit longer and * do not kick it out right away if all slots are full instead kick out the new * incoming interframe. */ static int8_t get_slot(const Logger *log, struct RTPWorkBufferList *wkbl, bool is_keyframe, const struct RTPHeader *header, bool is_multipart) { if (is_multipart) { // This RTP message is part of a multipart frame, so we try to find an // existing slot with the previous parts of the frame in it. for (uint8_t i = 0; i < wkbl->next_free_entry; ++i) { const struct RTPWorkBuffer *slot = &wkbl->work_buffer[i]; if ((slot->buf->header.sequnum == header->sequnum) && (slot->buf->header.timestamp == header->timestamp)) { // Sequence number and timestamp match, so this slot belongs to // the same frame. // // In reality, these will almost certainly either both match or // both not match. Only if somehow there were 65535 frames // between, the timestamp will matter. return i; } } } // The message may or may not be part of a multipart frame. // // If it is part of a multipart frame, then this is an entirely new frame // for which we did not have a slot *or* the frame is so old that its slot // has been evicted by now. // // |----------- time -----------> // _________________ // slot 0 | | // ----------------- // _________________ // slot 1 | | // ----------------- // ____________ // slot 2 | | -> frame too old, drop // ------------ // // // // |----------- time -----------> // _________________ // slot 0 | | // ----------------- // _________________ // slot 1 | | // ----------------- // ____________ // slot 2 | | -> ok, start filling in a new slot // ------------ // If there is a free slot: if (wkbl->next_free_entry < USED_RTP_WORKBUFFER_COUNT) { // If there is at least one filled slot: if (wkbl->next_free_entry > 0) { // Get the most recently filled slot. const struct RTPWorkBuffer *slot = &wkbl->work_buffer[wkbl->next_free_entry - 1]; // If the incoming packet is older than our newest slot, drop it. // This is the first situation in the above diagram. if (slot->buf->header.timestamp > header->timestamp) { LOGGER_DEBUG(log, "workbuffer:2:timestamp too old"); return GET_SLOT_RESULT_DROP_INCOMING; } } // Not all slots are filled, and the packet is newer than our most // recent slot, so it's a new frame we want to start assembling. This is // the second situation in the above diagram. return wkbl->next_free_entry; } // If the incoming frame is a key frame, then stop assembling the oldest // slot, regardless of whether there was a keyframe in that or not. if (is_keyframe) { return GET_SLOT_RESULT_DROP_OLDEST_SLOT; } // The incoming slot is not a key frame, so we look at slot 0 to see what to // do next. const struct RTPWorkBuffer *slot = &wkbl->work_buffer[0]; // The incoming frame is not a key frame, but the existing slot 0 is also // not a keyframe, so we stop assembling the existing frame and make space // for the new one. if (!slot->is_keyframe) { return GET_SLOT_RESULT_DROP_OLDEST_SLOT; } // If this key frame is fully received, we also stop assembling and clear // slot 0. This also means sending the frame to the decoder. if (slot->received_len == slot->buf->header.data_length_full) { return GET_SLOT_RESULT_DROP_OLDEST_SLOT; } // This is a key frame, not fully received yet, but it's already much older // than the incoming frame, so we stop assembling it and send whatever part // we did receive to the decoder. if (slot->buf->header.timestamp + VIDEO_KEEP_KEYFRAME_IN_BUFFER_FOR_MS <= header->timestamp) { return GET_SLOT_RESULT_DROP_OLDEST_SLOT; } // This is a key frame, it's not too old yet, so we keep it in its slot for // a little longer. LOGGER_INFO(log, "keep KEYFRAME in workbuffer"); return GET_SLOT_RESULT_DROP_INCOMING; } /** * Returns an assembled frame (as much data as we currently have for this frame, * some pieces may be missing) * * If there are no frames ready, we return NULL. If this function returns * non-NULL, it transfers ownership of the message to the caller, i.e. the * caller is responsible for storing it elsewhere or calling `free()`. */ static struct RTPMessage *process_frame(const Logger *log, struct RTPWorkBufferList *wkbl, uint8_t slot_id) { assert(wkbl->next_free_entry >= 0); if (wkbl->next_free_entry == 0) { // There are no frames in any slot. return nullptr; } // Slot 0 contains a key frame, slot_id points at an interframe that is // relative to that key frame, so we don't use it yet. if (wkbl->work_buffer[0].is_keyframe && slot_id != 0) { LOGGER_DEBUG(log, "process_frame:KEYFRAME waiting in slot 0"); return nullptr; } // Either slot_id is 0 and slot 0 is a key frame, or there is no key frame // in slot 0 (and slot_id is anything). struct RTPWorkBuffer *const slot = &wkbl->work_buffer[slot_id]; // Move ownership of the frame out of the slot into m_new. struct RTPMessage *msg = slot->buf; msg->len = msg->header.data_length_full; slot->buf = nullptr; assert(wkbl->next_free_entry >= 1 && wkbl->next_free_entry <= USED_RTP_WORKBUFFER_COUNT); if (slot_id != wkbl->next_free_entry - 1) { // The slot is not the last slot, so we created a gap. We move all the // entries after it one step up. for (uint8_t i = slot_id; i < wkbl->next_free_entry - 1; ++i) { // Move entry (i+1) into entry (i). wkbl->work_buffer[i] = wkbl->work_buffer[i + 1]; } } // We now have a free entry at the end of the array. --wkbl->next_free_entry; // Clear the newly freed entry. const struct RTPWorkBuffer empty = {0}; wkbl->work_buffer[wkbl->next_free_entry] = empty; // Move ownership of the frame to the caller. return msg; } /** * @param log A pointer to the Logger object. * @param wkbl The list of in-progress frames, i.e. all the slots. * @param slot_id The slot we want to fill the data into. * @param is_keyframe Whether the data is part of a key frame. * @param header The RTP header from the incoming packet. * @param incoming_data The pure payload without header. * @param incoming_data_length The length in bytes of the incoming data payload. */ static bool fill_data_into_slot(const Logger *log, struct RTPWorkBufferList *wkbl, const uint8_t slot_id, bool is_keyframe, const struct RTPHeader *header, const uint8_t *incoming_data, uint16_t incoming_data_length) { // We're either filling the data into an existing slot, or in a new one that // is the next free entry. assert(slot_id <= wkbl->next_free_entry); struct RTPWorkBuffer *const slot = &wkbl->work_buffer[slot_id]; assert(header != nullptr); assert(is_keyframe == (bool)((header->flags & RTP_KEY_FRAME) != 0)); if (slot->received_len == 0) { assert(slot->buf == nullptr); if (header->data_length_full > MAX_RTP_FRAME_SIZE) { LOGGER_WARNING(log, "RTP frame too large: %u > %u", (unsigned)header->data_length_full, (unsigned)MAX_RTP_FRAME_SIZE); return false; } // No data for this slot has been received, yet, so we create a new // message for it with enough memory for the entire frame. struct RTPMessage *msg = (struct RTPMessage *)calloc(1, sizeof(struct RTPMessage) + header->data_length_full); if (msg == nullptr) { LOGGER_ERROR(log, "Out of memory while trying to allocate for frame of size %u", (unsigned)header->data_length_full); // Out of memory: throw away the incoming data. return false; } // Unused in the new video receiving code, as it's 16 bit and can't hold // the full length of large frames. Instead, we use slot->received_len. msg->len = 0; msg->header = *header; slot->buf = msg; slot->is_keyframe = is_keyframe; slot->received_len = 0; assert(wkbl->next_free_entry < USED_RTP_WORKBUFFER_COUNT); ++wkbl->next_free_entry; } else { if (slot->buf->header.data_length_full != header->data_length_full) { LOGGER_WARNING(log, "Received packet with different length than previous packets in same frame: %u != %u", header->data_length_full, slot->buf->header.data_length_full); return false; } } // We already checked this when we received the packet, but we rely on it // here, so assert again. assert(header->offset_full < header->data_length_full); if (header->data_length_full - header->offset_full < incoming_data_length) { LOGGER_ERROR(log, "Packet too long for buffer: offset %u + len %u > total %u", (unsigned)header->offset_full, (unsigned)incoming_data_length, (unsigned)header->data_length_full); return false; } // Copy the incoming chunk of data into the correct position in the full // frame data array. memcpy( slot->buf->data + header->offset_full, incoming_data, incoming_data_length ); // Update the total received length of this slot. slot->received_len += incoming_data_length; // Update received length also in the header of the message, for later use. slot->buf->header.received_length_full = slot->received_len; return slot->received_len == header->data_length_full; } static void update_bwc_values(RTPSession *session, const struct RTPMessage *msg) { if (session->first_packets_counter < DISMISS_FIRST_LOST_VIDEO_PACKET_COUNT) { ++session->first_packets_counter; } else { const uint32_t data_length_full = msg->header.data_length_full; // without header const uint32_t received_length_full = msg->header.received_length_full; // without header if (session->add_recv) { session->add_recv(session->bwc_user_data, data_length_full); } if (received_length_full < data_length_full) { LOGGER_DEBUG(session->log, "BWC: full length=%u received length=%u", data_length_full, received_length_full); if (session->add_lost) { session->add_lost(session->bwc_user_data, data_length_full - received_length_full); } } } } /** * Handle a single RTP video packet. * * The packet may or may not be part of a multipart frame. This function will * find out and handle it appropriately. * * @param session The current RTP session * @param header The RTP header deserialised from the packet. * @param incoming_data The packet data *not* header, i.e. this is the actual * payload. * @param incoming_data_length The packet length *not* including header, i.e. * this is the actual payload length. * @param log A logger. * * @retval -1 on error. * @retval 0 on success. */ static int handle_video_packet(const Logger *log, RTPSession *session, const struct RTPHeader *header, const uint8_t *incoming_data, uint16_t incoming_data_length) { // Full frame length in bytes. The frame may be split into multiple packets, // but this value is the complete assembled frame size. const uint32_t full_frame_length = header->data_length_full; // The sender tells us whether this is a key frame. const bool is_keyframe = (header->flags & RTP_KEY_FRAME) != 0; LOGGER_DEBUG(log, "wkbl->next_free_entry:003=%d", session->work_buffer_list->next_free_entry); const bool is_multipart = full_frame_length != incoming_data_length; /* The message was sent in single part */ int8_t slot_id = get_slot(log, session->work_buffer_list, is_keyframe, header, is_multipart); LOGGER_DEBUG(log, "slot num=%d", slot_id); // get_slot told us to drop the packet, so we ignore it. if (slot_id == GET_SLOT_RESULT_DROP_INCOMING) { return -1; } // get_slot said there is no free slot. if (slot_id == GET_SLOT_RESULT_DROP_OLDEST_SLOT) { LOGGER_DEBUG(log, "there was no free slot, so we process the oldest frame"); // We now own the frame. struct RTPMessage *m_new = process_frame(log, session->work_buffer_list, 0); // The process_frame function returns NULL if there is no slot 0, i.e. // the work buffer list is completely empty. It can't be empty, because // get_slot just told us it's full, so process_frame must return non-null. assert(m_new != nullptr); if (m_new->len >= 2) { LOGGER_DEBUG(log, "-- handle_video_packet -- CALLBACK-001a b0=%d b1=%d", (int)m_new->data[0], (int)m_new->data[1]); } else if (m_new->len == 1) { LOGGER_DEBUG(log, "-- handle_video_packet -- CALLBACK-001a b0=%d", (int)m_new->data[0]); } else { LOGGER_DEBUG(log, "-- handle_video_packet -- CALLBACK-001a (empty)"); } update_bwc_values(session, m_new); // Pass ownership of m_new to the callback. session->mcb(session->mono_time, session->cs, m_new); // Now we no longer own m_new. m_new = nullptr; // Now we must have a free slot, so we either get that slot, i.e. >= 0, // or get told to drop the incoming packet if it's too old. slot_id = get_slot(log, session->work_buffer_list, is_keyframe, header, /* is_multipart */false); if (slot_id == GET_SLOT_RESULT_DROP_INCOMING) { // The incoming frame is too old, so we drop it. return -1; } } // We must have a valid slot here. assert(slot_id >= 0); LOGGER_DEBUG(log, "fill_data_into_slot.1"); // fill in this part into the slot buffer at the correct offset if (!fill_data_into_slot( log, session->work_buffer_list, slot_id, is_keyframe, header, incoming_data, incoming_data_length)) { // Memory allocation failed. Return error. return -1; } struct RTPMessage *m_new = process_frame(log, session->work_buffer_list, slot_id); if (m_new != nullptr) { if (m_new->len >= 2) { LOGGER_DEBUG(log, "-- handle_video_packet -- CALLBACK-003a b0=%d b1=%d", (int)m_new->data[0], (int)m_new->data[1]); } else if (m_new->len == 1) { LOGGER_DEBUG(log, "-- handle_video_packet -- CALLBACK-003a b0=%d", (int)m_new->data[0]); } else { LOGGER_DEBUG(log, "-- handle_video_packet -- CALLBACK-003a (empty)"); } update_bwc_values(session, m_new); session->mcb(session->mono_time, session->cs, m_new); m_new = nullptr; } return 0; } /** * receive custom lossypackets and process them. they can be incoming audio or video packets */ void rtp_receive_packet(RTPSession *session, const uint8_t *data, size_t length) { const Logger *log = session->log; if (length < RTP_HEADER_SIZE + 1) { LOGGER_WARNING(log, "Invalid length of received buffer!"); return; } // Get the packet type. const uint8_t packet_type = data[0]; const uint8_t *payload = &data[1]; // TODO(Zoff): is this ok? const uint16_t payload_size = (uint16_t)length - 1; // Unpack the header. struct RTPHeader header; rtp_header_unpack(payload, &header); if (header.pt != packet_type % 128) { LOGGER_WARNING(log, "RTPHeader packet type and Tox protocol packet type did not agree: %d != %d", header.pt, packet_type % 128); return; } if (header.pt != session->payload_type % 128) { LOGGER_WARNING(log, "RTPHeader packet type does not match this session's payload type: %d != %d", header.pt, session->payload_type % 128); return; } if ((header.flags & RTP_LARGE_FRAME) != 0 && header.offset_full >= header.data_length_full) { LOGGER_ERROR(log, "Invalid video packet: frame offset (%u) >= full frame length (%u)", (unsigned)header.offset_full, (unsigned)header.data_length_full); return; } if (header.offset_lower >= header.data_length_lower) { LOGGER_ERROR(log, "Invalid old protocol video packet: frame offset (%u) >= full frame length (%u)", (unsigned)header.offset_lower, (unsigned)header.data_length_lower); return; } LOGGER_DEBUG(log, "header.pt %d, video %d", (uint8_t)header.pt, RTP_TYPE_VIDEO % 128); // The sender uses the new large-frame capable protocol and is sending a // video packet. if ((header.flags & RTP_LARGE_FRAME) != 0 && header.pt == (RTP_TYPE_VIDEO % 128)) { handle_video_packet(log, session, &header, &payload[RTP_HEADER_SIZE], payload_size - RTP_HEADER_SIZE); return; } // everything below here is for the old 16 bit protocol ------------------ if (header.data_length_lower == payload_size - RTP_HEADER_SIZE) { /* The message is sent in single part */ /* Message is not late; pick up the latest parameters */ session->rsequnum = header.sequnum; session->rtimestamp = header.timestamp; if (session->add_recv) { session->add_recv(session->bwc_user_data, payload_size); } /* Invoke processing of active multiparted message */ if (session->mp != nullptr) { session->mcb(session->mono_time, session->cs, session->mp); session->mp = nullptr; } /* The message came in the allowed time; */ session->mp = new_message(log, &header, payload_size - RTP_HEADER_SIZE, &payload[RTP_HEADER_SIZE], payload_size - RTP_HEADER_SIZE); session->mcb(session->mono_time, session->cs, session->mp); session->mp = nullptr; return; } /* The message is sent in multiple parts */ if (session->mp != nullptr) { /* There are 2 possible situations in this case: * 1) being that we got the part of already processing message. * 2) being that we got the part of a new/old message. * * We handle them differently as we only allow a single multiparted * processing message */ if (session->mp->header.sequnum == header.sequnum && session->mp->header.timestamp == header.timestamp) { /* First case */ /* Make sure we have enough allocated memory */ if (session->mp->header.data_length_lower - session->mp->len < payload_size - RTP_HEADER_SIZE || session->mp->header.data_length_lower <= header.offset_lower || session->mp->header.data_length_lower - header.offset_lower < payload_size - RTP_HEADER_SIZE) { LOGGER_WARNING(log, "Corruption on the stream: multipart audio packet does not fit"); return; } memcpy(session->mp->data + header.offset_lower, &payload[RTP_HEADER_SIZE], payload_size - RTP_HEADER_SIZE); session->mp->len += payload_size - RTP_HEADER_SIZE; if (session->add_recv) { session->add_recv(session->bwc_user_data, payload_size); } if (session->mp->len == session->mp->header.data_length_lower) { /* Received a full message; now push it for the further * processing. */ session->mcb(session->mono_time, session->cs, session->mp); session->mp = nullptr; } } else { /* Second case */ if (session->mp->header.timestamp > header.timestamp) { /* The received message part is from the old message; * discard it. */ return; } /* Push the previous message for processing */ session->mcb(session->mono_time, session->cs, session->mp); session->mp = nullptr; goto NEW_MULTIPARTED; } } else { /* In this case treat the message as if it was received in order */ /* This is also a point for new multiparted messages */ NEW_MULTIPARTED: if (header.data_length_lower - header.offset_lower < payload_size - RTP_HEADER_SIZE) { LOGGER_WARNING(log, "Packet too long for buffer: offset %u + len %u > total %u", (unsigned)header.offset_lower, (unsigned)(payload_size - RTP_HEADER_SIZE), (unsigned)header.data_length_lower); return; } /* Message is not late; pick up the latest parameters */ session->rsequnum = header.sequnum; session->rtimestamp = header.timestamp; if (session->add_recv) { session->add_recv(session->bwc_user_data, payload_size); } /* Store message. */ session->mp = new_message(log, &header, header.data_length_lower, &payload[RTP_HEADER_SIZE], payload_size - RTP_HEADER_SIZE); if (session->mp != nullptr) { memmove(session->mp->data + header.offset_lower, session->mp->data, session->mp->len); } else { LOGGER_WARNING(log, "new_message() returned a null pointer"); return; } } return; } size_t rtp_header_pack(uint8_t *const rdata, const struct RTPHeader *header) { uint8_t *p = rdata; *p = (header->ve & 3) << 6 | (header->pe & 1) << 5 | (header->xe & 1) << 4 | (header->cc & 0xf); ++p; *p = (header->ma & 1) << 7 | (header->pt & 0x7f); ++p; p += net_pack_u16(p, header->sequnum); p += net_pack_u32(p, header->timestamp); p += net_pack_u32(p, header->ssrc); p += net_pack_u64(p, header->flags); p += net_pack_u32(p, header->offset_full); p += net_pack_u32(p, header->data_length_full); p += net_pack_u32(p, header->received_length_full); for (size_t i = 0; i < RTP_PADDING_FIELDS; ++i) { p += net_pack_u32(p, 0); } p += net_pack_u16(p, header->offset_lower); p += net_pack_u16(p, header->data_length_lower); assert(p == rdata + RTP_HEADER_SIZE); return p - rdata; } size_t rtp_header_unpack(const uint8_t *data, struct RTPHeader *header) { const uint8_t *p = data; header->ve = (*p >> 6) & 3; header->pe = (*p >> 5) & 1; header->xe = (*p >> 4) & 1; header->cc = *p & 0xf; ++p; header->ma = (*p >> 7) & 1; header->pt = *p & 0x7f; ++p; p += net_unpack_u16(p, &header->sequnum); p += net_unpack_u32(p, &header->timestamp); p += net_unpack_u32(p, &header->ssrc); p += net_unpack_u64(p, &header->flags); p += net_unpack_u32(p, &header->offset_full); p += net_unpack_u32(p, &header->data_length_full); p += net_unpack_u32(p, &header->received_length_full); p += sizeof(uint32_t) * RTP_PADDING_FIELDS; p += net_unpack_u16(p, &header->offset_lower); p += net_unpack_u16(p, &header->data_length_lower); assert(p == data + RTP_HEADER_SIZE); return p - data; } static uint32_t rtp_random_u32(void) { // HINT: uses libsodium function return randombytes_random(); } RTPSession *rtp_new(const Logger *log, int payload_type, Mono_Time *mono_time, rtp_send_packet_cb *send_packet, void *send_packet_user_data, rtp_add_recv_cb *add_recv, rtp_add_lost_cb *add_lost, void *bwc_user_data, void *cs, rtp_m_cb *mcb) { assert(mcb != nullptr); assert(cs != nullptr); RTPSession *session = (RTPSession *)calloc(1, sizeof(RTPSession)); if (session == nullptr) { LOGGER_WARNING(log, "Alloc failed! Program might misbehave!"); return nullptr; } session->work_buffer_list = (struct RTPWorkBufferList *)calloc(1, sizeof(struct RTPWorkBufferList)); if (session->work_buffer_list == nullptr) { LOGGER_ERROR(log, "out of memory while allocating work buffer list"); free(session); return nullptr; } // First entry is free. session->work_buffer_list->next_free_entry = 0; session->ssrc = payload_type == RTP_TYPE_VIDEO ? 0 : rtp_random_u32(); // Zoff: what is this?? session->payload_type = payload_type; session->log = log; session->mono_time = mono_time; session->rtp_receive_active = true; session->send_packet = send_packet; session->send_packet_user_data = send_packet_user_data; session->add_recv = add_recv; session->add_lost = add_lost; session->bwc_user_data = bwc_user_data; // set NULL just in case session->mp = nullptr; session->first_packets_counter = 1; /* Also set payload type as prefix */ session->cs = cs; session->mcb = mcb; return session; } void rtp_kill(const Logger *log, RTPSession *session) { if (session == nullptr) { LOGGER_WARNING(log, "No session"); return; } LOGGER_DEBUG(log, "Terminated RTP session: %p", (void *)session); LOGGER_DEBUG(log, "Terminated RTP session V3 work_buffer_list->next_free_entry: %d", (int)session->work_buffer_list->next_free_entry); if (session->work_buffer_list) { for (int8_t i = 0; i < session->work_buffer_list->next_free_entry; ++i) { free(session->work_buffer_list->work_buffer[i].buf); } free(session->work_buffer_list); } free(session->mp); free(session); } void rtp_allow_receiving_mark(RTPSession *session) { if (session != nullptr) { session->rtp_receive_active = true; } } void rtp_stop_receiving_mark(RTPSession *session) { if (session != nullptr) { session->rtp_receive_active = false; } } static void rtp_send_piece(RTPSession *session, const struct RTPHeader *header, const uint8_t *data, uint8_t *rdata, uint16_t length) { rtp_header_pack(rdata + 1, header); memcpy(rdata + 1 + RTP_HEADER_SIZE, data, length); const uint16_t rdata_size = length + RTP_HEADER_SIZE + 1; if (session->send_packet) { session->send_packet(session->send_packet_user_data, rdata, rdata_size); } } static struct RTPHeader rtp_default_header(const RTPSession *session, uint32_t length, bool is_keyframe) { uint16_t length_safe = (uint16_t)length; if (length > UINT16_MAX) { length_safe = UINT16_MAX; } struct RTPHeader header = {0}; if (is_keyframe) { header.flags |= RTP_KEY_FRAME; } if (session->payload_type == RTP_TYPE_VIDEO) { header.flags |= RTP_LARGE_FRAME; } header.ve = 2; // this is unused in toxav header.pe = 0; header.xe = 0; header.cc = 0; header.ma = 0; header.pt = session->payload_type % 128; header.sequnum = session->sequnum; if (session->mono_time != nullptr) { header.timestamp = current_time_monotonic(session->mono_time); } else { header.timestamp = 0; } header.ssrc = session->ssrc; header.offset_lower = 0; header.data_length_lower = length_safe; header.data_length_full = length; // without header header.offset_full = 0; return header; } /** * @brief Send a frame of audio or video data, chunked in @ref RTPMessage instances. * * @param session The A/V session to send the data for. * @param data A byte array of length @p length. * @param length The number of bytes to send from @p data. * @param is_keyframe Whether this video frame is a key frame. If it is an * audio frame, this parameter is ignored. */ int rtp_send_data(const Logger *log, RTPSession *session, const uint8_t *data, uint32_t length, bool is_keyframe) { if (session == nullptr) { return -1; } const uint16_t rdata_size = min_u32(length + RTP_HEADER_SIZE + 1, MAX_CRYPTO_DATA_SIZE); VLA(uint8_t, rdata, rdata_size); memset(rdata, 0, rdata_size); rdata[0] = session->payload_type; // packet id == payload_type struct RTPHeader header = rtp_default_header(session, length, is_keyframe); if (MAX_CRYPTO_DATA_SIZE > (length + RTP_HEADER_SIZE + 1)) { /* * The length is lesser than the maximum allowed length (including header) * Send the packet in single piece. */ assert(length < UINT16_MAX); rtp_send_piece(session, &header, data, rdata, (uint16_t)length); } else { /* * The length is greater than the maximum allowed length (including header) * Send the packet in multiple pieces. */ uint32_t sent = 0; uint16_t piece = MAX_CRYPTO_DATA_SIZE - (RTP_HEADER_SIZE + 1); while ((length - sent) + RTP_HEADER_SIZE + 1 > MAX_CRYPTO_DATA_SIZE) { rtp_send_piece(session, &header, data + sent, rdata, piece); sent += piece; header.offset_lower = (uint16_t)sent; header.offset_full = sent; // raw data offset, without any header } /* Send remaining */ piece = (uint16_t)(length - sent); if (piece != 0) { rtp_send_piece(session, &header, data + sent, rdata, piece); } } ++session->sequnum; return 0; }