Files
tomato/toxav/rtp_test.cc
Green Sky e95f2cbb1c Squashed 'external/toxcore/c-toxcore/' changes from e58eb27a8..1828c5356
1828c5356 fix(toxav): remove extra copy of video frame on encode
b66b8ded6 refactor: improve group stability, moderation determinism, and DHT dual-stack handling
4fbd7c10a fix(toxav): fix heap buffer overflow in RTP video packet handling
809fe8c78 refactor(tox): make the `#define` consts int literals.
50d242a37 refactor(toxav): improve MSI safety and testability
da1c13a2f fix(toxav): harden video processing and fix large frame handling
472825288 fix(toxav): fix multiple logic bugs in audio module
dc963d9a9 fix(toxav): fix multiple bugs in bandwidth controller and add tests
3bf5778ef refactor(toxav): split out RTP module and add exhaustive unit tests
b79b7d436 fix(autotools): add tox_log_level.h to public headers list
ea2e34ff2 chore: Disable cirrus. We're out of quota again.
b449ea2ed chore(ci): update azure runner image to windows-2022 windows-2019 is EOL
e115b136d refactor: Make add_to_list non-recursive.
REVERT: e58eb27a8 fix(toxav): remove extra copy of video frame on encode Tested and works, but there might be alignment issues and other stuff.

git-subtree-dir: external/toxcore/c-toxcore
git-subtree-split: 1828c5356b2daf1d5f680854e776d74b181d268c
2026-01-01 19:15:15 +01:00

954 lines
31 KiB
C++

#include "rtp.h"
#include <gtest/gtest.h>
#include <algorithm>
#include <cstring>
#include <vector>
#include "../toxcore/logger.h"
#include "../toxcore/mono_time.h"
#include "../toxcore/net_crypto.h"
#include "../toxcore/os_memory.h"
namespace {
struct MockSessionData {
MockSessionData();
~MockSessionData();
std::vector<std::vector<uint8_t>> sent_packets;
std::vector<std::vector<uint8_t>> received_frames;
std::vector<uint16_t> received_frame_lengths;
std::vector<uint32_t> received_32bit_lengths;
std::vector<uint32_t> received_full_lengths;
std::vector<uint16_t> received_sequnums;
std::vector<uint8_t> received_pts;
std::vector<uint64_t> received_flags;
uint32_t total_bytes_received = 0;
uint32_t total_bytes_lost = 0;
};
MockSessionData::MockSessionData() = default;
MockSessionData::~MockSessionData() = default;
static int mock_send_packet(void *user_data, const uint8_t *data, uint16_t length)
{
auto *sd = static_cast<MockSessionData *>(user_data);
sd->sent_packets.emplace_back(data, data + length);
return 0;
}
static int mock_m_cb(const Mono_Time * /*mono_time*/, void *cs, RTPMessage *msg)
{
auto *sd = static_cast<MockSessionData *>(cs);
sd->received_pts.push_back(rtp_message_pt(msg));
sd->received_flags.push_back(rtp_message_flags(msg));
const uint8_t *data = rtp_message_data(msg);
uint32_t len = rtp_message_len(msg);
uint32_t full_len = rtp_message_data_length_full(msg);
// If full_len is not set (old protocol), use len
uint32_t actual_len = (full_len > 0) ? full_len : len;
sd->received_frames.emplace_back(data, data + actual_len);
sd->received_frame_lengths.push_back(static_cast<uint16_t>(len));
sd->received_32bit_lengths.push_back(len);
sd->received_full_lengths.push_back(full_len);
sd->received_sequnums.push_back(rtp_message_sequnum(msg));
std::free(msg);
return 0;
}
static void mock_add_recv(void *user_data, uint32_t bytes)
{
auto *sd = static_cast<MockSessionData *>(user_data);
sd->total_bytes_received += bytes;
}
static void mock_add_lost(void *user_data, uint32_t bytes)
{
auto *sd = static_cast<MockSessionData *>(user_data);
sd->total_bytes_lost += bytes;
}
class RtpPublicTest : public ::testing::Test {
protected:
void SetUp() override
{
const Memory *mem = os_memory();
log = logger_new(mem);
mono_time = mono_time_new(mem, nullptr, nullptr);
mono_time_update(mono_time);
}
void TearDown() override
{
const Memory *mem = os_memory();
mono_time_free(mem, mono_time);
logger_kill(log);
}
Logger *log;
Mono_Time *mono_time;
};
TEST_F(RtpPublicTest, BasicAudioSendReceive)
{
MockSessionData sd;
RTPSession *session = rtp_new(log, RTP_TYPE_AUDIO, mono_time, mock_send_packet, &sd,
mock_add_recv, mock_add_lost, &sd, &sd, mock_m_cb);
ASSERT_NE(session, nullptr);
uint8_t data[] = "Hello RTP";
rtp_send_data(log, session, data, sizeof(data), false);
ASSERT_EQ(sd.sent_packets.size(), 1);
EXPECT_EQ(sd.sent_packets[0][0], RTP_TYPE_AUDIO);
rtp_receive_packet(session, sd.sent_packets[0].data(), sd.sent_packets[0].size());
ASSERT_EQ(sd.received_frames.size(), 1);
EXPECT_EQ(sd.received_frames[0].size(), sizeof(data));
EXPECT_STREQ(reinterpret_cast<const char *>(sd.received_frames[0].data()), "Hello RTP");
EXPECT_EQ(sd.received_pts[0], RTP_TYPE_AUDIO % 128);
EXPECT_EQ(sd.received_flags[0], 0);
rtp_kill(log, session);
}
TEST_F(RtpPublicTest, LargeVideoFrameFragmentation)
{
MockSessionData sd;
RTPSession *session = rtp_new(log, RTP_TYPE_VIDEO, mono_time, mock_send_packet, &sd,
mock_add_recv, mock_add_lost, &sd, &sd, mock_m_cb);
// Frame larger than MAX_CRYPTO_DATA_SIZE
const uint32_t frame_size = MAX_CRYPTO_DATA_SIZE + 500;
std::vector<uint8_t> data(frame_size);
for (uint32_t i = 0; i < frame_size; ++i)
data[i] = i & 0xFF;
rtp_send_data(log, session, data.data(), frame_size, true);
// Should be at least 2 packets
ASSERT_GE(sd.sent_packets.size(), 2);
// Receive packets in order
for (const auto &pkt : sd.sent_packets) {
rtp_receive_packet(session, pkt.data(), pkt.size());
}
ASSERT_EQ(sd.received_frames.size(), 1);
EXPECT_EQ(sd.received_frames[0], data);
EXPECT_EQ(sd.received_pts[0], RTP_TYPE_VIDEO % 128);
EXPECT_TRUE(sd.received_flags[0] & RTP_KEY_FRAME);
EXPECT_TRUE(sd.received_flags[0] & RTP_LARGE_FRAME);
rtp_kill(log, session);
}
TEST_F(RtpPublicTest, OutOfOrderVideoPackets)
{
MockSessionData sd;
RTPSession *session = rtp_new(log, RTP_TYPE_VIDEO, mono_time, mock_send_packet, &sd,
mock_add_recv, mock_add_lost, &sd, &sd, mock_m_cb);
const uint32_t frame_size = MAX_CRYPTO_DATA_SIZE + 100;
std::vector<uint8_t> data(frame_size, 0x55);
rtp_send_data(log, session, data.data(), frame_size, false);
ASSERT_EQ(sd.sent_packets.size(), 2);
// Receive last packet first
rtp_receive_packet(session, sd.sent_packets[1].data(), sd.sent_packets[1].size());
EXPECT_EQ(sd.received_frames.size(), 0);
// Receive first packet
rtp_receive_packet(session, sd.sent_packets[0].data(), sd.sent_packets[0].size());
ASSERT_EQ(sd.received_frames.size(), 1);
EXPECT_EQ(sd.received_frames[0].size(), frame_size);
rtp_kill(log, session);
}
TEST_F(RtpPublicTest, HandlingInvalidPackets)
{
MockSessionData sd;
RTPSession *session = rtp_new(log, RTP_TYPE_AUDIO, mono_time, mock_send_packet, &sd,
mock_add_recv, mock_add_lost, &sd, &sd, mock_m_cb);
// Packet too short to even contain the Tox packet ID
rtp_receive_packet(session, nullptr, 0);
// Packet too short (less than RTP_HEADER_SIZE + 1)
uint8_t short_pkt[10] = {RTP_TYPE_AUDIO};
rtp_receive_packet(session, short_pkt, sizeof(short_pkt));
// Wrong packet ID (Tox level)
uint8_t wrong_id[RTP_HEADER_SIZE + 10];
std::memset(wrong_id, 0, sizeof(wrong_id));
wrong_id[0] = RTP_TYPE_VIDEO; // Session expects AUDIO
rtp_receive_packet(session, wrong_id, sizeof(wrong_id));
EXPECT_EQ(sd.received_frames.size(), 0);
rtp_kill(log, session);
}
TEST_F(RtpPublicTest, ReceiveActiveToggle)
{
MockSessionData sd;
RTPSession *session = rtp_new(log, RTP_TYPE_AUDIO, mono_time, mock_send_packet, &sd,
mock_add_recv, mock_add_lost, &sd, &sd, mock_m_cb);
EXPECT_TRUE(rtp_session_is_receiving_active(session));
rtp_stop_receiving_mark(session);
EXPECT_FALSE(rtp_session_is_receiving_active(session));
rtp_allow_receiving_mark(session);
EXPECT_TRUE(rtp_session_is_receiving_active(session));
rtp_kill(log, session);
}
TEST_F(RtpPublicTest, SsrcAccessors)
{
MockSessionData sd;
RTPSession *session = rtp_new(log, RTP_TYPE_VIDEO, mono_time, mock_send_packet, &sd,
mock_add_recv, mock_add_lost, &sd, &sd, mock_m_cb);
rtp_session_set_ssrc(session, 0x12345678);
EXPECT_EQ(rtp_session_get_ssrc(session), 0x12345678);
rtp_kill(log, session);
}
TEST_F(RtpPublicTest, LargeAudioFragmentationOldProtocol)
{
MockSessionData sd;
RTPSession *session = rtp_new(log, RTP_TYPE_AUDIO, mono_time, mock_send_packet, &sd,
mock_add_recv, mock_add_lost, &sd, &sd, mock_m_cb);
// Audio doesn't use RTP_LARGE_FRAME, so it uses the old 16-bit offset/length fields
const uint32_t frame_size = MAX_CRYPTO_DATA_SIZE + 500;
std::vector<uint8_t> data(frame_size, 0x44);
rtp_send_data(log, session, data.data(), frame_size, false);
ASSERT_GE(sd.sent_packets.size(), 2);
for (const auto &pkt : sd.sent_packets) {
rtp_receive_packet(session, pkt.data(), pkt.size());
}
ASSERT_EQ(sd.received_frames.size(), 1);
EXPECT_EQ(sd.received_frames[0].size(), frame_size);
rtp_kill(log, session);
}
TEST_F(RtpPublicTest, WorkBufferEvictionAndKeyframePreservation)
{
MockSessionData sd;
RTPSession *session = rtp_new(log, RTP_TYPE_VIDEO, mono_time, mock_send_packet, &sd,
mock_add_recv, mock_add_lost, &sd, &sd, mock_m_cb);
struct TimeMock {
uint64_t t;
} tm = {1000};
auto time_cb = [](void *ud) -> uint64_t { return static_cast<TimeMock *>(ud)->t; };
mono_time_set_current_time_callback(mono_time, time_cb, &tm);
mono_time_update(mono_time);
// USED_RTP_WORKBUFFER_COUNT is 3.
// 1. Start a keyframe (frame 0) but don't finish it.
const uint32_t frame_size = MAX_CRYPTO_DATA_SIZE + 100;
std::vector<uint8_t> kf_data(frame_size, 0x11);
rtp_send_data(log, session, kf_data.data(), frame_size, true);
rtp_receive_packet(session, sd.sent_packets[0].data(), sd.sent_packets[0].size());
sd.sent_packets.clear();
// 2. Start two interframes (frames 1 and 2) but don't finish them.
for (int i = 0; i < 2; ++i) {
tm.t += 1;
mono_time_update(mono_time);
std::vector<uint8_t> if_data(frame_size, 0x20 + i);
rtp_send_data(log, session, if_data.data(), frame_size, false);
rtp_receive_packet(session, sd.sent_packets[0].data(), sd.sent_packets[0].size());
sd.sent_packets.clear();
}
// Now work buffer has 3 slots: [KF(part), IF1(part), IF2(part)]
EXPECT_EQ(sd.received_frames.size(), 0);
// 3. Start another interframe (frame 3).
// Since slot 0 is a KEYFRAME and it's not old yet (tm.t=1002, KF.t=1000, age=2ms < 15ms),
// and it's not finished, it should be kept.
// The new IF should be DROPPED because there's no space and slot 0 is a protected KF.
tm.t += 1;
mono_time_update(mono_time);
std::vector<uint8_t> if3_data(frame_size, 0x33);
rtp_send_data(log, session, if3_data.data(), frame_size, false);
rtp_receive_packet(session, sd.sent_packets[0].data(), sd.sent_packets[0].size());
sd.sent_packets.clear();
EXPECT_EQ(sd.received_frames.size(), 0);
// 4. Advance time by 20ms (> VIDEO_KEEP_KEYFRAME_IN_BUFFER_FOR_MS = 15).
// Now slot 0 (the KF) is old relative to the new incoming frame's timestamp.
tm.t += 20;
mono_time_update(mono_time);
// 5. Start another frame (frame 4).
// Now the old KF should be evicted and processed (sent to callback), making room.
std::vector<uint8_t> if4_data(frame_size, 0x44);
rtp_send_data(log, session, if4_data.data(), frame_size, false);
rtp_receive_packet(session, sd.sent_packets[0].data(), sd.sent_packets[0].size());
// We expect the KF to have been delivered now.
ASSERT_GE(sd.received_frames.size(), 1);
EXPECT_EQ(sd.received_frames[0][0], 0x11);
rtp_kill(log, session);
}
TEST_F(RtpPublicTest, BwcReporting)
{
MockSessionData sd;
RTPSession *session = rtp_new(log, RTP_TYPE_VIDEO, mono_time, mock_send_packet, &sd,
mock_add_recv, mock_add_lost, &sd, &sd, mock_m_cb);
uint8_t data[] = "test";
// DISMISS_FIRST_LOST_VIDEO_PACKET_COUNT is 10.
// Packets 1-9 are dismissed. Packet 10 is reported.
for (int i = 0; i < 10; ++i) {
sd.sent_packets.clear();
rtp_send_data(log, session, data, sizeof(data), false);
rtp_receive_packet(session, sd.sent_packets[0].data(), sd.sent_packets[0].size());
}
// Packet 10 should have been the first one reported.
EXPECT_EQ(sd.total_bytes_received, sizeof(data));
EXPECT_EQ(sd.total_bytes_lost, 0);
rtp_kill(log, session);
}
TEST_F(RtpPublicTest, OldProtocolEdgeCases)
{
MockSessionData sd;
RTPSession *session = rtp_new(log, RTP_TYPE_AUDIO, mono_time, mock_send_packet, &sd, nullptr,
nullptr, nullptr, &sd, mock_m_cb);
// 1. Multipart message interrupted by a newer message.
const uint32_t large_size = 5000;
std::vector<uint8_t> data(large_size, 0xAA);
rtp_send_data(log, session, data.data(), large_size, false);
ASSERT_GE(sd.sent_packets.size(), 2);
// Receive only the first part of the first message
rtp_receive_packet(session, sd.sent_packets[0].data(), sd.sent_packets[0].size());
EXPECT_EQ(sd.received_frames.size(), 0);
// Send a second message (newer)
std::vector<uint8_t> data2 = {0x1, 0x2, 0x3};
rtp_send_data(log, session, data2.data(), data2.size(), false);
// The second message is the last one in sent_packets.
rtp_receive_packet(session, sd.sent_packets.back().data(), sd.sent_packets.back().size());
// The first (incomplete) message should have been pushed to mcb when the second one arrived.
ASSERT_EQ(sd.received_frames.size(), 2);
EXPECT_LT(sd.received_frame_lengths[0], large_size);
EXPECT_EQ(sd.received_pts[0], RTP_TYPE_AUDIO % 128);
EXPECT_EQ(sd.received_frame_lengths[1], static_cast<uint16_t>(data2.size()));
// 2. Discarding old message part
sd.received_frames.clear();
sd.received_frame_lengths.clear();
sd.received_full_lengths.clear();
sd.received_pts.clear();
// Send a very new message.
std::vector<uint8_t> data3 = {0xDE, 0xAD};
rtp_send_data(log, session, data3.data(), data3.size(), false);
rtp_receive_packet(session, sd.sent_packets.back().data(), sd.sent_packets.back().size());
EXPECT_EQ(sd.received_frames.size(), 1);
// Now try to "receive" an old part of message 1 (Index 1)
rtp_receive_packet(session, sd.sent_packets[1].data(), sd.sent_packets[1].size());
// It should be discarded because it's older than the current session state.
EXPECT_EQ(sd.received_frames.size(), 1);
rtp_kill(log, session);
}
TEST_F(RtpPublicTest, MoreInvalidPackets)
{
MockSessionData sd;
RTPSession *session = rtp_new(log, RTP_TYPE_VIDEO, mono_time, mock_send_packet, &sd, nullptr,
nullptr, nullptr, &sd, mock_m_cb);
// Get a valid packet to start with
uint8_t data[] = "test";
rtp_send_data(log, session, data, sizeof(data), false);
std::vector<uint8_t> valid_pkt = sd.sent_packets[0];
sd.sent_packets.clear();
// 1. RTPHeader packet type and Tox protocol packet type do not agree
std::vector<uint8_t> bad_pkt_1 = valid_pkt;
bad_pkt_1[0] = RTP_TYPE_AUDIO; // Tox ID says AUDIO, but header (byte 2) still says VIDEO
rtp_receive_packet(session, bad_pkt_1.data(), bad_pkt_1.size());
EXPECT_EQ(sd.received_frames.size(), 0);
// 2. RTPHeader packet type does not match session payload type
// Create an AUDIO session and send it the valid VIDEO packet
RTPSession *session_audio = rtp_new(log, RTP_TYPE_AUDIO, mono_time, mock_send_packet, &sd,
nullptr, nullptr, nullptr, &sd, mock_m_cb);
rtp_receive_packet(session_audio, valid_pkt.data(), valid_pkt.size());
EXPECT_EQ(sd.received_frames.size(), 0);
rtp_kill(log, session_audio);
// 3. Invalid video packet: offset >= length
// From rtp.c, offset_full is at byte 20 and data_length_full at byte 24 of the RTP header.
// The RTP header starts at index 1 of the packet.
std::vector<uint8_t> bad_pkt_3 = valid_pkt;
// Set offset (bytes 21-24) to be equal to length (bytes 25-28)
// For a small packet, both are usually 0 and sizeof(data) respectively.
// Let's just make offset very large.
bad_pkt_3[1 + 20] = 0xFF;
bad_pkt_3[1 + 21] = 0xFF;
rtp_receive_packet(session, bad_pkt_3.data(), bad_pkt_3.size());
EXPECT_EQ(sd.received_frames.size(), 0);
// 4. Invalid old protocol packet: offset >= length
// offset_lower is at byte 76, data_length_lower at byte 78 of the RTP header.
RTPSession *session_audio2 = rtp_new(log, RTP_TYPE_AUDIO, mono_time, mock_send_packet, &sd,
nullptr, nullptr, nullptr, &sd, mock_m_cb);
rtp_send_data(log, session_audio2, data, sizeof(data), false);
std::vector<uint8_t> audio_pkt = sd.sent_packets[0];
sd.sent_packets.clear();
std::vector<uint8_t> bad_pkt_4 = audio_pkt;
// Set offset_lower (byte 1 + 76) > data_length_lower (byte 1 + 78)
bad_pkt_4[1 + 76] = 0x01; // offset = 256
bad_pkt_4[1 + 77] = 0x00;
bad_pkt_4[1 + 78] = 0x00; // length = 10
bad_pkt_4[1 + 79] = 0x0A;
rtp_receive_packet(session_audio2, bad_pkt_4.data(), bad_pkt_4.size());
EXPECT_EQ(sd.received_frames.size(), 0);
rtp_kill(log, session_audio2);
rtp_kill(log, session);
}
TEST_F(RtpPublicTest, VideoJitterBufferEdgeCases)
{
MockSessionData sd;
RTPSession *session = rtp_new(log, RTP_TYPE_VIDEO, mono_time, mock_send_packet, &sd, nullptr,
nullptr, nullptr, &sd, mock_m_cb);
// Use a large frame size to force fragmentation and keep slots occupied
const uint32_t frame_size = MAX_CRYPTO_DATA_SIZE + 100;
std::vector<uint8_t> data(frame_size, 0);
// Advancing time for subsequent frames
struct TimeMock {
uint64_t t;
} tm = {1000};
auto time_cb = [](void *ud) -> uint64_t { return static_cast<TimeMock *>(ud)->t; };
mono_time_set_current_time_callback(mono_time, time_cb, &tm);
mono_time_update(mono_time);
// 1. Packet too old for work buffer
rtp_send_data(log, session, data.data(), frame_size, false); // Time 1000ms
std::vector<uint8_t> old_pkt = sd.sent_packets[0];
// Receive only first part to keep slot occupied
rtp_receive_packet(session, sd.sent_packets[0].data(), sd.sent_packets[0].size());
EXPECT_EQ(sd.received_frames.size(), 0);
sd.sent_packets.clear();
// Send a newer frame by advancing time
tm.t = 2000;
mono_time_update(mono_time);
rtp_send_data(log, session, data.data(), frame_size, false); // Time 2000ms
// Receive first part of this one too. Now we have two slots occupied.
rtp_receive_packet(session, sd.sent_packets[0].data(), sd.sent_packets[0].size());
EXPECT_EQ(sd.received_frames.size(), 0);
sd.sent_packets.clear();
// Now try to send the old packet again. It should be rejected because
// it's older than the most recent frame in the buffer.
rtp_receive_packet(session, old_pkt.data(), old_pkt.size());
EXPECT_EQ(sd.received_frames.size(), 0);
// 2. Interframe waiting for keyframe in slot 0
rtp_kill(log, session);
sd.received_frames.clear();
session = rtp_new(log, RTP_TYPE_VIDEO, mono_time, mock_send_packet, &sd, nullptr, nullptr,
nullptr, &sd, mock_m_cb);
// Fill slot 0 with an incomplete Keyframe
std::vector<uint8_t> kf_data(frame_size, 0x11);
tm.t = 3000;
mono_time_update(mono_time);
rtp_send_data(log, session, kf_data.data(), frame_size, true);
// Receive only first part
rtp_receive_packet(session, sd.sent_packets[0].data(), sd.sent_packets[0].size());
sd.sent_packets.clear();
// Now send a complete Interframe
std::vector<uint8_t> if_data(10, 0x22);
tm.t += 1;
mono_time_update(mono_time);
rtp_send_data(log, session, if_data.data(), if_data.size(), false);
rtp_receive_packet(session, sd.sent_packets[0].data(), sd.sent_packets[0].size());
// The interframe should be in slot 1, but NOT processed because slot 0 is an incomplete KF
EXPECT_EQ(sd.received_frames.size(), 0);
rtp_kill(log, session);
}
TEST_F(RtpPublicTest, OldProtocolCorruption)
{
MockSessionData sd;
RTPSession *session = rtp_new(log, RTP_TYPE_AUDIO, mono_time, mock_send_packet, &sd, nullptr,
nullptr, nullptr, &sd, mock_m_cb);
// 1. Packet claiming a smaller length than its payload.
// This triggers the condition that previously caused a DoS crash via
// an assertion failure in new_message().
uint8_t data[10] = {0};
rtp_send_data(log, session, data, sizeof(data), false);
std::vector<uint8_t> pkt = sd.sent_packets[0];
sd.sent_packets.clear();
// Modify data_length_lower (byte 1 + 78) to be 2, while payload is 10.
pkt[1 + 78] = 0x00;
pkt[1 + 79] = 0x02;
// This used to trigger an assertion failure (crash). Now it should return nullptr.
rtp_receive_packet(session, pkt.data(), pkt.size());
EXPECT_EQ(sd.received_frames.size(), 0);
// 2. Corruption check for an EXISTING multipart message.
const uint32_t multipart_size = 5000;
std::vector<uint8_t> multipart_data(multipart_size, 0xBB);
rtp_send_data(log, session, multipart_data.data(), multipart_size, false);
// Receive the first part
rtp_receive_packet(session, sd.sent_packets[0].data(), sd.sent_packets[0].size());
EXPECT_EQ(sd.received_frames.size(), 0);
// Now receive a corrupted second part that claims a weird offset
std::vector<uint8_t> corrupted_part = sd.sent_packets[1];
// offset_lower is at byte 76. Set it beyond data_length_lower.
corrupted_part[1 + 76] = 0xFF;
corrupted_part[1 + 77] = 0xFF;
rtp_receive_packet(session, corrupted_part.data(), corrupted_part.size());
// It should return early without pushing the message.
EXPECT_EQ(sd.received_frames.size(), 0);
rtp_kill(log, session);
}
TEST_F(RtpPublicTest, HugeVideoFrameInternalLength)
{
MockSessionData sd;
RTPSession *session = rtp_new(log, RTP_TYPE_VIDEO, mono_time, mock_send_packet, &sd, nullptr,
nullptr, nullptr, &sd, mock_m_cb);
// Frame larger than 64KB (uint16_t max)
const uint32_t huge_frame_size = 65540;
std::vector<uint8_t> data(huge_frame_size);
for (uint32_t i = 0; i < huge_frame_size; ++i) {
data[i] = static_cast<uint8_t>(i & 0xFF);
}
rtp_send_data(log, session, data.data(), huge_frame_size, false);
// Should be fragmented into many packets
ASSERT_GT(sd.sent_packets.size(), 1);
// Receive all packets
for (const auto &pkt : sd.sent_packets) {
rtp_receive_packet(session, pkt.data(), pkt.size());
}
ASSERT_EQ(sd.received_frames.size(), 1);
// This verifies that the internal 32-bit length is working correctly.
// We cast huge_frame_size to 16-bit to show what it would have been if it truncated.
EXPECT_NE(static_cast<uint16_t>(sd.received_32bit_lengths[0]), huge_frame_size);
EXPECT_EQ(sd.received_32bit_lengths[0], huge_frame_size);
EXPECT_EQ(sd.received_full_lengths[0], huge_frame_size);
EXPECT_EQ(sd.received_frames[0].size(), huge_frame_size);
EXPECT_EQ(sd.received_frames[0], data);
rtp_kill(log, session);
}
TEST_F(RtpPublicTest, HeapBufferOverflowRaw)
{
MockSessionData sd;
RTPSession *session = rtp_new(log, RTP_TYPE_VIDEO, mono_time, mock_send_packet, &sd, nullptr,
nullptr, nullptr, &sd, mock_m_cb);
// Manually construct a malicious packet.
// 1 byte ID + 80 bytes Header + 200 bytes Payload
const size_t header_size = 80;
const size_t payload_size = 200;
const size_t total_size = 1 + header_size + payload_size;
std::vector<uint8_t> pkt(total_size, 0);
// 0: Packet ID
pkt[0] = RTP_TYPE_VIDEO;
// 1: VE=2 (10xxxxxx) -> 0x80
pkt[1] = 0x80;
// 2: PT = RTP_TYPE_VIDEO % 128 (193 % 128 = 65 -> 0x41)
// MA=0
pkt[2] = 0x41;
// 13-20: Flags (64-bit)
// We need RTP_LARGE_FRAME (1<<0) and RTP_KEY_FRAME (1<<1) -> 0x03
// Stored in Big Endian. Last byte is 0x03.
pkt[20] = 0x03;
// 25-28: Data Length Full (32-bit Big Endian)
// We set this to 50 (0x32) to trick the allocator.
pkt[28] = 50;
// 81...: Payload
// Fill with 0x41 ('A')
std::fill(pkt.begin() + 81, pkt.end(), 0x41);
// Inject the malicious packet
rtp_receive_packet(session, pkt.data(), pkt.size());
rtp_kill(log, session);
}
TEST_F(RtpPublicTest, HeapBufferOverflow)
{
MockSessionData sd;
RTPSession *session = rtp_new(log, RTP_TYPE_VIDEO, mono_time, mock_send_packet, &sd, nullptr,
nullptr, nullptr, &sd, mock_m_cb);
// Common parameters
uint16_t sequnum = 100;
uint32_t timestamp = 12345;
uint32_t ssrc = 0x11223344;
// --- Packet 1: Small allocation ---
// data_length_full = 10
// offset_full = 0
// payload_len = 5
{
uint8_t packet[100];
std::memset(packet, 0, sizeof(packet));
packet[0] = RTP_TYPE_VIDEO; // Tox Packet ID
// RTP Header
uint8_t *h = &packet[1];
// Byte 0: VE=2 (0x80)
h[0] = 0x80;
// Byte 1: PT=0x41
h[1] = 0x41; // 65
// Bytes 2-3: Sequnum
h[2] = (sequnum >> 8) & 0xFF;
h[3] = sequnum & 0xFF;
// Bytes 4-7: Timestamp
h[4] = (timestamp >> 24) & 0xFF;
h[5] = (timestamp >> 16) & 0xFF;
h[6] = (timestamp >> 8) & 0xFF;
h[7] = timestamp & 0xFF;
// Bytes 8-11: SSRC
h[8] = (ssrc >> 24) & 0xFF;
h[9] = (ssrc >> 16) & 0xFF;
h[10] = (ssrc >> 8) & 0xFF;
h[11] = ssrc & 0xFF;
// Bytes 12-19: Flags (RTP_LARGE_FRAME = 1)
h[19] = 1;
// Bytes 20-23: Offset Full (0)
// 0
// Bytes 24-27: Data Length Full (10)
h[27] = 10;
// Bytes 28-31: Received Length Full (0)
// Offset Lower (at 76)
h[76] = 0;
h[77] = 0;
// Data Length Lower (at 78) -> 10
h[78] = 0;
h[79] = 10;
// Payload starts at 1 + RTP_HEADER_SIZE (80) = 81
// We set payload length to 5.
// Total packet size = 81 + 5 = 86
rtp_receive_packet(session, packet, 81 + 5);
}
// --- Packet 2: Exploit ---
// Same sequnum/timestamp -> same slot
// data_length_full = 1000 (Larger!)
// offset_full = 10
// payload_len = 100
//
// Logic check:
// data_length_full (1000) - offset_full (10) < payload_len (100) -> 990 < 100 -> False.
// Check passes.
//
// Memcpy to buf->data + 10. Buf was allocated with size 10. Writing 100
// bytes to offset 10 -> Overflow.
{
uint8_t packet[200];
std::memset(packet, 0, sizeof(packet));
packet[0] = RTP_TYPE_VIDEO;
uint8_t *h = &packet[1];
h[0] = 0x80;
h[1] = 0x41;
h[2] = (sequnum >> 8) & 0xFF;
h[3] = sequnum & 0xFF;
h[4] = (timestamp >> 24) & 0xFF;
h[5] = (timestamp >> 16) & 0xFF;
h[6] = (timestamp >> 8) & 0xFF;
h[7] = timestamp & 0xFF;
h[8] = (ssrc >> 24) & 0xFF;
h[9] = (ssrc >> 16) & 0xFF;
h[10] = (ssrc >> 8) & 0xFF;
h[11] = ssrc & 0xFF;
h[19] = 1; // Large frame
// Offset Full = 10
h[23] = 10;
// Data Length Full = 1000
h[26] = (1000 >> 8) & 0xFF;
h[27] = 1000 & 0xFF;
// Offset Lower (at 76) -> 10
h[76] = 0;
h[77] = 10;
// Data Length Lower (at 78) -> 1000
h[78] = (1000 >> 8) & 0xFF;
h[79] = 1000 & 0xFF;
// Payload starts at 81. Length 100.
// We fill it with 'A' to make the overflow obvious if inspected.
std::memset(&packet[81], 'A', 100);
rtp_receive_packet(session, packet, 81 + 100);
}
rtp_kill(log, session);
}
TEST_F(RtpPublicTest, AudioHeapBufferOverflow)
{
MockSessionData sd;
RTPSession *session = rtp_new(log, RTP_TYPE_AUDIO, mono_time, mock_send_packet, &sd, nullptr,
nullptr, nullptr, &sd, mock_m_cb);
uint16_t sequnum = 100;
uint32_t timestamp = 12345;
uint32_t ssrc = 0x11223344;
uint8_t packet[200];
std::memset(packet, 0, sizeof(packet));
packet[0] = RTP_TYPE_AUDIO;
uint8_t *h = &packet[1];
h[0] = 0x80;
h[1] = 0x40; // 64 (Audio)
h[2] = (sequnum >> 8) & 0xFF;
h[3] = sequnum & 0xFF;
h[4] = (timestamp >> 24) & 0xFF;
h[5] = (timestamp >> 16) & 0xFF;
h[6] = (timestamp >> 8) & 0xFF;
h[7] = timestamp & 0xFF;
h[8] = (ssrc >> 24) & 0xFF;
h[9] = (ssrc >> 16) & 0xFF;
h[10] = (ssrc >> 8) & 0xFF;
h[11] = ssrc & 0xFF;
h[19] = 0; // Small frame (Audio)
// Offset Lower (at 76) -> 90
h[76] = 0;
h[77] = 90;
// Data Length Lower (at 78) -> 100
h[78] = 0;
h[79] = 100;
// Payload starts at 81. Length 20.
// Total size required = 90 + 20 = 110.
// Allocated size = 100.
// Overflow by 10 bytes.
std::memset(&packet[81], 'A', 20);
rtp_receive_packet(session, packet, 81 + 20);
rtp_kill(log, session);
}
TEST_F(RtpPublicTest, HeapBufferOverflowMultipartAudio)
{
MockSessionData sd;
RTPSession *session = rtp_new(log, RTP_TYPE_AUDIO, mono_time, mock_send_packet, &sd, nullptr,
nullptr, nullptr, &sd, mock_m_cb);
uint16_t sequnum = 200;
uint32_t timestamp = 67890;
uint32_t ssrc = 0x55667788;
uint16_t total_len = 100;
// --- Packet 1: Allocate buffer ---
// data_length_lower = 100
// offset_lower = 0
// payload_len = 10
{
uint8_t packet[200];
std::memset(packet, 0, sizeof(packet));
packet[0] = RTP_TYPE_AUDIO;
uint8_t *h = &packet[1];
h[0] = 0x80;
h[1] = 0x40; // Audio
h[2] = (sequnum >> 8) & 0xFF;
h[3] = sequnum & 0xFF;
h[4] = (timestamp >> 24) & 0xFF;
h[5] = (timestamp >> 16) & 0xFF;
h[6] = (timestamp >> 8) & 0xFF;
h[7] = timestamp & 0xFF;
h[8] = (ssrc >> 24) & 0xFF;
h[9] = (ssrc >> 16) & 0xFF;
h[10] = (ssrc >> 8) & 0xFF;
h[11] = ssrc & 0xFF;
h[19] = 0;
// Offset Lower (at 76) -> 0
h[76] = 0;
h[77] = 0;
// Data Length Lower (at 78) -> 100
h[78] = (total_len >> 8) & 0xFF;
h[79] = total_len & 0xFF;
// Payload len 10
std::memset(&packet[81], 'A', 10);
rtp_receive_packet(session, packet, 81 + 10);
}
// --- Packet 2: Overflow ---
// offset_lower = 95
// payload_len = 10
//
// Check 1: total (100) - received (10) = 90. 90 >= 10. Safe.
// Check 2: total (100) > offset (95). Safe.
// Write: 95 + 10 = 105. Overflow.
{
uint8_t packet[200];
std::memset(packet, 0, sizeof(packet));
packet[0] = RTP_TYPE_AUDIO;
uint8_t *h = &packet[1];
h[0] = 0x80;
h[1] = 0x40;
h[2] = (sequnum >> 8) & 0xFF;
h[3] = sequnum & 0xFF;
h[4] = (timestamp >> 24) & 0xFF;
h[5] = (timestamp >> 16) & 0xFF;
h[6] = (timestamp >> 8) & 0xFF;
h[7] = timestamp & 0xFF;
h[8] = (ssrc >> 24) & 0xFF;
h[9] = (ssrc >> 16) & 0xFF;
h[10] = (ssrc >> 8) & 0xFF;
h[11] = ssrc & 0xFF;
h[19] = 0;
// Offset Lower (at 76) -> 95
h[76] = 0;
h[77] = 95;
// Data Length Lower (at 78) -> 100
h[78] = (total_len >> 8) & 0xFF;
h[79] = total_len & 0xFF;
// Payload len 10
std::memset(&packet[81], 'B', 10);
rtp_receive_packet(session, packet, 81 + 10);
}
rtp_kill(log, session);
}
TEST_F(RtpPublicTest, HeapBufferOverflowLogRead)
{
MockSessionData sd;
RTPSession *session = rtp_new(log, RTP_TYPE_VIDEO, mono_time, mock_send_packet, &sd, nullptr,
nullptr, nullptr, &sd, mock_m_cb);
uint16_t sequnum = 123;
uint32_t timestamp = 99999;
uint32_t ssrc = 0x88776655;
// Packet with data_length_full = 1.
// The logger tries to read data[0] and data[1].
// data[1] will be out of bounds if only 1 byte is allocated.
uint8_t packet[100];
std::memset(packet, 0, sizeof(packet));
packet[0] = RTP_TYPE_VIDEO;
uint8_t *h = &packet[1];
h[0] = 0x80;
h[1] = 0x41; // Video
h[2] = (sequnum >> 8) & 0xFF;
h[3] = sequnum & 0xFF;
h[4] = (timestamp >> 24) & 0xFF;
h[5] = (timestamp >> 16) & 0xFF;
h[6] = (timestamp >> 8) & 0xFF;
h[7] = timestamp & 0xFF;
h[8] = (ssrc >> 24) & 0xFF;
h[9] = (ssrc >> 16) & 0xFF;
h[10] = (ssrc >> 8) & 0xFF;
h[11] = ssrc & 0xFF;
h[19] = 1; // Large frame
// Offset Full = 0
h[23] = 0;
// Data Length Full = 1
h[26] = 0;
h[27] = 1;
// Offset Lower (at 76) -> 0
h[76] = 0;
h[77] = 0;
// Data Length Lower (at 78) -> 1
h[78] = 0;
h[79] = 1;
// Payload starts at 81. Length 1.
packet[81] = 0xCC;
rtp_receive_packet(session, packet, 81 + 1);
rtp_kill(log, session);
}
} // namespace