#include "video.h" #include #include #include #include #include "../toxcore/logger.h" #include "../toxcore/mono_time.h" #include "../toxcore/network.h" #include "../toxcore/os_memory.h" #include "av_test_support.hh" #include "rtp.h" namespace { using VideoTest = AvTest; TEST_F(VideoTest, BasicNewKill) { VideoTestData data; VCSession *vc = vc_new(log, mono_time, 123, VideoTestData::receive_frame, &data); ASSERT_NE(vc, nullptr); vc_kill(vc); } TEST_F(VideoTest, EncodeDecodeLoop) { VideoTestData data; VCSession *vc = vc_new(log, mono_time, 123, VideoTestData::receive_frame, &data); ASSERT_NE(vc, nullptr); RtpMock rtp_mock; RTPSession *send_rtp = rtp_new(log, RTP_TYPE_VIDEO, mono_time, RtpMock::send_packet, &rtp_mock, nullptr, nullptr, nullptr, vc, RtpMock::video_cb); RTPSession *recv_rtp = rtp_new(log, RTP_TYPE_VIDEO, mono_time, RtpMock::send_packet, &rtp_mock, nullptr, nullptr, nullptr, vc, RtpMock::video_cb); rtp_mock.recv_session = recv_rtp; uint16_t width = 320; uint16_t height = 240; uint32_t bitrate = 500; ASSERT_EQ(vc_reconfigure_encoder(vc, bitrate, width, height, -1), 0); std::vector y(width * height, 128); std::vector u((width / 2) * (height / 2), 64); std::vector v((width / 2) * (height / 2), 192); ASSERT_EQ(vc_encode(vc, width, height, y.data(), u.data(), v.data(), VC_EFLAG_FORCE_KF), 0); vc_increment_frame_counter(vc); uint8_t *pkt_data; uint32_t pkt_size; bool is_keyframe; while (vc_get_cx_data(vc, &pkt_data, &pkt_size, &is_keyframe)) { int rc = rtp_send_data(log, send_rtp, pkt_data, pkt_size, is_keyframe); ASSERT_EQ(rc, 0); } vc_iterate(vc); ASSERT_EQ(data.friend_number, 123u); ASSERT_EQ(data.width, width); ASSERT_EQ(data.height, height); ASSERT_FALSE(data.y.empty()); rtp_kill(log, send_rtp); rtp_kill(log, recv_rtp); vc_kill(vc); } TEST_F(VideoTest, EncodeDecodeSequence) { VideoTestData data; VCSession *vc = vc_new(log, mono_time, 123, VideoTestData::receive_frame, &data); ASSERT_NE(vc, nullptr); RtpMock rtp_mock; RTPSession *send_rtp = rtp_new(log, RTP_TYPE_VIDEO, mono_time, RtpMock::send_packet, &rtp_mock, nullptr, nullptr, nullptr, vc, RtpMock::video_cb); RTPSession *recv_rtp = rtp_new(log, RTP_TYPE_VIDEO, mono_time, RtpMock::send_packet, &rtp_mock, nullptr, nullptr, nullptr, vc, RtpMock::video_cb); rtp_mock.recv_session = recv_rtp; uint16_t width = 320; uint16_t height = 240; uint32_t bitrate = 2000; ASSERT_EQ(vc_reconfigure_encoder(vc, bitrate, width, height, -1), 0); for (int i = 0; i < 20; ++i) { std::vector y(width * height); std::vector u((width / 2) * (height / 2)); std::vector v((width / 2) * (height / 2)); // Background std::fill(y.begin(), y.end(), 16); std::fill(u.begin(), u.end(), 128); std::fill(v.begin(), v.end(), 128); // Moving square int sq_size = 64; int x0 = (i * 8) % (width - sq_size); int y0 = (i * 4) % (height - sq_size); for (int r = 0; r < sq_size; ++r) { for (int c = 0; c < sq_size; ++c) { y[(y0 + r) * width + (x0 + c)] = 200; } } ASSERT_EQ(vc_encode(vc, width, height, y.data(), u.data(), v.data(), i == 0 ? VC_EFLAG_FORCE_KF : VC_EFLAG_NONE), 0); vc_increment_frame_counter(vc); uint8_t *pkt_data; uint32_t pkt_size; bool is_keyframe; while (vc_get_cx_data(vc, &pkt_data, &pkt_size, &is_keyframe)) { rtp_send_data(log, send_rtp, pkt_data, pkt_size, is_keyframe); } vc_iterate(vc); ASSERT_EQ(data.width, width); ASSERT_EQ(data.height, height); double mse = data.calculate_mse(y); // Expect MSE to be reasonably low for high bitrate EXPECT_LT(mse, 100.0) << "Frame " << i << " MSE too high: " << mse; } rtp_kill(log, send_rtp); rtp_kill(log, recv_rtp); vc_kill(vc); } TEST_F(VideoTest, EncodeDecodeResolutionChange) { VideoTestData data; VCSession *vc = vc_new(log, mono_time, 123, VideoTestData::receive_frame, &data); ASSERT_NE(vc, nullptr); RtpMock rtp_mock; RTPSession *send_rtp = rtp_new(log, RTP_TYPE_VIDEO, mono_time, RtpMock::send_packet, &rtp_mock, nullptr, nullptr, nullptr, vc, RtpMock::video_cb); RTPSession *recv_rtp = rtp_new(log, RTP_TYPE_VIDEO, mono_time, RtpMock::send_packet, &rtp_mock, nullptr, nullptr, nullptr, vc, RtpMock::video_cb); rtp_mock.recv_session = recv_rtp; uint16_t widths[] = {320, 160, 480}; uint16_t heights[] = {240, 120, 360}; for (int res = 0; res < 3; ++res) { uint16_t width = widths[res]; uint16_t height = heights[res]; ASSERT_EQ(vc_reconfigure_encoder(vc, 2000, width, height, -1), 0); for (int i = 0; i < 5; ++i) { std::vector y(width * height); std::vector u((width / 2) * (height / 2)); std::vector v((width / 2) * (height / 2)); std::fill(y.begin(), y.end(), static_cast((res * 50 + i * 10) % 256)); std::fill(u.begin(), u.end(), 128); std::fill(v.begin(), v.end(), 128); ASSERT_EQ(vc_encode(vc, width, height, y.data(), u.data(), v.data(), i == 0 ? VC_EFLAG_FORCE_KF : VC_EFLAG_NONE), 0); vc_increment_frame_counter(vc); uint8_t *pkt_data; uint32_t pkt_size; bool is_keyframe; while (vc_get_cx_data(vc, &pkt_data, &pkt_size, &is_keyframe)) { rtp_send_data(log, send_rtp, pkt_data, pkt_size, is_keyframe); } vc_iterate(vc); ASSERT_EQ(data.width, width); ASSERT_EQ(data.height, height); double mse = data.calculate_mse(y); EXPECT_LT(mse, 100.0) << "Res " << res << " Frame " << i << " MSE too high: " << mse; } } rtp_kill(log, send_rtp); rtp_kill(log, recv_rtp); vc_kill(vc); } TEST_F(VideoTest, EncodeDecodeBitrateImpact) { uint32_t bitrates[] = {100, 500, 2000}; double mses[3]; uint16_t width = 320; uint16_t height = 240; for (int b = 0; b < 3; ++b) { VideoTestData data; VCSession *vc = vc_new(log, mono_time, 123, VideoTestData::receive_frame, &data); ASSERT_NE(vc, nullptr); RtpMock rtp_mock; RTPSession *send_rtp = rtp_new(log, RTP_TYPE_VIDEO, mono_time, RtpMock::send_packet, &rtp_mock, nullptr, nullptr, nullptr, vc, RtpMock::video_cb); RTPSession *recv_rtp = rtp_new(log, RTP_TYPE_VIDEO, mono_time, RtpMock::send_packet, &rtp_mock, nullptr, nullptr, nullptr, vc, RtpMock::video_cb); rtp_mock.recv_session = recv_rtp; ASSERT_EQ(vc_reconfigure_encoder(vc, bitrates[b], width, height, -1), 0); double total_mse = 0; int frames = 10; for (int i = 0; i < frames; ++i) { std::vector y(width * height); std::vector u((width / 2) * (height / 2)); std::vector v((width / 2) * (height / 2)); for (size_t j = 0; j < y.size(); ++j) y[j] = static_cast((j + i * 10) % 256); std::fill(u.begin(), u.end(), 128); std::fill(v.begin(), v.end(), 128); ASSERT_EQ(vc_encode(vc, width, height, y.data(), u.data(), v.data(), i == 0 ? VC_EFLAG_FORCE_KF : VC_EFLAG_NONE), 0); vc_increment_frame_counter(vc); uint8_t *pkt_data; uint32_t pkt_size; bool is_keyframe; while (vc_get_cx_data(vc, &pkt_data, &pkt_size, &is_keyframe)) { rtp_send_data(log, send_rtp, pkt_data, pkt_size, is_keyframe); } vc_iterate(vc); total_mse += data.calculate_mse(y); } mses[b] = total_mse / frames; rtp_kill(log, send_rtp); rtp_kill(log, recv_rtp); vc_kill(vc); } // Quality should generally improve (MSE decrease) as bitrate increases. // 100kbps should have significantly higher MSE than 2000kbps for this complex pattern. EXPECT_GT(mses[0], mses[1]); EXPECT_GT(mses[1], mses[2]); printf("MSE results: 100kbps: %f, 500kbps: %f, 2000kbps: %f\n", mses[0], mses[1], mses[2]); } TEST_F(VideoTest, ReconfigureEncoder) { VideoTestData data; VCSession *vc = vc_new(log, mono_time, 123, VideoTestData::receive_frame, &data); ASSERT_NE(vc, nullptr); // Initial reconfigure ASSERT_EQ(vc_reconfigure_encoder(vc, 500, 320, 240, -1), 0); // Change bitrate and resolution ASSERT_EQ(vc_reconfigure_encoder(vc, 1000, 640, 480, -1), 0); std::vector y(640 * 480, 128); std::vector u(320 * 240, 64); std::vector v(320 * 240, 192); ASSERT_EQ(vc_encode(vc, 640, 480, y.data(), u.data(), v.data(), VC_EFLAG_NONE), 0); vc_kill(vc); } TEST_F(VideoTest, GetLcfd) { VideoTestData data; VCSession *vc = vc_new(log, mono_time, 123, VideoTestData::receive_frame, &data); ASSERT_NE(vc, nullptr); // Default lcfd is 60 in video.c EXPECT_EQ(vc_get_lcfd(vc), 60u); vc_kill(vc); } TEST_F(VideoTest, QueueInvalidMessage) { VideoTestData data; VCSession *vc = vc_new(log, mono_time, 123, VideoTestData::receive_frame, &data); ASSERT_NE(vc, nullptr); RtpMock rtp_mock; // Create an audio RTP session but try to queue to video session RTPSession *audio_rtp = rtp_new(log, RTP_TYPE_AUDIO, mono_time, RtpMock::send_packet, &rtp_mock, nullptr, nullptr, nullptr, vc, RtpMock::video_cb); RTPSession *video_recv_rtp = rtp_new(log, RTP_TYPE_VIDEO, mono_time, RtpMock::send_packet, &rtp_mock, nullptr, nullptr, nullptr, vc, RtpMock::video_cb); rtp_mock.recv_session = video_recv_rtp; std::vector dummy_audio(100, 0); int rc = rtp_send_data( log, audio_rtp, dummy_audio.data(), static_cast(dummy_audio.size()), false); ASSERT_EQ(rc, 0); // Iterate should NOT trigger callback because payload type was wrong vc_iterate(vc); EXPECT_EQ(data.width, 0u); rtp_kill(log, audio_rtp); rtp_kill(log, video_recv_rtp); vc_kill(vc); } TEST_F(VideoTest, ReconfigureOptimizations) { VideoTestData data; VCSession *vc = vc_new(log, mono_time, 123, VideoTestData::receive_frame, &data); ASSERT_NE(vc, nullptr); // 1. Reconfigure with same values (should do nothing) // vc_new initializes encoder with 800x600 and 5000 bitrate. EXPECT_EQ(vc_reconfigure_encoder(vc, 5000, 800, 600, -1), 0); // 2. Reconfigure with only bitrate change EXPECT_EQ(vc_reconfigure_encoder(vc, 2000, 800, 600, -1), 0); // 3. Reconfigure with kf_max_dist > 1 (triggers re-init and kf_max_dist branch) EXPECT_EQ(vc_reconfigure_encoder(vc, 2000, 800, 600, 60), 0); vc_kill(vc); } TEST_F(VideoTest, LcfdAndSpecialPackets) { VideoTestData data; VCSession *vc = vc_new(log, mono_time, 123, VideoTestData::receive_frame, &data); ASSERT_NE(vc, nullptr); RtpMock rtp_mock; RTPSession *video_recv_rtp = rtp_new(log, RTP_TYPE_VIDEO, mono_time, RtpMock::send_packet, &rtp_mock, nullptr, nullptr, nullptr, vc, RtpMock::video_cb); rtp_mock.recv_session = video_recv_rtp; // 1. Test lcfd update tm.t += 50; // Advance time by 50ms mono_time_update(mono_time); std::vector dummy_frame(10, 0); rtp_send_data( log, video_recv_rtp, dummy_frame.data(), static_cast(dummy_frame.size()), true); // lcfd should be updated. Initial linfts was set at vc_new (tm.t=1000). // Now tm.t is 1050. t_lcfd = 1050 - 1000 = 50. EXPECT_EQ(vc_get_lcfd(vc), 50u); // 2. Test lcfd threshold (t_lcfd > 100 should be ignored) tm.t += 200; mono_time_update(mono_time); rtp_send_data( log, video_recv_rtp, dummy_frame.data(), static_cast(dummy_frame.size()), true); EXPECT_EQ(vc_get_lcfd(vc), 50u); // Should still be 50 // 3. Test dummy packet PT = (RTP_TYPE_VIDEO + 2) % 128 RTPSession *dummy_rtp = rtp_new(log, (RTP_TYPE_VIDEO + 2), mono_time, RtpMock::send_packet, &rtp_mock, nullptr, nullptr, nullptr, vc, RtpMock::video_cb); rtp_mock.recv_session = dummy_rtp; rtp_send_data( log, dummy_rtp, dummy_frame.data(), static_cast(dummy_frame.size()), false); // Should return 0 but do nothing (logged as "Got dummy!") // 4. Test GetQueueMutex EXPECT_NE(vc_get_queue_mutex(vc), nullptr); rtp_kill(log, video_recv_rtp); rtp_kill(log, dummy_rtp); vc_kill(vc); } TEST_F(VideoTest, MultiReconfigureEncode) { VideoTestData data; VCSession *vc = vc_new(log, mono_time, 123, VideoTestData::receive_frame, &data); ASSERT_NE(vc, nullptr); for (int i = 0; i < 5; ++i) { uint16_t w = static_cast(160 + (i * 16)); uint16_t h = static_cast(120 + (i * 16)); std::vector y(static_cast(w) * h, 128); std::vector u((static_cast(w) / 2) * (h / 2), 64); std::vector v((static_cast(w) / 2) * (h / 2), 192); ASSERT_EQ(vc_reconfigure_encoder(vc, 1000, w, h, -1), 0); ASSERT_EQ(vc_encode(vc, w, h, y.data(), u.data(), v.data(), VC_EFLAG_NONE), 0); } vc_kill(vc); } TEST_F(VideoTest, ReconfigureFailDoS) { VideoTestData data; VCSession *vc = vc_new(log, mono_time, 123, VideoTestData::receive_frame, &data); ASSERT_NE(vc, nullptr); // Trigger failure by passing invalid resolution (0) // This currently destroys the encoder. ASSERT_EQ(vc_reconfigure_encoder(vc, 1000, 0, 0, -1), -1); // Attempt to encode. This is expected to crash because vc->encoder is destroyed. std::vector y(320 * 240, 128); std::vector u(160 * 120, 64); std::vector v(160 * 120, 192); // This call will crash in the current unfixed code. vc_encode(vc, 320, 240, y.data(), u.data(), v.data(), VC_EFLAG_NONE); vc_kill(vc); } TEST_F(VideoTest, LyingLengthOOB) { VideoTestData data; VCSession *vc = vc_new(log, mono_time, 123, VideoTestData::receive_frame, &data); ASSERT_NE(vc, nullptr); RtpMock rtp_mock; RTPSession *recv_rtp = rtp_new(log, RTP_TYPE_VIDEO, mono_time, RtpMock::send_packet, &rtp_mock, nullptr, nullptr, nullptr, vc, RtpMock::video_cb); rtp_mock.recv_session = recv_rtp; // Craft a malicious RTP packet uint16_t payload_len = 10; uint8_t packet[RTP_HEADER_SIZE + 11]; // +1 for Tox ID memset(packet, 0, sizeof(packet)); // Tox ID packet[0] = static_cast(RTP_TYPE_VIDEO); auto pack_u16 = [](uint8_t *p, uint16_t v) { p[0] = static_cast(v >> 8); p[1] = static_cast(v & 0xff); }; auto pack_u32 = [](uint8_t *p, uint32_t v) { p[0] = static_cast(v >> 24); p[1] = static_cast((v >> 16) & 0xff); p[2] = static_cast((v >> 8) & 0xff); p[3] = static_cast(v & 0xff); }; auto pack_u64 = [&](uint8_t *p, uint64_t v) { pack_u32(p, static_cast(v >> 32)); pack_u32(p + 4, static_cast(v & 0xffffffff)); }; // RTP Header starts at packet[1] packet[1] = 2 << 6; // ve = 2 packet[2] = static_cast(RTP_TYPE_VIDEO % 128); pack_u16(packet + 3, 1); // sequnum pack_u32(packet + 5, 1000); // timestamp pack_u32(packet + 9, 0x12345678); // ssrc pack_u64(packet + 13, RTP_LARGE_FRAME); // flags pack_u32(packet + 21, 0); // offset_full pack_u32(packet + 25, 1000); // data_length_full (LYING! Actual is 10) pack_u32(packet + 29, 0); // received_length_full // Skip padding fields (11 * 4 = 44 bytes) pack_u16(packet + 77, 0); // offset_lower pack_u16(packet + 79, payload_len); // data_length_lower // Send the malicious packet rtp_receive_packet(recv_rtp, packet, sizeof(packet)); // Trigger vc_iterate. This will call vpx_codec_decode with length 1000. // This is expected to cause OOB read. vc_iterate(vc); rtp_kill(log, recv_rtp); vc_kill(vc); } } // namespace