Files
tomato/toxav/video_test.cc
Green Sky 9b36dd9d99 Squashed 'external/toxcore/c-toxcore/' changes from c9cdae001..9ed2fa80d
9ed2fa80d fix(toxav): remove extra copy of video frame on encode
de30cf3ad docs: Add new file kinds, that should be useful to all clients.
d5b5e879d fix(DHT): Correct node skipping logic timed out nodes.
30e71fe97 refactor: Generate event dispatch functions and add tox_events_dispatch.
8fdbb0b50 style: Format parameter lists in event handlers.
d00dee12b refactor: Add warning logs when losing chat invites.
b144e8db1 feat: Add a way to look up a file number by ID.
849281ea0 feat: Add a way to fetch groups by chat ID.
a2c177396 refactor: Harden event system and improve type safety.
8f5caa656 refactor: Add MessagePack string support to bin_pack.
34e8d5ad5 chore: Add GitHub CodeQL workflow and local Docker runner.
f7b068010 refactor: Add nullability annotations to event headers.
788abe651 refactor(toxav): Use system allocator for mutexes.
2e4b423eb refactor: Use specific typedefs for public API arrays.
2baf34775 docs(toxav): update idle iteration interval see 679444751876fa3882a717772918ebdc8f083354
2f87ac67b feat: Add Event Loop abstraction (Ev).
f8dfc38d8 test: Fix data race in ToxScenario virtual_clock.
38313921e test(TCP): Add regression test for TCP priority queue integrity.
f94a50d9a refactor(toxav): Replace mutable_mutex with dynamically allocated mutex.
ad054511e refactor: Internalize DHT structs and add debug helpers.
8b467cc96 fix: Prevent potential integer overflow in group chat handshake.
4962bdbb8 test: Improve TCP simulation and add tests
5f0227093 refactor: Allow nullable data in group chat handlers.
e97b18ea9 chore: Improve Windows Docker support.
b14943bbd refactor: Move Logger out of Messenger into Tox.
dd3136250 cleanup: Apply nullability qualifiers to C++ codebase.
1849f70fc refactor: Extract low-level networking code to net and os_network.
8fec75421 refactor: Delete tox_random, align on rng and os_random.
a03ae8051 refactor: Delete tox_memory, align on mem and os_memory.
4c88fed2c refactor: Use `std::` prefixes more consistently in C++ code.
72452f2ae test: Add some more tests for onion and shared key cache.
d5a51b09a cleanup: Use tox_attributes.h in tox_private.h and install it.
b6f5b9fc5 test: Add some benchmarks for various high level things.
8a8d02785 test(support): Introduce threaded Tox runner and simulation barrier
d68d1d095 perf(toxav): optimize audio and video intermediate buffers by keeping them around
REVERT: c9cdae001 fix(toxav): remove extra copy of video frame on encode

git-subtree-dir: external/toxcore/c-toxcore
git-subtree-split: 9ed2fa80d582c714d6bdde6a7648220a92cddff8
2026-02-01 14:26:52 +01:00

497 lines
17 KiB
C++

#include "video.h"
#include <gtest/gtest.h>
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <vector>
#include "../toxcore/logger.h"
#include "../toxcore/mono_time.h"
#include "../toxcore/network.h"
#include "../toxcore/os_memory.h"
#include "av_test_support.hh"
#include "rtp.h"
namespace {
using VideoTest = AvTest;
TEST_F(VideoTest, BasicNewKill)
{
VideoTestData data;
VCSession *vc = vc_new(mem, log, mono_time, 123, VideoTestData::receive_frame, &data);
ASSERT_NE(vc, nullptr);
vc_kill(vc);
}
TEST_F(VideoTest, EncodeDecodeLoop)
{
VideoTestData data;
VCSession *vc = vc_new(mem, log, mono_time, 123, VideoTestData::receive_frame, &data);
ASSERT_NE(vc, nullptr);
RtpMock rtp_mock;
RTPSession *send_rtp = rtp_new(log, RTP_TYPE_VIDEO, mono_time, RtpMock::send_packet, &rtp_mock,
nullptr, nullptr, nullptr, vc, RtpMock::video_cb);
RTPSession *recv_rtp = rtp_new(log, RTP_TYPE_VIDEO, mono_time, RtpMock::send_packet, &rtp_mock,
nullptr, nullptr, nullptr, vc, RtpMock::video_cb);
rtp_mock.recv_session = recv_rtp;
std::uint16_t width = 320;
std::uint16_t height = 240;
std::uint32_t bitrate = 500;
ASSERT_EQ(vc_reconfigure_encoder(vc, bitrate, width, height, -1), 0);
std::vector<std::uint8_t> y(width * height, 128);
std::vector<std::uint8_t> u((width / 2) * (height / 2), 64);
std::vector<std::uint8_t> v((width / 2) * (height / 2), 192);
ASSERT_EQ(vc_encode(vc, width, height, y.data(), u.data(), v.data(), VC_EFLAG_FORCE_KF), 0);
vc_increment_frame_counter(vc);
std::uint8_t *pkt_data;
std::uint32_t pkt_size;
bool is_keyframe;
while (vc_get_cx_data(vc, &pkt_data, &pkt_size, &is_keyframe)) {
int rc = rtp_send_data(log, send_rtp, pkt_data, pkt_size, is_keyframe);
ASSERT_EQ(rc, 0);
}
vc_iterate(vc);
ASSERT_EQ(data.friend_number, 123u);
ASSERT_EQ(data.width, width);
ASSERT_EQ(data.height, height);
ASSERT_FALSE(data.y.empty());
rtp_kill(log, send_rtp);
rtp_kill(log, recv_rtp);
vc_kill(vc);
}
TEST_F(VideoTest, EncodeDecodeSequence)
{
VideoTestData data;
VCSession *vc = vc_new(mem, log, mono_time, 123, VideoTestData::receive_frame, &data);
ASSERT_NE(vc, nullptr);
RtpMock rtp_mock;
RTPSession *send_rtp = rtp_new(log, RTP_TYPE_VIDEO, mono_time, RtpMock::send_packet, &rtp_mock,
nullptr, nullptr, nullptr, vc, RtpMock::video_cb);
RTPSession *recv_rtp = rtp_new(log, RTP_TYPE_VIDEO, mono_time, RtpMock::send_packet, &rtp_mock,
nullptr, nullptr, nullptr, vc, RtpMock::video_cb);
rtp_mock.recv_session = recv_rtp;
std::uint16_t width = 320;
std::uint16_t height = 240;
std::uint32_t bitrate = 2000;
ASSERT_EQ(vc_reconfigure_encoder(vc, bitrate, width, height, -1), 0);
for (int i = 0; i < 20; ++i) {
std::vector<std::uint8_t> y(width * height);
std::vector<std::uint8_t> u((width / 2) * (height / 2));
std::vector<std::uint8_t> v((width / 2) * (height / 2));
// Background
std::fill(y.begin(), y.end(), 16);
std::fill(u.begin(), u.end(), 128);
std::fill(v.begin(), v.end(), 128);
// Moving square
int sq_size = 64;
int x0 = (i * 8) % (width - sq_size);
int y0 = (i * 4) % (height - sq_size);
for (int r = 0; r < sq_size; ++r) {
for (int c = 0; c < sq_size; ++c) {
y[(y0 + r) * width + (x0 + c)] = 200;
}
}
ASSERT_EQ(vc_encode(vc, width, height, y.data(), u.data(), v.data(),
i == 0 ? VC_EFLAG_FORCE_KF : VC_EFLAG_NONE),
0);
vc_increment_frame_counter(vc);
std::uint8_t *pkt_data;
std::uint32_t pkt_size;
bool is_keyframe;
while (vc_get_cx_data(vc, &pkt_data, &pkt_size, &is_keyframe)) {
rtp_send_data(log, send_rtp, pkt_data, pkt_size, is_keyframe);
}
vc_iterate(vc);
ASSERT_EQ(data.width, width);
ASSERT_EQ(data.height, height);
double mse = data.calculate_mse(y);
// Expect MSE to be reasonably low for high bitrate
EXPECT_LT(mse, 100.0) << "Frame " << i << " MSE too high: " << mse;
}
rtp_kill(log, send_rtp);
rtp_kill(log, recv_rtp);
vc_kill(vc);
}
TEST_F(VideoTest, EncodeDecodeResolutionChange)
{
VideoTestData data;
VCSession *vc = vc_new(mem, log, mono_time, 123, VideoTestData::receive_frame, &data);
ASSERT_NE(vc, nullptr);
RtpMock rtp_mock;
RTPSession *send_rtp = rtp_new(log, RTP_TYPE_VIDEO, mono_time, RtpMock::send_packet, &rtp_mock,
nullptr, nullptr, nullptr, vc, RtpMock::video_cb);
RTPSession *recv_rtp = rtp_new(log, RTP_TYPE_VIDEO, mono_time, RtpMock::send_packet, &rtp_mock,
nullptr, nullptr, nullptr, vc, RtpMock::video_cb);
rtp_mock.recv_session = recv_rtp;
std::uint16_t widths[] = {320, 160, 480};
std::uint16_t heights[] = {240, 120, 360};
for (int res = 0; res < 3; ++res) {
std::uint16_t width = widths[res];
std::uint16_t height = heights[res];
ASSERT_EQ(vc_reconfigure_encoder(vc, 2000, width, height, -1), 0);
for (int i = 0; i < 5; ++i) {
std::vector<std::uint8_t> y(width * height);
std::vector<std::uint8_t> u((width / 2) * (height / 2));
std::vector<std::uint8_t> v((width / 2) * (height / 2));
std::fill(y.begin(), y.end(), static_cast<std::uint8_t>((res * 50 + i * 10) % 256));
std::fill(u.begin(), u.end(), 128);
std::fill(v.begin(), v.end(), 128);
ASSERT_EQ(vc_encode(vc, width, height, y.data(), u.data(), v.data(),
i == 0 ? VC_EFLAG_FORCE_KF : VC_EFLAG_NONE),
0);
vc_increment_frame_counter(vc);
std::uint8_t *pkt_data;
std::uint32_t pkt_size;
bool is_keyframe;
while (vc_get_cx_data(vc, &pkt_data, &pkt_size, &is_keyframe)) {
rtp_send_data(log, send_rtp, pkt_data, pkt_size, is_keyframe);
}
vc_iterate(vc);
ASSERT_EQ(data.width, width);
ASSERT_EQ(data.height, height);
double mse = data.calculate_mse(y);
EXPECT_LT(mse, 100.0) << "Res " << res << " Frame " << i << " MSE too high: " << mse;
}
}
rtp_kill(log, send_rtp);
rtp_kill(log, recv_rtp);
vc_kill(vc);
}
TEST_F(VideoTest, EncodeDecodeBitrateImpact)
{
std::uint32_t bitrates[] = {100, 500, 2000};
double mses[3];
std::uint16_t width = 320;
std::uint16_t height = 240;
for (int b = 0; b < 3; ++b) {
VideoTestData data;
VCSession *vc = vc_new(mem, log, mono_time, 123, VideoTestData::receive_frame, &data);
ASSERT_NE(vc, nullptr);
RtpMock rtp_mock;
RTPSession *send_rtp = rtp_new(log, RTP_TYPE_VIDEO, mono_time, RtpMock::send_packet,
&rtp_mock, nullptr, nullptr, nullptr, vc, RtpMock::video_cb);
RTPSession *recv_rtp = rtp_new(log, RTP_TYPE_VIDEO, mono_time, RtpMock::send_packet,
&rtp_mock, nullptr, nullptr, nullptr, vc, RtpMock::video_cb);
rtp_mock.recv_session = recv_rtp;
ASSERT_EQ(vc_reconfigure_encoder(vc, bitrates[b], width, height, -1), 0);
double total_mse = 0;
int frames = 10;
for (int i = 0; i < frames; ++i) {
std::vector<std::uint8_t> y(width * height);
std::vector<std::uint8_t> u((width / 2) * (height / 2));
std::vector<std::uint8_t> v((width / 2) * (height / 2));
for (std::size_t j = 0; j < y.size(); ++j)
y[j] = static_cast<std::uint8_t>((j + i * 10) % 256);
std::fill(u.begin(), u.end(), 128);
std::fill(v.begin(), v.end(), 128);
ASSERT_EQ(vc_encode(vc, width, height, y.data(), u.data(), v.data(),
i == 0 ? VC_EFLAG_FORCE_KF : VC_EFLAG_NONE),
0);
vc_increment_frame_counter(vc);
std::uint8_t *pkt_data;
std::uint32_t pkt_size;
bool is_keyframe;
while (vc_get_cx_data(vc, &pkt_data, &pkt_size, &is_keyframe)) {
rtp_send_data(log, send_rtp, pkt_data, pkt_size, is_keyframe);
}
vc_iterate(vc);
total_mse += data.calculate_mse(y);
}
mses[b] = total_mse / frames;
rtp_kill(log, send_rtp);
rtp_kill(log, recv_rtp);
vc_kill(vc);
}
// Quality should generally improve (MSE decrease) as bitrate increases.
// 100kbps should have significantly higher MSE than 2000kbps for this complex pattern.
EXPECT_GT(mses[0], mses[1]);
EXPECT_GT(mses[1], mses[2]);
std::printf("MSE results: 100kbps: %f, 500kbps: %f, 2000kbps: %f\n", mses[0], mses[1], mses[2]);
}
TEST_F(VideoTest, ReconfigureEncoder)
{
VideoTestData data;
VCSession *vc = vc_new(mem, log, mono_time, 123, VideoTestData::receive_frame, &data);
ASSERT_NE(vc, nullptr);
// Initial reconfigure
ASSERT_EQ(vc_reconfigure_encoder(vc, 500, 320, 240, -1), 0);
// Change bitrate and resolution
ASSERT_EQ(vc_reconfigure_encoder(vc, 1000, 640, 480, -1), 0);
std::vector<std::uint8_t> y(640 * 480, 128);
std::vector<std::uint8_t> u(320 * 240, 64);
std::vector<std::uint8_t> v(320 * 240, 192);
ASSERT_EQ(vc_encode(vc, 640, 480, y.data(), u.data(), v.data(), VC_EFLAG_NONE), 0);
vc_kill(vc);
}
TEST_F(VideoTest, GetLcfd)
{
VideoTestData data;
VCSession *vc = vc_new(mem, log, mono_time, 123, VideoTestData::receive_frame, &data);
ASSERT_NE(vc, nullptr);
// Default lcfd is 60 in video.c
EXPECT_EQ(vc_get_lcfd(vc), 60u);
vc_kill(vc);
}
TEST_F(VideoTest, QueueInvalidMessage)
{
VideoTestData data;
VCSession *vc = vc_new(mem, log, mono_time, 123, VideoTestData::receive_frame, &data);
ASSERT_NE(vc, nullptr);
RtpMock rtp_mock;
// Create an audio RTP session but try to queue to video session
RTPSession *audio_rtp = rtp_new(log, RTP_TYPE_AUDIO, mono_time, RtpMock::send_packet, &rtp_mock,
nullptr, nullptr, nullptr, vc, RtpMock::video_cb);
RTPSession *video_recv_rtp = rtp_new(log, RTP_TYPE_VIDEO, mono_time, RtpMock::send_packet,
&rtp_mock, nullptr, nullptr, nullptr, vc, RtpMock::video_cb);
rtp_mock.recv_session = video_recv_rtp;
std::vector<std::uint8_t> dummy_audio(100, 0);
int rc = rtp_send_data(
log, audio_rtp, dummy_audio.data(), static_cast<std::uint32_t>(dummy_audio.size()), false);
ASSERT_EQ(rc, 0);
// Iterate should NOT trigger callback because payload type was wrong
vc_iterate(vc);
EXPECT_EQ(data.width, 0u);
rtp_kill(log, audio_rtp);
rtp_kill(log, video_recv_rtp);
vc_kill(vc);
}
TEST_F(VideoTest, ReconfigureOptimizations)
{
VideoTestData data;
VCSession *vc = vc_new(mem, log, mono_time, 123, VideoTestData::receive_frame, &data);
ASSERT_NE(vc, nullptr);
// 1. Reconfigure with same values (should do nothing)
// vc_new initializes encoder with 800x600 and 5000 bitrate.
EXPECT_EQ(vc_reconfigure_encoder(vc, 5000, 800, 600, -1), 0);
// 2. Reconfigure with only bitrate change
EXPECT_EQ(vc_reconfigure_encoder(vc, 2000, 800, 600, -1), 0);
// 3. Reconfigure with kf_max_dist > 1 (triggers re-init and kf_max_dist branch)
EXPECT_EQ(vc_reconfigure_encoder(vc, 2000, 800, 600, 60), 0);
vc_kill(vc);
}
TEST_F(VideoTest, LcfdAndSpecialPackets)
{
VideoTestData data;
VCSession *vc = vc_new(mem, log, mono_time, 123, VideoTestData::receive_frame, &data);
ASSERT_NE(vc, nullptr);
RtpMock rtp_mock;
RTPSession *video_recv_rtp = rtp_new(log, RTP_TYPE_VIDEO, mono_time, RtpMock::send_packet,
&rtp_mock, nullptr, nullptr, nullptr, vc, RtpMock::video_cb);
rtp_mock.recv_session = video_recv_rtp;
// 1. Test lcfd update
tm.t += 50; // Advance time by 50ms
mono_time_update(mono_time);
std::vector<std::uint8_t> dummy_frame(10, 0);
rtp_send_data(log, video_recv_rtp, dummy_frame.data(),
static_cast<std::uint32_t>(dummy_frame.size()), true);
// lcfd should be updated. Initial linfts was set at vc_new (tm.t=1000).
// Now tm.t is 1050. t_lcfd = 1050 - 1000 = 50.
EXPECT_EQ(vc_get_lcfd(vc), 50u);
// 2. Test lcfd threshold (t_lcfd > 100 should be ignored)
tm.t += 200;
mono_time_update(mono_time);
rtp_send_data(log, video_recv_rtp, dummy_frame.data(),
static_cast<std::uint32_t>(dummy_frame.size()), true);
EXPECT_EQ(vc_get_lcfd(vc), 50u); // Should still be 50
// 3. Test dummy packet PT = (RTP_TYPE_VIDEO + 2) % 128
RTPSession *dummy_rtp = rtp_new(log, (RTP_TYPE_VIDEO + 2), mono_time, RtpMock::send_packet,
&rtp_mock, nullptr, nullptr, nullptr, vc, RtpMock::video_cb);
rtp_mock.recv_session = dummy_rtp;
rtp_send_data(
log, dummy_rtp, dummy_frame.data(), static_cast<std::uint32_t>(dummy_frame.size()), false);
// Should return 0 but do nothing (logged as "Got dummy!")
// 4. Test GetQueueMutex
EXPECT_NE(vc_get_queue_mutex(vc), nullptr);
rtp_kill(log, video_recv_rtp);
rtp_kill(log, dummy_rtp);
vc_kill(vc);
}
TEST_F(VideoTest, MultiReconfigureEncode)
{
VideoTestData data;
VCSession *vc = vc_new(mem, log, mono_time, 123, VideoTestData::receive_frame, &data);
ASSERT_NE(vc, nullptr);
for (int i = 0; i < 5; ++i) {
std::uint16_t w = static_cast<std::uint16_t>(160 + (i * 16));
std::uint16_t h = static_cast<std::uint16_t>(120 + (i * 16));
std::vector<std::uint8_t> y(static_cast<std::size_t>(w) * h, 128);
std::vector<std::uint8_t> u((static_cast<std::size_t>(w) / 2) * (h / 2), 64);
std::vector<std::uint8_t> v((static_cast<std::size_t>(w) / 2) * (h / 2), 192);
ASSERT_EQ(vc_reconfigure_encoder(vc, 1000, w, h, -1), 0);
ASSERT_EQ(vc_encode(vc, w, h, y.data(), u.data(), v.data(), VC_EFLAG_NONE), 0);
}
vc_kill(vc);
}
TEST_F(VideoTest, ReconfigureFailDoS)
{
VideoTestData data;
VCSession *vc = vc_new(mem, log, mono_time, 123, VideoTestData::receive_frame, &data);
ASSERT_NE(vc, nullptr);
// Trigger failure by passing invalid resolution (0)
// This currently destroys the encoder.
ASSERT_EQ(vc_reconfigure_encoder(vc, 1000, 0, 0, -1), -1);
// Attempt to encode. This is expected to crash because vc->encoder is destroyed.
std::vector<std::uint8_t> y(320 * 240, 128);
std::vector<std::uint8_t> u(160 * 120, 64);
std::vector<std::uint8_t> v(160 * 120, 192);
// This call will crash in the current unfixed code.
vc_encode(vc, 320, 240, y.data(), u.data(), v.data(), VC_EFLAG_NONE);
vc_kill(vc);
}
TEST_F(VideoTest, LyingLengthOOB)
{
VideoTestData data;
VCSession *vc = vc_new(mem, log, mono_time, 123, VideoTestData::receive_frame, &data);
ASSERT_NE(vc, nullptr);
RtpMock rtp_mock;
RTPSession *recv_rtp = rtp_new(log, RTP_TYPE_VIDEO, mono_time, RtpMock::send_packet, &rtp_mock,
nullptr, nullptr, nullptr, vc, RtpMock::video_cb);
rtp_mock.recv_session = recv_rtp;
// Craft a malicious RTP packet
std::uint16_t payload_len = 10;
std::uint8_t packet[RTP_HEADER_SIZE + 11]; // +1 for Tox ID
std::memset(packet, 0, sizeof(packet));
// Tox ID
packet[0] = static_cast<std::uint8_t>(RTP_TYPE_VIDEO);
auto pack_u16 = [](std::uint8_t *p, std::uint16_t v) {
p[0] = static_cast<std::uint8_t>(v >> 8);
p[1] = static_cast<std::uint8_t>(v & 0xff);
};
auto pack_u32 = [](std::uint8_t *p, std::uint32_t v) {
p[0] = static_cast<std::uint8_t>(v >> 24);
p[1] = static_cast<std::uint8_t>((v >> 16) & 0xff);
p[2] = static_cast<std::uint8_t>((v >> 8) & 0xff);
p[3] = static_cast<std::uint8_t>(v & 0xff);
};
auto pack_u64 = [&](std::uint8_t *p, std::uint64_t v) {
pack_u32(p, static_cast<std::uint32_t>(v >> 32));
pack_u32(p + 4, static_cast<std::uint32_t>(v & 0xffffffff));
};
// RTP Header starts at packet[1]
packet[1] = 2 << 6; // ve = 2
packet[2] = static_cast<std::uint8_t>(RTP_TYPE_VIDEO % 128);
pack_u16(packet + 3, 1); // sequnum
pack_u32(packet + 5, 1000); // timestamp
pack_u32(packet + 9, 0x12345678); // ssrc
pack_u64(packet + 13, RTP_LARGE_FRAME); // flags
pack_u32(packet + 21, 0); // offset_full
pack_u32(packet + 25, 1000); // data_length_full (LYING! Actual is 10)
pack_u32(packet + 29, 0); // received_length_full
// Skip padding fields (11 * 4 = 44 bytes)
pack_u16(packet + 77, 0); // offset_lower
pack_u16(packet + 79, payload_len); // data_length_lower
// Send the malicious packet
rtp_receive_packet(recv_rtp, packet, sizeof(packet));
// Trigger vc_iterate. This will call vpx_codec_decode with length 1000.
// This is expected to cause OOB read.
vc_iterate(vc);
rtp_kill(log, recv_rtp);
vc_kill(vc);
}
} // namespace