Files
tomato-testing/toxav/video_test.cc
Green Sky 565efa4f39 Squashed 'external/toxcore/c-toxcore/' changes from 1828c5356..c9cdae001
c9cdae001 fix(toxav): remove extra copy of video frame on encode
4f6d4546b test: Improve the fake network library.
a2581e700 refactor(toxcore): generate `Friend_Request` and `Dht_Nodes_Response`
2aaa11770 refactor(toxcore): use Tox_Memory in generated events
5c367452b test(toxcore): fix incorrect mutex in tox_scenario_get_time
8f92e710f perf: Add a timed limit of number of cookie requests.
695b6417a test: Add some more simulated network support.
815ae9ce9 test(toxcore): fix thread-safety in scenario framework
6d85c754e test(toxcore): add unit tests for net_crypto
9c22e79cc test(support): add SimulatedEnvironment for deterministic testing
f34fcb195 chore: Update windows Dockerfile to debian stable (trixie).
ece0e8980 fix(group_moderation): allow validating unsorted sanction list signatures
a4fa754d7 refactor: rename struct Packet to struct Net_Packet
d6f330f85 cleanup: Fix some warnings from coverity.
e206bffa2 fix(group_chats): fix sync packets reverting topics
0e4715598 test: Add new scenario testing framework.
668291f44 refactor(toxcore): decouple Network_Funcs from sockaddr via IP_Port
fc4396cef fix: potential division by zero in toxav and unsafe hex parsing
8e8b352ab refactor: Add nullable annotations to struct members.
7740bb421 refactor: decouple net_crypto from DHT
1936d4296 test: add benchmark for toxav audio and video
46bfdc2df fix: correct printf format specifiers for unsigned integers
REVERT: 1828c5356 fix(toxav): remove extra copy of video frame on encode

git-subtree-dir: external/toxcore/c-toxcore
git-subtree-split: c9cdae001341e701fca980c9bb9febfeb95d2902
2026-01-11 14:42:31 +01:00

493 lines
16 KiB
C++

#include "video.h"
#include <gtest/gtest.h>
#include <algorithm>
#include <cmath>
#include <vector>
#include "../toxcore/logger.h"
#include "../toxcore/mono_time.h"
#include "../toxcore/network.h"
#include "../toxcore/os_memory.h"
#include "av_test_support.hh"
#include "rtp.h"
namespace {
using VideoTest = AvTest;
TEST_F(VideoTest, BasicNewKill)
{
VideoTestData data;
VCSession *vc = vc_new(log, mono_time, 123, VideoTestData::receive_frame, &data);
ASSERT_NE(vc, nullptr);
vc_kill(vc);
}
TEST_F(VideoTest, EncodeDecodeLoop)
{
VideoTestData data;
VCSession *vc = vc_new(log, mono_time, 123, VideoTestData::receive_frame, &data);
ASSERT_NE(vc, nullptr);
RtpMock rtp_mock;
RTPSession *send_rtp = rtp_new(log, RTP_TYPE_VIDEO, mono_time, RtpMock::send_packet, &rtp_mock,
nullptr, nullptr, nullptr, vc, RtpMock::video_cb);
RTPSession *recv_rtp = rtp_new(log, RTP_TYPE_VIDEO, mono_time, RtpMock::send_packet, &rtp_mock,
nullptr, nullptr, nullptr, vc, RtpMock::video_cb);
rtp_mock.recv_session = recv_rtp;
uint16_t width = 320;
uint16_t height = 240;
uint32_t bitrate = 500;
ASSERT_EQ(vc_reconfigure_encoder(vc, bitrate, width, height, -1), 0);
std::vector<uint8_t> y(width * height, 128);
std::vector<uint8_t> u((width / 2) * (height / 2), 64);
std::vector<uint8_t> v((width / 2) * (height / 2), 192);
ASSERT_EQ(vc_encode(vc, width, height, y.data(), u.data(), v.data(), VC_EFLAG_FORCE_KF), 0);
vc_increment_frame_counter(vc);
uint8_t *pkt_data;
uint32_t pkt_size;
bool is_keyframe;
while (vc_get_cx_data(vc, &pkt_data, &pkt_size, &is_keyframe)) {
int rc = rtp_send_data(log, send_rtp, pkt_data, pkt_size, is_keyframe);
ASSERT_EQ(rc, 0);
}
vc_iterate(vc);
ASSERT_EQ(data.friend_number, 123u);
ASSERT_EQ(data.width, width);
ASSERT_EQ(data.height, height);
ASSERT_FALSE(data.y.empty());
rtp_kill(log, send_rtp);
rtp_kill(log, recv_rtp);
vc_kill(vc);
}
TEST_F(VideoTest, EncodeDecodeSequence)
{
VideoTestData data;
VCSession *vc = vc_new(log, mono_time, 123, VideoTestData::receive_frame, &data);
ASSERT_NE(vc, nullptr);
RtpMock rtp_mock;
RTPSession *send_rtp = rtp_new(log, RTP_TYPE_VIDEO, mono_time, RtpMock::send_packet, &rtp_mock,
nullptr, nullptr, nullptr, vc, RtpMock::video_cb);
RTPSession *recv_rtp = rtp_new(log, RTP_TYPE_VIDEO, mono_time, RtpMock::send_packet, &rtp_mock,
nullptr, nullptr, nullptr, vc, RtpMock::video_cb);
rtp_mock.recv_session = recv_rtp;
uint16_t width = 320;
uint16_t height = 240;
uint32_t bitrate = 2000;
ASSERT_EQ(vc_reconfigure_encoder(vc, bitrate, width, height, -1), 0);
for (int i = 0; i < 20; ++i) {
std::vector<uint8_t> y(width * height);
std::vector<uint8_t> u((width / 2) * (height / 2));
std::vector<uint8_t> v((width / 2) * (height / 2));
// Background
std::fill(y.begin(), y.end(), 16);
std::fill(u.begin(), u.end(), 128);
std::fill(v.begin(), v.end(), 128);
// Moving square
int sq_size = 64;
int x0 = (i * 8) % (width - sq_size);
int y0 = (i * 4) % (height - sq_size);
for (int r = 0; r < sq_size; ++r) {
for (int c = 0; c < sq_size; ++c) {
y[(y0 + r) * width + (x0 + c)] = 200;
}
}
ASSERT_EQ(vc_encode(vc, width, height, y.data(), u.data(), v.data(),
i == 0 ? VC_EFLAG_FORCE_KF : VC_EFLAG_NONE),
0);
vc_increment_frame_counter(vc);
uint8_t *pkt_data;
uint32_t pkt_size;
bool is_keyframe;
while (vc_get_cx_data(vc, &pkt_data, &pkt_size, &is_keyframe)) {
rtp_send_data(log, send_rtp, pkt_data, pkt_size, is_keyframe);
}
vc_iterate(vc);
ASSERT_EQ(data.width, width);
ASSERT_EQ(data.height, height);
double mse = data.calculate_mse(y);
// Expect MSE to be reasonably low for high bitrate
EXPECT_LT(mse, 100.0) << "Frame " << i << " MSE too high: " << mse;
}
rtp_kill(log, send_rtp);
rtp_kill(log, recv_rtp);
vc_kill(vc);
}
TEST_F(VideoTest, EncodeDecodeResolutionChange)
{
VideoTestData data;
VCSession *vc = vc_new(log, mono_time, 123, VideoTestData::receive_frame, &data);
ASSERT_NE(vc, nullptr);
RtpMock rtp_mock;
RTPSession *send_rtp = rtp_new(log, RTP_TYPE_VIDEO, mono_time, RtpMock::send_packet, &rtp_mock,
nullptr, nullptr, nullptr, vc, RtpMock::video_cb);
RTPSession *recv_rtp = rtp_new(log, RTP_TYPE_VIDEO, mono_time, RtpMock::send_packet, &rtp_mock,
nullptr, nullptr, nullptr, vc, RtpMock::video_cb);
rtp_mock.recv_session = recv_rtp;
uint16_t widths[] = {320, 160, 480};
uint16_t heights[] = {240, 120, 360};
for (int res = 0; res < 3; ++res) {
uint16_t width = widths[res];
uint16_t height = heights[res];
ASSERT_EQ(vc_reconfigure_encoder(vc, 2000, width, height, -1), 0);
for (int i = 0; i < 5; ++i) {
std::vector<uint8_t> y(width * height);
std::vector<uint8_t> u((width / 2) * (height / 2));
std::vector<uint8_t> v((width / 2) * (height / 2));
std::fill(y.begin(), y.end(), static_cast<uint8_t>((res * 50 + i * 10) % 256));
std::fill(u.begin(), u.end(), 128);
std::fill(v.begin(), v.end(), 128);
ASSERT_EQ(vc_encode(vc, width, height, y.data(), u.data(), v.data(),
i == 0 ? VC_EFLAG_FORCE_KF : VC_EFLAG_NONE),
0);
vc_increment_frame_counter(vc);
uint8_t *pkt_data;
uint32_t pkt_size;
bool is_keyframe;
while (vc_get_cx_data(vc, &pkt_data, &pkt_size, &is_keyframe)) {
rtp_send_data(log, send_rtp, pkt_data, pkt_size, is_keyframe);
}
vc_iterate(vc);
ASSERT_EQ(data.width, width);
ASSERT_EQ(data.height, height);
double mse = data.calculate_mse(y);
EXPECT_LT(mse, 100.0) << "Res " << res << " Frame " << i << " MSE too high: " << mse;
}
}
rtp_kill(log, send_rtp);
rtp_kill(log, recv_rtp);
vc_kill(vc);
}
TEST_F(VideoTest, EncodeDecodeBitrateImpact)
{
uint32_t bitrates[] = {100, 500, 2000};
double mses[3];
uint16_t width = 320;
uint16_t height = 240;
for (int b = 0; b < 3; ++b) {
VideoTestData data;
VCSession *vc = vc_new(log, mono_time, 123, VideoTestData::receive_frame, &data);
ASSERT_NE(vc, nullptr);
RtpMock rtp_mock;
RTPSession *send_rtp = rtp_new(log, RTP_TYPE_VIDEO, mono_time, RtpMock::send_packet,
&rtp_mock, nullptr, nullptr, nullptr, vc, RtpMock::video_cb);
RTPSession *recv_rtp = rtp_new(log, RTP_TYPE_VIDEO, mono_time, RtpMock::send_packet,
&rtp_mock, nullptr, nullptr, nullptr, vc, RtpMock::video_cb);
rtp_mock.recv_session = recv_rtp;
ASSERT_EQ(vc_reconfigure_encoder(vc, bitrates[b], width, height, -1), 0);
double total_mse = 0;
int frames = 10;
for (int i = 0; i < frames; ++i) {
std::vector<uint8_t> y(width * height);
std::vector<uint8_t> u((width / 2) * (height / 2));
std::vector<uint8_t> v((width / 2) * (height / 2));
for (size_t j = 0; j < y.size(); ++j)
y[j] = static_cast<uint8_t>((j + i * 10) % 256);
std::fill(u.begin(), u.end(), 128);
std::fill(v.begin(), v.end(), 128);
ASSERT_EQ(vc_encode(vc, width, height, y.data(), u.data(), v.data(),
i == 0 ? VC_EFLAG_FORCE_KF : VC_EFLAG_NONE),
0);
vc_increment_frame_counter(vc);
uint8_t *pkt_data;
uint32_t pkt_size;
bool is_keyframe;
while (vc_get_cx_data(vc, &pkt_data, &pkt_size, &is_keyframe)) {
rtp_send_data(log, send_rtp, pkt_data, pkt_size, is_keyframe);
}
vc_iterate(vc);
total_mse += data.calculate_mse(y);
}
mses[b] = total_mse / frames;
rtp_kill(log, send_rtp);
rtp_kill(log, recv_rtp);
vc_kill(vc);
}
// Quality should generally improve (MSE decrease) as bitrate increases.
// 100kbps should have significantly higher MSE than 2000kbps for this complex pattern.
EXPECT_GT(mses[0], mses[1]);
EXPECT_GT(mses[1], mses[2]);
printf("MSE results: 100kbps: %f, 500kbps: %f, 2000kbps: %f\n", mses[0], mses[1], mses[2]);
}
TEST_F(VideoTest, ReconfigureEncoder)
{
VideoTestData data;
VCSession *vc = vc_new(log, mono_time, 123, VideoTestData::receive_frame, &data);
ASSERT_NE(vc, nullptr);
// Initial reconfigure
ASSERT_EQ(vc_reconfigure_encoder(vc, 500, 320, 240, -1), 0);
// Change bitrate and resolution
ASSERT_EQ(vc_reconfigure_encoder(vc, 1000, 640, 480, -1), 0);
std::vector<uint8_t> y(640 * 480, 128);
std::vector<uint8_t> u(320 * 240, 64);
std::vector<uint8_t> v(320 * 240, 192);
ASSERT_EQ(vc_encode(vc, 640, 480, y.data(), u.data(), v.data(), VC_EFLAG_NONE), 0);
vc_kill(vc);
}
TEST_F(VideoTest, GetLcfd)
{
VideoTestData data;
VCSession *vc = vc_new(log, mono_time, 123, VideoTestData::receive_frame, &data);
ASSERT_NE(vc, nullptr);
// Default lcfd is 60 in video.c
EXPECT_EQ(vc_get_lcfd(vc), 60u);
vc_kill(vc);
}
TEST_F(VideoTest, QueueInvalidMessage)
{
VideoTestData data;
VCSession *vc = vc_new(log, mono_time, 123, VideoTestData::receive_frame, &data);
ASSERT_NE(vc, nullptr);
RtpMock rtp_mock;
// Create an audio RTP session but try to queue to video session
RTPSession *audio_rtp = rtp_new(log, RTP_TYPE_AUDIO, mono_time, RtpMock::send_packet, &rtp_mock,
nullptr, nullptr, nullptr, vc, RtpMock::video_cb);
RTPSession *video_recv_rtp = rtp_new(log, RTP_TYPE_VIDEO, mono_time, RtpMock::send_packet,
&rtp_mock, nullptr, nullptr, nullptr, vc, RtpMock::video_cb);
rtp_mock.recv_session = video_recv_rtp;
std::vector<uint8_t> dummy_audio(100, 0);
int rc = rtp_send_data(
log, audio_rtp, dummy_audio.data(), static_cast<uint32_t>(dummy_audio.size()), false);
ASSERT_EQ(rc, 0);
// Iterate should NOT trigger callback because payload type was wrong
vc_iterate(vc);
EXPECT_EQ(data.width, 0u);
rtp_kill(log, audio_rtp);
rtp_kill(log, video_recv_rtp);
vc_kill(vc);
}
TEST_F(VideoTest, ReconfigureOptimizations)
{
VideoTestData data;
VCSession *vc = vc_new(log, mono_time, 123, VideoTestData::receive_frame, &data);
ASSERT_NE(vc, nullptr);
// 1. Reconfigure with same values (should do nothing)
// vc_new initializes encoder with 800x600 and 5000 bitrate.
EXPECT_EQ(vc_reconfigure_encoder(vc, 5000, 800, 600, -1), 0);
// 2. Reconfigure with only bitrate change
EXPECT_EQ(vc_reconfigure_encoder(vc, 2000, 800, 600, -1), 0);
// 3. Reconfigure with kf_max_dist > 1 (triggers re-init and kf_max_dist branch)
EXPECT_EQ(vc_reconfigure_encoder(vc, 2000, 800, 600, 60), 0);
vc_kill(vc);
}
TEST_F(VideoTest, LcfdAndSpecialPackets)
{
VideoTestData data;
VCSession *vc = vc_new(log, mono_time, 123, VideoTestData::receive_frame, &data);
ASSERT_NE(vc, nullptr);
RtpMock rtp_mock;
RTPSession *video_recv_rtp = rtp_new(log, RTP_TYPE_VIDEO, mono_time, RtpMock::send_packet,
&rtp_mock, nullptr, nullptr, nullptr, vc, RtpMock::video_cb);
rtp_mock.recv_session = video_recv_rtp;
// 1. Test lcfd update
tm.t += 50; // Advance time by 50ms
mono_time_update(mono_time);
std::vector<uint8_t> dummy_frame(10, 0);
rtp_send_data(
log, video_recv_rtp, dummy_frame.data(), static_cast<uint32_t>(dummy_frame.size()), true);
// lcfd should be updated. Initial linfts was set at vc_new (tm.t=1000).
// Now tm.t is 1050. t_lcfd = 1050 - 1000 = 50.
EXPECT_EQ(vc_get_lcfd(vc), 50u);
// 2. Test lcfd threshold (t_lcfd > 100 should be ignored)
tm.t += 200;
mono_time_update(mono_time);
rtp_send_data(
log, video_recv_rtp, dummy_frame.data(), static_cast<uint32_t>(dummy_frame.size()), true);
EXPECT_EQ(vc_get_lcfd(vc), 50u); // Should still be 50
// 3. Test dummy packet PT = (RTP_TYPE_VIDEO + 2) % 128
RTPSession *dummy_rtp = rtp_new(log, (RTP_TYPE_VIDEO + 2), mono_time, RtpMock::send_packet,
&rtp_mock, nullptr, nullptr, nullptr, vc, RtpMock::video_cb);
rtp_mock.recv_session = dummy_rtp;
rtp_send_data(
log, dummy_rtp, dummy_frame.data(), static_cast<uint32_t>(dummy_frame.size()), false);
// Should return 0 but do nothing (logged as "Got dummy!")
// 4. Test GetQueueMutex
EXPECT_NE(vc_get_queue_mutex(vc), nullptr);
rtp_kill(log, video_recv_rtp);
rtp_kill(log, dummy_rtp);
vc_kill(vc);
}
TEST_F(VideoTest, MultiReconfigureEncode)
{
VideoTestData data;
VCSession *vc = vc_new(log, mono_time, 123, VideoTestData::receive_frame, &data);
ASSERT_NE(vc, nullptr);
for (int i = 0; i < 5; ++i) {
uint16_t w = static_cast<uint16_t>(160 + (i * 16));
uint16_t h = static_cast<uint16_t>(120 + (i * 16));
std::vector<uint8_t> y(static_cast<size_t>(w) * h, 128);
std::vector<uint8_t> u((static_cast<size_t>(w) / 2) * (h / 2), 64);
std::vector<uint8_t> v((static_cast<size_t>(w) / 2) * (h / 2), 192);
ASSERT_EQ(vc_reconfigure_encoder(vc, 1000, w, h, -1), 0);
ASSERT_EQ(vc_encode(vc, w, h, y.data(), u.data(), v.data(), VC_EFLAG_NONE), 0);
}
vc_kill(vc);
}
TEST_F(VideoTest, ReconfigureFailDoS)
{
VideoTestData data;
VCSession *vc = vc_new(log, mono_time, 123, VideoTestData::receive_frame, &data);
ASSERT_NE(vc, nullptr);
// Trigger failure by passing invalid resolution (0)
// This currently destroys the encoder.
ASSERT_EQ(vc_reconfigure_encoder(vc, 1000, 0, 0, -1), -1);
// Attempt to encode. This is expected to crash because vc->encoder is destroyed.
std::vector<uint8_t> y(320 * 240, 128);
std::vector<uint8_t> u(160 * 120, 64);
std::vector<uint8_t> v(160 * 120, 192);
// This call will crash in the current unfixed code.
vc_encode(vc, 320, 240, y.data(), u.data(), v.data(), VC_EFLAG_NONE);
vc_kill(vc);
}
TEST_F(VideoTest, LyingLengthOOB)
{
VideoTestData data;
VCSession *vc = vc_new(log, mono_time, 123, VideoTestData::receive_frame, &data);
ASSERT_NE(vc, nullptr);
RtpMock rtp_mock;
RTPSession *recv_rtp = rtp_new(log, RTP_TYPE_VIDEO, mono_time, RtpMock::send_packet, &rtp_mock,
nullptr, nullptr, nullptr, vc, RtpMock::video_cb);
rtp_mock.recv_session = recv_rtp;
// Craft a malicious RTP packet
uint16_t payload_len = 10;
uint8_t packet[RTP_HEADER_SIZE + 11]; // +1 for Tox ID
memset(packet, 0, sizeof(packet));
// Tox ID
packet[0] = static_cast<uint8_t>(RTP_TYPE_VIDEO);
auto pack_u16 = [](uint8_t *p, uint16_t v) {
p[0] = static_cast<uint8_t>(v >> 8);
p[1] = static_cast<uint8_t>(v & 0xff);
};
auto pack_u32 = [](uint8_t *p, uint32_t v) {
p[0] = static_cast<uint8_t>(v >> 24);
p[1] = static_cast<uint8_t>((v >> 16) & 0xff);
p[2] = static_cast<uint8_t>((v >> 8) & 0xff);
p[3] = static_cast<uint8_t>(v & 0xff);
};
auto pack_u64 = [&](uint8_t *p, uint64_t v) {
pack_u32(p, static_cast<uint32_t>(v >> 32));
pack_u32(p + 4, static_cast<uint32_t>(v & 0xffffffff));
};
// RTP Header starts at packet[1]
packet[1] = 2 << 6; // ve = 2
packet[2] = static_cast<uint8_t>(RTP_TYPE_VIDEO % 128);
pack_u16(packet + 3, 1); // sequnum
pack_u32(packet + 5, 1000); // timestamp
pack_u32(packet + 9, 0x12345678); // ssrc
pack_u64(packet + 13, RTP_LARGE_FRAME); // flags
pack_u32(packet + 21, 0); // offset_full
pack_u32(packet + 25, 1000); // data_length_full (LYING! Actual is 10)
pack_u32(packet + 29, 0); // received_length_full
// Skip padding fields (11 * 4 = 44 bytes)
pack_u16(packet + 77, 0); // offset_lower
pack_u16(packet + 79, payload_len); // data_length_lower
// Send the malicious packet
rtp_receive_packet(recv_rtp, packet, sizeof(packet));
// Trigger vc_iterate. This will call vpx_codec_decode with length 1000.
// This is expected to cause OOB read.
vc_iterate(vc);
rtp_kill(log, recv_rtp);
vc_kill(vc);
}
} // namespace