/* SPDX-License-Identifier: GPL-3.0-or-later * Copyright © 2025 The TokTok team. */ #include #include #include "../toxcore/logger.h" #include "../toxcore/mono_time.h" #include "../toxcore/os_memory.h" #include "av_test_support.hh" #include "rtp.h" #include "video.h" namespace { class VideoBench : public benchmark::Fixture { public: void SetUp(const ::benchmark::State &state) override { const Memory *mem = os_memory(); log = logger_new(mem); tm.t = 1000; mono_time = mono_time_new(mem, mock_time_cb, &tm); vc = vc_new(log, mono_time, 123, nullptr, nullptr); width = static_cast(state.range(0)); height = static_cast(state.range(1)); // Use a standard bitrate for benchmarks vc_reconfigure_encoder(vc, 2000, width, height, -1); y.resize(static_cast(width) * height); u.resize((static_cast(width) / 2) * (static_cast(height) / 2)); v.resize((static_cast(width) / 2) * (static_cast(height) / 2)); rtp_mock.capture_packets = false; // Disable capturing for benchmarks rtp_mock.auto_forward = true; rtp_mock.recv_session = rtp_new(log, RTP_TYPE_VIDEO, mono_time, RtpMock::send_packet, &rtp_mock, nullptr, nullptr, nullptr, vc, RtpMock::video_cb); } void TearDown(const ::benchmark::State &state) override { const Memory *mem = os_memory(); if (rtp_mock.recv_session) { rtp_kill(log, rtp_mock.recv_session); } if (vc) { vc_kill(vc); } if (mono_time) { mono_time_free(mem, mono_time); } if (log) { logger_kill(log); } } Logger *log = nullptr; Mono_Time *mono_time = nullptr; MockTime tm; VCSession *vc = nullptr; RtpMock rtp_mock; uint16_t width = 0, height = 0; std::vector y, u, v; }; // Benchmark encoding a sequence of frames. // Measures how the encoder performs as it builds up temporal state. BENCHMARK_DEFINE_F(VideoBench, EncodeSequence)(benchmark::State &state) { int frame_index = 0; // Pre-fill frames to avoid measuring fill_frame time const int num_prefilled = 100; std::vector> ys(num_prefilled, std::vector(width * height)); std::vector> us( num_prefilled, std::vector((width / 2) * (height / 2))); std::vector> vs( num_prefilled, std::vector((width / 2) * (height / 2))); for (int i = 0; i < num_prefilled; ++i) { fill_video_frame(width, height, i, ys[i], us[i], vs[i]); } for (auto _ : state) { int idx = frame_index % num_prefilled; // Force a keyframe every 100 frames to simulate real-world periodic keyframes int flags = (frame_index % 100 == 0) ? VC_EFLAG_FORCE_KF : VC_EFLAG_NONE; vc_encode(vc, width, height, ys[idx].data(), us[idx].data(), vs[idx].data(), flags); vc_increment_frame_counter(vc); uint8_t *pkt_data; uint32_t pkt_size; bool is_keyframe; while (vc_get_cx_data(vc, &pkt_data, &pkt_size, &is_keyframe)) { benchmark::DoNotOptimize(pkt_data); benchmark::DoNotOptimize(pkt_size); } frame_index++; } } BENCHMARK_REGISTER_F(VideoBench, EncodeSequence) ->Args({320, 240}) ->Args({640, 480}) ->Args({1280, 720}) ->Args({1920, 1080}); // Benchmark decoding a sequence of frames. // First pre-encodes a sequence, then measures decoding performance. BENCHMARK_DEFINE_F(VideoBench, DecodeSequence)(benchmark::State &state) { const int num_frames = 100; std::vector> encoded_frames(num_frames); std::vector is_keyframe_list(num_frames); // Pre-encode for (int i = 0; i < num_frames; ++i) { fill_video_frame(width, height, i, y, u, v); int flags = (i == 0) ? VC_EFLAG_FORCE_KF : VC_EFLAG_NONE; vc_encode(vc, width, height, y.data(), u.data(), v.data(), flags); vc_increment_frame_counter(vc); uint8_t *pkt_data; uint32_t pkt_size; bool is_kf; while (vc_get_cx_data(vc, &pkt_data, &pkt_size, &is_kf)) { encoded_frames[i].insert(encoded_frames[i].end(), pkt_data, pkt_data + pkt_size); is_keyframe_list[i] = is_kf; } } int frame_index = 0; for (auto _ : state) { int idx = frame_index % num_frames; const auto &encoded_data = encoded_frames[idx]; rtp_send_data(log, rtp_mock.recv_session, encoded_data.data(), static_cast(encoded_data.size()), is_keyframe_list[idx]); vc_iterate(vc); frame_index++; } } BENCHMARK_REGISTER_F(VideoBench, DecodeSequence) ->Args({320, 240}) ->Args({640, 480}) ->Args({1280, 720}) ->Args({1920, 1080}); // Full end-to-end sequence benchmark (Encode -> RTP -> Decode) BENCHMARK_DEFINE_F(VideoBench, FullSequence)(benchmark::State &state) { int frame_index = 0; const int num_prefilled = 100; std::vector> ys(num_prefilled, std::vector(width * height)); std::vector> us( num_prefilled, std::vector((width / 2) * (height / 2))); std::vector> vs( num_prefilled, std::vector((width / 2) * (height / 2))); for (int i = 0; i < num_prefilled; ++i) { fill_video_frame(width, height, i, ys[i], us[i], vs[i]); } for (auto _ : state) { int idx = frame_index % num_prefilled; int flags = (frame_index % 100 == 0) ? VC_EFLAG_FORCE_KF : VC_EFLAG_NONE; vc_encode(vc, width, height, ys[idx].data(), us[idx].data(), vs[idx].data(), flags); vc_increment_frame_counter(vc); uint8_t *pkt_data; uint32_t pkt_size; bool is_keyframe = false; // We need to collect all packets for the frame before sending to decoder std::vector frame_data; while (vc_get_cx_data(vc, &pkt_data, &pkt_size, &is_keyframe)) { frame_data.insert(frame_data.end(), pkt_data, pkt_data + pkt_size); } rtp_send_data(log, rtp_mock.recv_session, frame_data.data(), static_cast(frame_data.size()), is_keyframe); vc_iterate(vc); frame_index++; } } BENCHMARK_REGISTER_F(VideoBench, FullSequence) ->Args({320, 240}) ->Args({640, 480}) ->Args({1280, 720}) ->Args({1920, 1080}); } BENCHMARK_MAIN();