diff --git a/ledbat.cpp b/ledbat.cpp new file mode 100644 index 0000000..f0f9e3b --- /dev/null +++ b/ledbat.cpp @@ -0,0 +1,262 @@ +#include "./ledbat.hpp" + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +LEDBAT::LEDBAT(void) { + _time_start_offset = clock::now(); + + { // add some high delay values + // spec want +inf + //_rtt_buffer.push_back(_base_delay); + //_rtt_buffer.push_back(_base_delay); + //_rtt_buffer.push_back(_base_delay); + } +} + +size_t LEDBAT::canSend(void) const { + if (_in_flight.empty()) { + return 496u; + } + + //const float time_since_last_sent {std::min( + //getTimeNow() - std::get<1>(_in_flight.back()), + //0.01f // 10ms max + //)}; + + //const float bps {std::min( + //(_cwnd / getCurrentDelay()), + //max_byterate_allowed + //)}; + + const int64_t cspace = _cwnd - _in_flight_bytes; + if (cspace < 496) { + return 0u; + } + + const int64_t fspace = _fwnd - _in_flight_bytes; + if (fspace < 496) { + return 0u; + } + + size_t space = std::ceil(std::min(cspace, fspace) / 496.f) * 496.f; + + // data size, no overhead + //const int64_t can_send_size {std::min( + //bps * time_since_last_sent - segment_overhead, + //maximum_segment_size - segment_overhead + //)}; + //const int64_t can_send_size {static_cast(bps * time_since_last_sent - segment_overhead)}; + + //if (can_send_size < 100) { + //return 0; + //} else { + //return can_send_size; + //} + + return space; +} + +void LEDBAT::onSent(SeqIDType seq, size_t data_size) { + if (true) { + for (const auto& it : _in_flight) { + assert(std::get<0>(it) != seq); + } + } + _in_flight.push_back({seq, getTimeNow(), data_size + segment_overhead}); + _in_flight_bytes += data_size + segment_overhead; + _recently_sent_bytes += data_size + segment_overhead; +} + +void LEDBAT::onAck(std::vector seqs) { + // only take the smallest value + float most_recent {-std::numeric_limits::infinity()}; + + int64_t acked_data {0}; + + const auto now {getTimeNow()}; + + for (const auto& seq : seqs) { + auto it = std::find_if(_in_flight.begin(), _in_flight.end(), [seq](const auto& v) -> bool { + return std::get<0>(v) == seq; + }); + + if (it == _in_flight.end()) { + continue; // not found, ignore + } else { + addRTT(now - std::get<1>(*it)); + + // TODO: remove + most_recent = std::max(most_recent, std::get<1>(*it)); + _in_flight_bytes -= std::get<2>(*it); + _recently_acked_data += std::get<2>(*it); + assert(_in_flight_bytes >= 0); + _in_flight.erase(it); + } + } + + if (most_recent == -std::numeric_limits::infinity()) { + return; // not found, ignore + } + + + //addRTT(now - most_recent); + + updateWindows(); + + // update cto - no? we dont handle timeouts +} + +void LEDBAT::onLoss(SeqIDType seq, bool discard) { + auto it = std::find_if(_in_flight.begin(), _in_flight.end(), [seq](const auto& v) -> bool { + return std::get<0>(v) == seq; + }); + + if (it == _in_flight.end()) { + // error + return; // not found, ignore ?? + } + + _recently_lost_data = true; + + // at most once per rtt? + + if (false) { + std::cerr << "CCA: onLoss: TIME: " << getTimeNow() << "\n"; + } + + // TODO: "if data lost is not to be retransmitted" + if (discard) { + _in_flight_bytes -= std::get<2>(*it); + assert(_in_flight_bytes >= 0); + } + + updateWindows(); +} + +float LEDBAT::getCurrentDelay(void) const { + float sum {0.f}; + size_t count {0}; + for (size_t i = 0; i < _tmp_rtt_buffer.size(); i++) { + //sum += _tmp_rtt_buffer.at(_tmp_rtt_buffer.size()-(1+i)); + sum += _tmp_rtt_buffer.at(i); + count++; + } + + if (count) { + return sum / count; + } else { + return std::numeric_limits::infinity(); + } +} + +void LEDBAT::addRTT(float new_delay) { + auto now = getTimeNow(); + + _base_delay = std::min(_base_delay, new_delay); + // TODO: use fixed size instead? allocations can ruin perf + _rtt_buffer.push_back({now, new_delay}); + + _tmp_rtt_buffer.push_front(new_delay); + // HACKY + if (_tmp_rtt_buffer.size() > current_delay_filter_window) { + _tmp_rtt_buffer.resize(current_delay_filter_window); + } + + // is it 1 minute yet + if (now - _rtt_buffer.front().first >= 30.f) { + + float new_section_minimum = new_delay; + for (const auto it : _rtt_buffer) { + new_section_minimum = std::min(it.second, new_section_minimum); + } + + _rtt_buffer_minutes.push_back(new_section_minimum); + + _rtt_buffer.clear(); + + if (_rtt_buffer_minutes.size() > 20) { + _rtt_buffer_minutes.pop_front(); + } + + _base_delay = std::numeric_limits::infinity(); + for (const float it : _rtt_buffer_minutes) { + _base_delay = std::min(_base_delay, it); + } + } +} + +void LEDBAT::updateWindows(void) { + const auto now {getTimeNow()}; + + const float current_delay {getCurrentDelay()}; + + if (now - _last_cwnd >= current_delay) { + const float queuing_delay {current_delay - _base_delay}; + + _fwnd = max_byterate_allowed * getCurrentDelay(); + _fwnd *= 1.3f; // try do balance conservative algo a bit, current_delay + + //const float gain {1}; // TODO: move and increase + float gain {1.f / std::min(16.f, std::ceil(2.f*target_delay/_base_delay))}; + //gain *= 400.f; // from packets to bytes ~ + gain *= _recently_acked_data/10.f; // from packets to bytes ~ + //gain *= 0.1f; + + if (_recently_lost_data) { + _cwnd = std::clamp( + _cwnd / 2.f, + 2.f * maximum_segment_size, + _cwnd + ); + } else { + // LEDBAT++ (the Rethinking the LEDBAT Protocol paper) + // "Multiplicative decrease" + const float constant {2.f}; // spec recs 1 + if (queuing_delay < target_delay) { + _cwnd += gain; + _cwnd = std::min( + _cwnd + gain, + _fwnd + ); + } else if (queuing_delay > target_delay) { + _cwnd = std::clamp( + _cwnd + std::max( // TODO: where to put bytes_newly_acked + gain - constant * _cwnd * (queuing_delay / target_delay - 1.f), + -_cwnd/2.f // at most halve + ), + + // never drop below 2 "packets" in flight + //2.f * maximum_segment_size, + 2.f * 496, + + current_delay * max_byterate_allowed // cap rate + ); + } // no else, we on point. very unlikely with float + } + + if (false) { // plotting + std::cerr << std::fixed << "CCA: onAck: TIME: " << now << " cwnd: " << _cwnd << "\n"; + std::cerr << std::fixed << "CCA: onAck: TIME: " << now << " fwnd: " << _fwnd << "\n"; + std::cerr << std::fixed << "CCA: onAck: TIME: " << now << " current_delay: " << current_delay << "\n"; + std::cerr << std::fixed << "CCA: onAck: TIME: " << now << " base_delay: " << _base_delay << "\n"; + std::cerr << std::fixed << "CCA: onAck: TIME: " << now << " gain: " << gain << "\n"; + std::cerr << std::fixed << "CCA: onAck: TIME: " << now << " speed: " << (_recently_sent_bytes / (now - _last_cwnd)) / (1024*1024) << "\n"; + std::cerr << std::fixed << "CCA: onAck: TIME: " << now << " in_flight_bytes: " << _in_flight_bytes << "\n"; + } + + _last_cwnd = now; + _recently_acked_data = 0; + _recently_lost_data = false; + _recently_sent_bytes = 0; + } +} + diff --git a/ledbat.hpp b/ledbat.hpp new file mode 100644 index 0000000..e84189b --- /dev/null +++ b/ledbat.hpp @@ -0,0 +1,115 @@ +#pragma once + +#include +#include +#include +#include + +// LEDBAT: https://www.rfc-editor.org/rfc/rfc6817 +// LEDBAT++: https://www.ietf.org/archive/id/draft-irtf-iccrg-ledbat-plus-plus-01.txt + +// LEDBAT++ implementation +struct LEDBAT { + public: // config + using SeqIDType = std::pair; // tf_id, seq_id + + static constexpr size_t IPV4_HEADER_SIZE {20}; + static constexpr size_t IPV6_HEADER_SIZE {40}; // bru + static constexpr size_t UDP_HEADER_SIZE {8}; + + // TODO: tcp AND IPv6 will be different + static constexpr size_t segment_overhead { + 4+ // ft overhead + 46+ // tox? + UDP_HEADER_SIZE+ + IPV4_HEADER_SIZE + }; + + static constexpr size_t maximum_segment_size {496 + segment_overhead}; // tox 500 - 4 from ft + static_assert(maximum_segment_size == 574); // mesured in wireshark + + // ledbat++ says 60ms, we might need other values if relayed + const float target_delay {0.060f}; + //const float target_delay {0.030f}; + //const float target_delay {0.120f}; // 2x if relayed? + + // TODO: use a factor for multiple of rtt + static constexpr size_t current_delay_filter_window {16*4*2}; + + //static constexpr size_t rtt_buffer_size_max {2000}; + + float max_byterate_allowed {10*1024*1024}; // 10MiB/s + //float max_byterate_allowed {2*1024*1024}; + + public: + LEDBAT(void); + + // return the current believed window in bytes of how much data can be inflight, + // without overstepping the delay requirement + float getCWnD(void) const { + return _cwnd; + } + + // TODO: api for how much data we should send + // take time since last sent into account + // respect max_byterate_allowed + size_t canSend(void) const; + + public: // callbacks + // data size is without overhead + void onSent(SeqIDType seq, size_t data_size); + + void onAck(std::vector seqs); + + // if discard, not resent, not inflight + void onLoss(SeqIDType seq, bool discard); + + private: + using clock = std::chrono::steady_clock; + + // make values relative to algo start for readability (and precision) + // get timestamp in seconds + float getTimeNow(void) const { + return std::chrono::duration{clock::now() - _time_start_offset}.count(); + } + + // moving avg over the last few delay samples + // VERY sensitive to bundling acks + float getCurrentDelay(void) const; + + void addRTT(float new_delay); + + void updateWindows(void); + + private: // state + //float _cto {2.f}; // congestion timeout value in seconds + + float _cwnd {2.f * maximum_segment_size}; // in bytes + float _base_delay {2.f}; // lowest mesured delay in _rtt_buffer in seconds + + float _last_cwnd {0.f}; // timepoint of last cwnd correction + int64_t _recently_acked_data {0}; // reset on _last_cwnd + bool _recently_lost_data {false}; + int64_t _recently_sent_bytes {0}; + + // initialize to low value, will get corrected very fast + float _fwnd {0.01f * max_byterate_allowed}; // in bytes + + + // ssthresh + + // spec recomends 10min + // TODO: optimize and devide into spans of 1min (spec recom) + std::deque _tmp_rtt_buffer; + std::deque> _rtt_buffer; // timepoint, delay + std::deque _rtt_buffer_minutes; + + // list of sequence ids and timestamps of when they where sent + std::deque> _in_flight; + + int64_t _in_flight_bytes {0}; + + private: // helper + clock::time_point _time_start_offset; +}; + diff --git a/ngc_ft1.cpp b/ngc_ft1.cpp index 241fe3a..bee7c62 100644 --- a/ngc_ft1.cpp +++ b/ngc_ft1.cpp @@ -2,6 +2,9 @@ #include "ngc_ext.hpp" +#include "./ledbat.hpp" + +#include #include #include #include @@ -11,6 +14,7 @@ #include #include #include +#include struct SendSequenceBuffer { struct SSBEntry { @@ -114,6 +118,8 @@ struct NGC_FT1 { struct Group { struct Peer { + LEDBAT cca; + struct RecvTransfer { uint32_t file_kind; std::vector file_id; @@ -240,7 +246,9 @@ void NGC_FT1_iterate(Tox *tox, NGC_FT1* ngc_ft1_ctx, float time_delta) { tf.ssb.for_each(time_delta, [&](uint16_t id, const std::vector& data, float& time_since_activity) { // no ack after 5 sec -> resend if (time_since_activity >= ngc_ft1_ctx->options.sending_resend_without_ack_after) { + // TODO: can fail _send_pkg_FT1_DATA(tox, group_number, peer_number, idx, id, data.data(), data.size()); + peer.cca.onLoss({idx, id}, false); time_since_activity = 0.f; } }); @@ -249,6 +257,12 @@ void NGC_FT1_iterate(Tox *tox, NGC_FT1* ngc_ft1_ctx, float time_delta) { // no ack after 30sec, close ft // TODO: notify app fprintf(stderr, "FT: warning, sending ft in progress timed out, deleting\n"); + + // clean up cca + tf.ssb.for_each(time_delta, [&](uint16_t id, const std::vector& data, float& time_since_activity) { + peer.cca.onLoss({idx, id}, true); + }); + tf_opt.reset(); continue; // dangerous control flow } @@ -256,11 +270,23 @@ void NGC_FT1_iterate(Tox *tox, NGC_FT1* ngc_ft1_ctx, float time_delta) { assert(ngc_ft1_ctx->cb_send_data.count(tf.file_kind)); // if chunks in flight < window size (2) - while (tf.ssb.size() < ngc_ft1_ctx->options.packet_window_size) { + //while (tf.ssb.size() < ngc_ft1_ctx->options.packet_window_size) { + int64_t can_packet_size {peer.cca.canSend()}; + //if (can_packet_size) { + //std::cerr << "FT: can_packet_size: " << can_packet_size; + //} + size_t count {0}; + while (can_packet_size > 0 && tf.file_size > 0) { std::vector new_data; // TODO: parameterize packet size? -> only if JF increases lossy packet size >:) - size_t chunk_size = std::min(496u, tf.file_size - tf.file_size_current); + //size_t chunk_size = std::min(496u, tf.file_size - tf.file_size_current); + //size_t chunk_size = std::min(can_packet_size, tf.file_size - tf.file_size_current); + size_t chunk_size = std::min({ + 496u, + can_packet_size, + tf.file_size - tf.file_size_current + }); if (chunk_size == 0) { tf.state = State::FINISHING; break; // we done @@ -278,13 +304,19 @@ void NGC_FT1_iterate(Tox *tox, NGC_FT1* ngc_ft1_ctx, float time_delta) { ); uint16_t seq_id = tf.ssb.add(std::move(new_data)); _send_pkg_FT1_DATA(tox, group_number, peer_number, idx, seq_id, tf.ssb.entries.at(seq_id).data.data(), tf.ssb.entries.at(seq_id).data.size()); + peer.cca.onSent({idx, seq_id}, chunk_size); #if defined(EXTRA_LOGGING) && EXTRA_LOGGING == 1 fprintf(stderr, "FT: sent data size: %ld (seq %d)\n", chunk_size, seq_id); #endif tf.file_size_current += chunk_size; + can_packet_size -= chunk_size; + count++; } + //if (count) { + //std::cerr << " split over " << count << "\n"; + //} } break; case State::FINISHING: // we still have unacked packets @@ -292,6 +324,7 @@ void NGC_FT1_iterate(Tox *tox, NGC_FT1* ngc_ft1_ctx, float time_delta) { // no ack after 5 sec -> resend if (time_since_activity >= ngc_ft1_ctx->options.sending_resend_without_ack_after) { _send_pkg_FT1_DATA(tox, group_number, peer_number, idx, id, data.data(), data.size()); + peer.cca.onLoss({idx, id}, false); time_since_activity = 0.f; } }); @@ -299,6 +332,12 @@ void NGC_FT1_iterate(Tox *tox, NGC_FT1* ngc_ft1_ctx, float time_delta) { // no ack after 30sec, close ft // TODO: notify app fprintf(stderr, "FT: warning, sending ft finishing timed out, deleting\n"); + + // clean up cca + tf.ssb.for_each(time_delta, [&](uint16_t id, const std::vector& data, float& time_since_activity) { + peer.cca.onLoss({idx, id}, true); + }); + tf_opt.reset(); } break; @@ -828,12 +867,15 @@ static void _handle_FT1_DATA_ACK( transfer.time_since_activity = 0.f; + std::vector seqs; while (curser < length) { uint16_t seq_id = data[curser++]; seq_id |= data[curser++] << (1*8); + seqs.push_back({transfer_id, seq_id}); transfer.ssb.erase(seq_id); } + peer.cca.onAck(seqs); // delete if all packets acked if (transfer.file_size == transfer.file_size_current && transfer.ssb.size() == 0) {