diff --git a/CMakeLists.txt b/CMakeLists.txt index 6b34bbb..6df6786 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -38,6 +38,7 @@ endif() add_subdirectory(./prototyping EXCLUDE_FROM_ALL) add_subdirectory(./version0) +add_subdirectory(./version1) add_subdirectory(./bench) diff --git a/version1/CMakeLists.txt b/version1/CMakeLists.txt new file mode 100644 index 0000000..58486b3 --- /dev/null +++ b/version1/CMakeLists.txt @@ -0,0 +1,26 @@ +cmake_minimum_required(VERSION 3.9 FATAL_ERROR) + +project(crdt_version1 CXX C) + +add_library(crdt_version1 INTERFACE) + +target_compile_features(crdt_version1 INTERFACE cxx_std_17) + +target_include_directories(crdt_version1 INTERFACE "${PROJECT_SOURCE_DIR}") + +######################################## + +add_executable(v1_test1 + ./test1.cpp +) + +target_link_libraries(v1_test1 PUBLIC crdt_version1) + +######################################## + +#add_executable(v1_test2 + #./test2.cpp +#) + +#target_link_libraries(v1_test2 PUBLIC crdt_version1) + diff --git a/version1/green_crdt/v1/list.hpp b/version1/green_crdt/v1/list.hpp new file mode 100644 index 0000000..3589241 --- /dev/null +++ b/version1/green_crdt/v1/list.hpp @@ -0,0 +1,328 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include + +#if !defined(extra_assert) + #if defined(EXTRA_ASSERTS) && EXTRA_ASSERTS == 1 + #define extra_assert(...) assert(__VA_ARGS__) + #else + #define extra_assert(...) void(0) + #endif +#endif + +namespace GreenCRDT::V1 { + +template +struct List { + // for public interface + struct ListID { + ActorType id; + uint64_t seq{0}; // strictly increasing for that actor + + bool operator<(const ListID& rhs) const { + if (seq < rhs.seq) { + return true; + } else if (seq > rhs.seq) { + return false; + } else { // == + return id < rhs.id; + } + } + + bool operator==(const ListID& rhs) const { + return seq == rhs.seq && id == rhs.id; + } + + bool operator!=(const ListID& rhs) const { + return seq != rhs.seq || id != rhs.id; + } + }; + + struct ListIDInternal { + size_t actor_idx{0}; + uint64_t seq{0}; // strictly increasing for that actor + + bool operator==(const ListIDInternal& rhs) const { + return seq == rhs.seq && actor_idx == rhs.actor_idx; + } + }; + + // internally the index into this array is used to refer to an actor + std::vector _actors; + + // TODO: replace with SoA + struct Entry { + ListIDInternal id; + + // Yjs + std::optional parent_left; + std::optional parent_right; + + // might be deleted (yes, *sigh*, crtds need tombstones) + std::optional value; + }; + + // TODO: use something better, edit: this seems fine + std::vector list; + + // number of not deleted entries + size_t doc_size {0}; + + std::map last_seen_seq; + + std::optional findActor(const ActorType& actor) const { + for (size_t i = 0; i < _actors.size(); i++) { + if (_actors[i] == actor) { + return i; + } + } + return std::nullopt; + } + + std::optional findIdx(const ListIDInternal& list_id) const { + extra_assert(verify()); + + for (size_t i = 0; i < list.size(); i++) { + if (list[i].id == list_id) { + return i; + } + } + + return std::nullopt; + } + + std::optional findIdx(const ListID& list_id) const { + extra_assert(verify()); + + const auto actor_idx_opt = findActor(list_id.id); + if (!actor_idx_opt.has_value()) { + return std::nullopt; + } + + const ListIDInternal tmp_id {actor_idx_opt.value(), list_id.seq}; + + for (size_t i = 0; i < list.size(); i++) { + if (list[i].id == tmp_id) { + return i; + } + } + + return std::nullopt; + } + + // returns false if missing OPs + // based on YjsMod https://github.com/josephg/reference-crdts/blob/9f4f9c3a97b497e2df8ae4473d1e521d3c3bf2d2/crdts.ts#L293-L348 + // which is a modified Yjs(YATA) algo + bool add(const ListID& list_id, const ValueType& value, const std::optional& parent_left, const std::optional& parent_right) { + extra_assert(verify()); + + size_t actor_idx {0}; + { // new actor? + // add, even if op fails + const auto actor_opt = findActor(list_id.id); + if (!actor_opt.has_value()) { + actor_idx = _actors.size(); + _actors.push_back(list_id.id); + } else { + actor_idx = actor_opt.value(); + } + } + + // check actor op order + if (!last_seen_seq.count(actor_idx)) { + // we dont know this actor yet, first seq needs to be 0 + if (list_id.seq != 0) { + return false; + } + } else { + // making sure we dont skip operations by that actor + if (list_id.seq != last_seen_seq.at(actor_idx) + 1) { + return false; + } + } + + size_t insert_idx = 0; + if (list.empty()) { + if (parent_left.has_value() || parent_right.has_value()) { + // empty, missing parents + return false; + } + } else { + // find left + std::optional left_idx = std::nullopt; + if (parent_left.has_value()) { + left_idx = findIdx(parent_left.value()); + if (!left_idx.has_value()) { + // missing parent left + return false; + } + + // we insert before the it, so we need to go past the left parent + insert_idx = left_idx.value() + 1; + } // else insert_idx = 0 + + // find right + size_t right_idx = list.size(); + if (parent_right.has_value()) { + auto tmp_right = findIdx(parent_right.value()); + if (!tmp_right.has_value()) { + return false; + } + right_idx = tmp_right.value(); + } + + bool scanning {false}; + + for(size_t i = insert_idx;; i++) { + if (!scanning) { + insert_idx = i; + } + // if right parent / end of doc, insert + if (insert_idx == right_idx) { + break; + } + // we ran past right o.o ? + if (insert_idx == list.size()) { + break; + } + + const Entry& at_i = list[i]; + // parents left and right + std::optional i_left_idx {std::nullopt}; + if (at_i.parent_left.has_value()) { + i_left_idx = findIdx(at_i.parent_left.value()); + if (!i_left_idx.has_value()) { + assert(false && "item in list with unknown parent left!!"); + return false; + } + } + + // possibility map + // + // | ir < r | ir == r | ir > r + // ------------------------------------- + // il < l | insert | insert | insert + // il == l | ? | agentfallback | ? + // il > l | skip | skip | skip + + if (i_left_idx < left_idx) { + break; + } else if (i_left_idx == left_idx) { + // get i parent_right + size_t i_right_idx = list.size(); + if (at_i.parent_right.has_value()) { + auto tmp_right = findIdx(at_i.parent_right.value()); + if (!tmp_right.has_value()) { + assert(false && "item in list with unknown parent right!!"); + return false; + } + i_right_idx = tmp_right.value(); + } + + if (i_right_idx < right_idx) { + scanning = true; + } else if (i_right_idx == right_idx) { + // actor id tie breaker + if (_actors[actor_idx] < _actors[at_i.id.actor_idx]) { + break; + } else { + scanning = false; + } + } else { // i_right_idx > right_idx + scanning = false; + } + } else { // il > l + // do nothing + } + } + } + + { // actual insert + Entry new_entry; + + new_entry.id.actor_idx = actor_idx; + new_entry.id.seq = list_id.seq; + + if (parent_left.has_value()) { + new_entry.parent_left = ListIDInternal{findActor(parent_left.value().id).value(), parent_left.value().seq}; + } + + if (parent_right.has_value()) { + new_entry.parent_right = ListIDInternal{findActor(parent_right.value().id).value(), parent_right.value().seq}; + } + + new_entry.value = value; + + list.emplace(list.begin() + insert_idx, new_entry); + } + + doc_size++; + last_seen_seq[actor_idx] = list_id.seq; + + extra_assert(verify()); + return true; + } + + // returns false if not found + bool del(const ListID& id) { + extra_assert(verify()); + + auto actor_idx_opt = findActor(id.id); + if (!actor_idx_opt.has_value()) { + // we dont have anything with that actor + return false; + } + + const ListIDInternal tmp_id {actor_idx_opt.value(), id.seq}; + + for (auto& it : list) { + if (it.id == tmp_id) { + if (it.value.has_value()) { + it.value.reset(); + + doc_size--; + extra_assert(verify()); + return true; + } else { + extra_assert(verify()); + return false; // TODO: allow double deletes?,,,, need ids + } + } + } + + extra_assert(verify()); + return false; + } + + std::vector getArray(void) const { + std::vector array; + for (const auto& e : list) { + if (e.value.has_value()) { + array.push_back(e.value.value()); + } + } + + return array; + } + + // TODO: only in debug? + bool verify(void) const { + size_t actual_size = 0; + for (const auto& it : list) { + if (it.value.has_value()) { + actual_size++; + } + } + //assert(doc_size == actual_size); + return doc_size == actual_size; + } +}; + +} // GreenCRDT::V1 + diff --git a/version1/test1.cpp b/version1/test1.cpp new file mode 100644 index 0000000..7aedcfa --- /dev/null +++ b/version1/test1.cpp @@ -0,0 +1,214 @@ +#define EXTRA_ASSERTS 1 +#include + +#include +#include +#include +#include +#include +#include + +// single letter actor, for testing only +using Actor = char; +using ListType = GreenCRDT::V1::List; + +namespace std { +bool operator==(const std::vector& lhs, const std::string_view& rhs) { + if (lhs.size() != rhs.size()) { + return false; + } + + for (size_t i = 0; i < rhs.size(); i++) { + if (lhs[i] != rhs[i]) { + return false; + } + } + + return true; +} +} // namespace std + +void testSingle1(void) { + ListType list; + + assert(list.add({'A', 0}, 'a', std::nullopt, std::nullopt)); + assert(list.add({'A', 1}, 'b', ListType::ListID{'A', 0u}, std::nullopt)); + + assert(list.getArray() == "ab"); +} + + +void testConcurrent1(void) { + // agent_a < agent_b + + // concurrent insert of first element + { // variant 1, a then b + ListType list; + assert(list.add({'A', 0}, 'a', std::nullopt, std::nullopt)); + assert(list.add({'B', 0}, 'b', std::nullopt, std::nullopt)); + + assert(list.getArray() == "ab"); + } + { // variant 2, b then a + ListType list; + assert(list.add({'B', 0}, 'b', std::nullopt, std::nullopt)); + assert(list.add({'A', 0}, 'a', std::nullopt, std::nullopt)); + + assert(list.getArray() == "ab"); + } +} + +struct AddOp { + ListType::ListID id; + char value; + std::optional parent_left; + std::optional parent_right; +}; + +void randomAddPermutations(const std::vector& ops, const std::string& expected) { + // TODO: more then 1k? + for (size_t i = 0; i < 1000; i++) { + std::minstd_rand rng(1337 + i); + std::vector ops_todo(ops.size()); + std::iota(ops_todo.begin(), ops_todo.end(), 0u); + + size_t attempts {0}; + + ListType list; + do { + size_t idx = rng() % ops_todo.size(); + + if (list.add(ops[ops_todo[idx]].id, ops[ops_todo[idx]].value, ops[ops_todo[idx]].parent_left, ops[ops_todo[idx]].parent_right)) { + // only remove if it was possible -> returned true; + ops_todo.erase(ops_todo.begin()+idx); + } + + attempts++; + assert(attempts < 10'000); // in case we run into an endless loop + } while (!ops_todo.empty()); + + assert(list.getArray() == expected); + } +} + +void testInterleave1(void) { + const std::vector ops { + {{'A', 0u}, 'a', std::nullopt, std::nullopt}, + {{'A', 1u}, 'a', ListType::ListID{'A', 0u}, std::nullopt}, + {{'A', 2u}, 'a', ListType::ListID{'A', 1u}, std::nullopt}, + {{'B', 0u}, 'b', std::nullopt, std::nullopt}, + {{'B', 1u}, 'b', ListType::ListID{'B', 0u}, std::nullopt}, + {{'B', 2u}, 'b', ListType::ListID{'B', 1u}, std::nullopt}, + }; + + randomAddPermutations(ops, "aaabbb"); +} + +void testInterleave2(void) { + const std::vector ops { + {{'A', 0u}, 'a', std::nullopt, std::nullopt}, + {{'A', 1u}, 'a', std::nullopt, ListType::ListID{'A', 0u}}, + {{'A', 2u}, 'a', std::nullopt, ListType::ListID{'A', 1u}}, + {{'B', 0u}, 'b', std::nullopt, std::nullopt}, + {{'B', 1u}, 'b', std::nullopt, ListType::ListID{'B', 0u}}, + {{'B', 2u}, 'b', std::nullopt, ListType::ListID{'B', 1u}}, + }; + + randomAddPermutations(ops, "aaabbb"); +} + +void testConcurrent2(void) { + const std::vector ops { + {{'A', 0u}, 'a', std::nullopt, std::nullopt}, + {{'C', 0u}, 'c', std::nullopt, std::nullopt}, + {{'B', 0u}, 'b', std::nullopt, std::nullopt}, + {{'D', 0u}, 'd', ListType::ListID{'A', 0u}, ListType::ListID{'C', 0u}}, + }; + + randomAddPermutations(ops, "adbc"); +} + +void testMain1(void) { + ListType list; + + static_assert('0' < '1'); + + const std::vector a0_ops { + {{'0', 0u}, 'a', std::nullopt, std::nullopt}, + {{'0', 1u}, 'b', ListType::ListID{'0', 0u}, std::nullopt}, + {{'0', 2u}, 'c', ListType::ListID{'0', 1u}, std::nullopt}, + {{'0', 3u}, 'd', ListType::ListID{'0', 1u}, ListType::ListID{'0', 2u}}, + }; + + const std::vector a1_ops { + // knows of a0 up to {a0, 1} + {{'1', 0u}, 'z', ListType::ListID{'0', 0u}, ListType::ListID{'0', 1u}}, + {{'1', 1u}, 'y', ListType::ListID{'0', 1u}, std::nullopt}, + }; + + { // the ez, in order stuff + // a0 insert first char, 'a', since its the first, we dont have any parents + assert(list.add(a0_ops[0].id, a0_ops[0].value, a0_ops[0].parent_left, a0_ops[0].parent_right)); + assert(list.getArray() == "a"); + + // a0 insert secound char, 'b' after 'a', no parents to right + assert(list.add(a0_ops[1].id, a0_ops[1].value, a0_ops[1].parent_left, a0_ops[1].parent_right)); + assert(list.getArray() == "ab"); + + // a0 insert 'c' after 'b', no parents to right + assert(list.add(a0_ops[2].id, a0_ops[2].value, a0_ops[2].parent_left, a0_ops[2].parent_right)); + assert(list.getArray() == "abc"); + + // a0 insert 'd' after 'b', 'c' parent right + assert(list.add(a0_ops[3].id, a0_ops[3].value, a0_ops[3].parent_left, a0_ops[3].parent_right)); + assert(list.getArray() == "abdc"); + + // a1 insert 'z' after 'a', 'b' parent right + assert(list.add(a1_ops[0].id, a1_ops[0].value, a1_ops[0].parent_left, a1_ops[0].parent_right)); + assert(list.getArray() == "azbdc"); + } + + std::cout << "done with ez\n"; + + { // a1 was not uptodate only had 0,1 of a0 + // a1 insert 'y' after 'b', no parent right + assert(list.add(a1_ops[1].id, a1_ops[1].value, a1_ops[1].parent_left, a1_ops[1].parent_right)); + assert(list.getArray() == "azbdcy"); + } + + std::cout << "\ndoc size (with tombstones): " << list.list.size() << "\n"; + std::cout << "\ndoc size: " << list.doc_size << "\n"; + std::cout << "doc text:\n"; + + const auto tmp_array = list.getArray(); + std::cout << std::string_view(tmp_array.data(), tmp_array.size()) << "\n"; +} + +int main(void) { + std::cout << "testSingle1:\n"; + testSingle1(); + std::cout << std::string(40, '-') << "\n"; + + std::cout << "testConcurrent1:\n"; + testConcurrent1(); + std::cout << std::string(40, '-') << "\n"; + + std::cout << "testInterleave1:\n"; + testInterleave1(); + std::cout << std::string(40, '-') << "\n"; + + std::cout << "testInterleave2:\n"; + testInterleave2(); + std::cout << std::string(40, '-') << "\n"; + + std::cout << "testConcurrent2:\n"; + testConcurrent2(); + std::cout << std::string(40, '-') << "\n"; + + std::cout << "testMain1:\n"; + testMain1(); + std::cout << std::string(40, '-') << "\n"; + + return 0; +} +