From d85a2dc191666ff6835ccafd6be532687c2fefe6 Mon Sep 17 00:00:00 2001 From: Green Sky Date: Fri, 23 Dec 2022 03:16:38 +0100 Subject: [PATCH] add text document to v3, planing v4 --- CMakeLists.txt | 1 + version3/CMakeLists.txt | 8 +- version3/green_crdt/v3/list.hpp | 34 +- version3/green_crdt/v3/text_document.hpp | 305 ++++++++++ version3/test2.cpp | 701 +++++++++++++++++++++++ version4/CMakeLists.txt | 26 + version4/green_crdt/v4/list.hpp | 381 ++++++++++++ version4/test1.cpp | 214 +++++++ 8 files changed, 1659 insertions(+), 11 deletions(-) create mode 100644 version3/green_crdt/v3/text_document.hpp create mode 100644 version3/test2.cpp create mode 100644 version4/CMakeLists.txt create mode 100644 version4/green_crdt/v4/list.hpp create mode 100644 version4/test1.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 23fa7b4..4d365f1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -41,6 +41,7 @@ add_subdirectory(./version0) add_subdirectory(./version1) add_subdirectory(./version2) add_subdirectory(./version3) +#add_subdirectory(./version4) add_subdirectory(./bench) diff --git a/version3/CMakeLists.txt b/version3/CMakeLists.txt index 8807a09..eee152d 100644 --- a/version3/CMakeLists.txt +++ b/version3/CMakeLists.txt @@ -18,9 +18,9 @@ target_link_libraries(v3_test1 PUBLIC crdt_version3) ######################################## -#add_executable(v3_test2 - #./test2.cpp -#) +add_executable(v3_test2 + ./test2.cpp +) -#target_link_libraries(v3_test2 PUBLIC crdt_version3) +target_link_libraries(v3_test2 PUBLIC crdt_version3) diff --git a/version3/green_crdt/v3/list.hpp b/version3/green_crdt/v3/list.hpp index 89505b9..2cb551c 100644 --- a/version3/green_crdt/v3/list.hpp +++ b/version3/green_crdt/v3/list.hpp @@ -80,7 +80,7 @@ struct List { //size_t _stat_find_with_hint{0}; //size_t _stat_find_with_hint_hit{0}; - std::optional findActor(const ActorType& actor) const { + [[nodiscard]] std::optional findActor(const ActorType& actor) const { for (size_t i = 0; i < _actors.size(); i++) { if (_actors[i] == actor) { return i; @@ -89,7 +89,7 @@ struct List { return std::nullopt; } - std::optional findIdx(const ListIDInternal& list_id) const { + [[nodiscard]] std::optional findIdx(const ListIDInternal& list_id) const { extra_assert(verify()); for (size_t i = 0; i < _list_ids.size(); i++) { @@ -102,7 +102,7 @@ struct List { } // search close to hint first - std::optional findIdx(const ListIDInternal& list_id, size_t hint) const { + [[nodiscard]] std::optional findIdx(const ListIDInternal& list_id, size_t hint) const { extra_assert(verify()); //_stat_find_with_hint++; @@ -140,7 +140,7 @@ struct List { return findIdx(list_id); } - std::optional findIdx(const ListID& list_id) const { + [[nodiscard]] std::optional findIdx(const ListID& list_id) const { extra_assert(verify()); const auto actor_idx_opt = findActor(list_id.id); @@ -153,7 +153,7 @@ struct List { return findIdx(tmp_id); } - std::optional findIdx(const ListID& list_id, size_t hint) const { + [[nodiscard]] std::optional findIdx(const ListID& list_id, size_t hint) const { extra_assert(verify()); const auto actor_idx_opt = findActor(list_id.id); @@ -369,12 +369,32 @@ struct List { return false; } + [[nodiscard]] bool empty(void) const { + return _list_ids.empty(); + } + + [[nodiscard]] size_t size(void) const { + return _list_ids.size(); + } + + [[nodiscard]] ListIDInternal getIDInternal(size_t idx) const { + return _list_ids.at(idx); + } + + [[nodiscard]] const ListID getID(size_t idx) const { + return {_actors.at(_list_ids.at(idx).actor_idx), _list_ids.at(idx).seq}; + } + + [[nodiscard]] const std::optional& getValue(size_t idx) const { + return _list_data.at(idx).value; + } + // returns the size of alive entries - size_t getDocSize(void) const { + [[nodiscard]] size_t getDocSize(void) const { return _doc_size; } - std::vector getArray(void) const { + [[nodiscard]] std::vector getArray(void) const { std::vector array; for (const auto& e : _list_data) { if (e.value.has_value()) { diff --git a/version3/green_crdt/v3/text_document.hpp b/version3/green_crdt/v3/text_document.hpp new file mode 100644 index 0000000..31b9022 --- /dev/null +++ b/version3/green_crdt/v3/text_document.hpp @@ -0,0 +1,305 @@ +#pragma once + +#include "./list.hpp" + +#include + +//#include // debug + +namespace GreenCRDT::V3 { + +template +struct TextDocument { + // TODO: determine if char is the best + using ListType = List; + + struct OpAdd { + typename ListType::ListID id; + + std::optional parent_left; + std::optional parent_right; + + char value; + }; + + struct OpDel { + typename ListType::ListID id; + }; + + using Op = std::variant; + + //// TODO: implement + //struct Cursor { + //AgentType who; + //typename ListType::ListID pos; + //}; + + ActorType local_actor; + + ListType state; + + [[nodiscard]] std::string getText(void) const { + std::string text; + + for (const auto& it : state._list_data) { + if (it.value.has_value()) { + text += it.value.value(); + } + } + + return text; + } + + bool apply(const Op& op) { + if(std::holds_alternative(op)) { + const auto& add_op = std::get(op); + //std::cout << "a:" << add_op.id.id << " s:" << add_op.id.seq << " v:" << add_op.value << "\n"; + return state.add(add_op.id, add_op.value, add_op.parent_left, add_op.parent_right); + } else if (std::holds_alternative(op)) { + const auto& del_op = std::get(op); + return state.del(del_op.id); + } else { + assert(false); + } + } + + bool apply(const std::vector& ops) { + for (const auto& op : ops) { + if (!apply(op)) { + // this is not ideal, since we might have applyed some, and dont report which/howmany + return false; + } + } + + return true; + } + + static std::vector text2adds( + const ActorType& actor, uint64_t seq, // seq is the first seq + std::optional parent_left, + std::optional parent_right, + std::string_view text + ) { + std::vector ops; + for (size_t i = 0; i < text.size(); i++) { + typename ListType::ListID new_id {actor, seq++}; + + ops.emplace_back(OpAdd{ + new_id, + parent_left, + parent_right, + text[i] + }); + + parent_left = new_id; + } + + return ops; + } + + // adds in tast with specified parents + // returns generated ops + std::vector addText( + std::optional parent_left, + std::optional parent_right, + std::string_view text + ) { + // TODO: move actor setting to list + if (!state.findActor(local_actor).has_value()) { + state._actors.push_back(local_actor); + } + + // TODO: look up typesystem and fix (move? decltype?) + std::vector ops = text2adds( + // TODO: abstract actors + local_actor, state._last_seen_seq.count(state.findActor(local_actor).value()) ? state._last_seen_seq[state.findActor(local_actor).value()]+1u : 0u, + parent_left, + parent_right, + text + ); + + // TODO: make this better + // and apply + for (const auto& op : ops) { + if(std::holds_alternative(op)) { + const auto& add_op = std::get(op); + //std::cout << "a:" << add_op.id.id << " s:" << add_op.id.seq << " v:" << add_op.value << "\n"; + bool r = state.add(add_op.id, add_op.value, add_op.parent_left, add_op.parent_right); + assert(r); + } else if (std::holds_alternative(op)) { + const auto& del_op = std::get(op); + state.del(del_op.id); + } else { + assert(false); + } + } + + return ops; // TODO: move? + } + + // deletes everything in range [first, last) + // returns generated ops + std::vector delRange( + std::optional left, + std::optional right + ) { + size_t first_idx = 0; + if (left.has_value()) { + auto res = state.findIdx(left.value()); + if (!res.has_value()) { + assert(false && "cant find left"); + return {}; + } + first_idx = res.value(); + } + + size_t last_idx = state.size(); + if (right.has_value()) { + auto res = state.findIdx(right.value()); + if (!res.has_value()) { + assert(false && "cant find right"); + return {}; + } + last_idx = res.value(); + } + + std::vector ops; + + for (size_t i = first_idx; i < last_idx; i++) { + if (!state.getValue(i).has_value()) { + // allready deleted + continue; + } + + ops.emplace_back(OpDel{ + //state.list.at(i).id + state.getID(i) + }); + + // TODO: do delets get a seq????? + + state.del(state.getID(i)); + } + + return ops; + } + + // generates ops from the difference + // note: rn it only creates 1 diff patch + std::vector merge(std::string_view text) { + if (text.empty()) { + if (state.empty() || state.getDocSize() == 0) { + // no op + return {}; + } else { + // delete all + return delRange(std::nullopt, std::nullopt); + } + } + // text not empty + + if (state.empty()) { + return addText( + std::nullopt, + std::nullopt, + text + ); + } + // neither empty + + // find start and end of changes + // start + size_t list_start = 0; + size_t list_start_counted = 0; + size_t text_start = 0; + bool differ = false; + for (; list_start < state.size() && text_start < text.size();) { + // jump over tombstones + if (!state.getValue(list_start).has_value()) { + list_start++; + continue; + } + + if (state.getValue(list_start).value() != text[text_start]) { + differ = true; + break; + } + + list_start++; + text_start++; + list_start_counted++; + } + + // doc and text dont differ + if (!differ && list_start == state.size() && text_start == text.size()) { + return {}; + } + //std::cout << "list.size: " << state.list.size() << "(" << getText().size() << ")" << " text.size: " << text.size() << "\n"; + //std::cout << "list_start: " << list_start << " text_start: " << text_start << "\n"; + + // +1 so i can have unsigned + size_t list_end = state.size(); + size_t text_end = text.size(); + //for (; list_end > 0 && text_end > 0 && list_end >= list_start && text_end >= text_start;) { + //while (list_end >= list_start && text_end >= text_start) { + size_t list_end_counted = 0; + differ = false; // var reuse + //while (list_start_counted - list_end_counted > state.doc_size && text_end >= text_start) { + while (state.getDocSize() - list_start_counted > list_end_counted && text_end >= text_start) { + // jump over tombstones + if (!state.getValue(list_end-1).has_value()) { + list_end--; + continue; + } + + if (state.getValue(list_end-1).value() != text[text_end-1]) { + differ = true; + break; + } + + list_end--; + text_end--; + list_end_counted++; + } + + if (!differ && text_start == text_end+1) { + // we ran into eachother without seeing the different char + // TODO: do we need to increment list_end? text_end? + list_end++; + } + + //std::cout << "list_end: " << list_end << " text_end: " << text_end << "\n"; + //std::cout << "substring before: " << text.substr(text_start, text.size() - state.doc_size) << "\n"; + + std::vector ops; + + // 1. clear range (del all list_start - list_end) + if (list_start <= list_end && list_start < state.size()) { + //list_end += list_start == list_end; + ops = delRange( + state.getID(list_start), + list_end < state.size() ? std::make_optional(state.getID(list_end)) : std::nullopt + ); + //std::cout << "deleted: " << ops.size() << "\n"; + } + + //std::cout << "text between: " << getText() << "\n"; + //std::cout << "substring between: " << text.substr(text_start, text.size() - state.doc_size) << "\n"; + + // 2. add range (add all text_start - text_end) + if (state.getDocSize() < text.size()) { + auto tmp_add_ops = addText( + list_start == 0 ? std::nullopt : std::make_optional(state.getID(list_start-1)), + list_start == state.size() ? std::nullopt :std::make_optional(state.getID(list_start)), + text.substr(text_start, text.size() - state.getDocSize()) + ); + //std::cout << "added: " << tmp_add_ops.size() << "\n"; + ops.insert(ops.end(), tmp_add_ops.begin(), tmp_add_ops.end()); + } + + return ops; + } +}; + +} // GreenCRDT::V3 + diff --git a/version3/test2.cpp b/version3/test2.cpp new file mode 100644 index 0000000..e68395f --- /dev/null +++ b/version3/test2.cpp @@ -0,0 +1,701 @@ +#include + +#include +#include +#include +#include +#include +#include + +// single letter agent, for testing only +using Agent = std::string; +using Doc = GreenCRDT::V3::TextDocument; +using Op = Doc::Op; +using ListType = Doc::ListType; + +// maybe switch it up? +//using Rng = std::minstd_rand; +//using Rng = std::mt19937; +using Rng = std::ranlux24_base; + +// 10*7 -> 70 permutations , ggwp +// | 1add | 1del | 1rep | 2add | 2del | 2rep | random add | random del | random rep | random +// empty doc | | 0 | 0 | | 0 | 0 | x | 0 | 0 | +// before 1 char | | | | | | | | | | +// after 1 char | | | | | | | | | | +// before 2 char | | | | | | | | | | +// in 2 char | | | | | | | | | | +// after 2 char | | | | | | | | | | +// random | | | | | | | | | | + +static const std::vector random_chars { + 'a', 'b', 'c', 'd', 'e', + 'f', 'g', 'h', 'i', 'j', + 'k', 'l', 'm', 'n', 'o', + 'p', 'q', 'r', 's', 't', + 'u', 'v', 'w', 'x', 'y', + 'z', + + 'A', 'B', 'C', 'D', 'E', + 'F', 'G', 'H', 'I', 'J', + 'K', 'L', 'M', 'N', 'O', + 'P', 'Q', 'R', 'S', 'T', + 'U', 'V', 'W', 'X', 'Y', + 'Z', +}; + +std::ostream& operator<<(std::ostream& out, const std::optional& id) { + if (id.has_value()) { + out << id.value().id << "-" << id.value().seq; + } else { + out << "null"; + } + return out; +} + +std::ostream& operator<<(std::ostream& out, const Doc::OpAdd& op) { + out + << "{ id:" << op.id.id + << "-" << op.id.seq + << ", v:" << op.value + << ", l:" << op.parent_left + << ", r:" << op.parent_right + << " }" + ; + return out; +} + +// genX() changes doc, uses local agent + +Op genAdd(Rng& rng, Doc& doc) { + Doc::OpAdd op { + {doc.local_actor, 0u}, + std::nullopt, + std::nullopt, + random_chars[rng()%random_chars.size()] + }; + + // TODO: move to list + // make sure actor index exists + if (!doc.state.findActor(doc.local_actor).has_value()) { + doc.state._actors.push_back(doc.local_actor); + } + + // first id is 0 + if (doc.state._last_seen_seq.count(doc.state.findActor(doc.local_actor).value())) { + op.id.seq = doc.state._last_seen_seq[doc.state.findActor(doc.local_actor).value()] + 1; + } + + if (!doc.state.empty()) { + // gen parents + size_t li = rng()%(1+doc.state.size()); + if (li != doc.state.size()) { // nullopt + op.parent_left = doc.state.getID(li); + } + + //size_t r_range = 1+doc.state.list.size(); + //if (li != doc.state.list.size()) { + //r_range -= li+1; + //} + //size_t ri = rng()%r_range; + //if (li != doc.state.list.size()) { + //ri += li+1; + //} + //if (ri != doc.state.list.size()) { // nullopt + //op.parent_right = doc.state.list[li].id; + //} + + if (op.parent_left.has_value()) { + if (doc.state.size() != li + 1) { // left is not last + op.parent_right = doc.state.getID(li+1); + } + } else { + // left is before first, so right is first + op.parent_right = doc.state.getID(0); + } + } // else first char, both nullopt + + //std::cout << "op: " << op << "\n"; + + { + bool r = doc.state.add(op.id, op.value, op.parent_left, op.parent_right); + if (!r) { + std::cout << "op: " << op << "\n"; + } + assert(r); + } + + return op; +} + +Op genDel(Rng& rng, Doc& doc) { + if (doc.state.getDocSize() == 0) { + assert(false && "empty doc"); + return {}; // empty + } + + doc.state.verify(); + + Doc::OpDel op{}; + + // search for undelted entry + size_t idx = rng()%doc.state.size(); + bool found = false; + for (size_t attempts = 0; attempts <= doc.state.size(); attempts++) { + //if (doc.state.list[idx].value.has_value()) { + if (doc.state.getValue(idx).has_value()) { + op.id = doc.state.getID(idx); + found = true; + break; + } + idx = (idx+1) % doc.state.size(); + } + + assert(found); + + { + auto size_pre = doc.state.getDocSize(); + bool r = doc.state.del(op.id); + assert(r); + assert(size_pre-1 == doc.state.getDocSize()); + assert(doc.state.verify()); + } + + return op; +} + +//genRep() +//genAddContRange() +//genDelContRange() +//genRepContRange() + +//genRand() +//genRandRanges() +std::vector genRandAll(Rng& rng, Doc& doc) { + switch (rng() % 1) { + case 0: + return {genAdd(rng, doc)}; + } + + return {}; +} + +void testEmptyDocAdds(size_t seed) { + Rng rng(seed); + + Doc doc; // empty + doc.local_actor = 'A'; + + std::string changed_text; + { + // for modifying + Doc doctmp = doc; + + const size_t loop_count = (rng() % 55)+1; + for (size_t i = 0; i < loop_count; i++) { + genAdd(rng, doctmp); + } + + changed_text = doctmp.getText(); + } + + assert(doc.getText() != changed_text); + + std::cout << "changed_text: " << changed_text << "\n"; + + Doc otherdoc = doc; + assert(doc.getText().size() == doc.state.getDocSize()); + const auto merge_ops = doc.merge(changed_text); + assert(doc.getText().size() == doc.state.getDocSize()); + + assert(doc.getText() == changed_text); + + assert(otherdoc.apply(merge_ops)); + assert(doc.getText() == otherdoc.getText()); +} + +void test1CharDocAdds(size_t seed) { + Rng rng(seed); + + Doc doc; + doc.local_actor = 'A'; + + doc.addText(std::nullopt, std::nullopt, "0"); + + assert(doc.getText() == "0"); + + std::string changed_text; + { + // for modifying + Doc doctmp = doc; + + const size_t loop_count = (rng() % 4)+1; + for (size_t i = 0; i < loop_count; i++) { + genAdd(rng, doctmp); + } + + changed_text = doctmp.getText(); + } + + assert(doc.getText() != changed_text); + + std::cout << "text: " << doc.getText() << "\n"; + std::cout << "changed_text: " << changed_text << "\n"; + + Doc otherdoc = doc; + assert(doc.getText().size() == doc.state.getDocSize()); + const auto merge_ops = doc.merge(changed_text); + assert(doc.getText().size() == doc.state.getDocSize()); + + std::cout << "text after merge: " << doc.getText() << "\n"; + + assert(doc.getText() == changed_text); + + assert(otherdoc.apply(merge_ops)); + assert(doc.getText() == otherdoc.getText()); +} + +void test1CharDocDels(size_t seed) { + Rng rng(seed); + + Doc doc; + doc.local_actor = 'A'; + + assert(doc.getText().size() == doc.state.getDocSize()); + doc.addText(std::nullopt, std::nullopt, "0123"); + assert(doc.getText().size() == doc.state.getDocSize()); + + assert(doc.getText() == "0123"); + + std::string changed_text; + { + // for modifying + Doc doctmp = doc; + + const size_t loop_count = (rng() % 4)+1; + std::cout << "going to delete: " << loop_count << "\n"; + for (size_t i = 0; i < loop_count; i++) { + genDel(rng, doctmp); + } + + changed_text = doctmp.getText(); + assert(doctmp.getText().size() == doctmp.state.getDocSize()); + + if (loop_count == doc.state.getDocSize()) { + assert(doctmp.state.getDocSize() == 0); + assert(changed_text.size() == 0); + } + } + + assert(doc.getText() != changed_text); + + std::cout << "text: " << doc.getText() << "\n"; + std::cout << "changed_text: " << changed_text << "\n"; + + Doc otherdoc = doc; + assert(doc.getText().size() == doc.state.getDocSize()); + const auto merge_ops = doc.merge(changed_text); + assert(doc.getText().size() == doc.state.getDocSize()); + + std::cout << "text after merge: " << doc.getText() << "\n"; + + assert(doc.getText() == changed_text); + + assert(otherdoc.apply(merge_ops)); + assert(doc.getText() == otherdoc.getText()); +} + +void test2CharDocAdds(size_t seed) { + Rng rng(seed); + + Doc doc; + doc.local_actor = 'A'; + + assert(doc.getText().size() == doc.state.getDocSize()); + doc.addText(std::nullopt, std::nullopt, "012345"); + assert(doc.getText().size() == doc.state.getDocSize()); + + assert(doc.getText() == "012345"); + + std::string changed_text; + { + // for modifying + Doc doctmp = doc; + + const size_t loop_count = (rng() % 6)+1; + for (size_t i = 0; i < loop_count; i++) { + genAdd(rng, doctmp); + } + + changed_text = doctmp.getText(); + } + + assert(doc.getText() != changed_text); + + std::cout << "text: " << doc.getText() << "\n"; + std::cout << "changed_text: " << changed_text << "\n"; + + Doc otherdoc = doc; + assert(doc.getText().size() == doc.state.getDocSize()); + const auto merge_ops = doc.merge(changed_text); + assert(doc.getText().size() == doc.state.getDocSize()); + + std::cout << "text after merge: " << doc.getText() << "\n"; + + assert(doc.getText() == changed_text); + + assert(otherdoc.apply(merge_ops)); + assert(doc.getText() == otherdoc.getText()); +} + +void testChange1(size_t seed) { + Rng rng(seed); + + Doc doc; + doc.local_actor = 'A'; + + assert(doc.getText().size() == doc.state.getDocSize()); + doc.addText(std::nullopt, std::nullopt, "012345"); + assert(doc.getText().size() == doc.state.getDocSize()); + + assert(doc.getText() == "012345"); + + std::string changed_text; + { + // for modifying + Doc doctmp = doc; + + { // dels + const size_t loop_count = (rng() % 6)+1; + for (size_t i = 0; i < loop_count; i++) { + genDel(rng, doctmp); + } + } + + { // adds + const size_t loop_count = (rng() % 6)+1; + for (size_t i = 0; i < loop_count; i++) { + genAdd(rng, doctmp); + } + } + + changed_text = doctmp.getText(); + } + + assert(doc.getText() != changed_text); + + std::cout << "text: " << doc.getText() << "\n"; + std::cout << "changed_text: " << changed_text << "\n"; + + Doc otherdoc = doc; + assert(doc.getText().size() == doc.state.getDocSize()); + const auto merge_ops = doc.merge(changed_text); + assert(doc.getText().size() == doc.state.getDocSize()); + + std::cout << "text after merge: " << doc.getText() << "\n"; + + assert(doc.getText() == changed_text); + + assert(otherdoc.apply(merge_ops)); + assert(doc.getText() == otherdoc.getText()); +} + +void testBugSame(void) { + Doc doc; + doc.local_actor = 'A'; + + std::string_view new_text1{"a"}; + doc.merge(new_text1); + assert(doc.getText() == new_text1); + + std::string_view new_text2{"aa"}; + doc.merge(new_text2); + assert(doc.getText() == new_text2); +} + +void testBugDoubleDel(void) { + Doc doc; + doc.local_actor = 'A'; + + { + std::string_view new_text{"a"}; + const auto ops = doc.merge(new_text); + assert(doc.getText() == new_text); + assert(ops.size() == 1); + } + + { + std::string_view new_text{""}; + const auto ops = doc.merge(new_text); + assert(doc.getText() == new_text); + assert(ops.size() == 1); + assert(std::holds_alternative(ops.front())); + assert(std::get(ops.front()).id.seq == 0); + } + + { + std::string_view new_text{""}; + const auto ops = doc.merge(new_text); + assert(doc.getText() == new_text); + assert(ops.size() == 0); + } +} + +void testBugSameDel(void) { + Doc doc; + doc.local_actor = 'A'; + + { + std::string_view new_text{"a"}; + const auto ops = doc.merge(new_text); + assert(doc.getText() == new_text); + assert(ops.size() == 1); + } + + { + std::string_view new_text{"aa"}; + const auto ops = doc.merge(new_text); + assert(doc.getText() == new_text); + assert(ops.size() == 1); + } + + { + std::string_view new_text{"a"}; + const auto ops = doc.merge(new_text); + assert(doc.getText() == new_text); + assert(ops.size() == 1); + } +} + +void testBugSameDel2(void) { + Doc doc; + doc.local_actor = 'A'; + + { + std::string_view new_text{"a"}; + const auto ops = doc.merge(new_text); + assert(doc.getText() == new_text); + assert(ops.size() == 1); + } + + { + std::string_view new_text{"aa"}; + const auto ops = doc.merge(new_text); + assert(doc.getText() == new_text); + assert(ops.size() == 1); + } + + { + std::string_view new_text{"aaa"}; + const auto ops = doc.merge(new_text); + assert(doc.getText() == new_text); + assert(ops.size() == 1); + } + + { + std::string_view new_text{"aa"}; + const auto ops = doc.merge(new_text); + assert(doc.getText() == new_text); + assert(ops.size() == 1); + } + + { + std::string_view new_text{"a"}; + const auto ops = doc.merge(new_text); + assert(doc.getText() == new_text); + assert(ops.size() == 1); + } +} + +void testMulti1(void) { + Doc docA; + docA.local_actor = 'A'; + + Doc docB; + docB.local_actor = 'B'; + + // state A + { + std::string_view new_text{"iiiiiii"}; + const auto ops = docA.merge(new_text); + assert(docA.getText() == new_text); + + assert(docB.apply(ops)); + + assert(docB.getText() == new_text); + assert(docB.state.getDocSize() == docA.state.getDocSize()); + assert(docB.state.size() == docA.state.size()); + } + + // now B inserts b + { + std::string_view new_text{"iiibiiii"}; + const auto ops = docB.merge(new_text); + assert(docB.getText() == new_text); + assert(ops.size() == 1); // 1 new inserted char, nothing to delete + + assert(docA.apply(ops)); + + assert(docA.getText() == new_text); + } +} + +void testPaste1(void) { + Doc docA; + docA.local_actor = 'A'; + + { + std::string_view new_text{"iiiiiii"}; + const auto ops = docA.merge(new_text); + assert(ops.size() == 7); + assert(docA.getText() == new_text); + } + + { + std::string_view new_text{"iiiiiii\n"}; + const auto ops = docA.merge(new_text); + assert(ops.size() == 1); + assert(docA.getText() == new_text); + } + + { + std::string_view new_text{"iiiiiii\niiiiiii"}; + const auto ops = docA.merge(new_text); + assert(ops.size() == 7); + assert(docA.getText() == new_text); + } +} + +void testPaste2(void) { + Doc docA; + docA.local_actor = 'A'; + + { + std::string_view new_text{"aiiiiib"}; + const auto ops = docA.merge(new_text); + assert(ops.size() == 7); + assert(docA.getText() == new_text); + } + + { + std::string_view new_text{"aiiiiib\n"}; + const auto ops = docA.merge(new_text); + assert(ops.size() == 1); + assert(docA.getText() == new_text); + } + + { + std::string_view new_text{"aiiiiib\naiiiiib"}; + const auto ops = docA.merge(new_text); + assert(ops.size() == 7); + assert(docA.getText() == new_text); + } +} + +int main(void) { + const size_t loops = 1'000; + { + std::cout << "testEmptyDocAdds:\n"; + for (size_t i = 0; i < loops; i++) { + std::cout << "i " << i << "\n"; + testEmptyDocAdds(1337+i); + std::cout << std::string(40, '-') << "\n"; + } + } + + std::cout << std::string(40, '=') << "\n"; + + { + std::cout << "test1CharDocAdds:\n"; + for (size_t i = 0; i < loops; i++) { + std::cout << "i " << i << "\n"; + test1CharDocAdds(1337+i); + std::cout << std::string(40, '-') << "\n"; + } + } + + std::cout << std::string(40, '=') << "\n"; + + { + std::cout << "test1CharDocDels:\n"; + for (size_t i = 0; i < loops; i++) { + std::cout << "i " << i << "\n"; + test1CharDocDels(1337+i); + std::cout << std::string(40, '-') << "\n"; + } + } + + std::cout << std::string(40, '=') << "\n"; + + { + std::cout << "test2CharDocAdds:\n"; + for (size_t i = 0; i < loops; i++) { + std::cout << "i " << i << "\n"; + test2CharDocAdds(1337+i); + std::cout << std::string(40, '-') << "\n"; + } + } + + std::cout << std::string(40, '=') << "\n"; + + { + std::cout << "testChange1:\n"; + for (size_t i = 0; i < loops; i++) { + std::cout << "i " << i << "\n"; + testChange1(1337+i); + std::cout << std::string(40, '-') << "\n"; + } + } + + std::cout << std::string(40, '=') << "\n"; + + { + std::cout << "testBugSame:\n"; + testBugSame(); + } + + std::cout << std::string(40, '=') << "\n"; + + { + std::cout << "testBugDoubleDel:\n"; + testBugDoubleDel(); + } + + std::cout << std::string(40, '=') << "\n"; + + { + std::cout << "testBugSameDel:\n"; + testBugSameDel(); + } + + std::cout << std::string(40, '=') << "\n"; + + { + std::cout << "testBugSameDel2:\n"; + testBugSameDel2(); + } + + std::cout << std::string(40, '=') << "\n"; + + { + std::cout << "testMulti1:\n"; + testMulti1(); + } + + std::cout << std::string(40, '=') << "\n"; + + { + std::cout << "testPaste1:\n"; + testPaste1(); + } + + std::cout << std::string(40, '=') << "\n"; + + { + std::cout << "testPaste2:\n"; + testPaste2(); + } + + return 0; +} + diff --git a/version4/CMakeLists.txt b/version4/CMakeLists.txt new file mode 100644 index 0000000..c1687e1 --- /dev/null +++ b/version4/CMakeLists.txt @@ -0,0 +1,26 @@ +cmake_minimum_required(VERSION 3.9 FATAL_ERROR) + +project(crdt_version4 CXX C) + +add_library(crdt_version4 INTERFACE) + +target_compile_features(crdt_version4 INTERFACE cxx_std_17) + +target_include_directories(crdt_version4 INTERFACE "${PROJECT_SOURCE_DIR}") + +######################################## + +add_executable(v4_test1 + ./test1.cpp +) + +target_link_libraries(v4_test1 PUBLIC crdt_version4) + +######################################## + +#add_executable(v4_test2 + #./test2.cpp +#) + +#target_link_libraries(v4_test2 PUBLIC crdt_version4) + diff --git a/version4/green_crdt/v4/list.hpp b/version4/green_crdt/v4/list.hpp new file mode 100644 index 0000000..07ee073 --- /dev/null +++ b/version4/green_crdt/v4/list.hpp @@ -0,0 +1,381 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include + +#if !defined(extra_assert) + #if defined(EXTRA_ASSERTS) && EXTRA_ASSERTS == 1 + #define extra_assert(...) assert(__VA_ARGS__) + #else + #define extra_assert(...) void(0) + #endif +#endif + +namespace GreenCRDT::V4 { + +template +struct List { + // for public interface + struct ListID { + ActorType id; + uint64_t seq{0}; // strictly increasing for that actor + + bool operator<(const ListID& rhs) const { + if (seq < rhs.seq) { + return true; + } else if (seq > rhs.seq) { + return false; + } else { // == + return id < rhs.id; + } + } + + bool operator==(const ListID& rhs) const { + return seq == rhs.seq && id == rhs.id; + } + + bool operator!=(const ListID& rhs) const { + return seq != rhs.seq || id != rhs.id; + } + }; + + struct ListIDInternal { + size_t actor_idx{0}; + uint64_t seq{0}; // strictly increasing for that actor + + bool operator==(const ListIDInternal& rhs) const { + return seq == rhs.seq && actor_idx == rhs.actor_idx; + } + }; + + // internally the index into this array is used to refer to an actor + std::vector _actors; + + // range + struct Entry { + ListIDInternal id; + + std::vector values; + + bool deleted {false}; + + // Yjs + std::optional parent_left; + std::optional parent_right; + }; + + // TODO: use something better, edit: this seems fine + std::vector _list; + + // number of not deleted entries + size_t _doc_size {0}; + + // TODO: actor index instead of map + std::unordered_map _last_seen_seq; + + // caching only, contains the last index an actor inserted at + std::unordered_map _last_inserted_idx; + + //size_t _stat_find_with_hint{0}; + //size_t _stat_find_with_hint_hit{0}; + + std::optional findActor(const ActorType& actor) const { + for (size_t i = 0; i < _actors.size(); i++) { + if (_actors[i] == actor) { + return i; + } + } + return std::nullopt; + } + + std::optional findIdx(const ListIDInternal& list_id) const { + extra_assert(verify()); + + for (size_t i = 0; i < _list.size(); i++) { + if ( + _list[i].id.actor_idx == list_id && // same actor + list_id.seq >= _list[i].id.seq && // in range seen from left + list_id.seq < _list[i].id.seq + _list[i].values.size() // in range seen from right + ) { + return i; + } + } + + return std::nullopt; + } + + // search close to hint first + std::optional findIdx(const ListIDInternal& list_id, size_t hint) const { + extra_assert(verify()); + + //_stat_find_with_hint++; + + // TODO: find NEW magic values + static constexpr size_t c_hint_pre = 1; + static constexpr size_t c_hint_post = 4; + + if (hint >= c_hint_pre) { + hint -= c_hint_pre; + } + + const size_t max_at_hint = hint + c_hint_post; // how many positions we check at hint, before falling back to full lookup + + for (size_t i = hint; i <= max_at_hint && i < _list.size(); i++) { + if (_list[i].id == list_id) { + //_stat_find_with_hint_hit++; + return i; + } + } + + // fall back to normal search + return findIdx(list_id); + } + + std::optional findIdx(const ListID& list_id) const { + extra_assert(verify()); + + const auto actor_idx_opt = findActor(list_id.id); + if (!actor_idx_opt.has_value()) { + return std::nullopt; + } + + const ListIDInternal tmp_id {actor_idx_opt.value(), list_id.seq}; + + return findIdx(tmp_id); + } + + std::optional findIdx(const ListID& list_id, size_t hint) const { + extra_assert(verify()); + + const auto actor_idx_opt = findActor(list_id.id); + if (!actor_idx_opt.has_value()) { + return std::nullopt; + } + + const ListIDInternal tmp_id {actor_idx_opt.value(), list_id.seq}; + + return findIdx(tmp_id, hint); + } + + // returns false if missing OPs + // based on YjsMod https://github.com/josephg/reference-crdts/blob/9f4f9c3a97b497e2df8ae4473d1e521d3c3bf2d2/crdts.ts#L293-L348 + // which is a modified Yjs(YATA) algo + bool add(const ListID& list_id, const ValueType& value, const std::optional& parent_left, const std::optional& parent_right) { + extra_assert(verify()); + + size_t actor_idx {0}; + { // new actor? + // add, even if op fails + const auto actor_opt = findActor(list_id.id); + if (!actor_opt.has_value()) { + actor_idx = _actors.size(); + _last_inserted_idx[_actors.size()] = 0; // hack + _actors.push_back(list_id.id); + } else { + actor_idx = actor_opt.value(); + } + } + + // check actor op order + if (!_last_seen_seq.count(actor_idx)) { + // we dont know this actor yet, first seq needs to be 0 + if (list_id.seq != 0) { + return false; + } + } else { + // making sure we dont skip operations by that actor + if (list_id.seq != _last_seen_seq.at(actor_idx) + 1) { + return false; + } + } + + size_t insert_idx = 0; + if (_list.empty()) { + if (parent_left.has_value() || parent_right.has_value()) { + // empty, missing parents + return false; + } + } else { + // find left + std::optional left_idx_opt = std::nullopt; + if (parent_left.has_value()) { + left_idx_opt = findIdx(parent_left.value(), _last_inserted_idx[actor_idx]); + if (!left_idx_opt.has_value()) { + // missing parent left + return false; + } + + // we insert before the it, so we need to go past the left parent + insert_idx = left_idx_opt.value() + 1; + } // else insert_idx = 0 + const size_t left_idx_hint = insert_idx; + + // find right + size_t right_idx = _list.size(); + if (parent_right.has_value()) { + auto tmp_right = findIdx(parent_right.value(), left_idx_hint); + if (!tmp_right.has_value()) { + return false; + } + right_idx = tmp_right.value(); + } + + bool scanning {false}; + + for(size_t i = insert_idx;; i++) { + if (!scanning) { + insert_idx = i; + } + // if right parent / end of doc, insert + if (insert_idx == right_idx) { + break; + } + // we ran past right o.o ? + if (insert_idx == _list.size()) { + break; + } + + const Entry& at_i = _list[i]; + // parents left and right + std::optional i_left_idx {std::nullopt}; + if (at_i.parent_left.has_value()) { + i_left_idx = findIdx(at_i.parent_left.value(), left_idx_hint); + if (!i_left_idx.has_value()) { + assert(false && "item in list with unknown parent left!!"); + return false; + } + } + + // possibility map + // + // | ir < r | ir == r | ir > r + // ------------------------------------- + // il < l | insert | insert | insert + // il == l | ? | agentfallback | ? + // il > l | skip | skip | skip + + if (i_left_idx < left_idx_opt) { + break; + } else if (i_left_idx == left_idx_opt) { + // get i parent_right + size_t i_right_idx = _list.size(); + if (at_i.parent_right.has_value()) { + auto tmp_right = findIdx(at_i.parent_right.value(), insert_idx); + if (!tmp_right.has_value()) { + assert(false && "item in list with unknown parent right!!"); + return false; + } + i_right_idx = tmp_right.value(); + } + + if (i_right_idx < right_idx) { + scanning = true; + } else if (i_right_idx == right_idx) { + // actor id tie breaker + if (_actors[actor_idx] < _actors[at_i.id.actor_idx]) { + break; + } else { + scanning = false; + } + } else { // i_right_idx > right_idx + scanning = false; + } + } else { // il > l + // do nothing + } + } + } + + { // actual insert + Entry new_entry; + + new_entry.id.actor_idx = actor_idx; + new_entry.id.seq = list_id.seq; + + if (parent_left.has_value()) { + new_entry.parent_left = ListIDInternal{findActor(parent_left.value().id).value(), parent_left.value().seq}; + } + + if (parent_right.has_value()) { + new_entry.parent_right = ListIDInternal{findActor(parent_right.value().id).value(), parent_right.value().seq}; + } + + new_entry.value = value; + + _list.emplace(_list.begin() + insert_idx, new_entry); + _last_inserted_idx[actor_idx] = insert_idx; + } + + _doc_size++; + _last_seen_seq[actor_idx] = list_id.seq; + + extra_assert(verify()); + return true; + } + + // returns false if not found + bool del(const ListID& id) { + extra_assert(verify()); + + auto actor_idx_opt = findActor(id.id); + if (!actor_idx_opt.has_value()) { + // we dont have anything with that actor + return false; + } + + const ListIDInternal tmp_id {actor_idx_opt.value(), id.seq}; + + for (auto& it : _list) { + if (it.id == tmp_id) { + if (it.value.has_value()) { + it.value.reset(); + + _doc_size--; + extra_assert(verify()); + return true; + } else { + extra_assert(verify()); + return false; // TODO: allow double deletes?,,,, need ids + } + } + } + + extra_assert(verify()); + return false; + } + + size_t getDocSize(void) const { + return _doc_size; + } + + std::vector getArray(void) const { + std::vector array; + for (const auto& e : _list) { + if (e.value.has_value()) { + array.push_back(e.value.value()); + } + } + + return array; + } + + // TODO: only in debug? + bool verify(void) const { + size_t actual_size = 0; + for (const auto& it : _list) { + if (it.value.has_value()) { + actual_size++; + } + } + //assert(doc_size == actual_size); + return _doc_size == actual_size; + } +}; + +} // GreenCRDT::V1 + diff --git a/version4/test1.cpp b/version4/test1.cpp new file mode 100644 index 0000000..b663e42 --- /dev/null +++ b/version4/test1.cpp @@ -0,0 +1,214 @@ +#define EXTRA_ASSERTS 1 +#include + +#include +#include +#include +#include +#include +#include + +// single letter actor, for testing only +using Actor = char; +using ListType = GreenCRDT::V4::List; + +namespace std { +bool operator==(const std::vector& lhs, const std::string_view& rhs) { + if (lhs.size() != rhs.size()) { + return false; + } + + for (size_t i = 0; i < rhs.size(); i++) { + if (lhs[i] != rhs[i]) { + return false; + } + } + + return true; +} +} // namespace std + +void testSingle1(void) { + ListType list; + + assert(list.add({'A', 0}, 'a', std::nullopt, std::nullopt)); + assert(list.add({'A', 1}, 'b', ListType::ListID{'A', 0u}, std::nullopt)); + + assert(list.getArray() == "ab"); +} + + +void testConcurrent1(void) { + // agent_a < agent_b + + // concurrent insert of first element + { // variant 1, a then b + ListType list; + assert(list.add({'A', 0}, 'a', std::nullopt, std::nullopt)); + assert(list.add({'B', 0}, 'b', std::nullopt, std::nullopt)); + + assert(list.getArray() == "ab"); + } + { // variant 2, b then a + ListType list; + assert(list.add({'B', 0}, 'b', std::nullopt, std::nullopt)); + assert(list.add({'A', 0}, 'a', std::nullopt, std::nullopt)); + + assert(list.getArray() == "ab"); + } +} + +struct AddOp { + ListType::ListID id; + char value; + std::optional parent_left; + std::optional parent_right; +}; + +void randomAddPermutations(const std::vector& ops, const std::string& expected) { + // TODO: more then 1k? + for (size_t i = 0; i < 1000; i++) { + std::minstd_rand rng(1337 + i); + std::vector ops_todo(ops.size()); + std::iota(ops_todo.begin(), ops_todo.end(), 0u); + + size_t attempts {0}; + + ListType list; + do { + size_t idx = rng() % ops_todo.size(); + + if (list.add(ops[ops_todo[idx]].id, ops[ops_todo[idx]].value, ops[ops_todo[idx]].parent_left, ops[ops_todo[idx]].parent_right)) { + // only remove if it was possible -> returned true; + ops_todo.erase(ops_todo.begin()+idx); + } + + attempts++; + assert(attempts < 10'000); // in case we run into an endless loop + } while (!ops_todo.empty()); + + assert(list.getArray() == expected); + } +} + +void testInterleave1(void) { + const std::vector ops { + {{'A', 0u}, 'a', std::nullopt, std::nullopt}, + {{'A', 1u}, 'a', ListType::ListID{'A', 0u}, std::nullopt}, + {{'A', 2u}, 'a', ListType::ListID{'A', 1u}, std::nullopt}, + {{'B', 0u}, 'b', std::nullopt, std::nullopt}, + {{'B', 1u}, 'b', ListType::ListID{'B', 0u}, std::nullopt}, + {{'B', 2u}, 'b', ListType::ListID{'B', 1u}, std::nullopt}, + }; + + randomAddPermutations(ops, "aaabbb"); +} + +void testInterleave2(void) { + const std::vector ops { + {{'A', 0u}, 'a', std::nullopt, std::nullopt}, + {{'A', 1u}, 'a', std::nullopt, ListType::ListID{'A', 0u}}, + {{'A', 2u}, 'a', std::nullopt, ListType::ListID{'A', 1u}}, + {{'B', 0u}, 'b', std::nullopt, std::nullopt}, + {{'B', 1u}, 'b', std::nullopt, ListType::ListID{'B', 0u}}, + {{'B', 2u}, 'b', std::nullopt, ListType::ListID{'B', 1u}}, + }; + + randomAddPermutations(ops, "aaabbb"); +} + +void testConcurrent2(void) { + const std::vector ops { + {{'A', 0u}, 'a', std::nullopt, std::nullopt}, + {{'C', 0u}, 'c', std::nullopt, std::nullopt}, + {{'B', 0u}, 'b', std::nullopt, std::nullopt}, + {{'D', 0u}, 'd', ListType::ListID{'A', 0u}, ListType::ListID{'C', 0u}}, + }; + + randomAddPermutations(ops, "adbc"); +} + +void testMain1(void) { + ListType list; + + static_assert('0' < '1'); + + const std::vector a0_ops { + {{'0', 0u}, 'a', std::nullopt, std::nullopt}, + {{'0', 1u}, 'b', ListType::ListID{'0', 0u}, std::nullopt}, + {{'0', 2u}, 'c', ListType::ListID{'0', 1u}, std::nullopt}, + {{'0', 3u}, 'd', ListType::ListID{'0', 1u}, ListType::ListID{'0', 2u}}, + }; + + const std::vector a1_ops { + // knows of a0 up to {a0, 1} + {{'1', 0u}, 'z', ListType::ListID{'0', 0u}, ListType::ListID{'0', 1u}}, + {{'1', 1u}, 'y', ListType::ListID{'0', 1u}, std::nullopt}, + }; + + { // the ez, in order stuff + // a0 insert first char, 'a', since its the first, we dont have any parents + assert(list.add(a0_ops[0].id, a0_ops[0].value, a0_ops[0].parent_left, a0_ops[0].parent_right)); + assert(list.getArray() == "a"); + + // a0 insert secound char, 'b' after 'a', no parents to right + assert(list.add(a0_ops[1].id, a0_ops[1].value, a0_ops[1].parent_left, a0_ops[1].parent_right)); + assert(list.getArray() == "ab"); + + // a0 insert 'c' after 'b', no parents to right + assert(list.add(a0_ops[2].id, a0_ops[2].value, a0_ops[2].parent_left, a0_ops[2].parent_right)); + assert(list.getArray() == "abc"); + + // a0 insert 'd' after 'b', 'c' parent right + assert(list.add(a0_ops[3].id, a0_ops[3].value, a0_ops[3].parent_left, a0_ops[3].parent_right)); + assert(list.getArray() == "abdc"); + + // a1 insert 'z' after 'a', 'b' parent right + assert(list.add(a1_ops[0].id, a1_ops[0].value, a1_ops[0].parent_left, a1_ops[0].parent_right)); + assert(list.getArray() == "azbdc"); + } + + std::cout << "done with ez\n"; + + { // a1 was not uptodate only had 0,1 of a0 + // a1 insert 'y' after 'b', no parent right + assert(list.add(a1_ops[1].id, a1_ops[1].value, a1_ops[1].parent_left, a1_ops[1].parent_right)); + assert(list.getArray() == "azbdcy"); + } + + std::cout << "\ndoc size (with tombstones): " << list._list.size() << "\n"; + std::cout << "\ndoc size: " << list.getDocSize() << "\n"; + std::cout << "doc text:\n"; + + const auto tmp_array = list.getArray(); + std::cout << std::string_view(tmp_array.data(), tmp_array.size()) << "\n"; +} + +int main(void) { + std::cout << "testSingle1:\n"; + testSingle1(); + std::cout << std::string(40, '-') << "\n"; + + std::cout << "testConcurrent1:\n"; + testConcurrent1(); + std::cout << std::string(40, '-') << "\n"; + + std::cout << "testInterleave1:\n"; + testInterleave1(); + std::cout << std::string(40, '-') << "\n"; + + std::cout << "testInterleave2:\n"; + testInterleave2(); + std::cout << std::string(40, '-') << "\n"; + + std::cout << "testConcurrent2:\n"; + testConcurrent2(); + std::cout << std::string(40, '-') << "\n"; + + std::cout << "testMain1:\n"; + testMain1(); + std::cout << std::string(40, '-') << "\n"; + + return 0; +} +