add text document to v3, planing v4

This commit is contained in:
Green Sky 2022-12-23 03:16:38 +01:00
parent 931436dc11
commit d85a2dc191
No known key found for this signature in database
8 changed files with 1659 additions and 11 deletions

View File

@ -41,6 +41,7 @@ add_subdirectory(./version0)
add_subdirectory(./version1) add_subdirectory(./version1)
add_subdirectory(./version2) add_subdirectory(./version2)
add_subdirectory(./version3) add_subdirectory(./version3)
#add_subdirectory(./version4)
add_subdirectory(./bench) add_subdirectory(./bench)

View File

@ -18,9 +18,9 @@ target_link_libraries(v3_test1 PUBLIC crdt_version3)
######################################## ########################################
#add_executable(v3_test2 add_executable(v3_test2
#./test2.cpp ./test2.cpp
#) )
#target_link_libraries(v3_test2 PUBLIC crdt_version3) target_link_libraries(v3_test2 PUBLIC crdt_version3)

View File

@ -80,7 +80,7 @@ struct List {
//size_t _stat_find_with_hint{0}; //size_t _stat_find_with_hint{0};
//size_t _stat_find_with_hint_hit{0}; //size_t _stat_find_with_hint_hit{0};
std::optional<size_t> findActor(const ActorType& actor) const { [[nodiscard]] std::optional<size_t> findActor(const ActorType& actor) const {
for (size_t i = 0; i < _actors.size(); i++) { for (size_t i = 0; i < _actors.size(); i++) {
if (_actors[i] == actor) { if (_actors[i] == actor) {
return i; return i;
@ -89,7 +89,7 @@ struct List {
return std::nullopt; return std::nullopt;
} }
std::optional<size_t> findIdx(const ListIDInternal& list_id) const { [[nodiscard]] std::optional<size_t> findIdx(const ListIDInternal& list_id) const {
extra_assert(verify()); extra_assert(verify());
for (size_t i = 0; i < _list_ids.size(); i++) { for (size_t i = 0; i < _list_ids.size(); i++) {
@ -102,7 +102,7 @@ struct List {
} }
// search close to hint first // search close to hint first
std::optional<size_t> findIdx(const ListIDInternal& list_id, size_t hint) const { [[nodiscard]] std::optional<size_t> findIdx(const ListIDInternal& list_id, size_t hint) const {
extra_assert(verify()); extra_assert(verify());
//_stat_find_with_hint++; //_stat_find_with_hint++;
@ -140,7 +140,7 @@ struct List {
return findIdx(list_id); return findIdx(list_id);
} }
std::optional<size_t> findIdx(const ListID& list_id) const { [[nodiscard]] std::optional<size_t> findIdx(const ListID& list_id) const {
extra_assert(verify()); extra_assert(verify());
const auto actor_idx_opt = findActor(list_id.id); const auto actor_idx_opt = findActor(list_id.id);
@ -153,7 +153,7 @@ struct List {
return findIdx(tmp_id); return findIdx(tmp_id);
} }
std::optional<size_t> findIdx(const ListID& list_id, size_t hint) const { [[nodiscard]] std::optional<size_t> findIdx(const ListID& list_id, size_t hint) const {
extra_assert(verify()); extra_assert(verify());
const auto actor_idx_opt = findActor(list_id.id); const auto actor_idx_opt = findActor(list_id.id);
@ -369,12 +369,32 @@ struct List {
return false; return false;
} }
[[nodiscard]] bool empty(void) const {
return _list_ids.empty();
}
[[nodiscard]] size_t size(void) const {
return _list_ids.size();
}
[[nodiscard]] ListIDInternal getIDInternal(size_t idx) const {
return _list_ids.at(idx);
}
[[nodiscard]] const ListID getID(size_t idx) const {
return {_actors.at(_list_ids.at(idx).actor_idx), _list_ids.at(idx).seq};
}
[[nodiscard]] const std::optional<ValueType>& getValue(size_t idx) const {
return _list_data.at(idx).value;
}
// returns the size of alive entries // returns the size of alive entries
size_t getDocSize(void) const { [[nodiscard]] size_t getDocSize(void) const {
return _doc_size; return _doc_size;
} }
std::vector<ValueType> getArray(void) const { [[nodiscard]] std::vector<ValueType> getArray(void) const {
std::vector<ValueType> array; std::vector<ValueType> array;
for (const auto& e : _list_data) { for (const auto& e : _list_data) {
if (e.value.has_value()) { if (e.value.has_value()) {

View File

@ -0,0 +1,305 @@
#pragma once
#include "./list.hpp"
#include <variant>
//#include <iostream> // debug
namespace GreenCRDT::V3 {
template<typename ActorType>
struct TextDocument {
// TODO: determine if char is the best
using ListType = List<char, ActorType>;
struct OpAdd {
typename ListType::ListID id;
std::optional<typename ListType::ListID> parent_left;
std::optional<typename ListType::ListID> parent_right;
char value;
};
struct OpDel {
typename ListType::ListID id;
};
using Op = std::variant<OpAdd, OpDel>;
//// TODO: implement
//struct Cursor {
//AgentType who;
//typename ListType::ListID pos;
//};
ActorType local_actor;
ListType state;
[[nodiscard]] std::string getText(void) const {
std::string text;
for (const auto& it : state._list_data) {
if (it.value.has_value()) {
text += it.value.value();
}
}
return text;
}
bool apply(const Op& op) {
if(std::holds_alternative<OpAdd>(op)) {
const auto& add_op = std::get<OpAdd>(op);
//std::cout << "a:" << add_op.id.id << " s:" << add_op.id.seq << " v:" << add_op.value << "\n";
return state.add(add_op.id, add_op.value, add_op.parent_left, add_op.parent_right);
} else if (std::holds_alternative<OpDel>(op)) {
const auto& del_op = std::get<OpDel>(op);
return state.del(del_op.id);
} else {
assert(false);
}
}
bool apply(const std::vector<Op>& ops) {
for (const auto& op : ops) {
if (!apply(op)) {
// this is not ideal, since we might have applyed some, and dont report which/howmany
return false;
}
}
return true;
}
static std::vector<Op> text2adds(
const ActorType& actor, uint64_t seq, // seq is the first seq
std::optional<typename ListType::ListID> parent_left,
std::optional<typename ListType::ListID> parent_right,
std::string_view text
) {
std::vector<Op> ops;
for (size_t i = 0; i < text.size(); i++) {
typename ListType::ListID new_id {actor, seq++};
ops.emplace_back(OpAdd{
new_id,
parent_left,
parent_right,
text[i]
});
parent_left = new_id;
}
return ops;
}
// adds in tast with specified parents
// returns generated ops
std::vector<Op> addText(
std::optional<typename ListType::ListID> parent_left,
std::optional<typename ListType::ListID> parent_right,
std::string_view text
) {
// TODO: move actor setting to list
if (!state.findActor(local_actor).has_value()) {
state._actors.push_back(local_actor);
}
// TODO: look up typesystem and fix (move? decltype?)
std::vector<Op> ops = text2adds(
// TODO: abstract actors
local_actor, state._last_seen_seq.count(state.findActor(local_actor).value()) ? state._last_seen_seq[state.findActor(local_actor).value()]+1u : 0u,
parent_left,
parent_right,
text
);
// TODO: make this better
// and apply
for (const auto& op : ops) {
if(std::holds_alternative<OpAdd>(op)) {
const auto& add_op = std::get<OpAdd>(op);
//std::cout << "a:" << add_op.id.id << " s:" << add_op.id.seq << " v:" << add_op.value << "\n";
bool r = state.add(add_op.id, add_op.value, add_op.parent_left, add_op.parent_right);
assert(r);
} else if (std::holds_alternative<OpDel>(op)) {
const auto& del_op = std::get<OpDel>(op);
state.del(del_op.id);
} else {
assert(false);
}
}
return ops; // TODO: move?
}
// deletes everything in range [first, last)
// returns generated ops
std::vector<Op> delRange(
std::optional<typename ListType::ListID> left,
std::optional<typename ListType::ListID> right
) {
size_t first_idx = 0;
if (left.has_value()) {
auto res = state.findIdx(left.value());
if (!res.has_value()) {
assert(false && "cant find left");
return {};
}
first_idx = res.value();
}
size_t last_idx = state.size();
if (right.has_value()) {
auto res = state.findIdx(right.value());
if (!res.has_value()) {
assert(false && "cant find right");
return {};
}
last_idx = res.value();
}
std::vector<Op> ops;
for (size_t i = first_idx; i < last_idx; i++) {
if (!state.getValue(i).has_value()) {
// allready deleted
continue;
}
ops.emplace_back(OpDel{
//state.list.at(i).id
state.getID(i)
});
// TODO: do delets get a seq?????
state.del(state.getID(i));
}
return ops;
}
// generates ops from the difference
// note: rn it only creates 1 diff patch
std::vector<Op> merge(std::string_view text) {
if (text.empty()) {
if (state.empty() || state.getDocSize() == 0) {
// no op
return {};
} else {
// delete all
return delRange(std::nullopt, std::nullopt);
}
}
// text not empty
if (state.empty()) {
return addText(
std::nullopt,
std::nullopt,
text
);
}
// neither empty
// find start and end of changes
// start
size_t list_start = 0;
size_t list_start_counted = 0;
size_t text_start = 0;
bool differ = false;
for (; list_start < state.size() && text_start < text.size();) {
// jump over tombstones
if (!state.getValue(list_start).has_value()) {
list_start++;
continue;
}
if (state.getValue(list_start).value() != text[text_start]) {
differ = true;
break;
}
list_start++;
text_start++;
list_start_counted++;
}
// doc and text dont differ
if (!differ && list_start == state.size() && text_start == text.size()) {
return {};
}
//std::cout << "list.size: " << state.list.size() << "(" << getText().size() << ")" << " text.size: " << text.size() << "\n";
//std::cout << "list_start: " << list_start << " text_start: " << text_start << "\n";
// +1 so i can have unsigned
size_t list_end = state.size();
size_t text_end = text.size();
//for (; list_end > 0 && text_end > 0 && list_end >= list_start && text_end >= text_start;) {
//while (list_end >= list_start && text_end >= text_start) {
size_t list_end_counted = 0;
differ = false; // var reuse
//while (list_start_counted - list_end_counted > state.doc_size && text_end >= text_start) {
while (state.getDocSize() - list_start_counted > list_end_counted && text_end >= text_start) {
// jump over tombstones
if (!state.getValue(list_end-1).has_value()) {
list_end--;
continue;
}
if (state.getValue(list_end-1).value() != text[text_end-1]) {
differ = true;
break;
}
list_end--;
text_end--;
list_end_counted++;
}
if (!differ && text_start == text_end+1) {
// we ran into eachother without seeing the different char
// TODO: do we need to increment list_end? text_end?
list_end++;
}
//std::cout << "list_end: " << list_end << " text_end: " << text_end << "\n";
//std::cout << "substring before: " << text.substr(text_start, text.size() - state.doc_size) << "\n";
std::vector<Op> ops;
// 1. clear range (del all list_start - list_end)
if (list_start <= list_end && list_start < state.size()) {
//list_end += list_start == list_end;
ops = delRange(
state.getID(list_start),
list_end < state.size() ? std::make_optional(state.getID(list_end)) : std::nullopt
);
//std::cout << "deleted: " << ops.size() << "\n";
}
//std::cout << "text between: " << getText() << "\n";
//std::cout << "substring between: " << text.substr(text_start, text.size() - state.doc_size) << "\n";
// 2. add range (add all text_start - text_end)
if (state.getDocSize() < text.size()) {
auto tmp_add_ops = addText(
list_start == 0 ? std::nullopt : std::make_optional(state.getID(list_start-1)),
list_start == state.size() ? std::nullopt :std::make_optional(state.getID(list_start)),
text.substr(text_start, text.size() - state.getDocSize())
);
//std::cout << "added: " << tmp_add_ops.size() << "\n";
ops.insert(ops.end(), tmp_add_ops.begin(), tmp_add_ops.end());
}
return ops;
}
};
} // GreenCRDT::V3

701
version3/test2.cpp Normal file
View File

@ -0,0 +1,701 @@
#include <green_crdt/v3/text_document.hpp>
#include <numeric>
#include <optional>
#include <random>
#include <iostream>
#include <cassert>
#include <variant>
// single letter agent, for testing only
using Agent = std::string;
using Doc = GreenCRDT::V3::TextDocument<Agent>;
using Op = Doc::Op;
using ListType = Doc::ListType;
// maybe switch it up?
//using Rng = std::minstd_rand;
//using Rng = std::mt19937;
using Rng = std::ranlux24_base;
// 10*7 -> 70 permutations , ggwp
// | 1add | 1del | 1rep | 2add | 2del | 2rep | random add | random del | random rep | random
// empty doc | | 0 | 0 | | 0 | 0 | x | 0 | 0 |
// before 1 char | | | | | | | | | |
// after 1 char | | | | | | | | | |
// before 2 char | | | | | | | | | |
// in 2 char | | | | | | | | | |
// after 2 char | | | | | | | | | |
// random | | | | | | | | | |
static const std::vector<char> random_chars {
'a', 'b', 'c', 'd', 'e',
'f', 'g', 'h', 'i', 'j',
'k', 'l', 'm', 'n', 'o',
'p', 'q', 'r', 's', 't',
'u', 'v', 'w', 'x', 'y',
'z',
'A', 'B', 'C', 'D', 'E',
'F', 'G', 'H', 'I', 'J',
'K', 'L', 'M', 'N', 'O',
'P', 'Q', 'R', 'S', 'T',
'U', 'V', 'W', 'X', 'Y',
'Z',
};
std::ostream& operator<<(std::ostream& out, const std::optional<ListType::ListID>& id) {
if (id.has_value()) {
out << id.value().id << "-" << id.value().seq;
} else {
out << "null";
}
return out;
}
std::ostream& operator<<(std::ostream& out, const Doc::OpAdd& op) {
out
<< "{ id:" << op.id.id
<< "-" << op.id.seq
<< ", v:" << op.value
<< ", l:" << op.parent_left
<< ", r:" << op.parent_right
<< " }"
;
return out;
}
// genX() changes doc, uses local agent
Op genAdd(Rng& rng, Doc& doc) {
Doc::OpAdd op {
{doc.local_actor, 0u},
std::nullopt,
std::nullopt,
random_chars[rng()%random_chars.size()]
};
// TODO: move to list
// make sure actor index exists
if (!doc.state.findActor(doc.local_actor).has_value()) {
doc.state._actors.push_back(doc.local_actor);
}
// first id is 0
if (doc.state._last_seen_seq.count(doc.state.findActor(doc.local_actor).value())) {
op.id.seq = doc.state._last_seen_seq[doc.state.findActor(doc.local_actor).value()] + 1;
}
if (!doc.state.empty()) {
// gen parents
size_t li = rng()%(1+doc.state.size());
if (li != doc.state.size()) { // nullopt
op.parent_left = doc.state.getID(li);
}
//size_t r_range = 1+doc.state.list.size();
//if (li != doc.state.list.size()) {
//r_range -= li+1;
//}
//size_t ri = rng()%r_range;
//if (li != doc.state.list.size()) {
//ri += li+1;
//}
//if (ri != doc.state.list.size()) { // nullopt
//op.parent_right = doc.state.list[li].id;
//}
if (op.parent_left.has_value()) {
if (doc.state.size() != li + 1) { // left is not last
op.parent_right = doc.state.getID(li+1);
}
} else {
// left is before first, so right is first
op.parent_right = doc.state.getID(0);
}
} // else first char, both nullopt
//std::cout << "op: " << op << "\n";
{
bool r = doc.state.add(op.id, op.value, op.parent_left, op.parent_right);
if (!r) {
std::cout << "op: " << op << "\n";
}
assert(r);
}
return op;
}
Op genDel(Rng& rng, Doc& doc) {
if (doc.state.getDocSize() == 0) {
assert(false && "empty doc");
return {}; // empty
}
doc.state.verify();
Doc::OpDel op{};
// search for undelted entry
size_t idx = rng()%doc.state.size();
bool found = false;
for (size_t attempts = 0; attempts <= doc.state.size(); attempts++) {
//if (doc.state.list[idx].value.has_value()) {
if (doc.state.getValue(idx).has_value()) {
op.id = doc.state.getID(idx);
found = true;
break;
}
idx = (idx+1) % doc.state.size();
}
assert(found);
{
auto size_pre = doc.state.getDocSize();
bool r = doc.state.del(op.id);
assert(r);
assert(size_pre-1 == doc.state.getDocSize());
assert(doc.state.verify());
}
return op;
}
//genRep()
//genAddContRange()
//genDelContRange()
//genRepContRange()
//genRand()
//genRandRanges()
std::vector<Op> genRandAll(Rng& rng, Doc& doc) {
switch (rng() % 1) {
case 0:
return {genAdd(rng, doc)};
}
return {};
}
void testEmptyDocAdds(size_t seed) {
Rng rng(seed);
Doc doc; // empty
doc.local_actor = 'A';
std::string changed_text;
{
// for modifying
Doc doctmp = doc;
const size_t loop_count = (rng() % 55)+1;
for (size_t i = 0; i < loop_count; i++) {
genAdd(rng, doctmp);
}
changed_text = doctmp.getText();
}
assert(doc.getText() != changed_text);
std::cout << "changed_text: " << changed_text << "\n";
Doc otherdoc = doc;
assert(doc.getText().size() == doc.state.getDocSize());
const auto merge_ops = doc.merge(changed_text);
assert(doc.getText().size() == doc.state.getDocSize());
assert(doc.getText() == changed_text);
assert(otherdoc.apply(merge_ops));
assert(doc.getText() == otherdoc.getText());
}
void test1CharDocAdds(size_t seed) {
Rng rng(seed);
Doc doc;
doc.local_actor = 'A';
doc.addText(std::nullopt, std::nullopt, "0");
assert(doc.getText() == "0");
std::string changed_text;
{
// for modifying
Doc doctmp = doc;
const size_t loop_count = (rng() % 4)+1;
for (size_t i = 0; i < loop_count; i++) {
genAdd(rng, doctmp);
}
changed_text = doctmp.getText();
}
assert(doc.getText() != changed_text);
std::cout << "text: " << doc.getText() << "\n";
std::cout << "changed_text: " << changed_text << "\n";
Doc otherdoc = doc;
assert(doc.getText().size() == doc.state.getDocSize());
const auto merge_ops = doc.merge(changed_text);
assert(doc.getText().size() == doc.state.getDocSize());
std::cout << "text after merge: " << doc.getText() << "\n";
assert(doc.getText() == changed_text);
assert(otherdoc.apply(merge_ops));
assert(doc.getText() == otherdoc.getText());
}
void test1CharDocDels(size_t seed) {
Rng rng(seed);
Doc doc;
doc.local_actor = 'A';
assert(doc.getText().size() == doc.state.getDocSize());
doc.addText(std::nullopt, std::nullopt, "0123");
assert(doc.getText().size() == doc.state.getDocSize());
assert(doc.getText() == "0123");
std::string changed_text;
{
// for modifying
Doc doctmp = doc;
const size_t loop_count = (rng() % 4)+1;
std::cout << "going to delete: " << loop_count << "\n";
for (size_t i = 0; i < loop_count; i++) {
genDel(rng, doctmp);
}
changed_text = doctmp.getText();
assert(doctmp.getText().size() == doctmp.state.getDocSize());
if (loop_count == doc.state.getDocSize()) {
assert(doctmp.state.getDocSize() == 0);
assert(changed_text.size() == 0);
}
}
assert(doc.getText() != changed_text);
std::cout << "text: " << doc.getText() << "\n";
std::cout << "changed_text: " << changed_text << "\n";
Doc otherdoc = doc;
assert(doc.getText().size() == doc.state.getDocSize());
const auto merge_ops = doc.merge(changed_text);
assert(doc.getText().size() == doc.state.getDocSize());
std::cout << "text after merge: " << doc.getText() << "\n";
assert(doc.getText() == changed_text);
assert(otherdoc.apply(merge_ops));
assert(doc.getText() == otherdoc.getText());
}
void test2CharDocAdds(size_t seed) {
Rng rng(seed);
Doc doc;
doc.local_actor = 'A';
assert(doc.getText().size() == doc.state.getDocSize());
doc.addText(std::nullopt, std::nullopt, "012345");
assert(doc.getText().size() == doc.state.getDocSize());
assert(doc.getText() == "012345");
std::string changed_text;
{
// for modifying
Doc doctmp = doc;
const size_t loop_count = (rng() % 6)+1;
for (size_t i = 0; i < loop_count; i++) {
genAdd(rng, doctmp);
}
changed_text = doctmp.getText();
}
assert(doc.getText() != changed_text);
std::cout << "text: " << doc.getText() << "\n";
std::cout << "changed_text: " << changed_text << "\n";
Doc otherdoc = doc;
assert(doc.getText().size() == doc.state.getDocSize());
const auto merge_ops = doc.merge(changed_text);
assert(doc.getText().size() == doc.state.getDocSize());
std::cout << "text after merge: " << doc.getText() << "\n";
assert(doc.getText() == changed_text);
assert(otherdoc.apply(merge_ops));
assert(doc.getText() == otherdoc.getText());
}
void testChange1(size_t seed) {
Rng rng(seed);
Doc doc;
doc.local_actor = 'A';
assert(doc.getText().size() == doc.state.getDocSize());
doc.addText(std::nullopt, std::nullopt, "012345");
assert(doc.getText().size() == doc.state.getDocSize());
assert(doc.getText() == "012345");
std::string changed_text;
{
// for modifying
Doc doctmp = doc;
{ // dels
const size_t loop_count = (rng() % 6)+1;
for (size_t i = 0; i < loop_count; i++) {
genDel(rng, doctmp);
}
}
{ // adds
const size_t loop_count = (rng() % 6)+1;
for (size_t i = 0; i < loop_count; i++) {
genAdd(rng, doctmp);
}
}
changed_text = doctmp.getText();
}
assert(doc.getText() != changed_text);
std::cout << "text: " << doc.getText() << "\n";
std::cout << "changed_text: " << changed_text << "\n";
Doc otherdoc = doc;
assert(doc.getText().size() == doc.state.getDocSize());
const auto merge_ops = doc.merge(changed_text);
assert(doc.getText().size() == doc.state.getDocSize());
std::cout << "text after merge: " << doc.getText() << "\n";
assert(doc.getText() == changed_text);
assert(otherdoc.apply(merge_ops));
assert(doc.getText() == otherdoc.getText());
}
void testBugSame(void) {
Doc doc;
doc.local_actor = 'A';
std::string_view new_text1{"a"};
doc.merge(new_text1);
assert(doc.getText() == new_text1);
std::string_view new_text2{"aa"};
doc.merge(new_text2);
assert(doc.getText() == new_text2);
}
void testBugDoubleDel(void) {
Doc doc;
doc.local_actor = 'A';
{
std::string_view new_text{"a"};
const auto ops = doc.merge(new_text);
assert(doc.getText() == new_text);
assert(ops.size() == 1);
}
{
std::string_view new_text{""};
const auto ops = doc.merge(new_text);
assert(doc.getText() == new_text);
assert(ops.size() == 1);
assert(std::holds_alternative<Doc::OpDel>(ops.front()));
assert(std::get<Doc::OpDel>(ops.front()).id.seq == 0);
}
{
std::string_view new_text{""};
const auto ops = doc.merge(new_text);
assert(doc.getText() == new_text);
assert(ops.size() == 0);
}
}
void testBugSameDel(void) {
Doc doc;
doc.local_actor = 'A';
{
std::string_view new_text{"a"};
const auto ops = doc.merge(new_text);
assert(doc.getText() == new_text);
assert(ops.size() == 1);
}
{
std::string_view new_text{"aa"};
const auto ops = doc.merge(new_text);
assert(doc.getText() == new_text);
assert(ops.size() == 1);
}
{
std::string_view new_text{"a"};
const auto ops = doc.merge(new_text);
assert(doc.getText() == new_text);
assert(ops.size() == 1);
}
}
void testBugSameDel2(void) {
Doc doc;
doc.local_actor = 'A';
{
std::string_view new_text{"a"};
const auto ops = doc.merge(new_text);
assert(doc.getText() == new_text);
assert(ops.size() == 1);
}
{
std::string_view new_text{"aa"};
const auto ops = doc.merge(new_text);
assert(doc.getText() == new_text);
assert(ops.size() == 1);
}
{
std::string_view new_text{"aaa"};
const auto ops = doc.merge(new_text);
assert(doc.getText() == new_text);
assert(ops.size() == 1);
}
{
std::string_view new_text{"aa"};
const auto ops = doc.merge(new_text);
assert(doc.getText() == new_text);
assert(ops.size() == 1);
}
{
std::string_view new_text{"a"};
const auto ops = doc.merge(new_text);
assert(doc.getText() == new_text);
assert(ops.size() == 1);
}
}
void testMulti1(void) {
Doc docA;
docA.local_actor = 'A';
Doc docB;
docB.local_actor = 'B';
// state A
{
std::string_view new_text{"iiiiiii"};
const auto ops = docA.merge(new_text);
assert(docA.getText() == new_text);
assert(docB.apply(ops));
assert(docB.getText() == new_text);
assert(docB.state.getDocSize() == docA.state.getDocSize());
assert(docB.state.size() == docA.state.size());
}
// now B inserts b
{
std::string_view new_text{"iiibiiii"};
const auto ops = docB.merge(new_text);
assert(docB.getText() == new_text);
assert(ops.size() == 1); // 1 new inserted char, nothing to delete
assert(docA.apply(ops));
assert(docA.getText() == new_text);
}
}
void testPaste1(void) {
Doc docA;
docA.local_actor = 'A';
{
std::string_view new_text{"iiiiiii"};
const auto ops = docA.merge(new_text);
assert(ops.size() == 7);
assert(docA.getText() == new_text);
}
{
std::string_view new_text{"iiiiiii\n"};
const auto ops = docA.merge(new_text);
assert(ops.size() == 1);
assert(docA.getText() == new_text);
}
{
std::string_view new_text{"iiiiiii\niiiiiii"};
const auto ops = docA.merge(new_text);
assert(ops.size() == 7);
assert(docA.getText() == new_text);
}
}
void testPaste2(void) {
Doc docA;
docA.local_actor = 'A';
{
std::string_view new_text{"aiiiiib"};
const auto ops = docA.merge(new_text);
assert(ops.size() == 7);
assert(docA.getText() == new_text);
}
{
std::string_view new_text{"aiiiiib\n"};
const auto ops = docA.merge(new_text);
assert(ops.size() == 1);
assert(docA.getText() == new_text);
}
{
std::string_view new_text{"aiiiiib\naiiiiib"};
const auto ops = docA.merge(new_text);
assert(ops.size() == 7);
assert(docA.getText() == new_text);
}
}
int main(void) {
const size_t loops = 1'000;
{
std::cout << "testEmptyDocAdds:\n";
for (size_t i = 0; i < loops; i++) {
std::cout << "i " << i << "\n";
testEmptyDocAdds(1337+i);
std::cout << std::string(40, '-') << "\n";
}
}
std::cout << std::string(40, '=') << "\n";
{
std::cout << "test1CharDocAdds:\n";
for (size_t i = 0; i < loops; i++) {
std::cout << "i " << i << "\n";
test1CharDocAdds(1337+i);
std::cout << std::string(40, '-') << "\n";
}
}
std::cout << std::string(40, '=') << "\n";
{
std::cout << "test1CharDocDels:\n";
for (size_t i = 0; i < loops; i++) {
std::cout << "i " << i << "\n";
test1CharDocDels(1337+i);
std::cout << std::string(40, '-') << "\n";
}
}
std::cout << std::string(40, '=') << "\n";
{
std::cout << "test2CharDocAdds:\n";
for (size_t i = 0; i < loops; i++) {
std::cout << "i " << i << "\n";
test2CharDocAdds(1337+i);
std::cout << std::string(40, '-') << "\n";
}
}
std::cout << std::string(40, '=') << "\n";
{
std::cout << "testChange1:\n";
for (size_t i = 0; i < loops; i++) {
std::cout << "i " << i << "\n";
testChange1(1337+i);
std::cout << std::string(40, '-') << "\n";
}
}
std::cout << std::string(40, '=') << "\n";
{
std::cout << "testBugSame:\n";
testBugSame();
}
std::cout << std::string(40, '=') << "\n";
{
std::cout << "testBugDoubleDel:\n";
testBugDoubleDel();
}
std::cout << std::string(40, '=') << "\n";
{
std::cout << "testBugSameDel:\n";
testBugSameDel();
}
std::cout << std::string(40, '=') << "\n";
{
std::cout << "testBugSameDel2:\n";
testBugSameDel2();
}
std::cout << std::string(40, '=') << "\n";
{
std::cout << "testMulti1:\n";
testMulti1();
}
std::cout << std::string(40, '=') << "\n";
{
std::cout << "testPaste1:\n";
testPaste1();
}
std::cout << std::string(40, '=') << "\n";
{
std::cout << "testPaste2:\n";
testPaste2();
}
return 0;
}

26
version4/CMakeLists.txt Normal file
View File

@ -0,0 +1,26 @@
cmake_minimum_required(VERSION 3.9 FATAL_ERROR)
project(crdt_version4 CXX C)
add_library(crdt_version4 INTERFACE)
target_compile_features(crdt_version4 INTERFACE cxx_std_17)
target_include_directories(crdt_version4 INTERFACE "${PROJECT_SOURCE_DIR}")
########################################
add_executable(v4_test1
./test1.cpp
)
target_link_libraries(v4_test1 PUBLIC crdt_version4)
########################################
#add_executable(v4_test2
#./test2.cpp
#)
#target_link_libraries(v4_test2 PUBLIC crdt_version4)

View File

@ -0,0 +1,381 @@
#pragma once
#include <cstdint>
#include <optional>
#include <unordered_map>
#include <vector>
#include <string>
#include <cassert>
#if !defined(extra_assert)
#if defined(EXTRA_ASSERTS) && EXTRA_ASSERTS == 1
#define extra_assert(...) assert(__VA_ARGS__)
#else
#define extra_assert(...) void(0)
#endif
#endif
namespace GreenCRDT::V4 {
template<typename ValueType, typename ActorType>
struct List {
// for public interface
struct ListID {
ActorType id;
uint64_t seq{0}; // strictly increasing for that actor
bool operator<(const ListID& rhs) const {
if (seq < rhs.seq) {
return true;
} else if (seq > rhs.seq) {
return false;
} else { // ==
return id < rhs.id;
}
}
bool operator==(const ListID& rhs) const {
return seq == rhs.seq && id == rhs.id;
}
bool operator!=(const ListID& rhs) const {
return seq != rhs.seq || id != rhs.id;
}
};
struct ListIDInternal {
size_t actor_idx{0};
uint64_t seq{0}; // strictly increasing for that actor
bool operator==(const ListIDInternal& rhs) const {
return seq == rhs.seq && actor_idx == rhs.actor_idx;
}
};
// internally the index into this array is used to refer to an actor
std::vector<ActorType> _actors;
// range
struct Entry {
ListIDInternal id;
std::vector<ValueType> values;
bool deleted {false};
// Yjs
std::optional<ListIDInternal> parent_left;
std::optional<ListIDInternal> parent_right;
};
// TODO: use something better, edit: this seems fine
std::vector<Entry> _list;
// number of not deleted entries
size_t _doc_size {0};
// TODO: actor index instead of map
std::unordered_map<size_t, uint64_t> _last_seen_seq;
// caching only, contains the last index an actor inserted at
std::unordered_map<size_t, size_t> _last_inserted_idx;
//size_t _stat_find_with_hint{0};
//size_t _stat_find_with_hint_hit{0};
std::optional<size_t> findActor(const ActorType& actor) const {
for (size_t i = 0; i < _actors.size(); i++) {
if (_actors[i] == actor) {
return i;
}
}
return std::nullopt;
}
std::optional<size_t> findIdx(const ListIDInternal& list_id) const {
extra_assert(verify());
for (size_t i = 0; i < _list.size(); i++) {
if (
_list[i].id.actor_idx == list_id && // same actor
list_id.seq >= _list[i].id.seq && // in range seen from left
list_id.seq < _list[i].id.seq + _list[i].values.size() // in range seen from right
) {
return i;
}
}
return std::nullopt;
}
// search close to hint first
std::optional<size_t> findIdx(const ListIDInternal& list_id, size_t hint) const {
extra_assert(verify());
//_stat_find_with_hint++;
// TODO: find NEW magic values
static constexpr size_t c_hint_pre = 1;
static constexpr size_t c_hint_post = 4;
if (hint >= c_hint_pre) {
hint -= c_hint_pre;
}
const size_t max_at_hint = hint + c_hint_post; // how many positions we check at hint, before falling back to full lookup
for (size_t i = hint; i <= max_at_hint && i < _list.size(); i++) {
if (_list[i].id == list_id) {
//_stat_find_with_hint_hit++;
return i;
}
}
// fall back to normal search
return findIdx(list_id);
}
std::optional<size_t> findIdx(const ListID& list_id) const {
extra_assert(verify());
const auto actor_idx_opt = findActor(list_id.id);
if (!actor_idx_opt.has_value()) {
return std::nullopt;
}
const ListIDInternal tmp_id {actor_idx_opt.value(), list_id.seq};
return findIdx(tmp_id);
}
std::optional<size_t> findIdx(const ListID& list_id, size_t hint) const {
extra_assert(verify());
const auto actor_idx_opt = findActor(list_id.id);
if (!actor_idx_opt.has_value()) {
return std::nullopt;
}
const ListIDInternal tmp_id {actor_idx_opt.value(), list_id.seq};
return findIdx(tmp_id, hint);
}
// returns false if missing OPs
// based on YjsMod https://github.com/josephg/reference-crdts/blob/9f4f9c3a97b497e2df8ae4473d1e521d3c3bf2d2/crdts.ts#L293-L348
// which is a modified Yjs(YATA) algo
bool add(const ListID& list_id, const ValueType& value, const std::optional<ListID>& parent_left, const std::optional<ListID>& parent_right) {
extra_assert(verify());
size_t actor_idx {0};
{ // new actor?
// add, even if op fails
const auto actor_opt = findActor(list_id.id);
if (!actor_opt.has_value()) {
actor_idx = _actors.size();
_last_inserted_idx[_actors.size()] = 0; // hack
_actors.push_back(list_id.id);
} else {
actor_idx = actor_opt.value();
}
}
// check actor op order
if (!_last_seen_seq.count(actor_idx)) {
// we dont know this actor yet, first seq needs to be 0
if (list_id.seq != 0) {
return false;
}
} else {
// making sure we dont skip operations by that actor
if (list_id.seq != _last_seen_seq.at(actor_idx) + 1) {
return false;
}
}
size_t insert_idx = 0;
if (_list.empty()) {
if (parent_left.has_value() || parent_right.has_value()) {
// empty, missing parents
return false;
}
} else {
// find left
std::optional<size_t> left_idx_opt = std::nullopt;
if (parent_left.has_value()) {
left_idx_opt = findIdx(parent_left.value(), _last_inserted_idx[actor_idx]);
if (!left_idx_opt.has_value()) {
// missing parent left
return false;
}
// we insert before the it, so we need to go past the left parent
insert_idx = left_idx_opt.value() + 1;
} // else insert_idx = 0
const size_t left_idx_hint = insert_idx;
// find right
size_t right_idx = _list.size();
if (parent_right.has_value()) {
auto tmp_right = findIdx(parent_right.value(), left_idx_hint);
if (!tmp_right.has_value()) {
return false;
}
right_idx = tmp_right.value();
}
bool scanning {false};
for(size_t i = insert_idx;; i++) {
if (!scanning) {
insert_idx = i;
}
// if right parent / end of doc, insert
if (insert_idx == right_idx) {
break;
}
// we ran past right o.o ?
if (insert_idx == _list.size()) {
break;
}
const Entry& at_i = _list[i];
// parents left and right
std::optional<size_t> i_left_idx {std::nullopt};
if (at_i.parent_left.has_value()) {
i_left_idx = findIdx(at_i.parent_left.value(), left_idx_hint);
if (!i_left_idx.has_value()) {
assert(false && "item in list with unknown parent left!!");
return false;
}
}
// possibility map
//
// | ir < r | ir == r | ir > r
// -------------------------------------
// il < l | insert | insert | insert
// il == l | ? | agentfallback | ?
// il > l | skip | skip | skip
if (i_left_idx < left_idx_opt) {
break;
} else if (i_left_idx == left_idx_opt) {
// get i parent_right
size_t i_right_idx = _list.size();
if (at_i.parent_right.has_value()) {
auto tmp_right = findIdx(at_i.parent_right.value(), insert_idx);
if (!tmp_right.has_value()) {
assert(false && "item in list with unknown parent right!!");
return false;
}
i_right_idx = tmp_right.value();
}
if (i_right_idx < right_idx) {
scanning = true;
} else if (i_right_idx == right_idx) {
// actor id tie breaker
if (_actors[actor_idx] < _actors[at_i.id.actor_idx]) {
break;
} else {
scanning = false;
}
} else { // i_right_idx > right_idx
scanning = false;
}
} else { // il > l
// do nothing
}
}
}
{ // actual insert
Entry new_entry;
new_entry.id.actor_idx = actor_idx;
new_entry.id.seq = list_id.seq;
if (parent_left.has_value()) {
new_entry.parent_left = ListIDInternal{findActor(parent_left.value().id).value(), parent_left.value().seq};
}
if (parent_right.has_value()) {
new_entry.parent_right = ListIDInternal{findActor(parent_right.value().id).value(), parent_right.value().seq};
}
new_entry.value = value;
_list.emplace(_list.begin() + insert_idx, new_entry);
_last_inserted_idx[actor_idx] = insert_idx;
}
_doc_size++;
_last_seen_seq[actor_idx] = list_id.seq;
extra_assert(verify());
return true;
}
// returns false if not found
bool del(const ListID& id) {
extra_assert(verify());
auto actor_idx_opt = findActor(id.id);
if (!actor_idx_opt.has_value()) {
// we dont have anything with that actor
return false;
}
const ListIDInternal tmp_id {actor_idx_opt.value(), id.seq};
for (auto& it : _list) {
if (it.id == tmp_id) {
if (it.value.has_value()) {
it.value.reset();
_doc_size--;
extra_assert(verify());
return true;
} else {
extra_assert(verify());
return false; // TODO: allow double deletes?,,,, need ids
}
}
}
extra_assert(verify());
return false;
}
size_t getDocSize(void) const {
return _doc_size;
}
std::vector<ValueType> getArray(void) const {
std::vector<ValueType> array;
for (const auto& e : _list) {
if (e.value.has_value()) {
array.push_back(e.value.value());
}
}
return array;
}
// TODO: only in debug?
bool verify(void) const {
size_t actual_size = 0;
for (const auto& it : _list) {
if (it.value.has_value()) {
actual_size++;
}
}
//assert(doc_size == actual_size);
return _doc_size == actual_size;
}
};
} // GreenCRDT::V1

214
version4/test1.cpp Normal file
View File

@ -0,0 +1,214 @@
#define EXTRA_ASSERTS 1
#include <green_crdt/v4/list.hpp>
#include <numeric>
#include <random>
#include <iostream>
#include <cassert>
#include <string_view>
#include <vector>
// single letter actor, for testing only
using Actor = char;
using ListType = GreenCRDT::V4::List<char, Actor>;
namespace std {
bool operator==(const std::vector<char>& lhs, const std::string_view& rhs) {
if (lhs.size() != rhs.size()) {
return false;
}
for (size_t i = 0; i < rhs.size(); i++) {
if (lhs[i] != rhs[i]) {
return false;
}
}
return true;
}
} // namespace std
void testSingle1(void) {
ListType list;
assert(list.add({'A', 0}, 'a', std::nullopt, std::nullopt));
assert(list.add({'A', 1}, 'b', ListType::ListID{'A', 0u}, std::nullopt));
assert(list.getArray() == "ab");
}
void testConcurrent1(void) {
// agent_a < agent_b
// concurrent insert of first element
{ // variant 1, a then b
ListType list;
assert(list.add({'A', 0}, 'a', std::nullopt, std::nullopt));
assert(list.add({'B', 0}, 'b', std::nullopt, std::nullopt));
assert(list.getArray() == "ab");
}
{ // variant 2, b then a
ListType list;
assert(list.add({'B', 0}, 'b', std::nullopt, std::nullopt));
assert(list.add({'A', 0}, 'a', std::nullopt, std::nullopt));
assert(list.getArray() == "ab");
}
}
struct AddOp {
ListType::ListID id;
char value;
std::optional<ListType::ListID> parent_left;
std::optional<ListType::ListID> parent_right;
};
void randomAddPermutations(const std::vector<AddOp>& ops, const std::string& expected) {
// TODO: more then 1k?
for (size_t i = 0; i < 1000; i++) {
std::minstd_rand rng(1337 + i);
std::vector<size_t> ops_todo(ops.size());
std::iota(ops_todo.begin(), ops_todo.end(), 0u);
size_t attempts {0};
ListType list;
do {
size_t idx = rng() % ops_todo.size();
if (list.add(ops[ops_todo[idx]].id, ops[ops_todo[idx]].value, ops[ops_todo[idx]].parent_left, ops[ops_todo[idx]].parent_right)) {
// only remove if it was possible -> returned true;
ops_todo.erase(ops_todo.begin()+idx);
}
attempts++;
assert(attempts < 10'000); // in case we run into an endless loop
} while (!ops_todo.empty());
assert(list.getArray() == expected);
}
}
void testInterleave1(void) {
const std::vector<AddOp> ops {
{{'A', 0u}, 'a', std::nullopt, std::nullopt},
{{'A', 1u}, 'a', ListType::ListID{'A', 0u}, std::nullopt},
{{'A', 2u}, 'a', ListType::ListID{'A', 1u}, std::nullopt},
{{'B', 0u}, 'b', std::nullopt, std::nullopt},
{{'B', 1u}, 'b', ListType::ListID{'B', 0u}, std::nullopt},
{{'B', 2u}, 'b', ListType::ListID{'B', 1u}, std::nullopt},
};
randomAddPermutations(ops, "aaabbb");
}
void testInterleave2(void) {
const std::vector<AddOp> ops {
{{'A', 0u}, 'a', std::nullopt, std::nullopt},
{{'A', 1u}, 'a', std::nullopt, ListType::ListID{'A', 0u}},
{{'A', 2u}, 'a', std::nullopt, ListType::ListID{'A', 1u}},
{{'B', 0u}, 'b', std::nullopt, std::nullopt},
{{'B', 1u}, 'b', std::nullopt, ListType::ListID{'B', 0u}},
{{'B', 2u}, 'b', std::nullopt, ListType::ListID{'B', 1u}},
};
randomAddPermutations(ops, "aaabbb");
}
void testConcurrent2(void) {
const std::vector<AddOp> ops {
{{'A', 0u}, 'a', std::nullopt, std::nullopt},
{{'C', 0u}, 'c', std::nullopt, std::nullopt},
{{'B', 0u}, 'b', std::nullopt, std::nullopt},
{{'D', 0u}, 'd', ListType::ListID{'A', 0u}, ListType::ListID{'C', 0u}},
};
randomAddPermutations(ops, "adbc");
}
void testMain1(void) {
ListType list;
static_assert('0' < '1');
const std::vector<AddOp> a0_ops {
{{'0', 0u}, 'a', std::nullopt, std::nullopt},
{{'0', 1u}, 'b', ListType::ListID{'0', 0u}, std::nullopt},
{{'0', 2u}, 'c', ListType::ListID{'0', 1u}, std::nullopt},
{{'0', 3u}, 'd', ListType::ListID{'0', 1u}, ListType::ListID{'0', 2u}},
};
const std::vector<AddOp> a1_ops {
// knows of a0 up to {a0, 1}
{{'1', 0u}, 'z', ListType::ListID{'0', 0u}, ListType::ListID{'0', 1u}},
{{'1', 1u}, 'y', ListType::ListID{'0', 1u}, std::nullopt},
};
{ // the ez, in order stuff
// a0 insert first char, 'a', since its the first, we dont have any parents
assert(list.add(a0_ops[0].id, a0_ops[0].value, a0_ops[0].parent_left, a0_ops[0].parent_right));
assert(list.getArray() == "a");
// a0 insert secound char, 'b' after 'a', no parents to right
assert(list.add(a0_ops[1].id, a0_ops[1].value, a0_ops[1].parent_left, a0_ops[1].parent_right));
assert(list.getArray() == "ab");
// a0 insert 'c' after 'b', no parents to right
assert(list.add(a0_ops[2].id, a0_ops[2].value, a0_ops[2].parent_left, a0_ops[2].parent_right));
assert(list.getArray() == "abc");
// a0 insert 'd' after 'b', 'c' parent right
assert(list.add(a0_ops[3].id, a0_ops[3].value, a0_ops[3].parent_left, a0_ops[3].parent_right));
assert(list.getArray() == "abdc");
// a1 insert 'z' after 'a', 'b' parent right
assert(list.add(a1_ops[0].id, a1_ops[0].value, a1_ops[0].parent_left, a1_ops[0].parent_right));
assert(list.getArray() == "azbdc");
}
std::cout << "done with ez\n";
{ // a1 was not uptodate only had 0,1 of a0
// a1 insert 'y' after 'b', no parent right
assert(list.add(a1_ops[1].id, a1_ops[1].value, a1_ops[1].parent_left, a1_ops[1].parent_right));
assert(list.getArray() == "azbdcy");
}
std::cout << "\ndoc size (with tombstones): " << list._list.size() << "\n";
std::cout << "\ndoc size: " << list.getDocSize() << "\n";
std::cout << "doc text:\n";
const auto tmp_array = list.getArray();
std::cout << std::string_view(tmp_array.data(), tmp_array.size()) << "\n";
}
int main(void) {
std::cout << "testSingle1:\n";
testSingle1();
std::cout << std::string(40, '-') << "\n";
std::cout << "testConcurrent1:\n";
testConcurrent1();
std::cout << std::string(40, '-') << "\n";
std::cout << "testInterleave1:\n";
testInterleave1();
std::cout << std::string(40, '-') << "\n";
std::cout << "testInterleave2:\n";
testInterleave2();
std::cout << std::string(40, '-') << "\n";
std::cout << "testConcurrent2:\n";
testConcurrent2();
std::cout << std::string(40, '-') << "\n";
std::cout << "testMain1:\n";
testMain1();
std::cout << std::string(40, '-') << "\n";
return 0;
}