basic lightly tested v1, only change from v0 to v1 is actor cache (way lighter for large actor ids)

This commit is contained in:
Green Sky 2022-12-22 19:44:34 +01:00
parent a597193529
commit be5485856f
No known key found for this signature in database
4 changed files with 569 additions and 0 deletions

View File

@ -38,6 +38,7 @@ endif()
add_subdirectory(./prototyping EXCLUDE_FROM_ALL) add_subdirectory(./prototyping EXCLUDE_FROM_ALL)
add_subdirectory(./version0) add_subdirectory(./version0)
add_subdirectory(./version1)
add_subdirectory(./bench) add_subdirectory(./bench)

26
version1/CMakeLists.txt Normal file
View File

@ -0,0 +1,26 @@
cmake_minimum_required(VERSION 3.9 FATAL_ERROR)
project(crdt_version1 CXX C)
add_library(crdt_version1 INTERFACE)
target_compile_features(crdt_version1 INTERFACE cxx_std_17)
target_include_directories(crdt_version1 INTERFACE "${PROJECT_SOURCE_DIR}")
########################################
add_executable(v1_test1
./test1.cpp
)
target_link_libraries(v1_test1 PUBLIC crdt_version1)
########################################
#add_executable(v1_test2
#./test2.cpp
#)
#target_link_libraries(v1_test2 PUBLIC crdt_version1)

View File

@ -0,0 +1,328 @@
#pragma once
#include <cstdint>
#include <optional>
#include <vector>
#include <map>
#include <string>
#include <cassert>
#if !defined(extra_assert)
#if defined(EXTRA_ASSERTS) && EXTRA_ASSERTS == 1
#define extra_assert(...) assert(__VA_ARGS__)
#else
#define extra_assert(...) void(0)
#endif
#endif
namespace GreenCRDT::V1 {
template<typename ValueType, typename ActorType>
struct List {
// for public interface
struct ListID {
ActorType id;
uint64_t seq{0}; // strictly increasing for that actor
bool operator<(const ListID& rhs) const {
if (seq < rhs.seq) {
return true;
} else if (seq > rhs.seq) {
return false;
} else { // ==
return id < rhs.id;
}
}
bool operator==(const ListID& rhs) const {
return seq == rhs.seq && id == rhs.id;
}
bool operator!=(const ListID& rhs) const {
return seq != rhs.seq || id != rhs.id;
}
};
struct ListIDInternal {
size_t actor_idx{0};
uint64_t seq{0}; // strictly increasing for that actor
bool operator==(const ListIDInternal& rhs) const {
return seq == rhs.seq && actor_idx == rhs.actor_idx;
}
};
// internally the index into this array is used to refer to an actor
std::vector<ActorType> _actors;
// TODO: replace with SoA
struct Entry {
ListIDInternal id;
// Yjs
std::optional<ListIDInternal> parent_left;
std::optional<ListIDInternal> parent_right;
// might be deleted (yes, *sigh*, crtds need tombstones)
std::optional<ValueType> value;
};
// TODO: use something better, edit: this seems fine
std::vector<Entry> list;
// number of not deleted entries
size_t doc_size {0};
std::map<size_t, uint64_t> last_seen_seq;
std::optional<size_t> findActor(const ActorType& actor) const {
for (size_t i = 0; i < _actors.size(); i++) {
if (_actors[i] == actor) {
return i;
}
}
return std::nullopt;
}
std::optional<size_t> findIdx(const ListIDInternal& list_id) const {
extra_assert(verify());
for (size_t i = 0; i < list.size(); i++) {
if (list[i].id == list_id) {
return i;
}
}
return std::nullopt;
}
std::optional<size_t> findIdx(const ListID& list_id) const {
extra_assert(verify());
const auto actor_idx_opt = findActor(list_id.id);
if (!actor_idx_opt.has_value()) {
return std::nullopt;
}
const ListIDInternal tmp_id {actor_idx_opt.value(), list_id.seq};
for (size_t i = 0; i < list.size(); i++) {
if (list[i].id == tmp_id) {
return i;
}
}
return std::nullopt;
}
// returns false if missing OPs
// based on YjsMod https://github.com/josephg/reference-crdts/blob/9f4f9c3a97b497e2df8ae4473d1e521d3c3bf2d2/crdts.ts#L293-L348
// which is a modified Yjs(YATA) algo
bool add(const ListID& list_id, const ValueType& value, const std::optional<ListID>& parent_left, const std::optional<ListID>& parent_right) {
extra_assert(verify());
size_t actor_idx {0};
{ // new actor?
// add, even if op fails
const auto actor_opt = findActor(list_id.id);
if (!actor_opt.has_value()) {
actor_idx = _actors.size();
_actors.push_back(list_id.id);
} else {
actor_idx = actor_opt.value();
}
}
// check actor op order
if (!last_seen_seq.count(actor_idx)) {
// we dont know this actor yet, first seq needs to be 0
if (list_id.seq != 0) {
return false;
}
} else {
// making sure we dont skip operations by that actor
if (list_id.seq != last_seen_seq.at(actor_idx) + 1) {
return false;
}
}
size_t insert_idx = 0;
if (list.empty()) {
if (parent_left.has_value() || parent_right.has_value()) {
// empty, missing parents
return false;
}
} else {
// find left
std::optional<size_t> left_idx = std::nullopt;
if (parent_left.has_value()) {
left_idx = findIdx(parent_left.value());
if (!left_idx.has_value()) {
// missing parent left
return false;
}
// we insert before the it, so we need to go past the left parent
insert_idx = left_idx.value() + 1;
} // else insert_idx = 0
// find right
size_t right_idx = list.size();
if (parent_right.has_value()) {
auto tmp_right = findIdx(parent_right.value());
if (!tmp_right.has_value()) {
return false;
}
right_idx = tmp_right.value();
}
bool scanning {false};
for(size_t i = insert_idx;; i++) {
if (!scanning) {
insert_idx = i;
}
// if right parent / end of doc, insert
if (insert_idx == right_idx) {
break;
}
// we ran past right o.o ?
if (insert_idx == list.size()) {
break;
}
const Entry& at_i = list[i];
// parents left and right
std::optional<size_t> i_left_idx {std::nullopt};
if (at_i.parent_left.has_value()) {
i_left_idx = findIdx(at_i.parent_left.value());
if (!i_left_idx.has_value()) {
assert(false && "item in list with unknown parent left!!");
return false;
}
}
// possibility map
//
// | ir < r | ir == r | ir > r
// -------------------------------------
// il < l | insert | insert | insert
// il == l | ? | agentfallback | ?
// il > l | skip | skip | skip
if (i_left_idx < left_idx) {
break;
} else if (i_left_idx == left_idx) {
// get i parent_right
size_t i_right_idx = list.size();
if (at_i.parent_right.has_value()) {
auto tmp_right = findIdx(at_i.parent_right.value());
if (!tmp_right.has_value()) {
assert(false && "item in list with unknown parent right!!");
return false;
}
i_right_idx = tmp_right.value();
}
if (i_right_idx < right_idx) {
scanning = true;
} else if (i_right_idx == right_idx) {
// actor id tie breaker
if (_actors[actor_idx] < _actors[at_i.id.actor_idx]) {
break;
} else {
scanning = false;
}
} else { // i_right_idx > right_idx
scanning = false;
}
} else { // il > l
// do nothing
}
}
}
{ // actual insert
Entry new_entry;
new_entry.id.actor_idx = actor_idx;
new_entry.id.seq = list_id.seq;
if (parent_left.has_value()) {
new_entry.parent_left = ListIDInternal{findActor(parent_left.value().id).value(), parent_left.value().seq};
}
if (parent_right.has_value()) {
new_entry.parent_right = ListIDInternal{findActor(parent_right.value().id).value(), parent_right.value().seq};
}
new_entry.value = value;
list.emplace(list.begin() + insert_idx, new_entry);
}
doc_size++;
last_seen_seq[actor_idx] = list_id.seq;
extra_assert(verify());
return true;
}
// returns false if not found
bool del(const ListID& id) {
extra_assert(verify());
auto actor_idx_opt = findActor(id.id);
if (!actor_idx_opt.has_value()) {
// we dont have anything with that actor
return false;
}
const ListIDInternal tmp_id {actor_idx_opt.value(), id.seq};
for (auto& it : list) {
if (it.id == tmp_id) {
if (it.value.has_value()) {
it.value.reset();
doc_size--;
extra_assert(verify());
return true;
} else {
extra_assert(verify());
return false; // TODO: allow double deletes?,,,, need ids
}
}
}
extra_assert(verify());
return false;
}
std::vector<ValueType> getArray(void) const {
std::vector<ValueType> array;
for (const auto& e : list) {
if (e.value.has_value()) {
array.push_back(e.value.value());
}
}
return array;
}
// TODO: only in debug?
bool verify(void) const {
size_t actual_size = 0;
for (const auto& it : list) {
if (it.value.has_value()) {
actual_size++;
}
}
//assert(doc_size == actual_size);
return doc_size == actual_size;
}
};
} // GreenCRDT::V1

214
version1/test1.cpp Normal file
View File

@ -0,0 +1,214 @@
#define EXTRA_ASSERTS 1
#include <green_crdt/v1/list.hpp>
#include <numeric>
#include <random>
#include <iostream>
#include <cassert>
#include <string_view>
#include <vector>
// single letter actor, for testing only
using Actor = char;
using ListType = GreenCRDT::V1::List<char, Actor>;
namespace std {
bool operator==(const std::vector<char>& lhs, const std::string_view& rhs) {
if (lhs.size() != rhs.size()) {
return false;
}
for (size_t i = 0; i < rhs.size(); i++) {
if (lhs[i] != rhs[i]) {
return false;
}
}
return true;
}
} // namespace std
void testSingle1(void) {
ListType list;
assert(list.add({'A', 0}, 'a', std::nullopt, std::nullopt));
assert(list.add({'A', 1}, 'b', ListType::ListID{'A', 0u}, std::nullopt));
assert(list.getArray() == "ab");
}
void testConcurrent1(void) {
// agent_a < agent_b
// concurrent insert of first element
{ // variant 1, a then b
ListType list;
assert(list.add({'A', 0}, 'a', std::nullopt, std::nullopt));
assert(list.add({'B', 0}, 'b', std::nullopt, std::nullopt));
assert(list.getArray() == "ab");
}
{ // variant 2, b then a
ListType list;
assert(list.add({'B', 0}, 'b', std::nullopt, std::nullopt));
assert(list.add({'A', 0}, 'a', std::nullopt, std::nullopt));
assert(list.getArray() == "ab");
}
}
struct AddOp {
ListType::ListID id;
char value;
std::optional<ListType::ListID> parent_left;
std::optional<ListType::ListID> parent_right;
};
void randomAddPermutations(const std::vector<AddOp>& ops, const std::string& expected) {
// TODO: more then 1k?
for (size_t i = 0; i < 1000; i++) {
std::minstd_rand rng(1337 + i);
std::vector<size_t> ops_todo(ops.size());
std::iota(ops_todo.begin(), ops_todo.end(), 0u);
size_t attempts {0};
ListType list;
do {
size_t idx = rng() % ops_todo.size();
if (list.add(ops[ops_todo[idx]].id, ops[ops_todo[idx]].value, ops[ops_todo[idx]].parent_left, ops[ops_todo[idx]].parent_right)) {
// only remove if it was possible -> returned true;
ops_todo.erase(ops_todo.begin()+idx);
}
attempts++;
assert(attempts < 10'000); // in case we run into an endless loop
} while (!ops_todo.empty());
assert(list.getArray() == expected);
}
}
void testInterleave1(void) {
const std::vector<AddOp> ops {
{{'A', 0u}, 'a', std::nullopt, std::nullopt},
{{'A', 1u}, 'a', ListType::ListID{'A', 0u}, std::nullopt},
{{'A', 2u}, 'a', ListType::ListID{'A', 1u}, std::nullopt},
{{'B', 0u}, 'b', std::nullopt, std::nullopt},
{{'B', 1u}, 'b', ListType::ListID{'B', 0u}, std::nullopt},
{{'B', 2u}, 'b', ListType::ListID{'B', 1u}, std::nullopt},
};
randomAddPermutations(ops, "aaabbb");
}
void testInterleave2(void) {
const std::vector<AddOp> ops {
{{'A', 0u}, 'a', std::nullopt, std::nullopt},
{{'A', 1u}, 'a', std::nullopt, ListType::ListID{'A', 0u}},
{{'A', 2u}, 'a', std::nullopt, ListType::ListID{'A', 1u}},
{{'B', 0u}, 'b', std::nullopt, std::nullopt},
{{'B', 1u}, 'b', std::nullopt, ListType::ListID{'B', 0u}},
{{'B', 2u}, 'b', std::nullopt, ListType::ListID{'B', 1u}},
};
randomAddPermutations(ops, "aaabbb");
}
void testConcurrent2(void) {
const std::vector<AddOp> ops {
{{'A', 0u}, 'a', std::nullopt, std::nullopt},
{{'C', 0u}, 'c', std::nullopt, std::nullopt},
{{'B', 0u}, 'b', std::nullopt, std::nullopt},
{{'D', 0u}, 'd', ListType::ListID{'A', 0u}, ListType::ListID{'C', 0u}},
};
randomAddPermutations(ops, "adbc");
}
void testMain1(void) {
ListType list;
static_assert('0' < '1');
const std::vector<AddOp> a0_ops {
{{'0', 0u}, 'a', std::nullopt, std::nullopt},
{{'0', 1u}, 'b', ListType::ListID{'0', 0u}, std::nullopt},
{{'0', 2u}, 'c', ListType::ListID{'0', 1u}, std::nullopt},
{{'0', 3u}, 'd', ListType::ListID{'0', 1u}, ListType::ListID{'0', 2u}},
};
const std::vector<AddOp> a1_ops {
// knows of a0 up to {a0, 1}
{{'1', 0u}, 'z', ListType::ListID{'0', 0u}, ListType::ListID{'0', 1u}},
{{'1', 1u}, 'y', ListType::ListID{'0', 1u}, std::nullopt},
};
{ // the ez, in order stuff
// a0 insert first char, 'a', since its the first, we dont have any parents
assert(list.add(a0_ops[0].id, a0_ops[0].value, a0_ops[0].parent_left, a0_ops[0].parent_right));
assert(list.getArray() == "a");
// a0 insert secound char, 'b' after 'a', no parents to right
assert(list.add(a0_ops[1].id, a0_ops[1].value, a0_ops[1].parent_left, a0_ops[1].parent_right));
assert(list.getArray() == "ab");
// a0 insert 'c' after 'b', no parents to right
assert(list.add(a0_ops[2].id, a0_ops[2].value, a0_ops[2].parent_left, a0_ops[2].parent_right));
assert(list.getArray() == "abc");
// a0 insert 'd' after 'b', 'c' parent right
assert(list.add(a0_ops[3].id, a0_ops[3].value, a0_ops[3].parent_left, a0_ops[3].parent_right));
assert(list.getArray() == "abdc");
// a1 insert 'z' after 'a', 'b' parent right
assert(list.add(a1_ops[0].id, a1_ops[0].value, a1_ops[0].parent_left, a1_ops[0].parent_right));
assert(list.getArray() == "azbdc");
}
std::cout << "done with ez\n";
{ // a1 was not uptodate only had 0,1 of a0
// a1 insert 'y' after 'b', no parent right
assert(list.add(a1_ops[1].id, a1_ops[1].value, a1_ops[1].parent_left, a1_ops[1].parent_right));
assert(list.getArray() == "azbdcy");
}
std::cout << "\ndoc size (with tombstones): " << list.list.size() << "\n";
std::cout << "\ndoc size: " << list.doc_size << "\n";
std::cout << "doc text:\n";
const auto tmp_array = list.getArray();
std::cout << std::string_view(tmp_array.data(), tmp_array.size()) << "\n";
}
int main(void) {
std::cout << "testSingle1:\n";
testSingle1();
std::cout << std::string(40, '-') << "\n";
std::cout << "testConcurrent1:\n";
testConcurrent1();
std::cout << std::string(40, '-') << "\n";
std::cout << "testInterleave1:\n";
testInterleave1();
std::cout << std::string(40, '-') << "\n";
std::cout << "testInterleave2:\n";
testInterleave2();
std::cout << std::string(40, '-') << "\n";
std::cout << "testConcurrent2:\n";
testConcurrent2();
std::cout << std::string(40, '-') << "\n";
std::cout << "testMain1:\n";
testMain1();
std::cout << std::string(40, '-') << "\n";
return 0;
}