Compare commits

...

3 Commits

Author SHA1 Message Date
a597193529
add jpaper bench for v0 2022-12-22 18:18:56 +01:00
175042eb7c
rename stuff 2022-12-22 15:20:32 +01:00
e486d79dc3
make vim faster :D 2022-12-22 15:06:11 +01:00
11 changed files with 298 additions and 22 deletions

View File

@ -39,5 +39,7 @@ add_subdirectory(./prototyping EXCLUDE_FROM_ALL)
add_subdirectory(./version0) add_subdirectory(./version0)
add_subdirectory(./bench)
add_subdirectory(./vim_research) add_subdirectory(./vim_research)

28
bench/CMakeLists.txt Normal file
View File

@ -0,0 +1,28 @@
cmake_minimum_required(VERSION 3.9 FATAL_ERROR)
project(crdt_bench CXX C)
########################################
add_executable(crdt_bench_jpaper_v0
./v0_jpaper.cpp
)
target_link_libraries(crdt_bench_jpaper_v0 PUBLIC
crdt_version0
nlohmann_json::nlohmann_json
)
########################################
if (FALSE)
add_executable(crdt_bench_jpaper_v1
./test1.cpp
)
target_link_libraries(crdt_bench_jpaper_v1 PUBLIC
crdt_version1
nlohmann_json::nlohmann_json
)
endif()

38
bench/README.md Normal file
View File

@ -0,0 +1,38 @@
# Timings
all benches use the uncompressed .json from disk
all done with the "cool an breezy copy" preset
note: the .json is not a perfect fit, bc it is not designed for the Yjs algo (missing parent_right and ids are not perfect)
the json contains:
doc size (with tombstones): 182315
doc size: 104852
total inserts: 182315
total deletes: 77463
total ops: 259778
## baseline ( just walking through the json, no insertions )
- g++9 -g :
- 23.0s
- 22.6s
- 23.0s
- g++9 -O3 -DNDEBUG :
- 9.6s
- 9.7s
- 9.7s
## version0
- g++9 -g -O2 :
- 10m35s
- g++9 -O3 -DNDEBUG :
- 8m7s
## version1

198
bench/v0_jpaper.cpp Normal file
View File

@ -0,0 +1,198 @@
#define EXTRA_ASSERTS 0
//#include <green_crdt/v0/text_document.hpp>
#include <green_crdt/v0/list.hpp>
#include <nlohmann/json.hpp>
#include <unordered_map>
#include <string_view>
#include <fstream>
#include <iostream>
#include <cassert>
using ActorID = std::array<uint8_t, 32>;
//using Doc = GreenCRDT::V0::TextDocument<ActorID>;
using List = GreenCRDT::V0::List<char, ActorID>;
template<>
struct std::hash<ActorID> {
std::size_t operator()(ActorID const& s) const noexcept {
static_assert(sizeof(size_t) == 8);
// TODO: maybe shuffle the indices a bit
return
(static_cast<size_t>(s[0]) << 8*0) |
(static_cast<size_t>(s[1]) << 8*1) |
(static_cast<size_t>(s[2]) << 8*2) |
(static_cast<size_t>(s[3]) << 8*3) |
(static_cast<size_t>(s[4]) << 8*4) |
(static_cast<size_t>(s[5]) << 8*5) |
(static_cast<size_t>(s[6]) << 8*6) |
(static_cast<size_t>(s[7]) << 8*7)
;
}
};
// for dev, benching in debug is usefull, but only if the ammount of asserts is reasonable
#if !defined(extra_assert)
#if defined(EXTRA_ASSERTS) && EXTRA_ASSERTS == 1
#define extra_assert(...) assert(__VA_ARGS__)
#else
#define extra_assert(...) void(0)
#endif
#endif
namespace detail {
uint8_t nib_from_hex(char c) {
extra_assert((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f'));
if (c >= '0' && c <= '9') {
return static_cast<uint8_t>(c) - '0';
} else if (c >= 'a' && c <= 'f') {
return (static_cast<uint8_t>(c) - 'a') + 10u;
} else {
return 0u;
}
}
} // detail
static ActorID ActorIDFromStr(std::string_view str) {
extra_assert(str.size() == 32*2);
ActorID tmp;
for (size_t i = 0; i < tmp.size(); i++) {
tmp[i] = detail::nib_from_hex(str[i*2]) << 4 | detail::nib_from_hex(str[i*2+1]);
}
return tmp;
}
// seq@ID type format used in the json
struct JObj {
ActorID id;
uint64_t seq {0};
};
static JObj JObjFromStr(std::string_view str) {
extra_assert(str.size() > 32*2 + 1);
size_t at_pos = str.find_first_of('@');
auto seq_sv = str.substr(0, at_pos);
auto id_sv = str.substr(at_pos+1);
assert(seq_sv.size() != 0);
assert(id_sv.size() == 32*2);
uint64_t tmp_seq {0};
for (size_t i = 0; i < seq_sv.size(); i++) {
assert(seq_sv[i] >= '0' && seq_sv[i] <= '9');
tmp_seq *= 10;
tmp_seq += seq_sv[i] - '0';
}
return {ActorIDFromStr(id_sv), tmp_seq};
}
int main(void) {
List list;
std::ifstream file {"../res/paper.json"};
std::cout << "start reading...\n";
uint64_t g_total_inserts {0};
uint64_t g_total_deletes {0};
//uint64_t g_seq_inserts {0}; // the opsec are not sequentially growing for inserts, so we sidestep
std::unordered_map<ActorID, uint64_t> g_seq_inserts {0}; // the opsec are not sequentially growing for inserts, so we sidestep
std::unordered_map<ActorID, std::unordered_map<uint64_t, uint64_t>> map_seq; // maps json op_seq -> lits id seq
for (std::string line; std::getline(file, line); ) {
nlohmann::json j_entry = nlohmann::json::parse(line);
const ActorID actor = ActorIDFromStr(static_cast<const std::string&>(j_entry["actor"]));
uint64_t op_seq = j_entry["startOp"];
for (const auto& j_op : j_entry["ops"]) {
if (j_op["action"] == "set") {
const auto obj = JObjFromStr(static_cast<const std::string&>(j_op["obj"]));
if (obj.seq != 1) {
// skip all non text edits (create text doc, curser etc)
continue;
}
if (j_op["insert"]) {
const auto& j_parent = j_op["key"];
extra_assert(!j_parent.is_null());
if (j_parent == "_head") {
uint64_t tmp_seq {g_seq_inserts[actor]++};
bool r = list.add(
{actor, tmp_seq},
static_cast<const std::string&>(j_op["value"]).front(),
std::nullopt,
std::nullopt
);
assert(r);
map_seq[actor][op_seq] = tmp_seq;
g_total_inserts++;
} else { // we have a parrent
extra_assert(static_cast<const std::string&>(j_op["value"]).size() == 1);
// split parent into seq and actor
const auto parent_left = JObjFromStr(static_cast<const std::string&>(j_parent));
auto idx_opt = list.findIdx({parent_left.id, map_seq[parent_left.id][parent_left.seq]});
assert(idx_opt.has_value());
const auto parent_left_id = list.list.at(idx_opt.value()).id;
std::optional<List::ListID> parent_right_id;
if (idx_opt.value()+1 < list.list.size()) {
parent_right_id = list.list.at(idx_opt.value()+1).id;
}
uint64_t tmp_seq {g_seq_inserts[actor]++};
bool r = list.add(
{actor, tmp_seq},
static_cast<const std::string&>(j_op["value"]).front(),
parent_left_id,
parent_right_id
);
assert(r);
map_seq[actor][op_seq] = tmp_seq;
g_total_inserts++;
}
} else {
// i think this is curser movement
}
} else if (j_op["action"] == "del") {
const auto list_id = JObjFromStr(static_cast<const std::string&>(j_op["key"]));
bool r = list.del({list_id.id, map_seq[list_id.id][list_id.seq]});
assert(r);
g_total_deletes++;
} else if (j_op["action"] == "makeText") {
// doc.clear();
} else if (j_op["action"] == "makeMap") {
// no idea
} else {
std::cout << "op: " << j_op << "\n";
}
op_seq++;
}
}
std::cout << "\ndoc size (with tombstones): " << list.list.size() << "\n";
std::cout << "doc size: " << list.doc_size << "\n";
std::cout << "total inserts: " << g_total_inserts << "\n";
std::cout << "total deletes: " << g_total_deletes << "\n";
std::cout << "total ops: " << g_total_inserts + g_total_deletes << "\n";
// checked, looks correct
#if 0
std::cout << "doc text:\n";
// simple print
for (const auto& it : list.list) {
if (it.value) {
std::cout << it.value.value();
}
}
std::cout << "\n";
#endif
return 0;
}

View File

@ -10,17 +10,17 @@ target_include_directories(crdt_version0 INTERFACE "${PROJECT_SOURCE_DIR}")
######################################## ########################################
add_executable(test1 add_executable(v0_test1
./test1.cpp ./test1.cpp
) )
target_link_libraries(test1 PUBLIC crdt_version0) target_link_libraries(v0_test1 PUBLIC crdt_version0)
######################################## ########################################
add_executable(test2 add_executable(v0_test2
./test2.cpp ./test2.cpp
) )
target_link_libraries(test2 PUBLIC crdt_version0) target_link_libraries(v0_test2 PUBLIC crdt_version0)

View File

@ -8,7 +8,15 @@
#include <cassert> #include <cassert>
namespace GreenCRDT { #if !defined(extra_assert)
#if defined(EXTRA_ASSERTS) && EXTRA_ASSERTS == 1
#define extra_assert(...) assert(__VA_ARGS__)
#else
#define extra_assert(...) void(0)
#endif
#endif
namespace GreenCRDT::V0 {
template<typename ValueType, typename AgentType> template<typename ValueType, typename AgentType>
struct List { struct List {
@ -71,7 +79,9 @@ struct List {
std::map<AgentType, uint64_t> last_seen_seq; std::map<AgentType, uint64_t> last_seen_seq;
std::optional<size_t> findIdx(const ListID& list_id) const { std::optional<size_t> findIdx(const ListID& list_id) const {
//#if defined(EXTRA_ASSERTS) && EXTRA_ASSERTS == 1
//verify(); // too expensive //verify(); // too expensive
//#endif
for (size_t i = 0; i < list.size(); i++) { for (size_t i = 0; i < list.size(); i++) {
if (list[i].id == list_id) { if (list[i].id == list_id) {
return i; return i;
@ -243,5 +253,5 @@ struct List {
} }
}; };
} // GreenCRDT } // GreenCRDT::V0

View File

@ -6,7 +6,7 @@
//#include <iostream> // debug //#include <iostream> // debug
namespace GreenCRDT { namespace GreenCRDT::V0 {
template<typename AgentType> template<typename AgentType>
struct TextDocument { struct TextDocument {
@ -281,5 +281,5 @@ struct TextDocument {
} }
}; };
} // GreenCRDT } // GreenCRDT::V0

View File

@ -1,5 +1,5 @@
#include <crdt/list.hpp> #include <green_crdt/v0/list.hpp>
#include <crdt/text_document.hpp> #include <green_crdt/v0/text_document.hpp>
#include <numeric> #include <numeric>
#include <random> #include <random>
@ -8,7 +8,7 @@
// single letter agent, for testing only // single letter agent, for testing only
using Agent = char; using Agent = char;
using DocType = GreenCRDT::TextDocument<Agent>; using DocType = GreenCRDT::V0::TextDocument<Agent>;
using ListType = DocType::ListType; using ListType = DocType::ListType;
void testSingle1(void) { void testSingle1(void) {

View File

@ -1,4 +1,4 @@
#include <crdt/text_document.hpp> #include <green_crdt/v0/text_document.hpp>
#include <numeric> #include <numeric>
#include <optional> #include <optional>
@ -9,8 +9,8 @@
// single letter agent, for testing only // single letter agent, for testing only
using Agent = char; using Agent = char;
using Doc = GreenCRDT::TextDocument<Agent>; using Doc = GreenCRDT::V0::TextDocument<Agent>;
using Op = GreenCRDT::TextDocument<Agent>::Op; using Op = Doc::Op;
using ListType = Doc::ListType; using ListType = Doc::ListType;
// maybe switch it up? // maybe switch it up?

View File

@ -1,4 +1,4 @@
#include <crdt/text_document.hpp> #include <green_crdt/v0/text_document.hpp>
#include <nlohmann/json.hpp> #include <nlohmann/json.hpp>
extern "C" { extern "C" {
@ -15,7 +15,7 @@ extern "C" {
// single letter agent, for testing only // single letter agent, for testing only
//using Agent = char; //using Agent = char;
using Agent = uint16_t; // tmp local port using Agent = uint16_t; // tmp local port
using Doc = GreenCRDT::TextDocument<Agent>; using Doc = GreenCRDT::V0::TextDocument<Agent>;
using ListType = Doc::ListType; using ListType = Doc::ListType;
std::ostream& operator<<(std::ostream& out, const std::optional<ListType::ListID>& id) { std::ostream& operator<<(std::ostream& out, const std::optional<ListType::ListID>& id) {

View File

@ -1,4 +1,4 @@
#include <crdt/text_document.hpp> #include <green_crdt/v0/text_document.hpp>
#include <nlohmann/json.hpp> #include <nlohmann/json.hpp>
extern "C" { extern "C" {
@ -59,7 +59,7 @@ struct std::hash<ToxPubKey> {
//using Agent = char; //using Agent = char;
//using Agent = uint16_t; // tmp local port //using Agent = uint16_t; // tmp local port
using Agent = ToxPubKey; using Agent = ToxPubKey;
using Doc = GreenCRDT::TextDocument<Agent>; using Doc = GreenCRDT::V0::TextDocument<Agent>;
using ListType = Doc::ListType; using ListType = Doc::ListType;
struct Command { struct Command {
@ -89,7 +89,7 @@ namespace std {
} }
} // namespace std } // namespace std
namespace GreenCRDT { namespace GreenCRDT::V0 {
NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(ListType::ListID, NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(ListType::ListID,
id, id,
@ -107,7 +107,7 @@ NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(ListType::OpDel,
id id
) )
} // namespace GreenCRDT } // namespace GreenCRDT::V0
// bc variant <.< // bc variant <.<
namespace std { namespace std {
@ -235,7 +235,7 @@ function! GreenCRDTCheckTimeAndFetch()
endif endif
endif endif
let b:green_crdt_fetch_timer = timer_start(503, 'GreenCRDTFetchTimerCallback') let b:green_crdt_fetch_timer = timer_start(203, 'GreenCRDTFetchTimerCallback')
endif endif
endfunction endfunction
)" )"
@ -287,7 +287,7 @@ delfunction GreenCRDTSetupEvents
)" )"
R"( R"(
let b:green_crdt_fetch_timer = timer_start(900, 'GreenCRDTFetchTimerCallback') let b:green_crdt_fetch_timer = timer_start(300, 'GreenCRDTFetchTimerCallback')
echo 'setup done' echo 'setup done'
)"); )");