From 72d00f759d524fd6a4c320e2151e0133f9bc0e4d Mon Sep 17 00:00:00 2001
From: Green Sky <green@g-s.xyz>
Date: Thu, 22 Dec 2022 20:25:23 +0100
Subject: [PATCH] bench v1

---
 bench/CMakeLists.txt |   4 +-
 bench/README.md      |  16 ++--
 bench/v1_jpaper.cpp  | 203 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 215 insertions(+), 8 deletions(-)
 create mode 100644 bench/v1_jpaper.cpp
diff --git a/bench/CMakeLists.txt b/bench/CMakeLists.txt
index 8062fa8..1476793 100644
--- a/bench/CMakeLists.txt
+++ b/bench/CMakeLists.txt
@@ -15,14 +15,12 @@ target_link_libraries(crdt_bench_jpaper_v0 PUBLIC
 
 ########################################
 
-if (FALSE)
 add_executable(crdt_bench_jpaper_v1
-	./test1.cpp
+	./v1_jpaper.cpp
 )
 
 target_link_libraries(crdt_bench_jpaper_v1 PUBLIC
 	crdt_version1
 	nlohmann_json::nlohmann_json
 )
-endif()
 
diff --git a/bench/README.md b/bench/README.md
index cfe6d3e..e4c6c3e 100644
--- a/bench/README.md
+++ b/bench/README.md
@@ -16,12 +16,12 @@ the json contains:
 ## baseline ( just walking through the json, no insertions )
 
 - g++9 -g :
-	- 23.0s
-	- 22.6s
+	- 23.0s		~11294 ops/s
+	- 22.6s		~11494 ops/s
 	- 23.0s
 
 - g++9 -O3 -DNDEBUG :
-	- 9.6s
+	- 9.6s		~27060 ops/s
 	- 9.7s
 	- 9.7s
 
@@ -29,10 +29,16 @@ the json contains:
 ## version0
 
 - g++9 -g -O2 :
-	- 10m35s
+	- 10m35s	~409 ops/s
 
 - g++9 -O3 -DNDEBUG :
-	- 8m7s
+	- 8m7s		~533 ops/s
 
 ## version1
 
+- g++9 -g -O2 :
+	- 5m23s		~804 ops/s
+
+- g++9 -O3 -DNDEBUG :
+	- 4m7s		~1051 ops/s
+
diff --git a/bench/v1_jpaper.cpp b/bench/v1_jpaper.cpp
new file mode 100644
index 0000000..813a572
--- /dev/null
+++ b/bench/v1_jpaper.cpp
@@ -0,0 +1,203 @@
+#define EXTRA_ASSERTS 1
+
+#include <green_crdt/v1/list.hpp>
+#include <nlohmann/json.hpp>
+
+#include <unordered_map>
+#include <string_view>
+#include <fstream>
+#include <iostream>
+#include <cassert>
+
+using ActorID = std::array<uint8_t, 32>;
+//using Doc = GreenCRDT::V0::TextDocument<ActorID>;
+using List = GreenCRDT::V1::List<char, ActorID>;
+
+template<>
+struct std::hash<ActorID> {
+	std::size_t operator()(ActorID const& s) const noexcept {
+		static_assert(sizeof(size_t) == 8);
+		// TODO: maybe shuffle the indices a bit
+		return
+			(static_cast<size_t>(s[0]) << 8*0) |
+			(static_cast<size_t>(s[1]) << 8*1) |
+			(static_cast<size_t>(s[2]) << 8*2) |
+			(static_cast<size_t>(s[3]) << 8*3) |
+			(static_cast<size_t>(s[4]) << 8*4) |
+			(static_cast<size_t>(s[5]) << 8*5) |
+			(static_cast<size_t>(s[6]) << 8*6) |
+			(static_cast<size_t>(s[7]) << 8*7)
+		;
+	}
+};
+
+// for dev, benching in debug is usefull, but only if the ammount of asserts is reasonable
+#if !defined(extra_assert)
+	#if defined(EXTRA_ASSERTS) && EXTRA_ASSERTS == 1
+		#define extra_assert(...) assert(__VA_ARGS__)
+	#else
+		#define extra_assert(...) void(0)
+	#endif
+#endif
+
+namespace detail {
+	uint8_t nib_from_hex(char c) {
+		extra_assert((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f'));
+
+		if (c >= '0' && c <= '9') {
+			return static_cast<uint8_t>(c) - '0';
+		} else if (c >= 'a' && c <= 'f') {
+			return (static_cast<uint8_t>(c) - 'a') + 10u;
+		} else {
+			return 0u;
+		}
+	}
+} // detail
+
+static ActorID ActorIDFromStr(std::string_view str) {
+	extra_assert(str.size() == 32*2);
+	ActorID tmp;
+
+	for (size_t i = 0; i < tmp.size(); i++) {
+		tmp[i] = detail::nib_from_hex(str[i*2]) << 4 | detail::nib_from_hex(str[i*2+1]);
+	}
+
+	return tmp;
+}
+
+// seq@ID type format used in the json
+struct JObj {
+	ActorID id;
+	uint64_t seq {0};
+};
+
+static JObj JObjFromStr(std::string_view str) {
+	extra_assert(str.size() > 32*2 + 1);
+
+	size_t at_pos = str.find_first_of('@');
+	auto seq_sv = str.substr(0, at_pos);
+	auto id_sv = str.substr(at_pos+1);
+
+	assert(seq_sv.size() != 0);
+	assert(id_sv.size() == 32*2);
+
+	uint64_t tmp_seq {0};
+	for (size_t i = 0; i < seq_sv.size(); i++) {
+		assert(seq_sv[i] >= '0' && seq_sv[i] <= '9');
+		tmp_seq *= 10;
+		tmp_seq += seq_sv[i] - '0';
+	}
+
+	return {ActorIDFromStr(id_sv), tmp_seq};
+}
+
+int main(void) {
+	List list;
+
+	std::ifstream file {"../res/paper.json"};
+	std::cout << "start reading...\n";
+
+	uint64_t g_total_inserts {0};
+	uint64_t g_total_deletes {0};
+	//uint64_t g_seq_inserts {0}; // the opsec are not sequentially growing for inserts, so we sidestep
+	std::unordered_map<ActorID, uint64_t> g_seq_inserts {0}; // the opsec are not sequentially growing for inserts, so we sidestep
+	std::unordered_map<ActorID, std::unordered_map<uint64_t, uint64_t>> map_seq; // maps json op_seq -> lits id seq
+
+	for (std::string line; std::getline(file, line); ) {
+		nlohmann::json j_entry = nlohmann::json::parse(line);
+		const ActorID actor = ActorIDFromStr(static_cast<const std::string&>(j_entry["actor"]));
+		uint64_t op_seq = j_entry["startOp"];
+		for (const auto& j_op : j_entry["ops"]) {
+			if (j_op["action"] == "set") {
+				const auto obj = JObjFromStr(static_cast<const std::string&>(j_op["obj"]));
+				if (obj.seq != 1) {
+					// skip all non text edits (create text doc, curser etc)
+					continue;
+				}
+
+				if (j_op["insert"]) {
+					const auto& j_parent = j_op["key"];
+					extra_assert(!j_parent.is_null());
+					if (j_parent == "_head") {
+						uint64_t tmp_seq {g_seq_inserts[actor]++};
+						bool r = list.add(
+							{actor, tmp_seq},
+							static_cast<const std::string&>(j_op["value"]).front(),
+							std::nullopt,
+							std::nullopt
+						);
+						assert(r);
+						map_seq[actor][op_seq] = tmp_seq;
+						g_total_inserts++;
+					} else { // we have a parrent
+						extra_assert(static_cast<const std::string&>(j_op["value"]).size() == 1);
+
+						// split parent into seq and actor
+						const auto parent_left = JObjFromStr(static_cast<const std::string&>(j_parent));
+						auto idx_opt = list.findIdx({parent_left.id, map_seq[parent_left.id][parent_left.seq]});
+						assert(idx_opt.has_value());
+
+						std::optional<List::ListID> parent_left_id;
+						{
+							const auto& tmp_parent_left_id = list.list.at(idx_opt.value()).id;
+							parent_left_id = {list._actors[tmp_parent_left_id.actor_idx], tmp_parent_left_id.seq};
+						}
+
+						std::optional<List::ListID> parent_right_id;
+						if (idx_opt.value()+1 < list.list.size()) {
+							const auto& tmp_parent_right_id = list.list.at(idx_opt.value()+1).id;
+							parent_right_id = {list._actors[tmp_parent_right_id.actor_idx], tmp_parent_right_id.seq};
+						}
+
+						uint64_t tmp_seq {g_seq_inserts[actor]++};
+						bool r = list.add(
+							{actor, tmp_seq},
+							static_cast<const std::string&>(j_op["value"]).front(),
+							parent_left_id,
+							parent_right_id
+						);
+						assert(r);
+						map_seq[actor][op_seq] = tmp_seq;
+						g_total_inserts++;
+					}
+				} else {
+					// i think this is curser movement
+				}
+			} else if (j_op["action"] == "del") {
+				const auto list_id = JObjFromStr(static_cast<const std::string&>(j_op["key"]));
+				bool r = list.del({list_id.id, map_seq[list_id.id][list_id.seq]});
+				assert(r);
+				g_total_deletes++;
+			} else if (j_op["action"] == "makeText") {
+				// doc.clear();
+			} else if (j_op["action"] == "makeMap") {
+				// no idea
+			} else {
+				std::cout << "op: " << j_op << "\n";
+			}
+
+			op_seq++;
+		}
+	}
+
+	std::cout << "\ndoc size (with tombstones): " << list.list.size() << "\n";
+	std::cout << "doc size: " << list.doc_size << "\n";
+	std::cout << "total inserts: " << g_total_inserts << "\n";
+	std::cout << "total deletes: " << g_total_deletes << "\n";
+	std::cout << "total ops: " << g_total_inserts + g_total_deletes << "\n";
+
+	// checked, looks correct
+#if 0
+	std::cout << "doc text:\n";
+	// simple print
+	for (const auto& it : list.list) {
+		if (it.value) {
+			std::cout << it.value.value();
+		}
+	}
+	std::cout << "\n";
+#endif
+
+	return 0;
+}
+