From dfbb1dea6821927051a690e96ae4ffea2a91d971 Mon Sep 17 00:00:00 2001 From: Green Sky Date: Fri, 12 Apr 2024 13:34:20 +0200 Subject: [PATCH] move os and backend to sub --- .gitmodules | 3 + external/CMakeLists.txt | 24 +- external/solanaceae_object_store | 1 + src/CMakeLists.txt | 58 +- .../backends/filesystem_storage.cpp | 627 ------------------ .../backends/filesystem_storage.hpp | 40 -- src/fragment_store/convert_frag_to_obj.cpp | 8 +- src/fragment_store/file2_stack.cpp | 93 --- src/fragment_store/file2_stack.hpp | 23 - src/fragment_store/file2_zstd.cpp | 209 ------ src/fragment_store/file2_zstd.hpp | 51 -- src/fragment_store/message_fragment_store.cpp | 2 +- src/fragment_store/message_fragment_store.hpp | 4 +- .../messages_meta_components.hpp | 2 +- src/fragment_store/meta_components.hpp | 77 --- src/fragment_store/meta_components_id.inl | 30 - src/fragment_store/object_store.cpp | 140 ---- src/fragment_store/object_store.hpp | 95 --- src/fragment_store/serializer_json.hpp | 67 -- src/fragment_store/test_file_zstd.cpp | 394 ----------- src/fragment_store/types.hpp | 19 - src/main_screen.hpp | 4 +- 22 files changed, 28 insertions(+), 1943 deletions(-) create mode 160000 external/solanaceae_object_store delete mode 100644 src/fragment_store/backends/filesystem_storage.cpp delete mode 100644 src/fragment_store/backends/filesystem_storage.hpp delete mode 100644 src/fragment_store/file2_stack.cpp delete mode 100644 src/fragment_store/file2_stack.hpp delete mode 100644 src/fragment_store/file2_zstd.cpp delete mode 100644 src/fragment_store/file2_zstd.hpp delete mode 100644 src/fragment_store/meta_components.hpp delete mode 100644 src/fragment_store/meta_components_id.inl delete mode 100644 src/fragment_store/object_store.cpp delete mode 100644 src/fragment_store/object_store.hpp delete mode 100644 src/fragment_store/serializer_json.hpp delete mode 100644 src/fragment_store/test_file_zstd.cpp delete mode 100644 src/fragment_store/types.hpp diff --git a/.gitmodules b/.gitmodules index 06f552c..4d17134 100644 --- a/.gitmodules +++ b/.gitmodules @@ -20,3 +20,6 @@ [submodule "external/solanaceae_plugin"] path = external/solanaceae_plugin url = https://github.com/Green-Sky/solanaceae_plugin.git +[submodule "external/solanaceae_object_store"] + path = external/solanaceae_object_store + url = https://github.com/Green-Sky/solanaceae_object_store.git diff --git a/external/CMakeLists.txt b/external/CMakeLists.txt index a7066be..5019deb 100644 --- a/external/CMakeLists.txt +++ b/external/CMakeLists.txt @@ -12,6 +12,8 @@ add_subdirectory(./toxcore) add_subdirectory(./solanaceae_toxcore) add_subdirectory(./solanaceae_tox) +add_subdirectory(./solanaceae_object_store) + add_subdirectory(./sdl) add_subdirectory(./imgui) @@ -28,25 +30,3 @@ if (NOT TARGET nlohmann_json::nlohmann_json) FetchContent_MakeAvailable(json) endif() -if (NOT TARGET zstd::zstd) - # TODO: try find_package() first - # TODO: try pkg-config next (will work on most distros) - - set(ZSTD_BUILD_STATIC ON) - set(ZSTD_BUILD_SHARED OFF) - set(ZSTD_BUILD_PROGRAMS OFF) - set(ZSTD_BUILD_CONTRIB OFF) - set(ZSTD_BUILD_TESTS OFF) - FetchContent_Declare(zstd - URL "https://github.com/facebook/zstd/releases/download/v1.5.5/zstd-1.5.5.tar.gz" - DOWNLOAD_EXTRACT_TIMESTAMP TRUE - SOURCE_SUBDIR build/cmake - EXCLUDE_FROM_ALL - ) - FetchContent_MakeAvailable(zstd) - - add_library(zstd INTERFACE) # somehow zstd fkd this up - target_include_directories(zstd INTERFACE ${zstd_SOURCE_DIR}/lib/) - target_link_libraries(zstd INTERFACE libzstd_static) - add_library(zstd::zstd ALIAS zstd) -endif() diff --git a/external/solanaceae_object_store b/external/solanaceae_object_store new file mode 160000 index 0000000..4d3ffb8 --- /dev/null +++ b/external/solanaceae_object_store @@ -0,0 +1 @@ +Subproject commit 4d3ffb8192623740f6e170855ee1cffd428b78da diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 390ea26..7715e3e 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,51 +1,14 @@ cmake_minimum_required(VERSION 3.9 FATAL_ERROR) -add_library(fragment_store - ./fragment_store/file2_zstd.hpp - ./fragment_store/file2_zstd.cpp - - ./fragment_store/uuid_generator.hpp - ./fragment_store/uuid_generator.cpp - - ./fragment_store/types.hpp - ./fragment_store/meta_components.hpp - ./fragment_store/meta_components_id.inl - ./fragment_store/file2_stack.hpp - ./fragment_store/file2_stack.cpp - ./fragment_store/serializer_json.hpp - ./fragment_store/object_store.hpp - ./fragment_store/object_store.cpp - ./fragment_store/backends/filesystem_storage.hpp - ./fragment_store/backends/filesystem_storage.cpp - - ./json/message_components.hpp # TODO: move - ./json/tox_message_components.hpp # TODO: move -) - -target_link_libraries(fragment_store PUBLIC - nlohmann_json::nlohmann_json - EnTT::EnTT - solanaceae_util - - solanaceae_file2 - zstd::zstd - - solanaceae_tox_messages # TODO: move -) - -######################################## - -add_executable(test_file_zstd - fragment_store/test_file_zstd.cpp -) - -target_link_libraries(test_file_zstd PUBLIC - fragment_store -) - ######################################## add_library(message_fragment_store + ./fragment_store/uuid_generator.hpp + ./fragment_store/uuid_generator.cpp + + ./json/message_components.hpp # TODO: move + ./json/tox_message_components.hpp # TODO: move + ./fragment_store/message_serializer.hpp ./fragment_store/message_serializer.cpp ./fragment_store/messages_meta_components.hpp @@ -60,8 +23,9 @@ add_library(message_fragment_store ) target_compile_features(message_fragment_store PRIVATE cxx_std_20) target_link_libraries(message_fragment_store PUBLIC - fragment_store + solanaceae_object_store solanaceae_message3 + solanaceae_tox_messages # TODO: move ) ######################################## @@ -71,7 +35,8 @@ add_executable(convert_message_object_store ) target_link_libraries(convert_message_object_store PUBLIC - fragment_store + solanaceae_object_store + solanaceae_object_store_backend_filesystem message_fragment_store ) @@ -158,7 +123,8 @@ target_link_libraries(tomato PUBLIC solanaceae_tox_contacts solanaceae_tox_messages - fragment_store + solanaceae_object_store + solanaceae_object_store_backend_filesystem message_fragment_store SDL3::SDL3 diff --git a/src/fragment_store/backends/filesystem_storage.cpp b/src/fragment_store/backends/filesystem_storage.cpp deleted file mode 100644 index 4a13663..0000000 --- a/src/fragment_store/backends/filesystem_storage.cpp +++ /dev/null @@ -1,627 +0,0 @@ -#include "./filesystem_storage.hpp" - -#include "../meta_components.hpp" -#include "../serializer_json.hpp" - -#include - -#include -#include - -#include -#include - -#include "../file2_stack.hpp" - -#include - -#include - -static const char* metaFileTypeSuffix(MetaFileType mft) { - switch (mft) { - case MetaFileType::TEXT_JSON: return ".json"; - //case MetaFileType::BINARY_ARB: return ".bin"; - case MetaFileType::BINARY_MSGPACK: return ".msgpack"; - } - return ""; // .unk? -} - -// TODO: move to ... somewhere. (span? file2i?) -static ByteSpan spanFromRead(const std::variant>& data_var) { - if (std::holds_alternative>(data_var)) { - auto& vec = std::get>(data_var); - return {vec.data(), vec.size()}; - } else if (std::holds_alternative(data_var)) { - return std::get(data_var); - } else { - assert(false); - return {}; - } -} - - -namespace backend { - -FilesystemStorage::FilesystemStorage( - ObjectStore2& os, - std::string_view storage_path, - MetaFileType mft_new -) : StorageBackendI::StorageBackendI(os), _storage_path(storage_path), _mft_new(mft_new) { -} - -FilesystemStorage::~FilesystemStorage(void) { -} - -ObjectHandle FilesystemStorage::newObject(ByteSpan id) { - { // first check if id is already used (TODO: solve the multi obj/backend problem) - auto exising_oh = _os.getOneObjectByID(id); - if (static_cast(exising_oh)) { - return {}; - } - } - - std::filesystem::create_directories(_storage_path); - - if (!std::filesystem::is_directory(_storage_path)) { - std::cerr << "FS error: failed to create storage path dir '" << _storage_path << "'\n"; - return {}; - } - - const auto id_hex = bin2hex(id); - std::filesystem::path object_file_path; - - // TODO: refactor this magic somehow - if (id_hex.size() < 6) { - object_file_path = std::filesystem::path{_storage_path}/id_hex; - } else { - // use the first 2hex (1byte) as a subfolder - std::filesystem::create_directories(std::string{_storage_path} + id_hex.substr(0, 2)); - object_file_path = std::filesystem::path{std::string{_storage_path} + id_hex.substr(0, 2)} / id_hex.substr(2); - } - - if (std::filesystem::exists(object_file_path)) { - std::cerr << "FS error: object already exists in path '" << id_hex << "'\n"; - return {}; - } - - ObjectHandle oh{_os.registry(), _os.registry().create()}; - - oh.emplace(this); - oh.emplace(std::vector{id}); - oh.emplace(object_file_path.generic_u8string()); - oh.emplace(_mft_new); - - // meta needs to be synced to file - std::function empty_data_cb = [](auto*, auto) -> uint64_t { return 0; }; - if (!write(oh, empty_data_cb)) { - std::cerr << "FS error: write failed while creating new object file\n"; - oh.destroy(); - return {}; - } - - // while new metadata might be created here, making sure the file could be created is more important - _os.throwEventConstruct(oh); - - return oh; -} - -bool FilesystemStorage::write(Object o, std::function& data_cb) { - auto& reg = _os.registry(); - - if (!reg.valid(o)) { - return false; - } - - ObjectHandle oh {reg, o}; - - if (!oh.all_of()) { - // not a file fragment? - return false; - } - - // split object storage (meta and data are 2 files) - - MetaFileType meta_type = MetaFileType::TEXT_JSON; // TODO: better defaults? - if (reg.all_of(o)) { - meta_type = oh.get().type; - } - - Encryption meta_enc = Encryption::NONE; // TODO: better defaults? - Compression meta_comp = Compression::NONE; // TODO: better defaults? - - if (meta_type != MetaFileType::TEXT_JSON) { - if (oh.all_of()) { - meta_enc = oh.get().enc; - } - - if (oh.all_of()) { - meta_comp = oh.get().comp; - } - } else { - // we cant have encryption or compression - // so we force NONE for TEXT JSON - - oh.emplace_or_replace(Encryption::NONE); - oh.emplace_or_replace(Compression::NONE); - } - - std::filesystem::path meta_tmp_path = oh.get().path + ".meta" + metaFileTypeSuffix(meta_type) + ".tmp"; - meta_tmp_path.replace_filename("." + meta_tmp_path.filename().generic_u8string()); - // TODO: make meta comp work with mem compressor - //auto meta_file_stack = buildFileStackWrite(std::string_view{meta_tmp_path.generic_u8string()}, meta_enc, meta_comp); - std::stack> meta_file_stack; - meta_file_stack.push(std::make_unique(std::string_view{meta_tmp_path.generic_u8string()})); - - if (meta_file_stack.empty()) { - std::cerr << "FS error: failed to create temporary meta file stack\n"; - std::filesystem::remove(meta_tmp_path); // might have created an empty file - return false; - } - - Encryption data_enc = Encryption::NONE; // TODO: better defaults - Compression data_comp = Compression::NONE; // TODO: better defaults - if (oh.all_of()) { - data_enc = oh.get().enc; - } - if (oh.all_of()) { - data_comp = oh.get().comp; - } - - std::filesystem::path data_tmp_path = oh.get().path + ".tmp"; - data_tmp_path.replace_filename("." + data_tmp_path.filename().generic_u8string()); - auto data_file_stack = buildFileStackWrite(std::string_view{data_tmp_path.generic_u8string()}, data_enc, data_comp); - if (data_file_stack.empty()) { - while (!meta_file_stack.empty()) { meta_file_stack.pop(); } - std::filesystem::remove(meta_tmp_path); - std::cerr << "FS error: failed to create temporary data file stack\n"; - return false; - } - - try { // TODO: properly sanitize strings, so this cant throw - // sharing code between binary msgpack and text json for now - nlohmann::json meta_data_j = nlohmann::json::object(); // metadata needs to be an object, null not allowed - // metadata file - - auto& sjc = _os.registry().ctx().get>(); - - // TODO: refactor extract to OS - for (const auto& [type_id, storage] : reg.storage()) { - if (!storage.contains(o)) { - continue; - } - - //std::cout << "storage type: type_id:" << type_id << " name:" << storage.type().name() << "\n"; - - // use type_id to find serializer - auto s_cb_it = sjc._serl.find(type_id); - if (s_cb_it == sjc._serl.end()) { - // could not find serializer, not saving - continue; - } - - // noooo, why cant numbers be keys - //if (meta_type == MetaFileType::BINARY_MSGPACK) { // msgpack uses the hash id instead - //s_cb_it->second(storage.value(fid), meta_data[storage.type().hash()]); - //} else if (meta_type == MetaFileType::TEXT_JSON) { - s_cb_it->second(oh, meta_data_j[storage.type().name()]); - //} - } - - if (meta_type == MetaFileType::BINARY_MSGPACK) { // binary metadata file - std::vector binary_meta_data; - { - std::stack> binary_writer_stack; - binary_writer_stack.push(std::make_unique(binary_meta_data)); - - if (!buildStackWrite(binary_writer_stack, meta_enc, meta_comp)) { - while (!meta_file_stack.empty()) { meta_file_stack.pop(); } - std::filesystem::remove(meta_tmp_path); - while (!data_file_stack.empty()) { data_file_stack.pop(); } - std::filesystem::remove(data_tmp_path); - std::cerr << "FS error: binary writer creation failed '" << oh.get().path << "'\n"; - return false; - } - - { - const std::vector meta_data = nlohmann::json::to_msgpack(meta_data_j); - if (!binary_writer_stack.top()->write(ByteSpan{meta_data})) { - // i feel like exceptions or refactoring would be nice here - while (!meta_file_stack.empty()) { meta_file_stack.pop(); } - std::filesystem::remove(meta_tmp_path); - while (!data_file_stack.empty()) { data_file_stack.pop(); } - std::filesystem::remove(data_tmp_path); - std::cerr << "FS error: binary writer failed '" << oh.get().path << "'\n"; - return false; - } - } - } - - //// the meta file is itself msgpack data - nlohmann::json meta_header_j = nlohmann::json::array(); - meta_header_j.emplace_back() = "SOLMET"; - meta_header_j.push_back(meta_enc); - meta_header_j.push_back(meta_comp); - - // with a custom msgpack impl like cmp, we can be smarter here and dont need an extra buffer - meta_header_j.push_back(nlohmann::json::binary(binary_meta_data)); - - const auto meta_header_data = nlohmann::json::to_msgpack(meta_header_j); - meta_file_stack.top()->write({meta_header_data.data(), meta_header_data.size()}); - } else if (meta_type == MetaFileType::TEXT_JSON) { - // cant be compressed or encrypted - const auto meta_file_json_str = meta_data_j.dump(2, ' ', true); - meta_file_stack.top()->write({reinterpret_cast(meta_file_json_str.data()), meta_file_json_str.size()}); - } - - } catch (...) { - while (!meta_file_stack.empty()) { meta_file_stack.pop(); } // destroy stack // TODO: maybe work with scope? - std::filesystem::remove(meta_tmp_path); - while (!data_file_stack.empty()) { data_file_stack.pop(); } // destroy stack // TODO: maybe work with scope? - std::filesystem::remove(data_tmp_path); - std::cerr << "FS error: failed to serialize json data\n"; - return false; - } - - // now data - // for zstd compression, chunk size is frame size. (no cross frame referencing) - // TODO: add buffering steam layer - static constexpr int64_t chunk_size{1024*1024}; // 1MiB should be enough - std::vector buffer(chunk_size); - uint64_t buffer_actual_size {0}; - do { - buffer_actual_size = data_cb(buffer.data(), buffer.size()); - if (buffer_actual_size == 0) { - break; - } - if (buffer_actual_size > buffer.size()) { - // wtf - break; - } - - data_file_stack.top()->write({buffer.data(), buffer_actual_size}); - } while (buffer_actual_size == buffer.size()); - - // flush // TODO: use scope - while (!meta_file_stack.empty()) { meta_file_stack.pop(); } // destroy stack // TODO: maybe work with scope? - while (!data_file_stack.empty()) { data_file_stack.pop(); } // destroy stack // TODO: maybe work with scope? - - std::filesystem::rename( - meta_tmp_path, - oh.get().path + ".meta" + metaFileTypeSuffix(meta_type) - ); - - std::filesystem::rename( - data_tmp_path, - oh.get().path - ); - - // TODO: check return value of renames - - if (oh.all_of()) { - oh.remove(); - } - - return true; -} - -bool FilesystemStorage::read(Object o, std::function& data_cb) { - auto& reg = _os.registry(); - - if (!reg.valid(o)) { - return false; - } - - ObjectHandle oh {reg, o}; - - if (!oh.all_of()) { - // not a file - return false; - } - - const auto& obj_path = oh.get().path; - - // TODO: check if metadata dirty? - // TODO: what if file changed on disk? - - std::cout << "FS: loading fragment '" << obj_path << "'\n"; - - Compression data_comp = Compression::NONE; - if (oh.all_of()) { - data_comp = oh.get().comp; - } - - auto data_file_stack = buildFileStackRead(std::string_view{obj_path}, Encryption::NONE, data_comp); - if (data_file_stack.empty()) { - return false; - } - - // TODO: make it read in a single chunk instead? - static constexpr int64_t chunk_size {1024 * 1024}; // 1MiB should be good for read - do { - auto data_var = data_file_stack.top()->read(chunk_size); - ByteSpan data = spanFromRead(data_var); - - if (data.empty()) { - // error or probably eof - break; - } - - data_cb(data); - - if (data.size < chunk_size) { - // eof - break; - } - } while (data_file_stack.top()->isGood()); - - return true; -} - -void FilesystemStorage::scanAsync(void) { - scanPathAsync(_storage_path); -} - -size_t FilesystemStorage::scanPath(std::string_view path) { - // TODO: extract so async can work (or/and make iteratable generator) - - if (path.empty() || !std::filesystem::is_directory(path)) { - std::cerr << "FS error: scan path not a directory '" << path << "'\n"; - return 0; - } - - // step 1: make snapshot of files, validate metafiles and save id/path+meta.ext - // can be extra thread (if non vfs) - struct ObjFileEntry { - std::string id_str; - std::filesystem::path obj_path; - std::string meta_ext; - - bool operator==(const ObjFileEntry& other) const { - // only compare by id - return id_str == other.id_str; - } - }; - struct ObjFileEntryHash { - size_t operator()(const ObjFileEntry& it) const { - return entt::hashed_string(it.id_str.data(), it.id_str.size()); - } - }; - entt::dense_set file_obj_list; - - std::filesystem::path storage_path{path}; - - auto handle_file = [&](const std::filesystem::path& file_path) { - if (!std::filesystem::is_regular_file(file_path)) { - return; - } - // handle file - - if (file_path.has_extension()) { - // skip over metadata, assuming only metafiles have extentions (might be wrong?) - // also skips temps - return; - } - - auto relative_path = std::filesystem::proximate(file_path, storage_path); - std::string id_str = relative_path.generic_u8string(); - // delete all '/' - id_str.erase(std::remove(id_str.begin(), id_str.end(), '/'), id_str.end()); - if (id_str.size() % 2 != 0) { - std::cerr << "FS error: non hex object uid detected: '" << id_str << "'\n"; - } - - if (file_obj_list.contains(ObjFileEntry{id_str, {}, ""})) { - std::cerr << "FS error: object duplicate detected: '" << id_str << "'\n"; - return; // skip - } - - const char* meta_ext = ".meta.msgpack"; - { // find meta - // TODO: this as to know all possible extentions - bool has_meta_msgpack = std::filesystem::is_regular_file(file_path.generic_u8string() + ".meta.msgpack"); - bool has_meta_json = std::filesystem::is_regular_file(file_path.generic_u8string() + ".meta.json"); - const size_t meta_sum = - (has_meta_msgpack?1:0) + - (has_meta_json?1:0) - ; - - if (meta_sum > 1) { // has multiple - std::cerr << "FS error: object with multiple meta files detected: " << id_str << "\n"; - return; // skip - } - - if (meta_sum == 0) { - std::cerr << "FS error: object missing meta file detected: " << id_str << "\n"; - return; // skip - } - - if (has_meta_json) { - meta_ext = ".meta.json"; - } - } - - file_obj_list.emplace(ObjFileEntry{ - std::move(id_str), - file_path, - meta_ext - }); - }; - - for (const auto& outer_path : std::filesystem::directory_iterator(storage_path)) { - if (std::filesystem::is_regular_file(outer_path)) { - handle_file(outer_path); - } else if (std::filesystem::is_directory(outer_path)) { - // subdir, part of id - for (const auto& inner_path : std::filesystem::directory_iterator(outer_path)) { - //if (std::filesystem::is_regular_file(inner_path)) { - - //// handle file - //} // TODO: support deeper recursion? - handle_file(inner_path); - } - } - } - - std::cout << "FS: scan found:\n"; - for (const auto& it : file_obj_list) { - std::cout << " " << it.id_str << "\n"; - } - - // step 2: check if files preexist in reg - // main thread - // (merge into step 3 and do more error checking?) - // TODO: properly handle duplicates, dups form different backends might be legit - // important - for (auto it = file_obj_list.begin(); it != file_obj_list.end();) { - auto id = hex2bin(it->id_str); - auto oh = _os.getOneObjectByID(ByteSpan{id}); - if (static_cast(oh)) { - // pre exising (handle differently??) - // check if store differs? - it = file_obj_list.erase(it); - } else { - it++; - } - } - - auto& sjc = _os.registry().ctx().get>(); - - std::vector scanned_objs; - // step 3: parse meta and insert into reg of non preexising - // main thread - // TODO: check timestamps of preexisting and reload? mark external/remote dirty? - for (const auto& it : file_obj_list) { - MetaFileType mft {MetaFileType::TEXT_JSON}; - Encryption meta_enc {Encryption::NONE}; - Compression meta_comp {Compression::NONE}; - nlohmann::json j; - if (it.meta_ext == ".meta.msgpack") { - std::ifstream file(it.obj_path.generic_u8string() + it.meta_ext, std::ios::in | std::ios::binary); - if (!file.is_open()) { - std::cout << "FS error: failed opening meta " << it.obj_path << "\n"; - continue; - } - - mft = MetaFileType::BINARY_MSGPACK; - - // file is a msgpack within a msgpack - - std::vector full_meta_data; - { // read meta file - // figure out size - file.seekg(0, file.end); - uint64_t file_size = file.tellg(); - file.seekg(0, file.beg); - - full_meta_data.resize(file_size); - - file.read(reinterpret_cast(full_meta_data.data()), full_meta_data.size()); - } - - const auto meta_header_j = nlohmann::json::from_msgpack(full_meta_data, true, false); - - if (!meta_header_j.is_array() || meta_header_j.size() < 4) { - std::cerr << "FS error: broken binary meta " << it.obj_path << "\n"; - continue; - } - - if (meta_header_j.at(0) != "SOLMET") { - std::cerr << "FS error: wrong magic '" << meta_header_j.at(0) << "' in meta " << it.obj_path << "\n"; - continue; - } - - meta_enc = meta_header_j.at(1); - if (meta_enc != Encryption::NONE) { - std::cerr << "FS error: unknown encryption " << it.obj_path << "\n"; - continue; - } - - meta_comp = meta_header_j.at(2); - if (meta_comp != Compression::NONE && meta_comp != Compression::ZSTD) { - std::cerr << "FS error: unknown compression " << it.obj_path << "\n"; - continue; - } - - //const auto& meta_data_ref = meta_header_j.at(3).is_binary()?meta_header_j.at(3):meta_header_j.at(3).at("data"); - if (!meta_header_j.at(3).is_binary()) { - std::cerr << "FS error: meta data not binary " << it.obj_path << "\n"; - continue; - } - const nlohmann::json::binary_t& meta_data_ref = meta_header_j.at(3); - - std::stack> binary_reader_stack; - binary_reader_stack.push(std::make_unique(ByteSpan{meta_data_ref.data(), meta_data_ref.size()})); - - if (!buildStackRead(binary_reader_stack, meta_enc, meta_comp)) { - std::cerr << "FS error: binary reader creation failed " << it.obj_path << "\n"; - continue; - } - - // HACK: read fixed amout of data, but this way if we have neither enc nor comp we pass the span through - auto binary_read_value = binary_reader_stack.top()->read(10*1024*1024); // is 10MiB large enough for meta? - const auto binary_read_span = spanFromRead(binary_read_value); - assert(binary_read_span.size < 10*1024*1024); - - j = nlohmann::json::from_msgpack(binary_read_span, true, false); - } else if (it.meta_ext == ".meta.json") { - std::ifstream file(it.obj_path.generic_u8string() + it.meta_ext, std::ios::in | std::ios::binary); - if (!file.is_open()) { - std::cerr << "FS error: failed opening meta " << it.obj_path << "\n"; - continue; - } - - mft = MetaFileType::TEXT_JSON; - - file >> j; - } else { - assert(false); - } - - if (!j.is_object()) { - std::cerr << "FS error: json in meta is broken " << it.id_str << "\n"; - continue; - } - - // TODO: existing fragment file - //newFragmentFile(); - ObjectHandle oh{_os.registry(), _os.registry().create()}; - oh.emplace(this); - oh.emplace(hex2bin(it.id_str)); - oh.emplace(mft); - oh.emplace(meta_enc); - oh.emplace(meta_comp); - - oh.emplace(it.obj_path.generic_u8string()); - - for (const auto& [k, v] : j.items()) { - // type id from string hash - const auto type_id = entt::hashed_string(k.data(), k.size()); - const auto deserl_fn_it = sjc._deserl.find(type_id); - if (deserl_fn_it != sjc._deserl.cend()) { - // TODO: check return value - deserl_fn_it->second(oh, v); - } else { - std::cerr << "FS warning: missing deserializer for meta key '" << k << "'\n"; - } - } - scanned_objs.push_back(oh); - } - - // TODO: mutex and move code to async and return this list ? - - // throw new frag event here, after loading them all - for (const Object o : scanned_objs) { - _os.throwEventConstruct(o); - } - - return scanned_objs.size(); -} - -void FilesystemStorage::scanPathAsync(std::string path) { - // add path to queue - // HACK: if path is known/any fragment is in the path, this operation blocks (non async) - scanPath(path); // TODO: make async and post result -} - -} // backend - diff --git a/src/fragment_store/backends/filesystem_storage.hpp b/src/fragment_store/backends/filesystem_storage.hpp deleted file mode 100644 index b8ddafd..0000000 --- a/src/fragment_store/backends/filesystem_storage.hpp +++ /dev/null @@ -1,40 +0,0 @@ -#pragma once - -#include "../types.hpp" -#include "../object_store.hpp" - -#include - -namespace backend { - -struct FilesystemStorage : public StorageBackendI { - FilesystemStorage( - ObjectStore2& os, - std::string_view storage_path = "test_obj_store", - MetaFileType mft_new = MetaFileType::BINARY_MSGPACK - ); - ~FilesystemStorage(void); - - // TODO: fix the path for this specific fs? - // for now we assume a single storage path per backend (there can be multiple per type) - std::string _storage_path; - - // meta file type for new objects - MetaFileType _mft_new {MetaFileType::BINARY_MSGPACK}; - - ObjectHandle newObject(ByteSpan id) override; - - bool write(Object o, std::function& data_cb) override; - bool read(Object o, std::function& data_cb) override; - - //// convenience function - //nlohmann::json loadFromStorageNJ(FragmentID fid); - - void scanAsync(void); - - private: - size_t scanPath(std::string_view path); - void scanPathAsync(std::string path); -}; - -} // backend diff --git a/src/fragment_store/convert_frag_to_obj.cpp b/src/fragment_store/convert_frag_to_obj.cpp index 41a69bd..d7520c7 100644 --- a/src/fragment_store/convert_frag_to_obj.cpp +++ b/src/fragment_store/convert_frag_to_obj.cpp @@ -1,7 +1,7 @@ -#include "./object_store.hpp" -#include "./backends/filesystem_storage.hpp" -#include "./meta_components.hpp" -#include "./serializer_json.hpp" +#include +#include +#include +#include #include "./message_fragment_store.hpp" #include diff --git a/src/fragment_store/file2_stack.cpp b/src/fragment_store/file2_stack.cpp deleted file mode 100644 index 70b599a..0000000 --- a/src/fragment_store/file2_stack.cpp +++ /dev/null @@ -1,93 +0,0 @@ -#include "./file2_stack.hpp" - -#include -#include "./file2_zstd.hpp" - -#include - -#include - -// add enc and comp file layers -// assumes a file is already in the stack -bool buildStackRead(std::stack>& file_stack, Encryption encryption, Compression compression) { - assert(!file_stack.empty()); - - // TODO: decrypt here - assert(encryption == Encryption::NONE); - - // add layer based on enum - if (compression == Compression::ZSTD) { - file_stack.push(std::make_unique(*file_stack.top().get())); - } else { - // TODO: make error instead - assert(compression == Compression::NONE); - } - - if (!file_stack.top()->isGood()) { - std::cerr << "FS error: file failed to add " << (int)compression << " decompression layer\n"; - return false; - } - - return true; -} - -// do i need this? -std::stack> buildFileStackRead(std::string_view file_path, Encryption encryption, Compression compression) { - std::stack> file_stack; - file_stack.push(std::make_unique(file_path)); - - if (!file_stack.top()->isGood()) { - std::cerr << "FS error: opening file for reading '" << file_path << "'\n"; - return {}; - } - - if (!buildStackRead(file_stack, encryption, compression)) { - std::cerr << "FS error: file failed to add layers for '" << file_path << "'\n"; - return {}; - } - - return file_stack; -} - -// add enc and comp file layers -// assumes a file is already in the stack -bool buildStackWrite(std::stack>& file_stack, Encryption encryption, Compression compression) { - assert(!file_stack.empty()); - - // TODO: encrypt here - assert(encryption == Encryption::NONE); - - // add layer based on enum - if (compression == Compression::ZSTD) { - file_stack.push(std::make_unique(*file_stack.top().get())); - } else { - // TODO: make error instead - assert(compression == Compression::NONE); - } - - if (!file_stack.top()->isGood()) { - std::cerr << "FS error: file failed to add " << (int)compression << " compression layer\n"; - return false; - } - - return true; -} - -// do i need this? -std::stack> buildFileStackWrite(std::string_view file_path, Encryption encryption, Compression compression) { - std::stack> file_stack; - file_stack.push(std::make_unique(file_path)); - - if (!file_stack.top()->isGood()) { - std::cerr << "FS error: opening file for writing '" << file_path << "'\n"; - return {}; - } - - if (!buildStackWrite(file_stack, encryption, compression)) { - std::cerr << "FS error: file failed to add layers for '" << file_path << "'\n"; - return {}; - } - - return file_stack; -} - diff --git a/src/fragment_store/file2_stack.hpp b/src/fragment_store/file2_stack.hpp deleted file mode 100644 index c1e878c..0000000 --- a/src/fragment_store/file2_stack.hpp +++ /dev/null @@ -1,23 +0,0 @@ -#pragma once - -#include "./types.hpp" - -#include - -#include -#include -#include - -// add enc and comp file layers -// assumes a file is already in the stack -[[nodiscard]] bool buildStackRead(std::stack>& file_stack, Encryption encryption, Compression compression); - -// do i need this? -[[nodiscard]] std::stack> buildFileStackRead(std::string_view file_path, Encryption encryption, Compression compression); - -// add enc and comp file layers -// assumes a file is already in the stack -[[nodiscard]] bool buildStackWrite(std::stack>& file_stack, Encryption encryption, Compression compression); - -// do i need this? -[[nodiscard]] std::stack> buildFileStackWrite(std::string_view file_path, Encryption encryption, Compression compression); diff --git a/src/fragment_store/file2_zstd.cpp b/src/fragment_store/file2_zstd.cpp deleted file mode 100644 index f3ba0ab..0000000 --- a/src/fragment_store/file2_zstd.cpp +++ /dev/null @@ -1,209 +0,0 @@ -#include "./file2_zstd.hpp" - -#include -#include -#include -#include - -#include - -File2ZSTDW::File2ZSTDW(File2I& real) : - File2I(true, false), - _real_file(real) -{ - ZSTD_CCtx_setParameter(_cctx.get(), ZSTD_c_compressionLevel, 0); // default (3) - ZSTD_CCtx_setParameter(_cctx.get(), ZSTD_c_checksumFlag, 1); // add extra checksums (to frames?) -} - -File2ZSTDW::~File2ZSTDW(void) { - // flush remaining data (and maybe header) - // actually nvm, write will always flush all data, so only on empty files this would be an issue -} - -bool File2ZSTDW::isGood(void) { - return _real_file.isGood(); -} - -bool File2ZSTDW::write(const ByteSpan data, int64_t pos) { - if (pos != -1) { - return false; - } - - if (data.empty()) { - return false; // return true? - } - - if (data.size < 16) { - std::cout << "F2ZSTD warning: each write is a zstd frame and compression suffers significantly for small frames.\n"; - } - - std::vector compressed_buffer(ZSTD_CStreamOutSize()); - - ZSTD_inBuffer input = { data.ptr, data.size, 0 }; - - size_t remaining_ret {0}; - do { - // remaining data in input < compressed_buffer size (heuristic) - bool const lastChunk = (input.size - input.pos) <= compressed_buffer.size(); - - ZSTD_EndDirective const mode = lastChunk ? ZSTD_e_end : ZSTD_e_continue; - - ZSTD_outBuffer output = { compressed_buffer.data(), compressed_buffer.size(), 0 }; - - remaining_ret = ZSTD_compressStream2(_cctx.get(), &output , &input, mode); - if (ZSTD_isError(remaining_ret)) { - std::cerr << "F2WRZSTD error: compressing data failed\n"; - break; - } - - _real_file.write(ByteSpan{compressed_buffer.data(), output.pos}); - } while ((input.pos < input.size || remaining_ret != 0) && _real_file.isGood()); - - return _real_file.isGood(); -} - -std::variant> File2ZSTDW::read(uint64_t, int64_t) { - return {}; -} - -// ######################################### decompression - -File2ZSTDR::File2ZSTDR(File2I& real) : - File2I(false, true), - _real_file(real), - - // 64kib - _in_buffer(ZSTD_DStreamInSize()), - _out_buffer(ZSTD_DStreamOutSize()) -{ -} - -bool File2ZSTDR::isGood(void) { - return _real_file.isGood(); -} - -bool File2ZSTDR::write(const ByteSpan, int64_t) { - return false; -} - -std::variant> File2ZSTDR::read(uint64_t size, int64_t pos) { - if (pos != -1) { - // error, only support streaming (for now) - return {}; - } - - std::vector ret_data; - - // actually first we check previous data - if (!_decompressed_buffer.empty()) { - uint64_t required_size = std::min(size, _decompressed_buffer.size()); - ret_data.insert(ret_data.end(), _decompressed_buffer.cbegin(), _decompressed_buffer.cbegin() + required_size); - _decompressed_buffer.erase(_decompressed_buffer.cbegin(), _decompressed_buffer.cbegin() + required_size); - } - - bool eof {false}; - // outerloop here - while (ret_data.size() < size && !eof) { - // first make sure we have data in input - if (_z_input.src == nullptr || _z_input.pos == _z_input.size) { - const auto request_size = _in_buffer.size(); - if (!feedInput(_real_file.read(request_size, -1))) { - return ret_data; - } - - // if _z_input.size < _in_buffer.size() -> assume eof? - if (_z_input.size < request_size) { - eof = true; - //std::cout << "---- eof\n"; - } - } - - do { - ZSTD_outBuffer output = { _out_buffer.data(), _out_buffer.size(), 0 }; - size_t const ret = ZSTD_decompressStream(_dctx.get(), &output , &_z_input); - if (ZSTD_isError(ret)) { - // error <.< - std::cerr << "---- error: decompression error\n"; - return ret_data; - } - - // no new decomp data? - if (output.pos == 0) { - if (ret != 0) { - // if not error and not 0, indicates that - // there is additional flushing needed - continue; - } - - assert(eof || ret == 0); - break; - } - - int64_t returning_size = std::min(int64_t(size) - int64_t(ret_data.size()), output.pos); - assert(returning_size >= 0); - if (returning_size > 0) { - ret_data.insert( - ret_data.end(), - reinterpret_cast(output.dst), - reinterpret_cast(output.dst) + returning_size - ); - } - - // make sure we keep excess decompressed data - if (returning_size < int64_t(output.pos)) { - //const auto remaining_size = output.pos - returning_size; - _decompressed_buffer.insert( - _decompressed_buffer.cend(), - reinterpret_cast(output.dst) + returning_size, - reinterpret_cast(output.dst) + output.pos - ); - } - } while (_z_input.pos < _z_input.size); - } - - return ret_data; -} - -bool File2ZSTDR::feedInput(std::variant>&& read_buff) { - // TODO: optimize, we copy the buffer, but we might not need to - if (std::holds_alternative(read_buff)) { - const auto& span = std::get(read_buff); - if (span.size > _in_buffer.size()) { - // error, how did we read more than we asked for?? - return {}; - } - - if (span.empty()) { - _z_input = { _in_buffer.data(), 0, 0 }; - } else { - // cpy - _in_buffer = static_cast>(span); - _z_input = { - _in_buffer.data(), - span.size, - 0 - }; - } - } else if (std::holds_alternative>(read_buff)) { - auto& vec = std::get>(read_buff); - if (vec.size() > _in_buffer.size()) { - // error, how did we read more than we asked for?? - return {}; - } - - // cpy - _in_buffer = vec; - - _z_input = { - _in_buffer.data(), - _in_buffer.size(), - 0 - }; - } else { - // error, unsupported return value of read?? - return false; - } - - return true; -} - diff --git a/src/fragment_store/file2_zstd.hpp b/src/fragment_store/file2_zstd.hpp deleted file mode 100644 index 110b5a3..0000000 --- a/src/fragment_store/file2_zstd.hpp +++ /dev/null @@ -1,51 +0,0 @@ -#pragma once - -#include - -#include - -#include - -// zstd compression wrapper over another file -// WARNING: only supports sequential writes -struct File2ZSTDW : public File2I { - File2I& _real_file; - - // TODO: hide this detail? - std::unique_ptr _cctx{ZSTD_createCCtx(), &ZSTD_freeCCtx}; - - File2ZSTDW(File2I& real); - virtual ~File2ZSTDW(void); - - bool isGood(void) override; - - // for simplicity and potential future seekability each write is its own frame - bool write(const ByteSpan data, int64_t pos = -1) override; - std::variant> read(uint64_t size, int64_t pos = -1) override; -}; - -// zstd decompression wrapper over another file -// WARNING: only supports sequential reads -// TODO: add seeking support (use frames) -struct File2ZSTDR : public File2I { - File2I& _real_file; - - // TODO: hide this detail? - std::unique_ptr _dctx{ZSTD_createDCtx(), &ZSTD_freeDCtx}; - std::vector _in_buffer; - std::vector _out_buffer; - std::vector _decompressed_buffer; // retains decompressed unread data between read() calls - ZSTD_inBuffer _z_input{nullptr, 0, 0}; - - File2ZSTDR(File2I& real); - virtual ~File2ZSTDR(void) {} - - bool isGood(void) override; - - bool write(const ByteSpan data, int64_t pos = -1) override; - std::variant> read(uint64_t size, int64_t pos = -1) override; - - private: - bool feedInput(std::variant>&& read_buff); -}; - diff --git a/src/fragment_store/message_fragment_store.cpp b/src/fragment_store/message_fragment_store.cpp index 85f47b4..90638a9 100644 --- a/src/fragment_store/message_fragment_store.cpp +++ b/src/fragment_store/message_fragment_store.cpp @@ -1,6 +1,6 @@ #include "./message_fragment_store.hpp" -#include "./serializer_json.hpp" +#include #include "../json/message_components.hpp" diff --git a/src/fragment_store/message_fragment_store.hpp b/src/fragment_store/message_fragment_store.hpp index ba2d7c0..6a26c4e 100644 --- a/src/fragment_store/message_fragment_store.hpp +++ b/src/fragment_store/message_fragment_store.hpp @@ -1,7 +1,7 @@ #pragma once -#include "./meta_components.hpp" -#include "./object_store.hpp" +#include +#include #include "./uuid_generator.hpp" diff --git a/src/fragment_store/messages_meta_components.hpp b/src/fragment_store/messages_meta_components.hpp index d2ea019..0324053 100644 --- a/src/fragment_store/messages_meta_components.hpp +++ b/src/fragment_store/messages_meta_components.hpp @@ -1,6 +1,6 @@ #pragma once -#include "./meta_components.hpp" +#include namespace ObjectStore::Components { struct MessagesVersion { diff --git a/src/fragment_store/meta_components.hpp b/src/fragment_store/meta_components.hpp deleted file mode 100644 index 0363caa..0000000 --- a/src/fragment_store/meta_components.hpp +++ /dev/null @@ -1,77 +0,0 @@ -#pragma once - -#include "./types.hpp" -#include "./object_store.hpp" - -#include -#include -#include - -namespace ObjectStore::Components { - - // TODO: is this special and should this be saved to meta or not (its already in the file name on disk) - struct ID { - std::vector v; - }; - - struct DataEncryptionType { - Encryption enc {Encryption::NONE}; - }; - - struct DataCompressionType { - Compression comp {Compression::NONE}; - }; - - - // meta that is not written to (meta-)file - namespace Ephemeral { - - // TODO: move, backend specific - struct MetaFileType { - ::MetaFileType type {::MetaFileType::TEXT_JSON}; - }; - - struct MetaEncryptionType { - Encryption enc {Encryption::NONE}; - }; - - struct MetaCompressionType { - Compression comp {Compression::NONE}; - }; - - struct Backend { - // TODO: shared_ptr instead?? - StorageBackendI* ptr; - }; - - // excluded from file meta - // TODO: move to backend specific - struct FilePath { - // contains store path, if any - std::string path; - }; - - // TODO: seperate into remote and local? - // (remote meaning eg. the file on disk was changed by another program) - struct DirtyTag {}; - - } // Ephemeral - -} // Components - -// shortened to save bytes (until I find a way to save by ID in msgpack) -namespace ObjComp = ObjectStore::Components; - -// old names from frag era -namespace Fragment::Components { - //struct ID {}; - //struct DataEncryptionType {}; - //struct DataCompressionType {}; - struct ID : public ObjComp::ID {}; - struct DataEncryptionType : public ObjComp::DataEncryptionType {}; - struct DataCompressionType : public ObjComp::DataCompressionType {}; -} -namespace FragComp = Fragment::Components; - -#include "./meta_components_id.inl" - diff --git a/src/fragment_store/meta_components_id.inl b/src/fragment_store/meta_components_id.inl deleted file mode 100644 index d9f7de3..0000000 --- a/src/fragment_store/meta_components_id.inl +++ /dev/null @@ -1,30 +0,0 @@ -#pragma once - -#include "./meta_components.hpp" - -#include - -// TODO: move more central -#define DEFINE_COMP_ID(x) \ -template<> \ -constexpr entt::id_type entt::type_hash::value() noexcept { \ - using namespace entt::literals; \ - return #x##_hs; \ -} \ -template<> \ -constexpr std::string_view entt::type_name::value() noexcept { \ - return #x; \ -} - -// cross compiler stable ids - -DEFINE_COMP_ID(ObjComp::DataEncryptionType) -DEFINE_COMP_ID(ObjComp::DataCompressionType) - -// old stuff -DEFINE_COMP_ID(FragComp::DataEncryptionType) -DEFINE_COMP_ID(FragComp::DataCompressionType) - -#undef DEFINE_COMP_ID - - diff --git a/src/fragment_store/object_store.cpp b/src/fragment_store/object_store.cpp deleted file mode 100644 index e459e94..0000000 --- a/src/fragment_store/object_store.cpp +++ /dev/null @@ -1,140 +0,0 @@ -#include "./object_store.hpp" - -#include "./meta_components.hpp" - -#include "./serializer_json.hpp" - -#include // this sucks - -#include - -// TODO: move somewhere else -static bool serl_json_data_enc_type(const ObjectHandle oh, nlohmann::json& out) { - if (!oh.all_of()) { - return false; - } - - out = static_cast>( - oh.get().enc - ); - return true; -} - -static bool deserl_json_data_enc_type(ObjectHandle oh, const nlohmann::json& in) { - oh.emplace_or_replace( - static_cast( - static_cast>(in) - ) - ); - return true; -} - -static bool serl_json_data_comp_type(const ObjectHandle oh, nlohmann::json& out) { - if (!oh.all_of()) { - return false; - } - - out = static_cast>( - oh.get().comp - ); - return true; -} - -static bool deserl_json_data_comp_type(ObjectHandle oh, const nlohmann::json& in) { - oh.emplace_or_replace( - static_cast( - static_cast>(in) - ) - ); - return true; -} - -StorageBackendI::StorageBackendI(ObjectStore2& os) : _os(os) { -} - -ObjectHandle StorageBackendI::newObject(ByteSpan) { - //return {_os.registry(), entt::null}; - return {}; -} - -bool StorageBackendI::write(Object o, const ByteSpan data) { - std::function fn_cb = [read = 0ull, data](uint8_t* request_buffer, uint64_t buffer_size) mutable -> uint64_t { - uint64_t i = 0; - for (; i+read < data.size && i < buffer_size; i++) { - request_buffer[i] = data[i+read]; - } - read += i; - - return i; - }; - return write(o, fn_cb); -} - -ObjectStore2::ObjectStore2(void) { - // HACK: set them up independently - auto& sjc = _reg.ctx().emplace>(); - sjc.registerSerializer(serl_json_data_enc_type); - sjc.registerDeSerializer(deserl_json_data_enc_type); - sjc.registerSerializer(serl_json_data_comp_type); - sjc.registerDeSerializer(deserl_json_data_comp_type); - - // old stuff - sjc.registerSerializer(serl_json_data_enc_type); - sjc.registerDeSerializer(deserl_json_data_enc_type); - sjc.registerSerializer(serl_json_data_comp_type); - sjc.registerDeSerializer(deserl_json_data_comp_type); -} - -ObjectStore2::~ObjectStore2(void) { -} - -ObjectRegistry& ObjectStore2::registry(void) { - return _reg; -} - -ObjectHandle ObjectStore2::objectHandle(const Object o) { - return {_reg, o}; -} - -ObjectHandle ObjectStore2::getOneObjectByID(const ByteSpan id) { - // TODO: accelerate - // maybe keep it sorted and binary search? hash table lookup? - for (const auto& [obj, id_comp] : _reg.view().each()) { - if (id == ByteSpan{id_comp.v}) { - return {_reg, obj}; - } - } - - return {_reg, entt::null}; -} - -void ObjectStore2::throwEventConstruct(const Object o) { - std::cout << "OS debug: event construct " << entt::to_integral(o) << "\n"; - dispatch( - ObjectStore_Event::object_construct, - ObjectStore::Events::ObjectConstruct{ - ObjectHandle{_reg, o} - } - ); -} - -void ObjectStore2::throwEventUpdate(const Object o) { - std::cout << "OS debug: event update " << entt::to_integral(o) << "\n"; - dispatch( - ObjectStore_Event::object_update, - ObjectStore::Events::ObjectUpdate{ - ObjectHandle{_reg, o} - } - ); -} - -void ObjectStore2::throwEventDestroy(const Object o) { - std::cout << "OS debug: event destroy " << entt::to_integral(o) << "\n"; - dispatch( - ObjectStore_Event::object_destroy, - ObjectStore::Events::ObjectUpdate{ - ObjectHandle{_reg, o} - } - ); -} - diff --git a/src/fragment_store/object_store.hpp b/src/fragment_store/object_store.hpp deleted file mode 100644 index ebbb8f7..0000000 --- a/src/fragment_store/object_store.hpp +++ /dev/null @@ -1,95 +0,0 @@ -#pragma once - -#include -#include - -#include -#include - -#include - -// internal id -enum class Object : uint32_t {}; -using ObjectRegistry = entt::basic_registry; -using ObjectHandle = entt::basic_handle; - -// fwd -struct ObjectStore2; - -struct StorageBackendI { - // OR or OS ? - ObjectStore2& _os; - - StorageBackendI(ObjectStore2& os); - - // default impl fails, acting like a read only store - virtual ObjectHandle newObject(ByteSpan id); - - // ========== write object to storage ========== - using write_to_storage_fetch_data_cb = uint64_t(uint8_t* request_buffer, uint64_t buffer_size); - // calls data_cb with a buffer to be filled in, cb returns actual count of data. if returned < max, its the last buffer. - virtual bool write(Object o, std::function& data_cb) = 0; - bool write(Object o, const ByteSpan data); - - // ========== read object from storage ========== - using read_from_storage_put_data_cb = void(const ByteSpan buffer); - virtual bool read(Object o, std::function& data_cb) = 0; - -}; - -namespace ObjectStore::Events { - - struct ObjectConstruct { - const ObjectHandle e; - }; - struct ObjectUpdate { - const ObjectHandle e; - }; - struct ObjectDestory { - const ObjectHandle e; - }; - -} // ObjectStore::Events - -enum class ObjectStore_Event : uint16_t { - object_construct, - object_update, - object_destroy, - - MAX -}; - -struct ObjectStoreEventI { - using enumType = ObjectStore_Event; - - virtual ~ObjectStoreEventI(void) {} - - virtual bool onEvent(const ObjectStore::Events::ObjectConstruct&) { return false; } - virtual bool onEvent(const ObjectStore::Events::ObjectUpdate&) { return false; } - virtual bool onEvent(const ObjectStore::Events::ObjectDestory&) { return false; } -}; -using ObjectStoreEventProviderI = EventProviderI; - -struct ObjectStore2 : public ObjectStoreEventProviderI { - static constexpr const char* version {"2"}; - - ObjectRegistry _reg; - - // TODO: default backend? - - ObjectStore2(void); - virtual ~ObjectStore2(void); - - ObjectRegistry& registry(void); - ObjectHandle objectHandle(const Object o); - - // TODO: properly think about multiple objects witht he same id / different backends - ObjectHandle getOneObjectByID(const ByteSpan id); - - // sync? - - void throwEventConstruct(const Object o); - void throwEventUpdate(const Object o); - void throwEventDestroy(const Object o); -}; - diff --git a/src/fragment_store/serializer_json.hpp b/src/fragment_store/serializer_json.hpp deleted file mode 100644 index cd74540..0000000 --- a/src/fragment_store/serializer_json.hpp +++ /dev/null @@ -1,67 +0,0 @@ -#pragma once - -#include -#include -#include - -#include - -// nlohmann -template -struct SerializerJsonCallbacks { - using Registry = entt::basic_registry; - using Handle = entt::basic_handle; - - using serialize_fn = bool(*)(const Handle h, nlohmann::json& out); - entt::dense_map _serl; - - using deserialize_fn = bool(*)(Handle h, const nlohmann::json& in); - entt::dense_map _deserl; - - template - static bool component_get_json(const Handle h, nlohmann::json& j) { - if (h.template all_of()) { - if constexpr (!std::is_empty_v) { - j = h.template get(); - } - return true; - } - - return false; - } - - template - static bool component_emplace_or_replace_json(Handle h, const nlohmann::json& j) { - if constexpr (std::is_empty_v) { - h.template emplace_or_replace(); // assert empty json? - } else { - h.template emplace_or_replace(static_cast(j)); - } - return true; - } - - void registerSerializer(serialize_fn fn, const entt::type_info& type_info) { - _serl[type_info.hash()] = fn; - } - - template - void registerSerializer( - serialize_fn fn = component_get_json, - const entt::type_info& type_info = entt::type_id() - ) { - registerSerializer(fn, type_info); - } - - void registerDeSerializer(deserialize_fn fn, const entt::type_info& type_info) { - _deserl[type_info.hash()] = fn; - } - - template - void registerDeSerializer( - deserialize_fn fn = component_emplace_or_replace_json, - const entt::type_info& type_info = entt::type_id() - ) { - registerDeSerializer(fn, type_info); - } -}; - diff --git a/src/fragment_store/test_file_zstd.cpp b/src/fragment_store/test_file_zstd.cpp deleted file mode 100644 index cecddd7..0000000 --- a/src/fragment_store/test_file_zstd.cpp +++ /dev/null @@ -1,394 +0,0 @@ -#include "./file2_zstd.hpp" - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -const static std::string_view test_text1{"test1 1234 1234 :) 1234 5678 88888888\n"}; -const static ByteSpan data_test_text1{ - reinterpret_cast(test_text1.data()), - test_text1.size() -}; - -const static std::string_view test_text2{"test2 1234 1234 :) 1234 5678 88888888\n"}; -const static ByteSpan data_test_text2{ - reinterpret_cast(test_text2.data()), - test_text2.size() -}; - -const static std::string_view test_text3{ - "00000000000000000000000000000000000000000000000000000 test 00000000000000000000000000000000000000\n" - "00000000000000000000000000000000000000000000000000000 test 00000000000000000000000000000000000000\n" - "00000000000000000000000000000000000000000000000000000 test 00000000000000000000000000000000000000\n" - "00000000000000000000000000000000000000000000000000000 test 00000000000000000000000000000000000000\n" - "00000000000000000000000000000000000000000000000000000 test 00000000000000000000000000000000000000\n" - "00000000000000000000000000000000000000000000000000000 test 00000000000000000000000000000000000000\n" -}; -const static ByteSpan data_test_text3{ - reinterpret_cast(test_text3.data()), - test_text3.size() -}; - -int main(void) { - { // first do a simple mem backed test - std::vector buffer; - { // write - File2MemW f_w_mem{buffer}; - assert(f_w_mem.isGood()); - - File2ZSTDW f_w_zstd{f_w_mem}; - assert(f_w_zstd.isGood()); - - bool res = f_w_zstd.write(data_test_text1); - assert(res); - assert(f_w_zstd.isGood()); - - // write another frame of the same data - res = f_w_zstd.write(data_test_text2); - assert(res); - assert(f_w_zstd.isGood()); - - // write larger frame - res = f_w_zstd.write(data_test_text3); - assert(res); - assert(f_w_zstd.isGood()); - } - - std::cout << "in mem size: " << buffer.size() << "\n"; - - { // read - File2MemR f_r_mem{ByteSpan{buffer}}; - assert(f_r_mem.isGood()); - - File2ZSTDR f_r_zstd{f_r_mem}; - assert(f_r_zstd.isGood()); - - // reads return owning buffers - - { // readback data_test_text1 - auto r_res_var = f_r_zstd.read(data_test_text1.size); - - //assert(f_r_zstd.isGood()); - //assert(f_r_file.isGood()); - assert(std::holds_alternative>(r_res_var)); - const auto& r_res_vec = std::get>(r_res_var); - - //std::cout << "decomp: " << std::string_view{reinterpret_cast(r_res_vec.data()), r_res_vec.size()}; - - assert(r_res_vec.size() == data_test_text1.size); - assert(std::equal(data_test_text1.cbegin(), data_test_text1.cend(), r_res_vec.cbegin())); - } - - { // readback data_test_text2 - auto r_res_var = f_r_zstd.read(data_test_text2.size); - - //assert(f_r_zstd.isGood()); - //assert(f_r_file.isGood()); - assert(std::holds_alternative>(r_res_var)); - const auto& r_res_vec = std::get>(r_res_var); - - //std::cout << "decomp: " << std::string_view{reinterpret_cast(r_res_vec.data()), r_res_vec.size()}; - - assert(r_res_vec.size() == data_test_text2.size); - assert(std::equal( - data_test_text2.cbegin(), - data_test_text2.cend(), - r_res_vec.cbegin() - )); - } - - { // readback data_test_text3 - auto r_res_var = f_r_zstd.read(data_test_text3.size); - - //assert(f_r_zstd.isGood()); - //assert(f_r_file.isGood()); - assert(std::holds_alternative>(r_res_var)); - const auto& r_res_vec = std::get>(r_res_var); - - //std::cout << "decomp: " << std::string_view{reinterpret_cast(r_res_vec.data()), r_res_vec.size()}; - - assert(r_res_vec.size() == data_test_text3.size); - assert(std::equal( - data_test_text3.cbegin(), - data_test_text3.cend(), - r_res_vec.cbegin() - )); - } - - { // assert eof somehow - // since its eof, reading a single byte should return a zero sized buffer - auto r_res_var = f_r_zstd.read(1); - if (std::holds_alternative>(r_res_var)) { - assert(std::get>(r_res_var).empty()); - } else if (std::holds_alternative(r_res_var)) { - assert(std::get(r_res_var).empty()); - } else { - assert(false); - } - } - } - } - - const auto temp_dir = std::filesystem::temp_directory_path() / "file2_zstd_tests"; - - std::filesystem::create_directories(temp_dir); // making sure - assert(std::filesystem::exists(temp_dir)); - std::cout << "test temp dir: " << temp_dir << "\n"; - - const auto test1_file_path = temp_dir / "testfile1.zstd"; - { // simple write test - File2WFile f_w_file{std::string_view{test1_file_path.u8string()}, true}; - assert(f_w_file.isGood()); - - File2ZSTDW f_w_zstd{f_w_file}; - assert(f_w_zstd.isGood()); - assert(f_w_file.isGood()); - - //bool res = f_w_file.write(data_test_text1); - bool res = f_w_zstd.write(data_test_text1); - assert(res); - assert(f_w_zstd.isGood()); - assert(f_w_file.isGood()); - - // write another frame of the same data - res = f_w_zstd.write(data_test_text2); - assert(res); - assert(f_w_zstd.isGood()); - assert(f_w_file.isGood()); - - // write larger frame - res = f_w_zstd.write(data_test_text3); - assert(res); - assert(f_w_zstd.isGood()); - assert(f_w_file.isGood()); - } - - // after flush - assert(std::filesystem::file_size(test1_file_path) != 0); - - { // simple read test (using write test created file) - File2RFile f_r_file{std::string_view{test1_file_path.u8string()}}; - assert(f_r_file.isGood()); - - File2ZSTDR f_r_zstd{f_r_file}; - assert(f_r_zstd.isGood()); - assert(f_r_file.isGood()); - - // reads return owning buffers - - { // readback data_test_text1 - auto r_res_var = f_r_zstd.read(data_test_text1.size); - - //assert(f_r_zstd.isGood()); - //assert(f_r_file.isGood()); - assert(std::holds_alternative>(r_res_var)); - const auto& r_res_vec = std::get>(r_res_var); - - //std::cout << "decomp: " << std::string_view{reinterpret_cast(r_res_vec.data()), r_res_vec.size()}; - - assert(r_res_vec.size() == data_test_text1.size); - assert(std::equal(data_test_text1.cbegin(), data_test_text1.cend(), r_res_vec.cbegin())); - } - - { // readback data_test_text2 - auto r_res_var = f_r_zstd.read(data_test_text2.size); - - //assert(f_r_zstd.isGood()); - //assert(f_r_file.isGood()); - assert(std::holds_alternative>(r_res_var)); - const auto& r_res_vec = std::get>(r_res_var); - - //std::cout << "decomp: " << std::string_view{reinterpret_cast(r_res_vec.data()), r_res_vec.size()}; - - assert(r_res_vec.size() == data_test_text2.size); - assert(std::equal( - data_test_text2.cbegin(), - data_test_text2.cend(), - r_res_vec.cbegin() - )); - } - - { // readback data_test_text3 - auto r_res_var = f_r_zstd.read(data_test_text3.size); - - //assert(f_r_zstd.isGood()); - //assert(f_r_file.isGood()); - assert(std::holds_alternative>(r_res_var)); - const auto& r_res_vec = std::get>(r_res_var); - - //std::cout << "decomp: " << std::string_view{reinterpret_cast(r_res_vec.data()), r_res_vec.size()}; - - assert(r_res_vec.size() == data_test_text3.size); - assert(std::equal( - data_test_text3.cbegin(), - data_test_text3.cend(), - r_res_vec.cbegin() - )); - } - - { // assert eof somehow - // since its eof, reading a single byte should return a zero sized buffer - auto r_res_var = f_r_zstd.read(1); - if (std::holds_alternative>(r_res_var)) { - assert(std::get>(r_res_var).empty()); - } else if (std::holds_alternative(r_res_var)) { - assert(std::get(r_res_var).empty()); - } else { - assert(false); - } - } - } - - const auto test2_file_path = temp_dir / "testfile2.zstd"; - { // write and read a single frame with increasing size - for (size_t fslog = 1; fslog <= 25; fslog++) { - const size_t frame_size = 1< tmp_data(frame_size); - for (auto& e : tmp_data) { - e = uint8_t(rng_data() & 0xff); // cutoff bad but good enough - } - assert(tmp_data.size() == frame_size); - - bool res = f_w_zstd.write(ByteSpan{tmp_data}); - assert(res); - assert(f_w_zstd.isGood()); - assert(f_w_file.isGood()); - } - - { // read - std::minstd_rand rng_data{11*1337}; - - File2RFile f_r_file{std::string_view{test2_file_path.u8string()}}; - assert(f_r_file.isGood()); - - File2ZSTDR f_r_zstd{f_r_file}; - assert(f_r_zstd.isGood()); - assert(f_r_file.isGood()); - - { // read frame - auto r_res_var = f_r_zstd.read(frame_size); - - assert(std::holds_alternative>(r_res_var)); - const auto& r_res_vec = std::get>(r_res_var); - assert(r_res_vec.size() == frame_size); - - // assert equal - for (auto& e : r_res_vec) { - assert(e == uint8_t(rng_data() & 0xff)); - } - } - - { // eof test - auto r_res_var = f_r_zstd.read(1); - if (std::holds_alternative>(r_res_var)) { - assert(std::get>(r_res_var).empty()); - } else if (std::holds_alternative(r_res_var)) { - assert(std::get(r_res_var).empty()); - } else { - assert(false); - } - } - } - - // since we spam file, we immediatly remove them - std::filesystem::remove(test2_file_path); - } - } - - const auto test3_file_path = temp_dir / "testfile3.zstd"; - { // large file test write - File2WFile f_w_file{std::string_view{test3_file_path.u8string()}, true}; - assert(f_w_file.isGood()); - - File2ZSTDW f_w_zstd{f_w_file}; - assert(f_w_zstd.isGood()); - assert(f_w_file.isGood()); - - std::minstd_rand rng{11*1337}; - std::minstd_rand rng_data{11*1337}; // make investigating easier - - size_t total_raw_size {0}; - for (size_t i = 0; i < 2000; i++) { - const size_t frame_size = (rng() % ((2<<19) - 1)) + 1; - - std::vector tmp_data(frame_size); - for (auto& e : tmp_data) { - e = uint8_t(rng_data() & 0xff); // cutoff bad but good enough - } - - bool res = f_w_zstd.write(ByteSpan{tmp_data}); - assert(res); - assert(f_w_zstd.isGood()); - assert(f_w_file.isGood()); - total_raw_size += frame_size; - } - std::cout << "t3 total raw size: " << total_raw_size << "\n"; - } - - // after flush - std::cout << "t3 size on disk: " << std::filesystem::file_size(test3_file_path) << "\n"; - - { // large file test read - File2RFile f_r_file{std::string_view{test3_file_path.u8string()}}; - assert(f_r_file.isGood()); - - File2ZSTDR f_r_zstd{f_r_file}; - assert(f_r_zstd.isGood()); - assert(f_r_file.isGood()); - - // using same rng state as write to compare - std::minstd_rand rng{11*1337}; - std::minstd_rand rng_data{11*1337}; - - for (size_t i = 0; i < 2000; i++) { - const size_t frame_size = (rng() % ((2<<19) - 1)) + 1; - //std::cerr << "f: " << i << " fs: " << frame_size << "\n"; - - auto r_res_var = f_r_zstd.read(frame_size); - - assert(std::holds_alternative>(r_res_var)); - const auto& r_res_vec = std::get>(r_res_var); - assert(r_res_vec.size() == frame_size); - - // assert equal - for (auto& e : r_res_vec) { - assert(e == uint8_t(rng_data() & 0xff)); - } - } - - { // eof test - auto r_res_var = f_r_zstd.read(1); - if (std::holds_alternative>(r_res_var)) { - assert(std::get>(r_res_var).empty()); - } else if (std::holds_alternative(r_res_var)) { - assert(std::get(r_res_var).empty()); - } else { - assert(false); - } - } - } - - // cleanup - std::filesystem::remove_all(temp_dir); -} - diff --git a/src/fragment_store/types.hpp b/src/fragment_store/types.hpp deleted file mode 100644 index 97cfb14..0000000 --- a/src/fragment_store/types.hpp +++ /dev/null @@ -1,19 +0,0 @@ -#pragma once - -#include - -enum class Encryption : uint8_t { - NONE = 0x00, -}; -enum class Compression : uint8_t { - NONE = 0x00, - ZSTD = 0x01, - // TODO: zstd without magic - // TODO: zstd meta dict - // TODO: zstd data(message) dict -}; -enum class MetaFileType : uint8_t { - TEXT_JSON, - BINARY_MSGPACK, // msgpacked msgpack -}; - diff --git a/src/main_screen.hpp b/src/main_screen.hpp index abe77f3..5cbb608 100644 --- a/src/main_screen.hpp +++ b/src/main_screen.hpp @@ -2,8 +2,8 @@ #include "./screen.hpp" -#include "./fragment_store/object_store.hpp" -#include "./fragment_store/backends/filesystem_storage.hpp" +#include +#include #include #include #include