forked from Green-Sky/tomato
change binary meta format and add zstd to metadata
This commit is contained in:
parent
4fb2b51b7d
commit
6aac44cda9
@ -40,25 +40,26 @@ A Metadata json object can have arbitrary keys, some are predefined:
|
|||||||
|
|
||||||
### Split Metadata
|
### Split Metadata
|
||||||
|
|
||||||
file magic bytes `SOLMET` (6 bytes)
|
msgpack array:
|
||||||
|
|
||||||
1 byte encryption type (`0x00` is none)
|
- `[0]`: file magic string `SOLMET` (6 bytes)
|
||||||
|
- `[1]`: uint8 encryption type (`0x00` is none)
|
||||||
1 byte compression type (`0x00` is none)
|
- `[2]`: uint8 compression type (`0x00` is none, `0x01` is zstd)
|
||||||
|
- `[3]`: binary metadata (optionally compressed and encrypted)
|
||||||
...metadata here...
|
|
||||||
|
|
||||||
note that the encryption and compression are for the metadata only.
|
note that the encryption and compression are for the metadata only.
|
||||||
The metadata itself contains encryption and compression info about the data.
|
The metadata itself contains encryption and compression info about the data.
|
||||||
|
|
||||||
### Split Data
|
### Split Data
|
||||||
|
|
||||||
(none) all the data is in the metadata file.
|
All the metadata is in the metadata file. (like encryption and compression)
|
||||||
This is mostly to allow direct storage for files in the Fragment store without excessive duplication.
|
This is mostly to allow direct storage for files in the Fragment store without excessive duplication.
|
||||||
Keep in mind to not use the actual file name as the data/meta file name.
|
Keep in mind to not use the actual file name as the data/meta file name.
|
||||||
|
|
||||||
### Single fragment
|
### Single fragment
|
||||||
|
|
||||||
|
Note: this format is unused for now
|
||||||
|
|
||||||
file magic bytes `SOLFIL` (6 bytes)
|
file magic bytes `SOLFIL` (6 bytes)
|
||||||
|
|
||||||
1 byte encryption type (`0x00` is none)
|
1 byte encryption type (`0x00` is none)
|
||||||
|
@ -224,15 +224,6 @@ FragmentID FragmentStore::getFragmentCustomMatcher(
|
|||||||
return entt::null;
|
return entt::null;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename F>
|
|
||||||
static void writeBinaryMetafileHeader(F& file, const Encryption enc, const Compression comp) {
|
|
||||||
file.write("SOLMET", 6);
|
|
||||||
file.put(static_cast<std::underlying_type_t<Encryption>>(enc));
|
|
||||||
|
|
||||||
// TODO: is compressiontype encrypted?
|
|
||||||
file.put(static_cast<std::underlying_type_t<Compression>>(comp));
|
|
||||||
}
|
|
||||||
|
|
||||||
bool FragmentStore::syncToStorage(FragmentID fid, std::function<write_to_storage_fetch_data_cb>& data_cb) {
|
bool FragmentStore::syncToStorage(FragmentID fid, std::function<write_to_storage_fetch_data_cb>& data_cb) {
|
||||||
if (!_reg.valid(fid)) {
|
if (!_reg.valid(fid)) {
|
||||||
return false;
|
return false;
|
||||||
@ -298,13 +289,8 @@ bool FragmentStore::syncToStorage(FragmentID fid, std::function<write_to_storage
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// metadata type
|
|
||||||
if (meta_type == MetaFileType::BINARY_MSGPACK) { // binary metadata file
|
|
||||||
writeBinaryMetafileHeader(meta_file, meta_enc, meta_comp);
|
|
||||||
}
|
|
||||||
|
|
||||||
// sharing code between binary msgpack and text json for now
|
// sharing code between binary msgpack and text json for now
|
||||||
nlohmann::json meta_data = nlohmann::json::object(); // metadata needs to be an object, null not allowed
|
nlohmann::json meta_data_j = nlohmann::json::object(); // metadata needs to be an object, null not allowed
|
||||||
// metadata file
|
// metadata file
|
||||||
|
|
||||||
for (const auto& [type_id, storage] : _reg.storage()) {
|
for (const auto& [type_id, storage] : _reg.storage()) {
|
||||||
@ -325,33 +311,52 @@ bool FragmentStore::syncToStorage(FragmentID fid, std::function<write_to_storage
|
|||||||
//if (meta_type == MetaFileType::BINARY_MSGPACK) { // msgpack uses the hash id instead
|
//if (meta_type == MetaFileType::BINARY_MSGPACK) { // msgpack uses the hash id instead
|
||||||
//s_cb_it->second(storage.value(fid), meta_data[storage.type().hash()]);
|
//s_cb_it->second(storage.value(fid), meta_data[storage.type().hash()]);
|
||||||
//} else if (meta_type == MetaFileType::TEXT_JSON) {
|
//} else if (meta_type == MetaFileType::TEXT_JSON) {
|
||||||
s_cb_it->second({_reg, fid}, meta_data[storage.type().name()]);
|
s_cb_it->second({_reg, fid}, meta_data_j[storage.type().name()]);
|
||||||
//}
|
//}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (meta_type == MetaFileType::BINARY_MSGPACK) { // binary metadata file
|
if (meta_type == MetaFileType::BINARY_MSGPACK) { // binary metadata file
|
||||||
const auto res = nlohmann::json::to_msgpack(meta_data);
|
const std::vector<uint8_t> meta_data = nlohmann::json::to_msgpack(meta_data_j);
|
||||||
|
std::vector<uint8_t> meta_data_compressed; // empty if none
|
||||||
|
//std::vector<uint8_t> meta_data_encrypted; // empty if none
|
||||||
|
|
||||||
// TODO: refactor
|
if (meta_comp == Compression::ZSTD) {
|
||||||
if (meta_comp == Compression::NONE) {
|
meta_data_compressed.resize(ZSTD_compressBound(meta_data.size()));
|
||||||
meta_file.write(reinterpret_cast<const char*>(res.data()), res.size());
|
|
||||||
} else if (meta_comp == Compression::ZSTD) {
|
|
||||||
std::vector<uint8_t> compressed_buffer;
|
|
||||||
compressed_buffer.resize(ZSTD_compressBound(res.size()));
|
|
||||||
|
|
||||||
size_t const cSize = ZSTD_compress(compressed_buffer.data(), compressed_buffer.size(), res.data(), res.size(), 0); // 0 is default is probably 3
|
size_t const cSize = ZSTD_compress(meta_data_compressed.data(), meta_data_compressed.size(), meta_data.data(), meta_data.size(), 0); // 0 is default is probably 3
|
||||||
if (ZSTD_isError(cSize)) {
|
if (ZSTD_isError(cSize)) {
|
||||||
std::cerr << "FS error: compressing meta failed\n";
|
std::cerr << "FS error: compressing meta failed\n";
|
||||||
return false; // HACK
|
meta_data_compressed.clear();
|
||||||
|
meta_comp = Compression::NONE;
|
||||||
|
} else {
|
||||||
|
meta_data_compressed.resize(cSize);
|
||||||
}
|
}
|
||||||
|
} else if (meta_comp == Compression::NONE) {
|
||||||
compressed_buffer.resize(cSize); // maybe skip this resize
|
// do nothing
|
||||||
|
} else {
|
||||||
meta_file.write(reinterpret_cast<const char*>(compressed_buffer.data()), compressed_buffer.size());
|
assert(false && "implement me");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: encryption
|
||||||
|
|
||||||
|
// the meta file is itself msgpack data
|
||||||
|
nlohmann::json meta_header_j = nlohmann::json::array();
|
||||||
|
meta_header_j.emplace_back() = "SOLMET";
|
||||||
|
meta_header_j.push_back(meta_enc);
|
||||||
|
meta_header_j.push_back(meta_comp);
|
||||||
|
|
||||||
|
if (false) { // TODO: encryption
|
||||||
|
} else if (!meta_data_compressed.empty()) {
|
||||||
|
meta_header_j.push_back(nlohmann::json::binary(meta_data_compressed));
|
||||||
|
} else {
|
||||||
|
meta_header_j.push_back(nlohmann::json::binary(meta_data));
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto meta_header_data = nlohmann::json::to_msgpack(meta_header_j);
|
||||||
|
meta_file.write(reinterpret_cast<const char*>(meta_header_data.data()), meta_header_data.size());
|
||||||
} else if (meta_type == MetaFileType::TEXT_JSON) {
|
} else if (meta_type == MetaFileType::TEXT_JSON) {
|
||||||
// cant be compressed or encrypted
|
// cant be compressed or encrypted
|
||||||
meta_file << meta_data.dump(2, ' ', true);
|
meta_file << meta_data_j.dump(2, ' ', true);
|
||||||
}
|
}
|
||||||
|
|
||||||
// now data
|
// now data
|
||||||
@ -409,6 +414,8 @@ bool FragmentStore::syncToStorage(FragmentID fid, std::function<write_to_storage
|
|||||||
}
|
}
|
||||||
// same as if lastChunk break;
|
// same as if lastChunk break;
|
||||||
} while (buffer_actual_size == buffer.size());
|
} while (buffer_actual_size == buffer.size());
|
||||||
|
} else {
|
||||||
|
assert(false && "implement me");
|
||||||
}
|
}
|
||||||
|
|
||||||
meta_file.flush();
|
meta_file.flush();
|
||||||
@ -511,6 +518,8 @@ bool FragmentStore::loadFromStorage(FragmentID fid, std::function<read_from_stor
|
|||||||
} while (buffer_actual_size == in_buffer.size() && !data_file.eof());
|
} while (buffer_actual_size == in_buffer.size() && !data_file.eof());
|
||||||
|
|
||||||
ZSTD_freeDCtx(dctx);
|
ZSTD_freeDCtx(dctx);
|
||||||
|
} else {
|
||||||
|
assert(false && "implement me");
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
@ -660,47 +669,127 @@ size_t FragmentStore::scanStoragePath(std::string_view path) {
|
|||||||
for (const auto& it : file_frag_list) {
|
for (const auto& it : file_frag_list) {
|
||||||
nlohmann::json j;
|
nlohmann::json j;
|
||||||
if (it.meta_ext == ".meta.msgpack") {
|
if (it.meta_ext == ".meta.msgpack") {
|
||||||
// uh
|
|
||||||
// read binary header
|
|
||||||
assert(false);
|
|
||||||
} else if (it.meta_ext == ".meta.json") {
|
|
||||||
std::ifstream file(it.frag_path.generic_u8string() + it.meta_ext, std::ios::in | std::ios::binary);
|
std::ifstream file(it.frag_path.generic_u8string() + it.meta_ext, std::ios::in | std::ios::binary);
|
||||||
if (!file.is_open()) {
|
if (!file.is_open()) {
|
||||||
std::cout << "FS error: failed opening meta " << it.frag_path << "\n";
|
std::cout << "FS error: failed opening meta " << it.frag_path << "\n";
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
file >> j;
|
// file is a msgpack within a msgpack
|
||||||
|
|
||||||
if (!j.is_object()) {
|
std::vector<uint8_t> full_meta_data;
|
||||||
std::cerr << "FS error: json in meta is broken " << it.id_str << "\n";
|
{ // read meta file
|
||||||
|
// figure out size
|
||||||
|
file.seekg(0, file.end);
|
||||||
|
uint64_t file_size = file.tellg();
|
||||||
|
file.seekg(0, file.beg);
|
||||||
|
|
||||||
|
full_meta_data.resize(file_size);
|
||||||
|
|
||||||
|
file.read(reinterpret_cast<char*>(full_meta_data.data()), full_meta_data.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto meta_header_j = nlohmann::json::from_msgpack(full_meta_data);
|
||||||
|
|
||||||
|
if (!meta_header_j.is_array() || meta_header_j.size() < 4) {
|
||||||
|
std::cerr << "FS error: broken binary meta " << it.frag_path << "\n";
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: existing fragment file
|
if (meta_header_j.at(0) != "SOLMET") {
|
||||||
//newFragmentFile();
|
std::cerr << "FS error: wrong magic '" << meta_header_j.at(0) << "' in meta " << it.frag_path << "\n";
|
||||||
FragmentHandle fh{_reg, _reg.create()};
|
continue;
|
||||||
fh.emplace<FragComp::ID>(hex2bin(it.id_str));
|
|
||||||
|
|
||||||
fh.emplace<FragComp::Ephemeral::FilePath>(it.frag_path.generic_u8string());
|
|
||||||
|
|
||||||
for (const auto& [k, v] : j.items()) {
|
|
||||||
// type id from string hash
|
|
||||||
const auto type_id = entt::hashed_string(k.data(), k.size());
|
|
||||||
const auto deserl_fn_it = _sc._deserl_json.find(type_id);
|
|
||||||
if (deserl_fn_it != _sc._deserl_json.cend()) {
|
|
||||||
// TODO: check return value
|
|
||||||
deserl_fn_it->second(fh, v);
|
|
||||||
} else {
|
|
||||||
std::cerr << "FS warning: missing deserializer for meta key '" << k << "'\n";
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
// throw new frag event here
|
|
||||||
throwEventConstruct(fh);
|
Encryption meta_enc = meta_header_j.at(1);
|
||||||
count++;
|
if (meta_enc != Encryption::NONE) {
|
||||||
|
std::cerr << "FS error: unknown encryption " << it.frag_path << "\n";
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
Compression meta_comp = meta_header_j.at(2);
|
||||||
|
if (meta_comp != Compression::NONE && meta_comp != Compression::ZSTD) {
|
||||||
|
std::cerr << "FS error: unknown compression " << it.frag_path << "\n";
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
//const auto& meta_data_ref = meta_header_j.at(3).is_binary()?meta_header_j.at(3):meta_header_j.at(3).at("data");
|
||||||
|
if (!meta_header_j.at(3).is_binary()) {
|
||||||
|
std::cerr << "FS error: meta data not binary " << it.frag_path << "\n";
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const nlohmann::json::binary_t& meta_data_ref = meta_header_j.at(3);
|
||||||
|
|
||||||
|
std::vector<uint8_t> meta_data_decomp;
|
||||||
|
if (meta_comp == Compression::NONE) {
|
||||||
|
// do nothing
|
||||||
|
} else if (meta_comp == Compression::ZSTD) {
|
||||||
|
meta_data_decomp.resize(ZSTD_DStreamOutSize());
|
||||||
|
ZSTD_DCtx* const dctx = ZSTD_createDCtx();
|
||||||
|
|
||||||
|
ZSTD_inBuffer input {meta_data_ref.data(), meta_data_ref.size(), 0 };
|
||||||
|
ZSTD_outBuffer output = { meta_data_decomp.data(), meta_data_decomp.size(), 0 };
|
||||||
|
do {
|
||||||
|
size_t const ret = ZSTD_decompressStream(dctx, &output , &input);
|
||||||
|
if (ZSTD_isError(ret)) {
|
||||||
|
// error <.<
|
||||||
|
std::cerr << "FS error: decompression error\n";
|
||||||
|
meta_data_decomp.clear();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} while (input.pos < input.size);
|
||||||
|
meta_data_decomp.resize(output.pos);
|
||||||
|
|
||||||
|
ZSTD_freeDCtx(dctx);
|
||||||
|
} else {
|
||||||
|
assert(false && "implement me");
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: enc
|
||||||
|
|
||||||
|
if (!meta_data_decomp.empty()) {
|
||||||
|
j = nlohmann::json::from_msgpack(meta_data_decomp);
|
||||||
|
} else {
|
||||||
|
j = nlohmann::json::from_msgpack(meta_data_ref);
|
||||||
|
}
|
||||||
|
} else if (it.meta_ext == ".meta.json") {
|
||||||
|
std::ifstream file(it.frag_path.generic_u8string() + it.meta_ext, std::ios::in | std::ios::binary);
|
||||||
|
if (!file.is_open()) {
|
||||||
|
std::cerr << "FS error: failed opening meta " << it.frag_path << "\n";
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
file >> j;
|
||||||
} else {
|
} else {
|
||||||
assert(false);
|
assert(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!j.is_object()) {
|
||||||
|
std::cerr << "FS error: json in meta is broken " << it.id_str << "\n";
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: existing fragment file
|
||||||
|
//newFragmentFile();
|
||||||
|
FragmentHandle fh{_reg, _reg.create()};
|
||||||
|
fh.emplace<FragComp::ID>(hex2bin(it.id_str));
|
||||||
|
|
||||||
|
fh.emplace<FragComp::Ephemeral::FilePath>(it.frag_path.generic_u8string());
|
||||||
|
|
||||||
|
for (const auto& [k, v] : j.items()) {
|
||||||
|
// type id from string hash
|
||||||
|
const auto type_id = entt::hashed_string(k.data(), k.size());
|
||||||
|
const auto deserl_fn_it = _sc._deserl_json.find(type_id);
|
||||||
|
if (deserl_fn_it != _sc._deserl_json.cend()) {
|
||||||
|
// TODO: check return value
|
||||||
|
deserl_fn_it->second(fh, v);
|
||||||
|
} else {
|
||||||
|
std::cerr << "FS warning: missing deserializer for meta key '" << k << "'\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// throw new frag event here
|
||||||
|
throwEventConstruct(fh);
|
||||||
|
count++;
|
||||||
}
|
}
|
||||||
|
|
||||||
return count;
|
return count;
|
||||||
|
BIN
src/fragment_store/fs_binary_msgpack1.png
Normal file
BIN
src/fragment_store/fs_binary_msgpack1.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 36 KiB |
BIN
src/fragment_store/fs_binary_msgpack2.png
Normal file
BIN
src/fragment_store/fs_binary_msgpack2.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 46 KiB |
@ -145,7 +145,7 @@ void MessageFragmentStore::handleMessage(const Message3Handle& m) {
|
|||||||
|
|
||||||
// if its still not found, we need a new fragment
|
// if its still not found, we need a new fragment
|
||||||
if (fragment_uid.empty()) {
|
if (fragment_uid.empty()) {
|
||||||
const auto new_fid = _fs.newFragmentFile("test_message_store/", MetaFileType::TEXT_JSON);
|
const auto new_fid = _fs.newFragmentFile("test_message_store/", MetaFileType::BINARY_MSGPACK);
|
||||||
auto fh = _fs.fragmentHandle(new_fid);
|
auto fh = _fs.fragmentHandle(new_fid);
|
||||||
if (!static_cast<bool>(fh)) {
|
if (!static_cast<bool>(fh)) {
|
||||||
std::cout << "MFS error: failed to create new fragment for message\n";
|
std::cout << "MFS error: failed to create new fragment for message\n";
|
||||||
@ -154,6 +154,7 @@ void MessageFragmentStore::handleMessage(const Message3Handle& m) {
|
|||||||
|
|
||||||
fragment_uid = fh.get<FragComp::ID>().v;
|
fragment_uid = fh.get<FragComp::ID>().v;
|
||||||
|
|
||||||
|
fh.emplace_or_replace<FragComp::Ephemeral::MetaCompressionType>().comp = Compression::ZSTD;
|
||||||
fh.emplace_or_replace<FragComp::DataCompressionType>().comp = Compression::ZSTD;
|
fh.emplace_or_replace<FragComp::DataCompressionType>().comp = Compression::ZSTD;
|
||||||
|
|
||||||
auto& new_ts_range = fh.emplace<FragComp::MessagesTSRange>();
|
auto& new_ts_range = fh.emplace<FragComp::MessagesTSRange>();
|
||||||
|
@ -8,10 +8,12 @@ enum class Encryption : uint8_t {
|
|||||||
enum class Compression : uint8_t {
|
enum class Compression : uint8_t {
|
||||||
NONE = 0x00,
|
NONE = 0x00,
|
||||||
ZSTD = 0x01,
|
ZSTD = 0x01,
|
||||||
|
// TODO: zstd without magic
|
||||||
|
// TODO: zstd meta dict
|
||||||
|
// TODO: zstd data(message) dict
|
||||||
};
|
};
|
||||||
enum class MetaFileType : uint8_t {
|
enum class MetaFileType : uint8_t {
|
||||||
TEXT_JSON,
|
TEXT_JSON,
|
||||||
//BINARY_ARB,
|
BINARY_MSGPACK, // msgpacked msgpack
|
||||||
BINARY_MSGPACK,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user