From 8b17ed195f2355f0115f65dac29c3b33ac8afa88 Mon Sep 17 00:00:00 2001 From: Green Sky Date: Sat, 30 Mar 2024 13:50:31 +0100 Subject: [PATCH] more testing and file2 zstd now passes tests with varying frame sizes and 1.5gig files --- src/fragment_store/file2_zstd.cpp | 14 ++- src/fragment_store/test_file_zstd.cpp | 155 +++++++++++++++++++++++++- 2 files changed, 158 insertions(+), 11 deletions(-) diff --git a/src/fragment_store/file2_zstd.cpp b/src/fragment_store/file2_zstd.cpp index f43e2fde..b34b3083 100644 --- a/src/fragment_store/file2_zstd.cpp +++ b/src/fragment_store/file2_zstd.cpp @@ -52,7 +52,7 @@ bool File2ZSTDW::write(const ByteSpan data, int64_t pos) { } _real_file.write(ByteSpan{compressed_buffer.data(), output.pos}); - } while (input.pos < input.size && remaining_ret != 0 && _real_file.isGood()); + } while ((input.pos < input.size || remaining_ret != 0) && _real_file.isGood()); return _real_file.isGood(); } @@ -105,11 +105,11 @@ std::variant> File2ZSTDR::read(uint64_t size, int if (!feedInput(_real_file.read(request_size, -1))) { return ret_data; } - std::cout << "---- fed input " << _z_input.size << "bytes\n"; + // if _z_input.size < _in_buffer.size() -> assume eof? if (_z_input.size < request_size) { eof = true; - std::cout << "---- eof\n"; + //std::cout << "---- eof\n"; } } @@ -124,6 +124,12 @@ std::variant> File2ZSTDR::read(uint64_t size, int // no new decomp data? if (output.pos == 0) { + if (ret != 0) { + // if not error and not 0, indicates that + // there is additional flushing needed + continue; + } + assert(eof || ret == 0); break; } @@ -157,7 +163,6 @@ bool File2ZSTDR::feedInput(std::variant>&& read_b // TODO: optimize, we copy the buffer, but we might not need to if (std::holds_alternative(read_buff)) { const auto& span = std::get(read_buff); - std::cout << "---- feedInput got span " << span.size << "\n"; if (span.size > _in_buffer.size()) { // error, how did we read more than we asked for?? return {}; @@ -176,7 +181,6 @@ bool File2ZSTDR::feedInput(std::variant>&& read_b } } else if (std::holds_alternative>(read_buff)) { auto& vec = std::get>(read_buff); - std::cout << "---- feedInput got vec " << vec.size() << "\n"; if (vec.size() > _in_buffer.size()) { // error, how did we read more than we asked for?? return {}; diff --git a/src/fragment_store/test_file_zstd.cpp b/src/fragment_store/test_file_zstd.cpp index b5a13691..08509b78 100644 --- a/src/fragment_store/test_file_zstd.cpp +++ b/src/fragment_store/test_file_zstd.cpp @@ -7,6 +7,8 @@ #include #include #include +#include +#include #include const static std::string_view test_text1{"test1 1234 1234 :) 1234 5678 88888888\n"}; @@ -35,7 +37,7 @@ const static ByteSpan data_test_text3{ }; int main(void) { - const auto temp_dir = std::filesystem::temp_directory_path() / "file2wzstdtests"; + const auto temp_dir = std::filesystem::temp_directory_path() / "file2_zstd_tests"; std::filesystem::create_directories(temp_dir); // making sure assert(std::filesystem::exists(temp_dir)); @@ -92,8 +94,8 @@ int main(void) { //std::cout << "decomp: " << std::string_view{reinterpret_cast(r_res_vec.data()), r_res_vec.size()}; - assert(std::get>(r_res_var).size() == data_test_text1.size); - assert(std::equal(data_test_text1.cbegin(), data_test_text1.cend(), std::get>(r_res_var).cbegin())); + assert(r_res_vec.size() == data_test_text1.size); + assert(std::equal(data_test_text1.cbegin(), data_test_text1.cend(), r_res_vec.cbegin())); } { // readback data_test_text2 @@ -106,11 +108,11 @@ int main(void) { //std::cout << "decomp: " << std::string_view{reinterpret_cast(r_res_vec.data()), r_res_vec.size()}; - assert(std::get>(r_res_var).size() == data_test_text2.size); + assert(r_res_vec.size() == data_test_text2.size); assert(std::equal( data_test_text2.cbegin(), data_test_text2.cend(), - std::get>(r_res_var).cbegin() + r_res_vec.cbegin() )); } @@ -124,7 +126,7 @@ int main(void) { //std::cout << "decomp: " << std::string_view{reinterpret_cast(r_res_vec.data()), r_res_vec.size()}; - assert(std::get>(r_res_var).size() == data_test_text3.size); + assert(r_res_vec.size() == data_test_text3.size); assert(std::equal( data_test_text3.cbegin(), data_test_text3.cend(), @@ -145,6 +147,147 @@ int main(void) { } } + const auto test2_file_path = temp_dir / "testfile2.zstd"; + { // write and read a single frame with increasing size + for (size_t fslog = 1; fslog <= 25; fslog++) { + const size_t frame_size = 1< tmp_data(frame_size); + for (auto& e : tmp_data) { + e = uint8_t(rng_data() & 0xff); // cutoff bad but good enough + } + assert(tmp_data.size() == frame_size); + + bool res = f_w_zstd.write(ByteSpan{tmp_data}); + assert(res); + assert(f_w_zstd.isGood()); + assert(f_w_file.isGood()); + } + + { // read + std::minstd_rand rng_data{11*1337}; + + File2RFile f_r_file{test2_file_path.c_str()}; + assert(f_r_file.isGood()); + + File2ZSTDR f_r_zstd{f_r_file}; + assert(f_r_zstd.isGood()); + assert(f_r_file.isGood()); + + { // read frame + auto r_res_var = f_r_zstd.read(frame_size); + + assert(std::holds_alternative>(r_res_var)); + const auto& r_res_vec = std::get>(r_res_var); + assert(r_res_vec.size() == frame_size); + + // assert equal + for (auto& e : r_res_vec) { + assert(e == uint8_t(rng_data() & 0xff)); + } + } + + { // eof test + auto r_res_var = f_r_zstd.read(1); + if (std::holds_alternative>(r_res_var)) { + assert(std::get>(r_res_var).empty()); + } else if (std::holds_alternative(r_res_var)) { + assert(std::get(r_res_var).empty()); + } else { + assert(false); + } + } + } + + // since we spam file, we immediatly remove them + std::filesystem::remove(test2_file_path); + } + } + + const auto test3_file_path = temp_dir / "testfile3.zstd"; + { // large file test write + File2WFile f_w_file{test3_file_path.c_str(), true}; + assert(f_w_file.isGood()); + + File2ZSTDW f_w_zstd{f_w_file}; + assert(f_w_zstd.isGood()); + assert(f_w_file.isGood()); + + std::minstd_rand rng{11*1337}; + std::minstd_rand rng_data{11*1337}; // make investigating easier + + size_t total_raw_size {0}; + for (size_t i = 0; i < 2000; i++) { + const size_t frame_size = (rng() % ((2<<19) - 1)) + 1; + + std::vector tmp_data(frame_size); + for (auto& e : tmp_data) { + e = uint8_t(rng_data() & 0xff); // cutoff bad but good enough + } + + bool res = f_w_zstd.write(ByteSpan{tmp_data}); + assert(res); + assert(f_w_zstd.isGood()); + assert(f_w_file.isGood()); + total_raw_size += frame_size; + } + std::cout << "t3 total raw size: " << total_raw_size << "\n"; + } + + // after flush + std::cout << "t3 size on disk: " << std::filesystem::file_size(test3_file_path) << "\n"; + + { // large file test read + File2RFile f_r_file{test3_file_path.c_str()}; + assert(f_r_file.isGood()); + + File2ZSTDR f_r_zstd{f_r_file}; + assert(f_r_zstd.isGood()); + assert(f_r_file.isGood()); + + // using same rng state as write to compare + std::minstd_rand rng{11*1337}; + std::minstd_rand rng_data{11*1337}; + + for (size_t i = 0; i < 2000; i++) { + const size_t frame_size = (rng() % ((2<<19) - 1)) + 1; + //std::cerr << "f: " << i << " fs: " << frame_size << "\n"; + + auto r_res_var = f_r_zstd.read(frame_size); + + assert(std::holds_alternative>(r_res_var)); + const auto& r_res_vec = std::get>(r_res_var); + assert(r_res_vec.size() == frame_size); + + // assert equal + for (auto& e : r_res_vec) { + assert(e == uint8_t(rng_data() & 0xff)); + } + } + + { // eof test + auto r_res_var = f_r_zstd.read(1); + if (std::holds_alternative>(r_res_var)) { + assert(std::get>(r_res_var).empty()); + } else if (std::holds_alternative(r_res_var)) { + assert(std::get(r_res_var).empty()); + } else { + assert(false); + } + } + } + // cleanup std::filesystem::remove_all(temp_dir); }