From b4eaf86ed1a476855974028339997c89fb94220e Mon Sep 17 00:00:00 2001 From: Green Sky Date: Mon, 28 Oct 2024 23:23:05 +0100 Subject: [PATCH] dynamically choose chunk size --- .../backends/sha1_mapped_filesystem.cpp | 9 +++++++ solanaceae/ngc_ft1_sha1/ft1_sha1_info.cpp | 24 +++++++++++++++++++ solanaceae/ngc_ft1_sha1/ft1_sha1_info.hpp | 2 ++ 3 files changed, 35 insertions(+) diff --git a/solanaceae/ngc_ft1_sha1/backends/sha1_mapped_filesystem.cpp b/solanaceae/ngc_ft1_sha1/backends/sha1_mapped_filesystem.cpp index 3daed84..a14a4df 100644 --- a/solanaceae/ngc_ft1_sha1/backends/sha1_mapped_filesystem.cpp +++ b/solanaceae/ngc_ft1_sha1/backends/sha1_mapped_filesystem.cpp @@ -86,6 +86,15 @@ void SHA1MappedFilesystem::newFromFile(std::string_view file_name, std::string_v // build info sha1_info.file_name = file_name_; sha1_info.file_size = file_impl->_file_size; // TODO: remove the reliance on implementation details + sha1_info.chunk_size = chunkSizeFromFileSize(sha1_info.file_size); + { + // TOOD: remove + const uint32_t cs_low {32*1024}; + const uint32_t cs_high {4*1024*1024}; + + assert(sha1_info.chunk_size >= cs_low); + assert(sha1_info.chunk_size <= cs_high); + } { // build chunks // HACK: load file fully diff --git a/solanaceae/ngc_ft1_sha1/ft1_sha1_info.cpp b/solanaceae/ngc_ft1_sha1/ft1_sha1_info.cpp index fd82ded..885ca82 100644 --- a/solanaceae/ngc_ft1_sha1/ft1_sha1_info.cpp +++ b/solanaceae/ngc_ft1_sha1/ft1_sha1_info.cpp @@ -1,5 +1,8 @@ #include "./ft1_sha1_info.hpp" +// next power of two +#include + #include SHA1Digest::SHA1Digest(const std::vector& v) { @@ -28,6 +31,27 @@ std::ostream& operator<<(std::ostream& out, const SHA1Digest& v) { return out; } +uint32_t chunkSizeFromFileSize(uint64_t file_size) { + const uint64_t fs_low {UINT64_C(512)*1024}; + const uint64_t fs_high {UINT64_C(2)*1024*1024*1024}; + + const uint32_t cs_low {32*1024}; + const uint32_t cs_high {4*1024*1024}; + + if (file_size <= fs_low) { // 512kib + return cs_low; // 32kib + } else if (file_size >= fs_high) { // 2gib + return cs_high; // 4mib + } + + double t = file_size - fs_low; + t /= fs_high; + + double x = (1 - t) * cs_low + t * cs_high; + + return entt::next_power_of_two(uint64_t(x)); +} + size_t FT1InfoSHA1::chunkSize(size_t chunk_index) const { if (chunk_index+1 == chunks.size()) { // last chunk diff --git a/solanaceae/ngc_ft1_sha1/ft1_sha1_info.hpp b/solanaceae/ngc_ft1_sha1/ft1_sha1_info.hpp index f09bd53..f3e850f 100644 --- a/solanaceae/ngc_ft1_sha1/ft1_sha1_info.hpp +++ b/solanaceae/ngc_ft1_sha1/ft1_sha1_info.hpp @@ -40,6 +40,8 @@ namespace std { // inject }; } // std +uint32_t chunkSizeFromFileSize(uint64_t file_size); + struct FT1InfoSHA1 { std::string file_name; uint64_t file_size {0};