dynamically choose chunk size

This commit is contained in:
Green Sky 2024-10-28 23:23:05 +01:00
parent c7485c4577
commit b4eaf86ed1
No known key found for this signature in database
3 changed files with 35 additions and 0 deletions

View File

@ -86,6 +86,15 @@ void SHA1MappedFilesystem::newFromFile(std::string_view file_name, std::string_v
// build info
sha1_info.file_name = file_name_;
sha1_info.file_size = file_impl->_file_size; // TODO: remove the reliance on implementation details
sha1_info.chunk_size = chunkSizeFromFileSize(sha1_info.file_size);
{
// TOOD: remove
const uint32_t cs_low {32*1024};
const uint32_t cs_high {4*1024*1024};
assert(sha1_info.chunk_size >= cs_low);
assert(sha1_info.chunk_size <= cs_high);
}
{ // build chunks
// HACK: load file fully

View File

@ -1,5 +1,8 @@
#include "./ft1_sha1_info.hpp"
// next power of two
#include <entt/core/memory.hpp>
#include <sodium.h>
SHA1Digest::SHA1Digest(const std::vector<uint8_t>& v) {
@ -28,6 +31,27 @@ std::ostream& operator<<(std::ostream& out, const SHA1Digest& v) {
return out;
}
uint32_t chunkSizeFromFileSize(uint64_t file_size) {
const uint64_t fs_low {UINT64_C(512)*1024};
const uint64_t fs_high {UINT64_C(2)*1024*1024*1024};
const uint32_t cs_low {32*1024};
const uint32_t cs_high {4*1024*1024};
if (file_size <= fs_low) { // 512kib
return cs_low; // 32kib
} else if (file_size >= fs_high) { // 2gib
return cs_high; // 4mib
}
double t = file_size - fs_low;
t /= fs_high;
double x = (1 - t) * cs_low + t * cs_high;
return entt::next_power_of_two(uint64_t(x));
}
size_t FT1InfoSHA1::chunkSize(size_t chunk_index) const {
if (chunk_index+1 == chunks.size()) {
// last chunk

View File

@ -40,6 +40,8 @@ namespace std { // inject
};
} // std
uint32_t chunkSizeFromFileSize(uint64_t file_size);
struct FT1InfoSHA1 {
std::string file_name;
uint64_t file_size {0};