fix large files >4gig

This commit is contained in:
Green Sky 2023-08-22 17:20:56 +02:00
parent bc09b5aa60
commit f4796397ff
No known key found for this signature in database
3 changed files with 26 additions and 13 deletions

View File

@ -22,8 +22,6 @@ struct FileRWMapped : public FileI {
std::error_code err; std::error_code err;
// sink, is also read // sink, is also read
//_file_map = mio::make_mmap_sink(file_path, 0, file_size, err);
//_file_map = mio::make_mmap<mio::ummap_sink>(file_path, 0, file_size, err);
_file_map.map(std::string{file_path}, 0, file_size, err); _file_map.map(std::string{file_path}, 0, file_size, err);
if (err) { if (err) {
@ -32,18 +30,19 @@ struct FileRWMapped : public FileI {
} }
} }
virtual ~FileRWMapped(void) {} virtual ~FileRWMapped(void) override {}
bool isGood(void) override { bool isGood(void) override {
return _file_map.is_mapped(); return _file_map.is_mapped();
} }
std::vector<uint8_t> read(uint64_t pos, uint32_t size) override { std::vector<uint8_t> read(uint64_t pos, uint64_t size) override {
if (pos+size > _file_size) { if (pos+size > _file_size) {
//assert(false && "read past end");
return {}; return {};
} }
return {_file_map.data()+pos, _file_map.data()+pos+size}; return {_file_map.data()+pos, _file_map.data()+(pos+size)};
} }
bool write(uint64_t pos, const std::vector<uint8_t>& data) override { bool write(uint64_t pos, const std::vector<uint8_t>& data) override {

View File

@ -31,7 +31,7 @@ std::ostream& operator<<(std::ostream& out, const SHA1Digest& v) {
size_t FT1InfoSHA1::chunkSize(size_t chunk_index) const { size_t FT1InfoSHA1::chunkSize(size_t chunk_index) const {
if (chunk_index+1 == chunks.size()) { if (chunk_index+1 == chunks.size()) {
// last chunk // last chunk
return file_size - chunk_index * chunk_size; return file_size - (uint64_t(chunk_index) * uint64_t(chunk_size));
} else { } else {
return chunk_size; return chunk_size;
} }

View File

@ -547,6 +547,7 @@ bool SHA1_NGCFT1::onEvent(const Message::Events::MessageUpdated& e) {
{ // next, create chuck cache and check for existing data { // next, create chuck cache and check for existing data
auto& cc = ce.emplace<Components::FT1ChunkSHA1Cache>(); auto& cc = ce.emplace<Components::FT1ChunkSHA1Cache>();
auto& bytes_received = ce.get_or_emplace<Message::Components::Transfer::BytesReceived>().total;
cc.have_all = false; cc.have_all = false;
cc.have_count = 0; cc.have_count = 0;
@ -555,21 +556,33 @@ bool SHA1_NGCFT1::onEvent(const Message::Events::MessageUpdated& e) {
if (file_exists) { if (file_exists) {
// iterate existing file // iterate existing file
for (size_t i = 0; i < info.chunks.size(); i++) { for (size_t i = 0; i < info.chunks.size(); i++) {
auto existing_data = file_impl->read(i*info.chunk_size, info.chunkSize(i)); const uint64_t chunk_size = info.chunkSize(i);
auto existing_data = file_impl->read(i*uint64_t(info.chunk_size), chunk_size);
assert(existing_data.size() == chunk_size);
// TODO: avoid copy // TODO: avoid copy
cc.have_chunk.push_back(
SHA1Digest{hash_sha1(existing_data.data(), existing_data.size())} == info.chunks.at(i) const auto data_hash = SHA1Digest{hash_sha1(existing_data.data(), existing_data.size())};
); const bool data_equal = data_hash == info.chunks.at(i);
if (cc.have_chunk.back()) {
cc.have_chunk.push_back(data_equal);
if (data_equal) {
cc.have_count += 1; cc.have_count += 1;
bytes_received += chunk_size;
//std::cout << "existing i[" << info.chunks.at(i) << "] == d[" << data_hash << "]\n";
} else {
//std::cout << "unk i[" << info.chunks.at(i) << "] != d[" << data_hash << "]\n";
} }
_chunks[info.chunks[i]] = ce; _chunks[info.chunks[i]] = ce;
cc.chunk_hash_to_index[info.chunks[i]].push_back(i); cc.chunk_hash_to_index[info.chunks[i]].push_back(i);
} }
std::cout << "preexisting " << cc.have_count << "/" << info.chunks.size() << "\n";
if (cc.have_count >= info.chunks.size()) { if (cc.have_count >= info.chunks.size()) {
cc.have_all = true; cc.have_all = true;
//ce.remove<Message::Components::Transfer::BytesReceived>();
} }
} else { } else {
for (size_t i = 0; i < info.chunks.size(); i++) { for (size_t i = 0; i < info.chunks.size(); i++) {
@ -772,6 +785,7 @@ bool SHA1_NGCFT1::onEvent(const Events::NGCFT1_recv_data& e) {
} }
auto& tv = peer_transfers[e.transfer_id].v; auto& tv = peer_transfers[e.transfer_id].v;
peer_transfers[e.transfer_id].time_since_activity = 0.f;
if (std::holds_alternative<ReceivingTransfer::Info>(tv)) { if (std::holds_alternative<ReceivingTransfer::Info>(tv)) {
auto& info_data = std::get<ReceivingTransfer::Info>(tv).info_data; auto& info_data = std::get<ReceivingTransfer::Info>(tv).info_data;
for (size_t i = 0; i < e.data_size && i + e.data_offset < info_data.size(); i++) { for (size_t i = 0; i < e.data_size && i + e.data_offset < info_data.size(); i++) {
@ -826,7 +840,7 @@ bool SHA1_NGCFT1::onEvent(const Events::NGCFT1_send_data& e) {
auto& chunk_transfer = std::get<SendingTransfer::Chunk>(transfer.v); auto& chunk_transfer = std::get<SendingTransfer::Chunk>(transfer.v);
const auto& info = chunk_transfer.content.get<Components::FT1InfoSHA1>(); const auto& info = chunk_transfer.content.get<Components::FT1InfoSHA1>();
// TODO: should we really use file? // TODO: should we really use file?
const auto data = chunk_transfer.content.get<Message::Components::Transfer::File>()->read((chunk_transfer.chunk_index * info.chunk_size) + e.data_offset, e.data_size); const auto data = chunk_transfer.content.get<Message::Components::Transfer::File>()->read((chunk_transfer.chunk_index * uint64_t(info.chunk_size)) + e.data_offset, e.data_size);
// TODO: optimize // TODO: optimize
for (size_t i = 0; i < e.data_size && i < data.size(); i++) { for (size_t i = 0; i < e.data_size && i < data.size(); i++) {
@ -910,7 +924,7 @@ bool SHA1_NGCFT1::onEvent(const Events::NGCFT1_recv_done& e) {
// HACK: only check first chunk (they *should* all be the same) // HACK: only check first chunk (they *should* all be the same)
const auto chunk_index = std::get<ReceivingTransfer::Chunk>(tv).chunk_indices.front(); const auto chunk_index = std::get<ReceivingTransfer::Chunk>(tv).chunk_indices.front();
const auto offset_into_file = chunk_index * info.chunk_size; const uint64_t offset_into_file = chunk_index * uint64_t(info.chunk_size);
assert(chunk_index < info.chunks.size()); assert(chunk_index < info.chunks.size());
const auto chunk_size = info.chunkSize(chunk_index); const auto chunk_size = info.chunkSize(chunk_index);