From a0ac08df866d66661bbe2e86e7f18db0db92cb82 Mon Sep 17 00:00:00 2001 From: Green Sky Date: Sat, 19 Aug 2023 21:39:12 +0200 Subject: [PATCH] detect and reuse existing data --- src/sha1_ngcft1.cpp | 53 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 41 insertions(+), 12 deletions(-) diff --git a/src/sha1_ngcft1.cpp b/src/sha1_ngcft1.cpp index 85ffb35..c55579b 100644 --- a/src/sha1_ngcft1.cpp +++ b/src/sha1_ngcft1.cpp @@ -519,32 +519,61 @@ bool SHA1_NGCFT1::onEvent(const Message::Events::MessageUpdated& e) { ce.emplace(std::vector{full_file_path}); - // HACK truncate - auto file_impl = std::make_unique(full_file_path, info.file_size, true); + std::unique_ptr file_impl; + const bool file_exists = std::filesystem::exists(full_file_path); + + { + const bool truncate = !file_exists; + file_impl = std::make_unique(full_file_path, info.file_size, truncate); + } + if (!file_impl->isGood()) { std::cerr << "SHA1_NGCFT1 error: failed opening file '" << full_file_path << "'!\n"; //e.e.remove(); // stop return false; } - ce.emplace(std::move(file_impl)); - // next, create chuck cache and check for existing data - // TODO: check existing and dont truncate - { + { // next, create chuck cache and check for existing data auto& cc = ce.emplace(); cc.have_all = false; cc.have_count = 0; cc.chunk_hash_to_index.clear(); // if copy pasta - for (size_t i = 0; i < info.chunks.size(); i++) { - cc.have_chunk.push_back(false); - _chunks[info.chunks[i]] = ce; - cc.chunk_hash_to_index[info.chunks[i]].push_back(i); + + if (file_exists) { + // iterate existing file + for (size_t i = 0; i < info.chunks.size(); i++) { + auto existing_data = file_impl->read(i*info.chunk_size, info.chunkSize(i)); + // TODO: avoid copy + cc.have_chunk.push_back( + SHA1Digest{hash_sha1(existing_data.data(), existing_data.size())} == info.chunks.at(i) + ); + if (cc.have_chunk.back()) { + cc.have_count += 1; + } + + _chunks[info.chunks[i]] = ce; + cc.chunk_hash_to_index[info.chunks[i]].push_back(i); + } + + if (cc.have_count == info.chunks.size()) { + cc.have_all = true; + } + } else { + for (size_t i = 0; i < info.chunks.size(); i++) { + cc.have_chunk.push_back(false); + _chunks[info.chunks[i]] = ce; + cc.chunk_hash_to_index[info.chunks[i]].push_back(i); + } + } + + if (!cc.have_all) { + // now, enque + _queue_content_want_chunk.push_back(ce); } } - // now, enque - _queue_content_want_chunk.push_back(ce); + ce.emplace(std::move(file_impl)); ce.remove();