commit 1c189bfd9c34436956d439355e3ecab85ad22afa Author: Green Sky Date: Mon May 12 20:44:00 2025 +0200 Squashed 'external/libqoirdo/' content from commit 59f81203c9 git-subtree-dir: external/libqoirdo git-subtree-split: 59f81203c99b2bd6edda0c84b98ba66a38f0e2c4 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..567609b --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +build/ diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..70406e4 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,29 @@ +cmake_minimum_required(VERSION 3.10) + +project(libqoirdo) + +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Debug) +endif() + +message( ${PROJECT_NAME} " build type: " ${CMAKE_BUILD_TYPE} ) + +add_library(qoirdo + ./qoirdo.hpp + ./qoirdo.cpp +) + +target_compile_features(qoirdo PUBLIC cxx_std_11) + +#if (NOT MSVC) +# target_link_libraries(rdopng m pthread) +#endif() + +######################################## + +add_executable(qoirdo_tool + tool.cpp +) + +target_link_libraries(qoirdo_tool qoirdo) + diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..261eeb9 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md new file mode 100644 index 0000000..4679575 --- /dev/null +++ b/README.md @@ -0,0 +1,120 @@ +# rdopng +Rate-Distortion Optimized Lossy PNG, QOI, and LZ4 image (LZ4I) Encoding Tool + +rdopng is a command line tool which uses LZ match optimization, Lagrangian multiplier [rate distortion optimization (RDO)](https://en.wikipedia.org/wiki/Rate%E2%80%93distortion_optimization), a simple perceptual error tolerance model, and [Oklab](https://bottosson.github.io/posts/oklab/)-based colorspace error metrics to encode lossy 24/32bpp PNG/QOI/LZ4I files. The encoded lossy PNG files are typically 30-80% smaller relative to lodepng/libpng. The tool defaults to reasonably fast near-lossless settings which writes PNG's around 30-40% smaller than lossless PNG encoders. + +Unlike [pngquant](https://pngquant.org/), rdopng does not use 256-color palettes or dithering. PNG files encoded by rdopng typically range between roughly 2.5-7bpp, depending on the options used (and how much time and patience you have). + +Some example encodes and command lines are [here](https://github.com/richgel999/rdopng/wiki/Examples). + +You can download a pre-built Windows binary for an older version of rdopng [here](https://github.com/richgel999/rdopng/releases). (The latest version is in the repo.) You may need to install the [VS 2022 runtime redistributable from Microsoft](https://docs.microsoft.com/en-us/cpp/windows/latest-supported-vc-redist?view=msvc-170). + +### Building + +You'll need [cmake](https://cmake.org/). There are no other dependencies. + +Linux (gcc/clang): + +``` +cmake . +make +``` + +Windows (tested with Visual Studio 2022): + +``` +cmake . +rdopng.sln +``` + +### Instructions + +Encodes a .PNG/.BMP/.TGA/.JPG file to "./file_rdo.png": + +``` +rdopng file.png +``` + +Encodes a .PNG/.BMP/.TGA/.JPG file to "./file_rdo.qoi" (and also unpacks the coded image and saves it as .PNG): + +``` +rdopng -qoi -unpack_qoi_to_png file.png +``` + +Encodes a file to "./file_rdo.qoi" at higher quality per bit, but much slower (also try -better which is in between the default/uber settings): + +``` +rdopng -qoi -uber -unpack_qoi_to_png file.png +``` + +Encodes smaller PNG files but will be 2x slower: + +``` +rdopng -two_pass file.png +``` + +Encodes at lower than default quality (which is 300), but writes smaller files: + +``` +rdopng -lambda 500 file.png +``` + +Significantly lower PNG quality (which increases artifacts), using a higher than default parsing level to compensate for artifacts: + +``` +rdopng -level 3 -lambda 1000 file.png +``` + +Enable debug output and write output to z.png: + +``` +rdopng -debug file.png -output z.png +``` + +Load a normal map, normalize it, pack it using angular normal map metrics, decoded/encode texels using GPU SNORM unpacking (instead of the default UNORM): + +``` +rdopng -normalize -normal_map -snorm file.png +``` + +Level ranges from 0-29. Levels 0-9 use up to 4 pixel long matches, levels 10-17 use up to 6 pixel long matches, and 18-23 use up to 6 or 12 pixel long matches. Levels 24-29 use exhaustive matching and are beyond impractical except on tiny images. + +The higher the level within a match length category, the slower the encoder. Higher match length categories are needed for the higher lambdas/lower bitrates. At near-lossless settings (lower than approximately lambda 300), the smaller/less aggressive parsing levels are usually fine. At higher lambdas/lower bitrates the higher levels are needed to avoid artifacts. To get below roughly 3-4bpp you'll need to use high lambdas, two pass mode, and very slow parsing levels. + +-lambda is the quality slider. Useful lambda values are roughly 1-20000, but values beyond approximately 500-1000 (depending on the image) will require fiddling with the level to compensate for artifacts. Higher levels are extremely slow because the current tool is single threaded. + +Most options work with both QOI, LZ4I and PNG. The -level option is only for PNG, and the -uber/-better options are only for QOI/LZ4I. + +### RDO LZ4 examples + +``` +rdopng -lz4i -lambda 5000 -debug -better file.png +``` + +Unpacking .LZ4I images to PNG: + +``` +rdopng -unpack file.lz4i +``` + +LZ4I image files contain a simple header followed by the RGB(A) pixels compressed using LZ4. Here's the header (it's like QOI's but with a different sig): + +``` +#pragma pack(push, 1) +struct lz4i_header +{ + char sig[4]; // signature bytes "lz4i" + uint32_t width; // image width in pixels (BE) + uint32_t height; // image height in pixels (BE) + uint8_t channels; // 3 = RGB, 4 = RGBA + uint8_t colorspace; // 0 = sRGB with linear alpha 1 = all channels linear +}; +#pragma pack(pop) +``` + +### Known Problems +rdopng has only been tested on little endian platforms, under Windows using MSVC and Ubuntu Linux using clang/gcc. There are a few known endian issues in there, which I'll eventually fix. It has not been compiled or tested on OSX. + +### Special Thanks +Thanks to [Paul Hughes](https://twitter.com/PaulieHughes) for encouraging me to continue working on this on Twitter. Also, thanks to [Jyrki Alakuijala](https://twitter.com/jyzg) for suggesting to drop YCbCr for an alternative such as Oklab. + diff --git a/basisu.min.hpp b/basisu.min.hpp new file mode 100644 index 0000000..4cd53a3 --- /dev/null +++ b/basisu.min.hpp @@ -0,0 +1,867 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace basisu +{ + + using std::clamp; + using std::min; + using std::max; + + template inline T0 lerp(T0 a, T0 b, T1 c) { return a + (b - a) * c; } + + + class color_rgba + { + public: + union + { + uint8_t m_comps[4]; + + struct + { + uint8_t r; + uint8_t g; + uint8_t b; + uint8_t a; + }; + }; + + inline color_rgba() + { + static_assert(sizeof(*this) == 4, "sizeof(*this) != 4"); + } + + inline color_rgba(int y) + { + set(y); + } + + inline color_rgba(int y, int na) + { + set(y, na); + } + + inline color_rgba(int sr, int sg, int sb, int sa) + { + set(sr, sg, sb, sa); + } + + //inline color_rgba(eNoClamp, int sr, int sg, int sb, int sa) + //{ + // set_noclamp_rgba((uint8_t)sr, (uint8_t)sg, (uint8_t)sb, (uint8_t)sa); + //} + + inline color_rgba& set_noclamp_y(int y) + { + m_comps[0] = (uint8_t)y; + m_comps[1] = (uint8_t)y; + m_comps[2] = (uint8_t)y; + m_comps[3] = (uint8_t)255; + return *this; + } + + inline color_rgba &set_noclamp_rgba(int sr, int sg, int sb, int sa) + { + m_comps[0] = (uint8_t)sr; + m_comps[1] = (uint8_t)sg; + m_comps[2] = (uint8_t)sb; + m_comps[3] = (uint8_t)sa; + return *this; + } + + inline color_rgba &set(int y) + { + m_comps[0] = static_cast(clamp(y, 0, 255)); + m_comps[1] = m_comps[0]; + m_comps[2] = m_comps[0]; + m_comps[3] = 255; + return *this; + } + + inline color_rgba &set(int y, int na) + { + m_comps[0] = static_cast(clamp(y, 0, 255)); + m_comps[1] = m_comps[0]; + m_comps[2] = m_comps[0]; + m_comps[3] = static_cast(clamp(na, 0, 255)); + return *this; + } + + inline color_rgba &set(int sr, int sg, int sb, int sa) + { + m_comps[0] = static_cast(clamp(sr, 0, 255)); + m_comps[1] = static_cast(clamp(sg, 0, 255)); + m_comps[2] = static_cast(clamp(sb, 0, 255)); + m_comps[3] = static_cast(clamp(sa, 0, 255)); + return *this; + } + + inline color_rgba &set_rgb(int sr, int sg, int sb) + { + m_comps[0] = static_cast(clamp(sr, 0, 255)); + m_comps[1] = static_cast(clamp(sg, 0, 255)); + m_comps[2] = static_cast(clamp(sb, 0, 255)); + return *this; + } + + inline color_rgba &set_rgb(const color_rgba &other) + { + r = other.r; + g = other.g; + b = other.b; + return *this; + } + + inline const uint8_t &operator[] (uint32_t index) const { assert(index < 4); return m_comps[index]; } + inline uint8_t &operator[] (uint32_t index) { assert(index < 4); return m_comps[index]; } + + inline void clear() + { + m_comps[0] = 0; + m_comps[1] = 0; + m_comps[2] = 0; + m_comps[3] = 0; + } + + inline bool operator== (const color_rgba &rhs) const + { + if (m_comps[0] != rhs.m_comps[0]) return false; + if (m_comps[1] != rhs.m_comps[1]) return false; + if (m_comps[2] != rhs.m_comps[2]) return false; + if (m_comps[3] != rhs.m_comps[3]) return false; + return true; + } + + inline bool operator!= (const color_rgba &rhs) const + { + return !(*this == rhs); + } + + inline bool operator<(const color_rgba &rhs) const + { + for (int i = 0; i < 4; i++) + { + if (m_comps[i] < rhs.m_comps[i]) + return true; + else if (m_comps[i] != rhs.m_comps[i]) + return false; + } + return false; + } + + inline int get_601_luma() const { return (19595U * m_comps[0] + 38470U * m_comps[1] + 7471U * m_comps[2] + 32768U) >> 16U; } + inline int get_709_luma() const { return (13938U * m_comps[0] + 46869U * m_comps[1] + 4729U * m_comps[2] + 32768U) >> 16U; } + inline int get_luma(bool luma_601) const { return luma_601 ? get_601_luma() : get_709_luma(); } + + static color_rgba comp_min(const color_rgba& a, const color_rgba& b) { return color_rgba(min(a[0], b[0]), min(a[1], b[1]), min(a[2], b[2]), min(a[3], b[3])); } + static color_rgba comp_max(const color_rgba& a, const color_rgba& b) { return color_rgba(max(a[0], b[0]), max(a[1], b[1]), max(a[2], b[2]), max(a[3], b[3])); } + }; + + typedef std::vector color_rgba_vec; + + const color_rgba g_black_color(0, 0, 0, 255); + const color_rgba g_black_trans_color(0, 0, 0, 0); + const color_rgba g_white_color(255, 255, 255, 255); + + // Simple 32-bit 2D image class + + class image + { + public: + image() : + m_width(0), m_height(0), m_pitch(0) + { + } + + image(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX) : + m_width(0), m_height(0), m_pitch(0) + { + resize(w, h, p); + } + + image(const uint8_t *pImage, uint32_t width, uint32_t height, uint32_t comps) : + m_width(0), m_height(0), m_pitch(0) + { + init(pImage, width, height, comps); + } + + image(const image &other) : + m_width(0), m_height(0), m_pitch(0) + { + *this = other; + } + + image &swap(image &other) + { + std::swap(m_width, other.m_width); + std::swap(m_height, other.m_height); + std::swap(m_pitch, other.m_pitch); + m_pixels.swap(other.m_pixels); + return *this; + } + + image &operator= (const image &rhs) + { + if (this != &rhs) + { + m_width = rhs.m_width; + m_height = rhs.m_height; + m_pitch = rhs.m_pitch; + m_pixels = rhs.m_pixels; + } + return *this; + } + + image &clear() + { + m_width = 0; + m_height = 0; + m_pitch = 0; + m_pixels.erase(m_pixels.begin(), m_pixels.end()); + return *this; + } + + image &resize(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const color_rgba& background = g_black_color) + { + return crop(w, h, p, background); + } + + image &set_all(const color_rgba &c) + { + for (uint32_t i = 0; i < m_pixels.size(); i++) + m_pixels[i] = c; + return *this; + } + + void init(const uint8_t *pImage, uint32_t width, uint32_t height, uint32_t comps) + { + assert(comps >= 1 && comps <= 4); + + resize(width, height); + + for (uint32_t y = 0; y < height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + const uint8_t *pSrc = &pImage[(x + y * width) * comps]; + color_rgba &dst = (*this)(x, y); + + if (comps == 1) + { + dst.r = pSrc[0]; + dst.g = pSrc[0]; + dst.b = pSrc[0]; + dst.a = 255; + } + else if (comps == 2) + { + dst.r = pSrc[0]; + dst.g = pSrc[0]; + dst.b = pSrc[0]; + dst.a = pSrc[1]; + } + else + { + dst.r = pSrc[0]; + dst.g = pSrc[1]; + dst.b = pSrc[2]; + if (comps == 4) + dst.a = pSrc[3]; + else + dst.a = 255; + } + } + } + } + + image &fill_box(uint32_t x, uint32_t y, uint32_t w, uint32_t h, const color_rgba &c) + { + for (uint32_t iy = 0; iy < h; iy++) + for (uint32_t ix = 0; ix < w; ix++) + set_clipped(x + ix, y + iy, c); + return *this; + } + + image& fill_box_alpha(uint32_t x, uint32_t y, uint32_t w, uint32_t h, const color_rgba& c) + { + for (uint32_t iy = 0; iy < h; iy++) + for (uint32_t ix = 0; ix < w; ix++) + set_clipped_alpha(x + ix, y + iy, c); + return *this; + } + + image &crop_dup_borders(uint32_t w, uint32_t h) + { + const uint32_t orig_w = m_width, orig_h = m_height; + + crop(w, h); + + if (orig_w && orig_h) + { + if (m_width > orig_w) + { + for (uint32_t x = orig_w; x < m_width; x++) + for (uint32_t y = 0; y < m_height; y++) + set_clipped(x, y, get_clamped(min(x, orig_w - 1U), min(y, orig_h - 1U))); + } + + if (m_height > orig_h) + { + for (uint32_t y = orig_h; y < m_height; y++) + for (uint32_t x = 0; x < m_width; x++) + set_clipped(x, y, get_clamped(min(x, orig_w - 1U), min(y, orig_h - 1U))); + } + } + return *this; + } + + //// pPixels MUST have been allocated using malloc() (basisu::vector will eventually use free() on the pointer). + //image& grant_ownership(color_rgba* pPixels, uint32_t w, uint32_t h, uint32_t p = UINT32_MAX) + //{ + // if (p == UINT32_MAX) + // p = w; + + // clear(); + + // if ((!p) || (!w) || (!h)) + // return *this; + + // m_pixels.grant_ownership(pPixels, p * h, p * h); + + // m_width = w; + // m_height = h; + // m_pitch = p; + + // return *this; + //} + + image &crop(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const color_rgba &background = g_black_color, bool init_image = true) + { + if (p == UINT32_MAX) + p = w; + + if ((w == m_width) && (m_height == h) && (m_pitch == p)) + return *this; + + if ((!w) || (!h) || (!p)) + { + clear(); + return *this; + } + + color_rgba_vec cur_state; + cur_state.swap(m_pixels); + + m_pixels.resize(p * h); + + if (init_image) + { + if (m_width || m_height) + { + for (uint32_t y = 0; y < h; y++) + { + for (uint32_t x = 0; x < w; x++) + { + if ((x < m_width) && (y < m_height)) + m_pixels[x + y * p] = cur_state[x + y * m_pitch]; + else + m_pixels[x + y * p] = background; + } + } + } + else + { + //m_pixels.set_all(background); + set_all(background); + } + } + + m_width = w; + m_height = h; + m_pitch = p; + + return *this; + } + + inline const color_rgba &operator() (uint32_t x, uint32_t y) const { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; } + inline color_rgba &operator() (uint32_t x, uint32_t y) { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; } + + inline const color_rgba& get_pixel(uint32_t c) const { return (*this)(c % m_width, c / m_width); } + inline color_rgba& get_pixel(uint32_t c) { return (*this)(c % m_width, c / m_width); } + + inline const color_rgba &get_clamped(int x, int y) const { return (*this)(clamp(x, 0, m_width - 1), clamp(y, 0, m_height - 1)); } + inline color_rgba &get_clamped(int x, int y) { return (*this)(clamp(x, 0, m_width - 1), clamp(y, 0, m_height - 1)); } + + //inline const color_rgba &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v) const + //{ + // x = wrap_u ? posmod(x, m_width) : clamp(x, 0, m_width - 1); + // y = wrap_v ? posmod(y, m_height) : clamp(y, 0, m_height - 1); + // return m_pixels[x + y * m_pitch]; + //} + + //inline color_rgba &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v) + //{ + // x = wrap_u ? posmod(x, m_width) : clamp(x, 0, m_width - 1); + // y = wrap_v ? posmod(y, m_height) : clamp(y, 0, m_height - 1); + // return m_pixels[x + y * m_pitch]; + //} + + inline image &set_clipped(int x, int y, const color_rgba &c) + { + if ((static_cast(x) < m_width) && (static_cast(y) < m_height)) + (*this)(x, y) = c; + return *this; + } + + inline image& set_clipped_alpha(int x, int y, const color_rgba& c) + { + if ((static_cast(x) < m_width) && (static_cast(y) < m_height)) + (*this)(x, y).m_comps[3] = c.m_comps[3]; + return *this; + } + + // Very straightforward blit with full clipping. Not fast, but it works. + image &blit(const image &src, int src_x, int src_y, int src_w, int src_h, int dst_x, int dst_y) + { + for (int y = 0; y < src_h; y++) + { + const int sy = src_y + y; + if (sy < 0) + continue; + else if (sy >= (int)src.get_height()) + break; + + for (int x = 0; x < src_w; x++) + { + const int sx = src_x + x; + if (sx < 0) + continue; + else if (sx >= (int)src.get_height()) + break; + + set_clipped(dst_x + x, dst_y + y, src(sx, sy)); + } + } + + return *this; + } + + const image &extract_block_clamped(color_rgba *pDst, uint32_t src_x, uint32_t src_y, uint32_t w, uint32_t h) const + { + if (((src_x + w) > m_width) || ((src_y + h) > m_height)) + { + // Slower clamping case + for (uint32_t y = 0; y < h; y++) + for (uint32_t x = 0; x < w; x++) + *pDst++ = get_clamped(src_x + x, src_y + y); + } + else + { + const color_rgba* pSrc = &m_pixels[src_x + src_y * m_pitch]; + + for (uint32_t y = 0; y < h; y++) + { + std::memcpy(pDst, pSrc, w * sizeof(color_rgba)); + pSrc += m_pitch; + pDst += w; + } + } + + return *this; + } + + image &set_block_clipped(const color_rgba *pSrc, uint32_t dst_x, uint32_t dst_y, uint32_t w, uint32_t h) + { + for (uint32_t y = 0; y < h; y++) + for (uint32_t x = 0; x < w; x++) + set_clipped(dst_x + x, dst_y + y, *pSrc++); + return *this; + } + + inline uint32_t get_width() const { return m_width; } + inline uint32_t get_height() const { return m_height; } + inline uint32_t get_pitch() const { return m_pitch; } + inline uint32_t get_total_pixels() const { return m_width * m_height; } + + inline uint32_t get_block_width(uint32_t w) const { return (m_width + (w - 1)) / w; } + inline uint32_t get_block_height(uint32_t h) const { return (m_height + (h - 1)) / h; } + inline uint32_t get_total_blocks(uint32_t w, uint32_t h) const { return get_block_width(w) * get_block_height(h); } + + inline const color_rgba_vec &get_pixels() const { return m_pixels; } + inline color_rgba_vec &get_pixels() { return m_pixels; } + + inline const color_rgba *get_ptr() const { return &m_pixels[0]; } + inline color_rgba *get_ptr() { return &m_pixels[0]; } + + bool has_alpha() const + { + for (uint32_t y = 0; y < m_height; ++y) + for (uint32_t x = 0; x < m_width; ++x) + if ((*this)(x, y).a < 255) + return true; + + return false; + } + + image &set_alpha(uint8_t a) + { + for (uint32_t y = 0; y < m_height; ++y) + for (uint32_t x = 0; x < m_width; ++x) + (*this)(x, y).a = a; + return *this; + } + + image &flip_y() + { + for (uint32_t y = 0; y < m_height / 2; ++y) + for (uint32_t x = 0; x < m_width; ++x) + std::swap((*this)(x, y), (*this)(x, m_height - 1 - y)); + return *this; + } + + //// TODO: There are many ways to do this, not sure this is the best way. + //image &renormalize_normal_map() + //{ + // for (uint32_t y = 0; y < m_height; y++) + // { + // for (uint32_t x = 0; x < m_width; x++) + // { + // color_rgba &c = (*this)(x, y); + // if ((c.r == 128) && (c.g == 128) && (c.b == 128)) + // continue; + + // vec3F v(c.r, c.g, c.b); + // v = (v * (2.0f / 255.0f)) - vec3F(1.0f); + // v.clamp(-1.0f, 1.0f); + + // float length = v.length(); + // const float cValidThresh = .077f; + // if (length < cValidThresh) + // { + // c.set(128, 128, 128, c.a); + // } + // else if (fabs(length - 1.0f) > cValidThresh) + // { + // if (length) + // v /= length; + + // for (uint32_t i = 0; i < 3; i++) + // c[i] = static_cast(clamp(floor((v[i] + 1.0f) * 255.0f * .5f + .5f), 0.0f, 255.0f)); + + // if ((c.g == 128) && (c.r == 128)) + // { + // if (c.b < 128) + // c.b = 0; + // else + // c.b = 255; + // } + // } + // } + // } + // return *this; + //} + + bool operator== (const image& img) const + { + if ((m_width != img.get_width()) || (m_height != img.get_height())) + return false; + + for (uint32_t y = 0; y < m_height; y++) + for (uint32_t x = 0; x < m_width; x++) + if ((*this)(x, y) != img(x, y)) + return false; + + return true; + } + + bool operator!= (const image& img) const + { + return !(*this == img); + } + + void debug_text(uint32_t x_ofs, uint32_t y_ofs, uint32_t x_scale, uint32_t y_scale, const color_rgba &fg, const color_rgba *pBG, bool alpha_only, const char* p, ...); + + private: + uint32_t m_width, m_height, m_pitch; // all in pixels + color_rgba_vec m_pixels; + }; + + enum eZero { cZero }; + + // Linear algebra + + template + class vec + { + protected: + T m_v[N]; + + public: + enum { num_elements = N }; + + inline vec() { } + inline vec(eZero) { set_zero(); } + + explicit inline vec(T val) { set(val); } + inline vec(T v0, T v1) { set(v0, v1); } + inline vec(T v0, T v1, T v2) { set(v0, v1, v2); } + inline vec(T v0, T v1, T v2, T v3) { set(v0, v1, v2, v3); } + inline vec(const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] = other.m_v[i]; } + template inline vec(const vec &other) { set(other); } + + inline T operator[](uint32_t i) const { assert(i < N); return m_v[i]; } + inline T &operator[](uint32_t i) { assert(i < N); return m_v[i]; } + + inline T getX() const { return m_v[0]; } + inline T getY() const { static_assert(N >= 2, "N too small"); return m_v[1]; } + inline T getZ() const { static_assert(N >= 3, "N too small"); return m_v[2]; } + inline T getW() const { static_assert(N >= 4, "N too small"); return m_v[3]; } + + inline bool operator==(const vec &rhs) const { for (uint32_t i = 0; i < N; i++) if (m_v[i] != rhs.m_v[i]) return false; return true; } + inline bool operator<(const vec &rhs) const { for (uint32_t i = 0; i < N; i++) { if (m_v[i] < rhs.m_v[i]) return true; else if (m_v[i] != rhs.m_v[i]) return false; } return false; } + + inline void set_zero() { for (uint32_t i = 0; i < N; i++) m_v[i] = 0; } + + template + inline vec &set(const vec &other) + { + uint32_t i; + if ((const void *)(&other) == (const void *)(this)) + return *this; + const uint32_t m = min(OtherN, N); + for (i = 0; i < m; i++) + m_v[i] = static_cast(other[i]); + for (; i < N; i++) + m_v[i] = 0; + return *this; + } + + inline vec &set_component(uint32_t index, T val) { assert(index < N); m_v[index] = val; return *this; } + inline vec &set(T val) { for (uint32_t i = 0; i < N; i++) m_v[i] = val; return *this; } + inline void clear_elements(uint32_t s, uint32_t e) { assert(e <= N); for (uint32_t i = s; i < e; i++) m_v[i] = 0; } + + inline vec &set(T v0, T v1) + { + m_v[0] = v0; + if (N >= 2) + { + m_v[1] = v1; + clear_elements(2, N); + } + return *this; + } + + inline vec &set(T v0, T v1, T v2) + { + m_v[0] = v0; + if (N >= 2) + { + m_v[1] = v1; + if (N >= 3) + { + m_v[2] = v2; + clear_elements(3, N); + } + } + return *this; + } + + inline vec &set(T v0, T v1, T v2, T v3) + { + m_v[0] = v0; + if (N >= 2) + { + m_v[1] = v1; + if (N >= 3) + { + m_v[2] = v2; + + if (N >= 4) + { + m_v[3] = v3; + clear_elements(5, N); + } + } + } + return *this; + } + + inline vec &operator=(const vec &rhs) { if (this != &rhs) for (uint32_t i = 0; i < N; i++) m_v[i] = rhs.m_v[i]; return *this; } + template inline vec &operator=(const vec &rhs) { set(rhs); return *this; } + + inline const T *get_ptr() const { return reinterpret_cast(&m_v[0]); } + inline T *get_ptr() { return reinterpret_cast(&m_v[0]); } + + inline vec operator- () const { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = -m_v[i]; return res; } + inline vec operator+ () const { return *this; } + inline vec &operator+= (const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] += other.m_v[i]; return *this; } + inline vec &operator-= (const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] -= other.m_v[i]; return *this; } + inline vec &operator/= (const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] /= other.m_v[i]; return *this; } + inline vec &operator*=(const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] *= other.m_v[i]; return *this; } + inline vec &operator/= (T s) { for (uint32_t i = 0; i < N; i++) m_v[i] /= s; return *this; } + inline vec &operator*= (T s) { for (uint32_t i = 0; i < N; i++) m_v[i] *= s; return *this; } + + friend inline vec operator+(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] + rhs.m_v[i]; return res; } + friend inline vec operator-(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] - rhs.m_v[i]; return res; } + friend inline vec operator*(const vec &lhs, T val) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] * val; return res; } + friend inline vec operator*(T val, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = val * rhs.m_v[i]; return res; } + friend inline vec operator/(const vec &lhs, T val) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] / val; return res; } + friend inline vec operator/(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] / rhs.m_v[i]; return res; } + + static inline T dot_product(const vec &lhs, const vec &rhs) { T res = lhs.m_v[0] * rhs.m_v[0]; for (uint32_t i = 1; i < N; i++) res += lhs.m_v[i] * rhs.m_v[i]; return res; } + + inline T dot(const vec &rhs) const { return dot_product(*this, rhs); } + + inline T norm() const { return dot_product(*this, *this); } + inline T length() const { return sqrt(norm()); } + + inline T squared_distance(const vec &other) const { T d2 = 0; for (uint32_t i = 0; i < N; i++) { T d = m_v[i] - other.m_v[i]; d2 += d * d; } return d2; } + inline double squared_distance_d(const vec& other) const { double d2 = 0; for (uint32_t i = 0; i < N; i++) { double d = (double)m_v[i] - (double)other.m_v[i]; d2 += d * d; } return d2; } + + inline T distance(const vec &other) const { return static_cast(sqrt(squared_distance(other))); } + inline double distance_d(const vec& other) const { return sqrt(squared_distance_d(other)); } + + inline vec &normalize_in_place() { T len = length(); if (len != 0.0f) *this *= (1.0f / len); return *this; } + + inline vec &clamp(T l, T h) + { + for (uint32_t i = 0; i < N; i++) + m_v[i] = basisu::clamp(m_v[i], l, h); + return *this; + } + + static vec component_min(const vec& a, const vec& b) + { + vec res; + for (uint32_t i = 0; i < N; i++) + res[i] = min(a[i], b[i]); + return res; + } + + static vec component_max(const vec& a, const vec& b) + { + vec res; + for (uint32_t i = 0; i < N; i++) + res[i] = max(a[i], b[i]); + return res; + } + }; + + typedef vec<4, double> vec4D; + typedef vec<3, double> vec3D; + typedef vec<2, double> vec2D; + typedef vec<1, double> vec1D; + + typedef vec<4, float> vec4F; + typedef vec<3, float> vec3F; + typedef vec<2, float> vec2F; + typedef vec<1, float> vec1F; + + typedef vec<16, float> vec16F; + + // 2D array + + template + class vector2D + { + typedef std::vector TVec; + + uint32_t m_width, m_height; + TVec m_values; + + public: + vector2D() : + m_width(0), + m_height(0) + { + } + + vector2D(uint32_t w, uint32_t h) : + m_width(0), + m_height(0) + { + resize(w, h); + } + + vector2D(const vector2D &other) + { + *this = other; + } + + vector2D &operator= (const vector2D &other) + { + if (this != &other) + { + m_width = other.m_width; + m_height = other.m_height; + m_values = other.m_values; + } + return *this; + } + + inline bool operator== (const vector2D &rhs) const + { + return (m_width == rhs.m_width) && (m_height == rhs.m_height) && (m_values == rhs.m_values); + } + + inline uint32_t size_in_bytes() const { return (uint32_t)m_values.size() * sizeof(m_values[0]); } + + inline const T &operator() (uint32_t x, uint32_t y) const { assert(x < m_width && y < m_height); return m_values[x + y * m_width]; } + inline T &operator() (uint32_t x, uint32_t y) { assert(x < m_width && y < m_height); return m_values[x + y * m_width]; } + + inline const T &operator[] (uint32_t i) const { return m_values[i]; } + inline T &operator[] (uint32_t i) { return m_values[i]; } + + inline const T &at_clamped(int x, int y) const { return (*this)(clamp(x, 0, m_width), clamp(y, 0, m_height)); } + inline T &at_clamped(int x, int y) { return (*this)(clamp(x, 0, m_width), clamp(y, 0, m_height)); } + + void clear() + { + m_width = 0; + m_height = 0; + m_values.clear(); + } + + void set_all(const T&val) + { + //vector_set_all(m_values, val); + for (size_t i = 0; i < m_values.size(); i++) + m_values[i] = val; + } + + inline const T* get_ptr() const { return &m_values[0]; } + inline T* get_ptr() { return &m_values[0]; } + + vector2D &resize(uint32_t new_width, uint32_t new_height) + { + if ((m_width == new_width) && (m_height == new_height)) + return *this; + + TVec oldVals(new_width * new_height); + oldVals.swap(m_values); + + const uint32_t w = min(m_width, new_width); + const uint32_t h = min(m_height, new_height); + + if ((w) && (h)) + { + for (uint32_t y = 0; y < h; y++) + for (uint32_t x = 0; x < w; x++) + m_values[x + y * new_width] = oldVals[x + y * m_width]; + } + + m_width = new_width; + m_height = new_height; + + return *this; + } + }; + +} // basisu + diff --git a/examples/aliens.png b/examples/aliens.png new file mode 100644 index 0000000..f8c2e40 Binary files /dev/null and b/examples/aliens.png differ diff --git a/examples/aliens_2_rdo.png b/examples/aliens_2_rdo.png new file mode 100644 index 0000000..83d2203 Binary files /dev/null and b/examples/aliens_2_rdo.png differ diff --git a/examples/aliens_rdo.png b/examples/aliens_rdo.png new file mode 100644 index 0000000..76bb4fc Binary files /dev/null and b/examples/aliens_rdo.png differ diff --git a/examples/crossyf.png b/examples/crossyf.png new file mode 100644 index 0000000..abd6478 Binary files /dev/null and b/examples/crossyf.png differ diff --git a/examples/crossyf_2_rdo.png b/examples/crossyf_2_rdo.png new file mode 100644 index 0000000..f537218 Binary files /dev/null and b/examples/crossyf_2_rdo.png differ diff --git a/examples/crossyf_rdo.png b/examples/crossyf_rdo.png new file mode 100644 index 0000000..ed0baa4 Binary files /dev/null and b/examples/crossyf_rdo.png differ diff --git a/examples/doom.png b/examples/doom.png new file mode 100644 index 0000000..965959d Binary files /dev/null and b/examples/doom.png differ diff --git a/examples/doom_delta.png b/examples/doom_delta.png new file mode 100644 index 0000000..99ae008 Binary files /dev/null and b/examples/doom_delta.png differ diff --git a/examples/doom_rdo.png b/examples/doom_rdo.png new file mode 100644 index 0000000..593f25b Binary files /dev/null and b/examples/doom_rdo.png differ diff --git a/examples/gotham.png b/examples/gotham.png new file mode 100644 index 0000000..cee9849 Binary files /dev/null and b/examples/gotham.png differ diff --git a/examples/gotham_2_delta.png b/examples/gotham_2_delta.png new file mode 100644 index 0000000..03c1ae0 Binary files /dev/null and b/examples/gotham_2_delta.png differ diff --git a/examples/gotham_2_rdo.png b/examples/gotham_2_rdo.png new file mode 100644 index 0000000..43b3b89 Binary files /dev/null and b/examples/gotham_2_rdo.png differ diff --git a/examples/gotham_delta.png b/examples/gotham_delta.png new file mode 100644 index 0000000..01f9a23 Binary files /dev/null and b/examples/gotham_delta.png differ diff --git a/examples/gotham_rdo.png b/examples/gotham_rdo.png new file mode 100644 index 0000000..1a8e5c4 Binary files /dev/null and b/examples/gotham_rdo.png differ diff --git a/examples/high_fidelity.png b/examples/high_fidelity.png new file mode 100644 index 0000000..fed5254 Binary files /dev/null and b/examples/high_fidelity.png differ diff --git a/examples/high_fidelity_1.png b/examples/high_fidelity_1.png new file mode 100644 index 0000000..606f4e8 Binary files /dev/null and b/examples/high_fidelity_1.png differ diff --git a/examples/high_fidelity_2.png b/examples/high_fidelity_2.png new file mode 100644 index 0000000..1b8d34b Binary files /dev/null and b/examples/high_fidelity_2.png differ diff --git a/examples/joker_768.png b/examples/joker_768.png new file mode 100644 index 0000000..a72008a Binary files /dev/null and b/examples/joker_768.png differ diff --git a/examples/joker_768_2_delta.png b/examples/joker_768_2_delta.png new file mode 100644 index 0000000..0f2a7b5 Binary files /dev/null and b/examples/joker_768_2_delta.png differ diff --git a/examples/joker_768_2_rdo.png b/examples/joker_768_2_rdo.png new file mode 100644 index 0000000..fdfe440 Binary files /dev/null and b/examples/joker_768_2_rdo.png differ diff --git a/examples/joker_768_3_delta.png b/examples/joker_768_3_delta.png new file mode 100644 index 0000000..a2faa98 Binary files /dev/null and b/examples/joker_768_3_delta.png differ diff --git a/examples/joker_768_3_rdo.png b/examples/joker_768_3_rdo.png new file mode 100644 index 0000000..1184162 Binary files /dev/null and b/examples/joker_768_3_rdo.png differ diff --git a/examples/joker_768_4_delta.png b/examples/joker_768_4_delta.png new file mode 100644 index 0000000..dcfec23 Binary files /dev/null and b/examples/joker_768_4_delta.png differ diff --git a/examples/joker_768_4_rdo.png b/examples/joker_768_4_rdo.png new file mode 100644 index 0000000..153b8ca Binary files /dev/null and b/examples/joker_768_4_rdo.png differ diff --git a/examples/joker_768_delta.png b/examples/joker_768_delta.png new file mode 100644 index 0000000..e26c9d4 Binary files /dev/null and b/examples/joker_768_delta.png differ diff --git a/examples/joker_768_rdo.png b/examples/joker_768_rdo.png new file mode 100644 index 0000000..9bf7df2 Binary files /dev/null and b/examples/joker_768_rdo.png differ diff --git a/examples/kodim18.png b/examples/kodim18.png new file mode 100644 index 0000000..8572808 Binary files /dev/null and b/examples/kodim18.png differ diff --git a/examples/kodim18_delta.png b/examples/kodim18_delta.png new file mode 100644 index 0000000..8161fa8 Binary files /dev/null and b/examples/kodim18_delta.png differ diff --git a/examples/kodim18_rdo.png b/examples/kodim18_rdo.png new file mode 100644 index 0000000..e9c9fcc Binary files /dev/null and b/examples/kodim18_rdo.png differ diff --git a/examples/lara_1024.png b/examples/lara_1024.png new file mode 100644 index 0000000..bd34366 Binary files /dev/null and b/examples/lara_1024.png differ diff --git a/examples/lara_1024_delta.png b/examples/lara_1024_delta.png new file mode 100644 index 0000000..5c6ff0d Binary files /dev/null and b/examples/lara_1024_delta.png differ diff --git a/examples/lara_1024_rdo.png b/examples/lara_1024_rdo.png new file mode 100644 index 0000000..8c7ed2d Binary files /dev/null and b/examples/lara_1024_rdo.png differ diff --git a/examples/magneto.png b/examples/magneto.png new file mode 100644 index 0000000..19b6110 Binary files /dev/null and b/examples/magneto.png differ diff --git a/examples/magneto_2_alpha_delta.png b/examples/magneto_2_alpha_delta.png new file mode 100644 index 0000000..7651a65 Binary files /dev/null and b/examples/magneto_2_alpha_delta.png differ diff --git a/examples/magneto_2_delta.png b/examples/magneto_2_delta.png new file mode 100644 index 0000000..2e1f247 Binary files /dev/null and b/examples/magneto_2_delta.png differ diff --git a/examples/magneto_2_rdo.png b/examples/magneto_2_rdo.png new file mode 100644 index 0000000..1cd902f Binary files /dev/null and b/examples/magneto_2_rdo.png differ diff --git a/examples/magneto_delta.png b/examples/magneto_delta.png new file mode 100644 index 0000000..e2adb35 Binary files /dev/null and b/examples/magneto_delta.png differ diff --git a/examples/magneto_rdo.png b/examples/magneto_rdo.png new file mode 100644 index 0000000..8995e62 Binary files /dev/null and b/examples/magneto_rdo.png differ diff --git a/examples/masterchief.png b/examples/masterchief.png new file mode 100644 index 0000000..73f46fd Binary files /dev/null and b/examples/masterchief.png differ diff --git a/examples/masterchief_2_rdo.png b/examples/masterchief_2_rdo.png new file mode 100644 index 0000000..77d41e0 Binary files /dev/null and b/examples/masterchief_2_rdo.png differ diff --git a/examples/masterchief_rdo.png b/examples/masterchief_rdo.png new file mode 100644 index 0000000..1eb4bc1 Binary files /dev/null and b/examples/masterchief_rdo.png differ diff --git a/examples/minerology.png b/examples/minerology.png new file mode 100644 index 0000000..590be40 Binary files /dev/null and b/examples/minerology.png differ diff --git a/examples/minerology_delta.png b/examples/minerology_delta.png new file mode 100644 index 0000000..e5485c8 Binary files /dev/null and b/examples/minerology_delta.png differ diff --git a/examples/minerology_rdo.png b/examples/minerology_rdo.png new file mode 100644 index 0000000..3bbed04 Binary files /dev/null and b/examples/minerology_rdo.png differ diff --git a/examples/puppy.png b/examples/puppy.png new file mode 100644 index 0000000..4200121 Binary files /dev/null and b/examples/puppy.png differ diff --git a/examples/puppy_delta.png b/examples/puppy_delta.png new file mode 100644 index 0000000..78d2762 Binary files /dev/null and b/examples/puppy_delta.png differ diff --git a/examples/puppy_rdo.png b/examples/puppy_rdo.png new file mode 100644 index 0000000..3ff6588 Binary files /dev/null and b/examples/puppy_rdo.png differ diff --git a/examples/stp.png b/examples/stp.png new file mode 100644 index 0000000..5e5810a Binary files /dev/null and b/examples/stp.png differ diff --git a/examples/stp_2_delta.png b/examples/stp_2_delta.png new file mode 100644 index 0000000..075e23a Binary files /dev/null and b/examples/stp_2_delta.png differ diff --git a/examples/stp_2_rdo.png b/examples/stp_2_rdo.png new file mode 100644 index 0000000..a08c29b Binary files /dev/null and b/examples/stp_2_rdo.png differ diff --git a/examples/stp_3_delta.png b/examples/stp_3_delta.png new file mode 100644 index 0000000..880e45a Binary files /dev/null and b/examples/stp_3_delta.png differ diff --git a/examples/stp_3_rdo.png b/examples/stp_3_rdo.png new file mode 100644 index 0000000..1f75162 Binary files /dev/null and b/examples/stp_3_rdo.png differ diff --git a/examples/stp_delta.png b/examples/stp_delta.png new file mode 100644 index 0000000..a7a53c1 Binary files /dev/null and b/examples/stp_delta.png differ diff --git a/examples/stp_rdo.png b/examples/stp_rdo.png new file mode 100644 index 0000000..00e7436 Binary files /dev/null and b/examples/stp_rdo.png differ diff --git a/examples/waterfall.png b/examples/waterfall.png new file mode 100644 index 0000000..8c3e411 Binary files /dev/null and b/examples/waterfall.png differ diff --git a/examples/waterfall_delta.png b/examples/waterfall_delta.png new file mode 100644 index 0000000..99ad823 Binary files /dev/null and b/examples/waterfall_delta.png differ diff --git a/examples/waterfall_rdo.png b/examples/waterfall_rdo.png new file mode 100644 index 0000000..5d78081 Binary files /dev/null and b/examples/waterfall_rdo.png differ diff --git a/examples/xfiles_768.png b/examples/xfiles_768.png new file mode 100644 index 0000000..2eb0b91 Binary files /dev/null and b/examples/xfiles_768.png differ diff --git a/examples/xfiles_768_2_delta.png b/examples/xfiles_768_2_delta.png new file mode 100644 index 0000000..004a85b Binary files /dev/null and b/examples/xfiles_768_2_delta.png differ diff --git a/examples/xfiles_768_2_rdo.png b/examples/xfiles_768_2_rdo.png new file mode 100644 index 0000000..3f023a6 Binary files /dev/null and b/examples/xfiles_768_2_rdo.png differ diff --git a/examples/xfiles_768_delta.png b/examples/xfiles_768_delta.png new file mode 100644 index 0000000..83e1b4a Binary files /dev/null and b/examples/xfiles_768_delta.png differ diff --git a/examples/xfiles_768_rdo.png b/examples/xfiles_768_rdo.png new file mode 100644 index 0000000..423ffa8 Binary files /dev/null and b/examples/xfiles_768_rdo.png differ diff --git a/qoi.h b/qoi.h new file mode 100644 index 0000000..5583bad --- /dev/null +++ b/qoi.h @@ -0,0 +1,672 @@ +/* + +QOI - The "Quite OK Image" format for fast, lossless image compression + +Dominic Szablewski - https://phoboslab.org + + +-- LICENSE: The MIT License(MIT) + +Copyright(c) 2021 Dominic Szablewski + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files(the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and / or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions : +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + + +-- About + +QOI encodes and decodes images in a lossless format. Compared to stb_image and +stb_image_write QOI offers 20x-50x faster encoding, 3x-4x faster decoding and +20% better compression. + + +-- Synopsis + +// Define `QOI_IMPLEMENTATION` in *one* C/C++ file before including this +// library to create the implementation. + +#define QOI_IMPLEMENTATION +#include "qoi.h" + +// Encode and store an RGBA buffer to the file system. The qoi_desc describes +// the input pixel data. +qoi_write("image_new.qoi", rgba_pixels, &(qoi_desc){ + .width = 1920, + .height = 1080, + .channels = 4, + .colorspace = QOI_SRGB +}); + +// Load and decode a QOI image from the file system into a 32bbp RGBA buffer. +// The qoi_desc struct will be filled with the width, height, number of channels +// and colorspace read from the file header. +qoi_desc desc; +void *rgba_pixels = qoi_read("image.qoi", &desc, 4); + + + +-- Documentation + +This library provides the following functions; +- qoi_read -- read and decode a QOI file +- qoi_decode -- decode the raw bytes of a QOI image from memory +- qoi_write -- encode and write a QOI file +- qoi_encode -- encode an rgba buffer into a QOI image in memory + +See the function declaration below for the signature and more information. + +If you don't want/need the qoi_read and qoi_write functions, you can define +QOI_NO_STDIO before including this library. + +This library uses malloc() and free(). To supply your own malloc implementation +you can define QOI_MALLOC and QOI_FREE before including this library. + +This library uses memset() to zero-initialize the index. To supply your own +implementation you can define QOI_ZEROARR before including this library. + + +-- Data Format + +A QOI file has a 14 byte header, followed by any number of data "chunks" and an +8-byte end marker. + +struct qoi_header_t { + char magic[4]; // magic bytes "qoif" + uint32_t width; // image width in pixels (BE) + uint32_t height; // image height in pixels (BE) + uint8_t channels; // 3 = RGB, 4 = RGBA + uint8_t colorspace; // 0 = sRGB with linear alpha, 1 = all channels linear +}; + +Images are encoded row by row, left to right, top to bottom. The decoder and +encoder start with {r: 0, g: 0, b: 0, a: 255} as the previous pixel value. An +image is complete when all pixels specified by width * height have been covered. + +Pixels are encoded as + - a run of the previous pixel + - an index into an array of previously seen pixels + - a difference to the previous pixel value in r,g,b + - full r,g,b or r,g,b,a values + +The color channels are assumed to not be premultiplied with the alpha channel +("un-premultiplied alpha"). + +A running array[64] (zero-initialized) of previously seen pixel values is +maintained by the encoder and decoder. Each pixel that is seen by the encoder +and decoder is put into this array at the position formed by a hash function of +the color value. In the encoder, if the pixel value at the index matches the +current pixel, this index position is written to the stream as QOI_OP_INDEX. +The hash function for the index is: + + index_position = (r * 3 + g * 5 + b * 7 + a * 11) % 64 + +Each chunk starts with a 2- or 8-bit tag, followed by a number of data bits. The +bit length of chunks is divisible by 8 - i.e. all chunks are byte aligned. All +values encoded in these data bits have the most significant bit on the left. + +The 8-bit tags have precedence over the 2-bit tags. A decoder must check for the +presence of an 8-bit tag first. + +The byte stream's end is marked with 7 0x00 bytes followed a single 0x01 byte. + + +The possible chunks are: + + +.- QOI_OP_INDEX ----------. +| Byte[0] | +| 7 6 5 4 3 2 1 0 | +|-------+-----------------| +| 0 0 | index | +`-------------------------` +2-bit tag b00 +6-bit index into the color index array: 0..63 + +A valid encoder must not issue 2 or more consecutive QOI_OP_INDEX chunks to the +same index. QOI_OP_RUN should be used instead. + + +.- QOI_OP_DIFF -----------. +| Byte[0] | +| 7 6 5 4 3 2 1 0 | +|-------+-----+-----+-----| +| 0 1 | dr | dg | db | +`-------------------------` +2-bit tag b01 +2-bit red channel difference from the previous pixel between -2..1 +2-bit green channel difference from the previous pixel between -2..1 +2-bit blue channel difference from the previous pixel between -2..1 + +The difference to the current channel values are using a wraparound operation, +so "1 - 2" will result in 255, while "255 + 1" will result in 0. + +Values are stored as unsigned integers with a bias of 2. E.g. -2 is stored as +0 (b00). 1 is stored as 3 (b11). + +The alpha value remains unchanged from the previous pixel. + + +.- QOI_OP_LUMA -------------------------------------. +| Byte[0] | Byte[1] | +| 7 6 5 4 3 2 1 0 | 7 6 5 4 3 2 1 0 | +|-------+-----------------+-------------+-----------| +| 1 0 | green diff | dr - dg | db - dg | +`---------------------------------------------------` +2-bit tag b10 +6-bit green channel difference from the previous pixel -32..31 +4-bit red channel difference minus green channel difference -8..7 +4-bit blue channel difference minus green channel difference -8..7 + +The green channel is used to indicate the general direction of change and is +encoded in 6 bits. The red and blue channels (dr and db) base their diffs off +of the green channel difference and are encoded in 4 bits. I.e.: + dr_dg = (cur_px.r - prev_px.r) - (cur_px.g - prev_px.g) + db_dg = (cur_px.b - prev_px.b) - (cur_px.g - prev_px.g) + +The difference to the current channel values are using a wraparound operation, +so "10 - 13" will result in 253, while "250 + 7" will result in 1. + +Values are stored as unsigned integers with a bias of 32 for the green channel +and a bias of 8 for the red and blue channel. + +The alpha value remains unchanged from the previous pixel. + + +.- QOI_OP_RUN ------------. +| Byte[0] | +| 7 6 5 4 3 2 1 0 | +|-------+-----------------| +| 1 1 | run | +`-------------------------` +2-bit tag b11 +6-bit run-length repeating the previous pixel: 1..62 + +The run-length is stored with a bias of -1. Note that the run-lengths 63 and 64 +(b111110 and b111111) are illegal as they are occupied by the QOI_OP_RGB and +QOI_OP_RGBA tags. + + +.- QOI_OP_RGB ------------------------------------------. +| Byte[0] | Byte[1] | Byte[2] | Byte[3] | +| 7 6 5 4 3 2 1 0 | 7 .. 0 | 7 .. 0 | 7 .. 0 | +|-------------------------+---------+---------+---------| +| 1 1 1 1 1 1 1 0 | red | green | blue | +`-------------------------------------------------------` +8-bit tag b11111110 +8-bit red channel value +8-bit green channel value +8-bit blue channel value + +The alpha value remains unchanged from the previous pixel. + + +.- QOI_OP_RGBA ---------------------------------------------------. +| Byte[0] | Byte[1] | Byte[2] | Byte[3] | Byte[4] | +| 7 6 5 4 3 2 1 0 | 7 .. 0 | 7 .. 0 | 7 .. 0 | 7 .. 0 | +|-------------------------+---------+---------+---------+---------| +| 1 1 1 1 1 1 1 1 | red | green | blue | alpha | +`-----------------------------------------------------------------` +8-bit tag b11111111 +8-bit red channel value +8-bit green channel value +8-bit blue channel value +8-bit alpha channel value + +*/ + + +/* ----------------------------------------------------------------------------- +Header - Public functions */ + +#ifndef QOI_H +#define QOI_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* A pointer to a qoi_desc struct has to be supplied to all of qoi's functions. +It describes either the input format (for qoi_write and qoi_encode), or is +filled with the description read from the file header (for qoi_read and +qoi_decode). + +The colorspace in this qoi_desc is an enum where + 0 = sRGB, i.e. gamma scaled RGB channels and a linear alpha channel + 1 = all channels are linear +You may use the constants QOI_SRGB or QOI_LINEAR. The colorspace is purely +informative. It will be saved to the file header, but does not affect +how chunks are en-/decoded. */ + +#define QOI_SRGB 0 +#define QOI_LINEAR 1 + +typedef struct { + unsigned int width; + unsigned int height; + unsigned char channels; + unsigned char colorspace; +} qoi_desc; + +#ifndef QOI_NO_STDIO + +/* Encode raw RGB or RGBA pixels into a QOI image and write it to the file +system. The qoi_desc struct must be filled with the image width, height, +number of channels (3 = RGB, 4 = RGBA) and the colorspace. + +The function returns 0 on failure (invalid parameters, or fopen or malloc +failed) or the number of bytes written on success. */ + +int qoi_write(const char *filename, const void *data, const qoi_desc *desc); + + +/* Read and decode a QOI image from the file system. If channels is 0, the +number of channels from the file header is used. If channels is 3 or 4 the +output format will be forced into this number of channels. + +The function either returns NULL on failure (invalid data, or malloc or fopen +failed) or a pointer to the decoded pixels. On success, the qoi_desc struct +will be filled with the description from the file header. + +The returned pixel data should be free()d after use. */ + +void *qoi_read(const char *filename, qoi_desc *desc, int channels); + +#endif /* QOI_NO_STDIO */ + + +/* Encode raw RGB or RGBA pixels into a QOI image in memory. + +The function either returns NULL on failure (invalid parameters or malloc +failed) or a pointer to the encoded data on success. On success the out_len +is set to the size in bytes of the encoded data. + +The returned qoi data should be free()d after use. */ + +void *qoi_encode(const void *data, const qoi_desc *desc, int *out_len); + + +/* Decode a QOI image from memory. + +The function either returns NULL on failure (invalid parameters or malloc +failed) or a pointer to the decoded pixels. On success, the qoi_desc struct +is filled with the description from the file header. + +The returned pixel data should be free()d after use. */ + +void *qoi_decode(const void *data, int size, qoi_desc *desc, int channels); + + +#ifdef __cplusplus +} +#endif +#endif /* QOI_H */ + + +/* ----------------------------------------------------------------------------- +Implementation */ + +#ifdef QOI_IMPLEMENTATION +#include +#include + +#ifndef QOI_MALLOC + #define QOI_MALLOC(sz) malloc(sz) + #define QOI_FREE(p) free(p) +#endif +#ifndef QOI_ZEROARR + #define QOI_ZEROARR(a) memset((a),0,sizeof(a)) +#endif + +#define QOI_OP_INDEX 0x00 /* 00xxxxxx */ +#define QOI_OP_DIFF 0x40 /* 01xxxxxx */ +#define QOI_OP_LUMA 0x80 /* 10xxxxxx */ +#define QOI_OP_RUN 0xc0 /* 11xxxxxx */ +#define QOI_OP_RGB 0xfe /* 11111110 */ +#define QOI_OP_RGBA 0xff /* 11111111 */ + +#define QOI_MASK_2 0xc0 /* 11000000 */ + +#define QOI_COLOR_HASH(C) (C.rgba.r*3 + C.rgba.g*5 + C.rgba.b*7 + C.rgba.a*11) +#define QOI_MAGIC \ + (((unsigned int)'q') << 24 | ((unsigned int)'o') << 16 | \ + ((unsigned int)'i') << 8 | ((unsigned int)'f')) +#define QOI_HEADER_SIZE 14 + +/* 2GB is the max file size that this implementation can safely handle. We guard +against anything larger than that, assuming the worst case with 5 bytes per +pixel, rounded down to a nice clean value. 400 million pixels ought to be +enough for anybody. */ +#define QOI_PIXELS_MAX ((unsigned int)400000000) + +typedef union { + struct { unsigned char r, g, b, a; } rgba; + unsigned int v; +} qoi_rgba_t; + +static const unsigned char qoi_padding[8] = {0,0,0,0,0,0,0,1}; + +static void qoi_write_32(unsigned char *bytes, int *p, unsigned int v) { + bytes[(*p)++] = (0xff000000 & v) >> 24; + bytes[(*p)++] = (0x00ff0000 & v) >> 16; + bytes[(*p)++] = (0x0000ff00 & v) >> 8; + bytes[(*p)++] = (0x000000ff & v); +} + +static unsigned int qoi_read_32(const unsigned char *bytes, int *p) { + unsigned int a = bytes[(*p)++]; + unsigned int b = bytes[(*p)++]; + unsigned int c = bytes[(*p)++]; + unsigned int d = bytes[(*p)++]; + return a << 24 | b << 16 | c << 8 | d; +} + +void *qoi_encode(const void *data, const qoi_desc *desc, int *out_len) { + int i, max_size, p, run; + int px_len, px_end, px_pos, channels; + unsigned char *bytes; + const unsigned char *pixels; + qoi_rgba_t index[64]; + qoi_rgba_t px, px_prev; + + if ( + data == NULL || out_len == NULL || desc == NULL || + desc->width == 0 || desc->height == 0 || + desc->channels < 3 || desc->channels > 4 || + desc->colorspace > 1 || + desc->height >= QOI_PIXELS_MAX / desc->width + ) { + return NULL; + } + + max_size = + desc->width * desc->height * (desc->channels + 1) + + QOI_HEADER_SIZE + sizeof(qoi_padding); + + p = 0; + bytes = (unsigned char *) QOI_MALLOC(max_size); + if (!bytes) { + return NULL; + } + + qoi_write_32(bytes, &p, QOI_MAGIC); + qoi_write_32(bytes, &p, desc->width); + qoi_write_32(bytes, &p, desc->height); + bytes[p++] = desc->channels; + bytes[p++] = desc->colorspace; + + + pixels = (const unsigned char *)data; + + QOI_ZEROARR(index); + + run = 0; + px_prev.rgba.r = 0; + px_prev.rgba.g = 0; + px_prev.rgba.b = 0; + px_prev.rgba.a = 255; + px = px_prev; + + px_len = desc->width * desc->height * desc->channels; + px_end = px_len - desc->channels; + channels = desc->channels; + + for (px_pos = 0; px_pos < px_len; px_pos += channels) { + if (channels == 4) { + px = *(qoi_rgba_t *)(pixels + px_pos); + } + else { + px.rgba.r = pixels[px_pos + 0]; + px.rgba.g = pixels[px_pos + 1]; + px.rgba.b = pixels[px_pos + 2]; + } + + if (px.v == px_prev.v) { + run++; + if (run == 62 || px_pos == px_end) { + bytes[p++] = QOI_OP_RUN | (run - 1); + run = 0; + } + } + else { + int index_pos; + + if (run > 0) { + bytes[p++] = QOI_OP_RUN | (run - 1); + run = 0; + } + + index_pos = QOI_COLOR_HASH(px) % 64; + + if (index[index_pos].v == px.v) { + bytes[p++] = QOI_OP_INDEX | index_pos; + } + else { + index[index_pos] = px; + + if (px.rgba.a == px_prev.rgba.a) { + signed char vr = px.rgba.r - px_prev.rgba.r; + signed char vg = px.rgba.g - px_prev.rgba.g; + signed char vb = px.rgba.b - px_prev.rgba.b; + + signed char vg_r = vr - vg; + signed char vg_b = vb - vg; + + if ( + vr > -3 && vr < 2 && + vg > -3 && vg < 2 && + vb > -3 && vb < 2 + ) { + bytes[p++] = QOI_OP_DIFF | (vr + 2) << 4 | (vg + 2) << 2 | (vb + 2); + } + else if ( + vg_r > -9 && vg_r < 8 && + vg > -33 && vg < 32 && + vg_b > -9 && vg_b < 8 + ) { + bytes[p++] = QOI_OP_LUMA | (vg + 32); + bytes[p++] = (vg_r + 8) << 4 | (vg_b + 8); + } + else { + bytes[p++] = QOI_OP_RGB; + bytes[p++] = px.rgba.r; + bytes[p++] = px.rgba.g; + bytes[p++] = px.rgba.b; + } + } + else { + bytes[p++] = QOI_OP_RGBA; + bytes[p++] = px.rgba.r; + bytes[p++] = px.rgba.g; + bytes[p++] = px.rgba.b; + bytes[p++] = px.rgba.a; + } + } + } + px_prev = px; + } + + for (i = 0; i < (int)sizeof(qoi_padding); i++) { + bytes[p++] = qoi_padding[i]; + } + + *out_len = p; + return bytes; +} + +void *qoi_decode(const void *data, int size, qoi_desc *desc, int channels) { + const unsigned char *bytes; + unsigned int header_magic; + unsigned char *pixels; + qoi_rgba_t index[64]; + qoi_rgba_t px; + int px_len, chunks_len, px_pos; + int p = 0, run = 0; + + if ( + data == NULL || desc == NULL || + (channels != 0 && channels != 3 && channels != 4) || + size < QOI_HEADER_SIZE + (int)sizeof(qoi_padding) + ) { + return NULL; + } + + bytes = (const unsigned char *)data; + + header_magic = qoi_read_32(bytes, &p); + desc->width = qoi_read_32(bytes, &p); + desc->height = qoi_read_32(bytes, &p); + desc->channels = bytes[p++]; + desc->colorspace = bytes[p++]; + + if ( + desc->width == 0 || desc->height == 0 || + desc->channels < 3 || desc->channels > 4 || + desc->colorspace > 1 || + header_magic != QOI_MAGIC || + desc->height >= QOI_PIXELS_MAX / desc->width + ) { + return NULL; + } + + if (channels == 0) { + channels = desc->channels; + } + + px_len = desc->width * desc->height * channels; + pixels = (unsigned char *) QOI_MALLOC(px_len); + if (!pixels) { + return NULL; + } + + QOI_ZEROARR(index); + px.rgba.r = 0; + px.rgba.g = 0; + px.rgba.b = 0; + px.rgba.a = 255; + + chunks_len = size - (int)sizeof(qoi_padding); + for (px_pos = 0; px_pos < px_len; px_pos += channels) { + + if (run > 0) { + run--; + } + else if (p < chunks_len) { + int b1 = bytes[p++]; + + if (b1 == QOI_OP_RGB) { + px.rgba.r = bytes[p++]; + px.rgba.g = bytes[p++]; + px.rgba.b = bytes[p++]; + } + else if (b1 == QOI_OP_RGBA) { + px.rgba.r = bytes[p++]; + px.rgba.g = bytes[p++]; + px.rgba.b = bytes[p++]; + px.rgba.a = bytes[p++]; + } + else if ((b1 & QOI_MASK_2) == QOI_OP_INDEX) { + px = index[b1]; + } + else if ((b1 & QOI_MASK_2) == QOI_OP_DIFF) { + px.rgba.r += ((b1 >> 4) & 0x03) - 2; + px.rgba.g += ((b1 >> 2) & 0x03) - 2; + px.rgba.b += ( b1 & 0x03) - 2; + } + else if ((b1 & QOI_MASK_2) == QOI_OP_LUMA) { + int b2 = bytes[p++]; + int vg = (b1 & 0x3f) - 32; + px.rgba.r += vg - 8 + ((b2 >> 4) & 0x0f); + px.rgba.g += vg; + px.rgba.b += vg - 8 + (b2 & 0x0f); + } + else if ((b1 & QOI_MASK_2) == QOI_OP_RUN) { + run = (b1 & 0x3f); + } + + index[QOI_COLOR_HASH(px) % 64] = px; + } + + if (channels == 4) { + *(qoi_rgba_t*)(pixels + px_pos) = px; + } + else { + pixels[px_pos + 0] = px.rgba.r; + pixels[px_pos + 1] = px.rgba.g; + pixels[px_pos + 2] = px.rgba.b; + } + } + + return pixels; +} + +#ifndef QOI_NO_STDIO +#include + +int qoi_write(const char *filename, const void *data, const qoi_desc *desc) { + FILE *f = fopen(filename, "wb"); + int size; + void *encoded; + + if (!f) { + return 0; + } + + encoded = qoi_encode(data, desc, &size); + if (!encoded) { + fclose(f); + return 0; + } + + fwrite(encoded, 1, size, f); + fclose(f); + + QOI_FREE(encoded); + return size; +} + +void *qoi_read(const char *filename, qoi_desc *desc, int channels) { + FILE *f = fopen(filename, "rb"); + int size, bytes_read; + void *pixels, *data; + + if (!f) { + return NULL; + } + + fseek(f, 0, SEEK_END); + size = ftell(f); + if (size <= 0) { + fclose(f); + return NULL; + } + fseek(f, 0, SEEK_SET); + + data = QOI_MALLOC(size); + if (!data) { + fclose(f); + return NULL; + } + + bytes_read = (int)fread(data, 1, size, f); + fclose(f); + + pixels = qoi_decode(data, bytes_read, desc, channels); + QOI_FREE(data); + return pixels; +} + +#endif /* QOI_NO_STDIO */ +#endif /* QOI_IMPLEMENTATION */ diff --git a/qoirdo.cpp b/qoirdo.cpp new file mode 100644 index 0000000..9573594 --- /dev/null +++ b/qoirdo.cpp @@ -0,0 +1,1212 @@ +// qoirdo.cpp +// Copyright (C) 2022 Richard Geldreich, Jr. All Rights Reserved. +// Copyright (C) 2025 Erik Scholz +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "./qoirdo.hpp" + +#if _MSC_VER +// For sprintf(), strcpy() +#define _CRT_SECURE_NO_WARNINGS (1) +#endif + +#include +#include +#include +#include +#include +#include + +#include "./basisu.min.hpp" + +using namespace basisu; + +#define RDO_PNG_VERSION "v1.10" + +const float DEF_MAX_SMOOTH_STD_DEV = 35.0f; +const float DEF_SMOOTH_MAX_MSE_SCALE = 250.0f; +const float DEF_MAX_ULTRA_SMOOTH_STD_DEV = 5.0F; +const float DEF_ULTRA_SMOOTH_MAX_MSE_SCALE = 1500.0F; + +const float QOI_DEF_SMOOTH_MAX_MSE_SCALE = 2500.0f; +const float QOI_DEF_ULTRA_SMOOTH_MAX_MSE_SCALE = 5000.0f; + +enum speed_mode +{ + cNormalSpeed, + cFasterSpeed, + cFastestSpeed +}; + +struct rdo_png_params +{ + rdo_png_params() + { + clear(); + } + + void clear() + { + m_orig_img.clear(); + m_output_file_data.clear(); + m_lambda = 300.0f; + m_level = 0; + m_psnr = 0; + m_angular_rms_error = 0; + m_y_psnr = 0; + m_bpp = 0; + m_print_debug_output = false; + m_debug_images = false; + m_print_progress = false; + m_print_stats = false; + + m_use_chan_weights = false; + m_chan_weights[0] = 1; + m_chan_weights[1] = 1; + m_chan_weights[2] = 1; + m_chan_weights[3] = 1; + + { + float LW = 2; + float AW = 1.5; + float BW = 1; + float l = sqrtf(LW * LW + AW * AW + BW * BW); + LW /= l; + AW /= l; + BW /= l; + m_chan_weights_lab[0] = LW; // L + m_chan_weights_lab[1] = AW; // a + m_chan_weights_lab[2] = BW; // b + m_chan_weights_lab[3] = 1.5f; // alpha + } + + m_use_reject_thresholds = true; + m_reject_thresholds[0] = 32; + m_reject_thresholds[1] = 32; + m_reject_thresholds[2] = 32; + m_reject_thresholds[3] = 32; + + m_reject_thresholds_lab[0] = .05f; + //m_reject_thresholds_lab[1] = .075f; + m_reject_thresholds_lab[1] = .05f; + + m_transparent_reject_test = false; + + m_perceptual_error = true; + + m_match_only = false; + + m_two_pass = false; + + m_alpha_is_opacity = true; + + m_speed_mode = cFastestSpeed; + + m_max_smooth_std_dev = DEF_MAX_SMOOTH_STD_DEV; + m_smooth_max_mse_scale = DEF_SMOOTH_MAX_MSE_SCALE; + m_max_ultra_smooth_std_dev = DEF_MAX_ULTRA_SMOOTH_STD_DEV; + m_ultra_smooth_max_mse_scale = DEF_ULTRA_SMOOTH_MAX_MSE_SCALE; + + m_no_mse_scaling = false; + } + + void print() + { + printf("orig image: %ux%u has alpha: %u\n", m_orig_img.get_width(), m_orig_img.get_height(), m_orig_img.has_alpha()); + printf("lambda: %f\n", m_lambda); + printf("level: %u\n", m_level); + printf("chan weights: %u %u %u %u\n", m_chan_weights[0], m_chan_weights[1], m_chan_weights[2], m_chan_weights[3]); + printf("use chan weights: %u\n", m_use_chan_weights); + printf("chan weights lab: %f %f %f %f\n", m_chan_weights_lab[0], m_chan_weights_lab[1], m_chan_weights_lab[2], m_chan_weights_lab[3]); + printf("reject thresholds: %u %u %u %u\n", m_reject_thresholds[0], m_reject_thresholds[1], m_reject_thresholds[2], m_reject_thresholds[3]); + printf("reject thresholds lab: %f %f\n", m_reject_thresholds_lab[0], m_reject_thresholds_lab[1]); + printf("use reject thresholds: %u\n", m_use_reject_thresholds); + printf("transparent reject test: %u\n", m_transparent_reject_test); + printf("print debug output: %u\n", m_print_debug_output); + printf("debug images: %u\n", m_debug_images); + printf("print progress: %u\n", m_print_progress); + printf("print stats: %u\n", m_print_stats); + printf("perceptual error: %u\n", m_perceptual_error); + printf("match only: %u\n", m_match_only); + printf("two pass: %u\n", m_two_pass); + printf("alpha is opacity: %u\n", m_alpha_is_opacity); + printf("speed mode: %u\n", (uint32_t)m_speed_mode); + printf("max smooth std dev: %f\n", m_max_smooth_std_dev); + printf("smooth max mse scale: %f\n", m_smooth_max_mse_scale); + printf("max ultra smooth std dev: %f\n", m_max_ultra_smooth_std_dev); + printf("ultra smooth max mse scale: %f\n", m_ultra_smooth_max_mse_scale); + printf("no MSE scaling: %u\n", m_no_mse_scaling); + } + + // TODO: results - move + float m_psnr; + float m_angular_rms_error; + float m_y_psnr; + float m_bpp; + + // This is the output image data, but note for PNG you can't save it at the right size without the scanline predictor values. + image m_output_image; + + image m_orig_img; + + std::vector m_output_file_data; + + float m_lambda; + + uint32_t m_level; + + uint32_t m_chan_weights[4]; + float m_chan_weights_lab[4]; + bool m_use_chan_weights; + + uint32_t m_reject_thresholds[4]; + float m_reject_thresholds_lab[2]; + bool m_use_reject_thresholds; + + bool m_transparent_reject_test; + + bool m_print_debug_output; + bool m_debug_images; + bool m_print_progress; + bool m_print_stats; + + bool m_perceptual_error; + + bool m_match_only; + bool m_two_pass; + + bool m_alpha_is_opacity; + + speed_mode m_speed_mode; + + float m_max_smooth_std_dev; + float m_smooth_max_mse_scale; + float m_max_ultra_smooth_std_dev; + float m_ultra_smooth_max_mse_scale; + + bool m_no_mse_scaling; +}; + +static inline float square(float f) +{ + return f * f; +} + +static inline uint32_t byteswap_32(uint32_t v) +{ + return ((v & 0xFF) << 24) | (((v >> 8) & 0xFF) << 16) | (((v >> 16) & 0xFF) << 8) | ((v >> 24) & 0xFF); +} + +class tracked_stat +{ +public: + tracked_stat() { clear(); } + + inline void clear() { m_num = 0; m_total = 0; m_total2 = 0; } + + inline void update(uint32_t val) { m_num++; m_total += val; m_total2 += val * val; } + + inline tracked_stat& operator += (uint32_t val) { update(val); return *this; } + + inline uint32_t get_number_of_values() { return m_num; } + inline uint64_t get_total() const { return m_total; } + inline uint64_t get_total2() const { return m_total2; } + + inline float get_average() const { return m_num ? (float)m_total / m_num : 0.0f; }; + inline float get_std_dev() const { return m_num ? sqrtf((float)(m_num * m_total2 - m_total * m_total)) / m_num : 0.0f; } + inline float get_variance() const { float s = get_std_dev(); return s * s; } + +private: + uint32_t m_num; + uint64_t m_total; + uint64_t m_total2; +}; + +struct Lab { float L; float a; float b; }; +struct RGB { float r; float g; float b; }; + +static inline Lab linear_srgb_to_oklab(RGB c) +{ + float l = 0.4122214708f * c.r + 0.5363325363f * c.g + 0.0514459929f * c.b; + float m = 0.2119034982f * c.r + 0.6806995451f * c.g + 0.1073969566f * c.b; + float s = 0.0883024619f * c.r + 0.2817188376f * c.g + 0.6299787005f * c.b; + + float l_ = std::cbrtf(l); + float m_ = std::cbrtf(m); + float s_ = std::cbrtf(s); + + return + { + 0.2104542553f * l_ + 0.7936177850f * m_ - 0.0040720468f * s_, + 1.9779984951f * l_ - 2.4285922050f * m_ + 0.4505937099f * s_, + 0.0259040371f * l_ + 0.7827717662f * m_ - 0.8086757660f * s_, + }; +} + +static float g_srgb_to_linear[256]; + +static float f_inv(float x) +{ + if (x <= 0.04045f) + return x / 12.92f; + else + return powf(((x + 0.055f) / 1.055f), 2.4f); +} + +static void init_srgb_to_linear() +{ + for (uint32_t i = 0; i < 256; i++) + g_srgb_to_linear[i] = f_inv(i / 255.0f); +} + +#pragma pack(push, 1) +struct Lab16 +{ + uint16_t m_L, m_a, m_b; +}; +#pragma pack(pop) + +std::vector g_srgb_to_oklab16; + +const float SCALE_L = 1.0f / 65535.0f; +const float SCALE_A = (1.0f / 65535.0f) * (0.276216f - (-0.233887f)); +const float OFS_A = -0.233887f; +const float SCALE_B = (1.0f / 65535.0f) * (0.198570f - (-0.311528f)); +const float OFS_B = -0.311528f; + +const float MIN_L = 0.000000f, MAX_L = 1.000000f; +const float MIN_A = -0.233888f, MAX_A = 0.276217f; +const float MIN_B = -0.311529f, MAX_B = 0.198570f; + +static inline Lab srgb_to_oklab(const color_rgba &c) +{ + const Lab16 &l = g_srgb_to_oklab16[c.r + c.g * 256 + c.b * 65536]; + + Lab res; + res.L = l.m_L * SCALE_L; + res.a = l.m_a * SCALE_A + OFS_A; + res.b = l.m_b * SCALE_B + OFS_B; + + return res; +} + +static inline Lab srgb_to_oklab_norm(const color_rgba& c) +{ + const Lab16& l = g_srgb_to_oklab16[c.r + c.g * 256 + c.b * 65536]; + + Lab res; + res.L = l.m_L * SCALE_L; + res.a = l.m_a * SCALE_L; + res.b = l.m_b * SCALE_L; + + return res; +} + +static void init_oklab_table(const char *pExec, bool quiet, bool caching_enabled) +{ + g_srgb_to_oklab16.resize(256 * 256 * 256); + + for (uint32_t r = 0; r <= 255; r++) + { + for (uint32_t g = 0; g <= 255; g++) + { + for (uint32_t b = 0; b <= 255; b++) + { + color_rgba c(r, g, b, 255); + Lab l(linear_srgb_to_oklab({ g_srgb_to_linear[c.r], g_srgb_to_linear[c.g], g_srgb_to_linear[c.b] })); + + assert(l.L >= MIN_L && l.L <= MAX_L); + assert(l.a >= MIN_A && l.a <= MAX_A); + assert(l.b >= MIN_B && l.b <= MAX_B); + + float lL = std::round(((l.L - MIN_L) / (MAX_L - MIN_L)) * 65535.0f); + float la = std::round(((l.a - MIN_A) / (MAX_A - MIN_A)) * 65535.0f); + float lb = std::round(((l.b - MIN_B) / (MAX_B - MIN_B)) * 65535.0f); + + lL = clamp(lL, 0.0f, 65535.0f); + la = clamp(la, 0.0f, 65535.0f); + lb = clamp(lb, 0.0f, 65535.0f); + + Lab16& v = g_srgb_to_oklab16[r + g * 256 + b * 65536]; + v.m_L = (uint16_t)lL; + v.m_a = (uint16_t)la; + v.m_b = (uint16_t)lb; + } + } + } +} + +static inline float compute_se(const color_rgba& a, const color_rgba& orig, uint32_t num_comps, const rdo_png_params ¶ms) +{ + float dist; + + if (params.m_perceptual_error) + { + Lab la = srgb_to_oklab_norm(a); + Lab lb = srgb_to_oklab_norm(orig); + + la.L -= lb.L; + la.a -= lb.a; + la.b -= lb.b; + + float L_d = la.L * la.L; + float a_d = la.a * la.a; + float b_d = la.b * la.b; + + L_d *= params.m_chan_weights_lab[0]; + a_d *= params.m_chan_weights_lab[1]; + b_d *= params.m_chan_weights_lab[2]; + + dist = L_d + a_d + b_d; + + // TODO: Scales the error to bring it into a range where lambda will be roughly comparable to plain MSE. + const float NORM_ERROR_SCALE = 350000.0f; + dist *= NORM_ERROR_SCALE; + + if (num_comps == 4) + { + int da = (int)a[3] - (int)orig[3]; + dist += params.m_chan_weights_lab[3] * square((float)da); + } + } + else if (params.m_use_chan_weights) + { + int dr = (int)a[0] - (int)orig[0]; + int dg = (int)a[1] - (int)orig[1]; + int db = (int)a[2] - (int)orig[2]; + + uint32_t idist = (uint32_t)(params.m_chan_weights[0] * (uint32_t)(dr * dr) + params.m_chan_weights[1] * (uint32_t)(dg * dg) + params.m_chan_weights[2] * (uint32_t)(db * db)); + if (num_comps == 4) + { + int da = (int)a[3] - (int)orig[3]; + idist += params.m_chan_weights[3] * (uint32_t)(da * da); + } + + dist = (float)idist; + } + else + { + int dr = (int)a[0] - (int)orig[0]; + int dg = (int)a[1] - (int)orig[1]; + int db = (int)a[2] - (int)orig[2]; + + uint32_t idist = (uint32_t)(dr * dr + dg * dg + db * db); + if (num_comps == 4) + { + int da = (int)a[3] - (int)orig[3]; + idist += da * da; + } + + dist = (float)idist; + } + + return dist; +} + +static inline bool should_reject(const color_rgba& trial_color, const color_rgba& orig_color, uint32_t num_comps, const rdo_png_params& params) +{ + if ((params.m_transparent_reject_test) && (num_comps == 4)) + { + if ((orig_color[3] == 0) && (trial_color[3] > 0)) + return true; + + if ((orig_color[3] == 255) && (trial_color[3] < 255)) + return true; + } + + if (params.m_use_reject_thresholds) + { + if (params.m_perceptual_error) + { + Lab t(srgb_to_oklab_norm(trial_color)); + Lab o(srgb_to_oklab_norm(orig_color)); + + float L_diff = fabs(t.L - o.L); + + if (L_diff > params.m_reject_thresholds_lab[0]) + return true; + + float ab_dist = square(t.a - o.a) + square(t.b - o.b); + + if (ab_dist > (params.m_reject_thresholds_lab[1] * params.m_reject_thresholds_lab[1])) + return true; + + if (num_comps == 4) + { + uint32_t delta_a = abs((int)trial_color[3] - (int)orig_color[3]); + if (delta_a > params.m_reject_thresholds[3]) + return true; + } + } + else + { + uint32_t delta_r = abs((int)trial_color[0] - (int)orig_color[0]); + uint32_t delta_g = abs((int)trial_color[1] - (int)orig_color[1]); + uint32_t delta_b = abs((int)trial_color[2] - (int)orig_color[2]); + + if (delta_r > params.m_reject_thresholds[0]) + return true; + if (delta_g > params.m_reject_thresholds[1]) + return true; + if (delta_b > params.m_reject_thresholds[2]) + return true; + + if (num_comps == 4) + { + uint32_t delta_a = abs((int)trial_color[3] - (int)orig_color[3]); + if (delta_a > params.m_reject_thresholds[3]) + return true; + } + } + } + + return false; +} + +struct smooth_desc { + bool alpha_is_opacity {true}; + float max_smooth_std_dev {DEF_MAX_SMOOTH_STD_DEV}; + float smooth_max_mse_scale {QOI_DEF_SMOOTH_MAX_MSE_SCALE}; + float max_ultra_smooth_std_dev {DEF_MAX_ULTRA_SMOOTH_STD_DEV}; + float ultra_smooth_max_mse_scale {QOI_DEF_ULTRA_SMOOTH_MAX_MSE_SCALE}; +}; + +static void create_smooth_maps( + vector2D &smooth_block_mse_scales, + const image& orig_img, + const smooth_desc& desc +) { + const uint32_t width = orig_img.get_width(); + const uint32_t height = orig_img.get_height(); + const uint32_t total_pixels = orig_img.get_total_pixels(); + const bool has_alpha = orig_img.has_alpha(); + const uint32_t num_comps = has_alpha ? 4 : 3; + +#if 0 + if (params.m_no_mse_scaling) + { + smooth_block_mse_scales.set_all(1.0f); + return; + } +#endif + + image smooth_vis(width, height); + image alpha_edge_vis(width, height); + image ultra_smooth_vis(width, height); + + for (uint32_t y = 0; y < height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + float alpha_edge_yl = 0.0f; + if ((num_comps == 4) && (desc.alpha_is_opacity)) + { + tracked_stat alpha_comp_stats; + for (int yd = -3; yd <= 3; yd++) + { + for (int xd = -3; xd <= 3; xd++) + { + const color_rgba& p = orig_img.get_clamped((int)x + xd, (int)y + yd); + alpha_comp_stats.update(p[3]); + } + } + + float max_std_dev = alpha_comp_stats.get_std_dev(); + + float yl = clamp(max_std_dev / desc.max_smooth_std_dev, 0.0f, 1.0f); + alpha_edge_yl = yl * yl; + } + + { + tracked_stat comp_stats[4]; + for (int yd = -1; yd <= 1; yd++) + { + for (int xd = -1; xd <= 1; xd++) + { + const color_rgba& p = orig_img.get_clamped((int)x + xd, (int)y + yd); + comp_stats[0].update(p[0]); + comp_stats[1].update(p[1]); + comp_stats[2].update(p[2]); + if (num_comps == 4) + comp_stats[3].update(p[3]); + } + } + + float max_std_dev = 0.0f; + for (uint32_t i = 0; i < num_comps; i++) + max_std_dev = std::max(max_std_dev, comp_stats[i].get_std_dev()); + + float yl = clamp(max_std_dev / desc.max_smooth_std_dev, 0.0f, 1.0f); + yl = yl * yl; + + smooth_block_mse_scales(x, y) = lerp(desc.smooth_max_mse_scale, 1.0f, yl); + + if (num_comps == 4) + { + alpha_edge_vis(x, y).set((int)std::round(alpha_edge_yl * 255.0f)); + + smooth_block_mse_scales(x, y) = lerp(smooth_block_mse_scales(x, y), desc.smooth_max_mse_scale, alpha_edge_yl); + } + + smooth_vis(x, y).set(clamp((int)((smooth_block_mse_scales(x, y) - 1.0f) / (desc.smooth_max_mse_scale - 1.0f) * 255.0f + .5f), 0, 255)); + } + + { + tracked_stat comp_stats[4]; + + const int S = 5; + for (int yd = -S; yd < S; yd++) + { + for (int xd = -S; xd < S; xd++) + { + const color_rgba& p = orig_img.get_clamped((int)x + xd, (int)y + yd); + comp_stats[0].update(p[0]); + comp_stats[1].update(p[1]); + comp_stats[2].update(p[2]); + if (num_comps == 4) + comp_stats[3].update(p[3]); + } + } + + float max_std_dev = 0.0f; + for (uint32_t i = 0; i < num_comps; i++) + max_std_dev = std::max(max_std_dev, comp_stats[i].get_std_dev()); + + float yl = clamp(max_std_dev / desc.max_ultra_smooth_std_dev, 0.0f, 1.0f); + yl = powf(yl, 3.0f); + + smooth_block_mse_scales(x, y) = lerp(desc.ultra_smooth_max_mse_scale, smooth_block_mse_scales(x, y), yl); + + ultra_smooth_vis(x, y).set((int)std::round(yl * 255.0f)); + } + + } + } + +#if 0 + if (params.m_debug_images) + { + save_png("dbg_smooth_vis.png", smooth_vis); + save_png("dbg_alpha_edge_vis.png", alpha_edge_vis); + save_png("dbg_ultra_smooth_vis.png", ultra_smooth_vis); + } +#endif +} + +#pragma pack(push, 1) +struct qoi_header +{ + char magic[4]; // magic bytes "qoif" + uint32_t width; // image width in pixels (BE) + uint32_t height; // image height in pixels (BE) + uint8_t channels; // 3 = RGB, 4 = RGBA + uint8_t colorspace; // 0 = sRGB with linear alpha 1 = all channels linear +}; +#pragma pack(pop) + +static bool encode_rdo_qoi( + const image& orig_img, + std::vector& data, + //const rdo_png_params& params, + const vector2D& smooth_block_mse_scales, + float lambda) +{ + // This function wasn't designed to deal with lambda=0, so nudge it up. + lambda = max(lambda, .0000125f); + + const rdo_png_params params{}; + + const bool has_alpha = orig_img.has_alpha(); + uint32_t num_comps = has_alpha ? 4 : 3; + + color_rgba hash[64]; + //clear_obj(hash); + memset(&hash, 0, sizeof(hash)); + + data.resize(0); + + qoi_header hdr; + memcpy(hdr.magic, "qoif", 4); + hdr.width = byteswap_32(orig_img.get_width()); + hdr.height = byteswap_32(orig_img.get_height()); + hdr.channels = has_alpha ? 4 : 3; + hdr.colorspace = 0; + data.resize(sizeof(hdr)); + memcpy(data.data(), &hdr, sizeof(hdr)); + + int prev_r = 0, prev_g = 0, prev_b = 0, prev_a = 255; + uint32_t cur_run_len = 0; + + enum commands_t + { + cRUN, + cIDX, + cDELTA, + cLUMA, + cRGB, + cRGBA, + }; + + uint32_t total_run = 0, total_rgb = 0, total_rgba = 0, total_index = 0, total_delta = 0, total_luma = 0, total_run_pixels = 0; + + for (uint32_t y = 0; y < orig_img.get_height(); y++) + { + for (uint32_t x = 0; x < orig_img.get_width(); x++) + { + const color_rgba& c = orig_img(x, y); + const float mse_scale = smooth_block_mse_scales(x, y); + + float best_mse = 0.0f; + float best_bits = 40.0f; + float best_t = best_mse + best_bits * lambda; + int best_command = cRGBA; + int best_index = 0, best_dr = 0, best_dg = 0, best_db = 0; + + { + color_rgba trial_c(c.r, c.g, c.b, prev_a); + if (!should_reject(trial_c, c, 4, params)) + { + float mse = compute_se(trial_c, c, 4, params); + float bits = 32.0f; + float trial_t = mse_scale * mse + bits * lambda; + if (trial_t < best_t) + { + best_mse = mse; + best_bits = bits; + best_t = trial_t; + best_command = cRGB; + } + } + } + + { + color_rgba trial_c(prev_r, prev_g, prev_b, prev_a); + if (!should_reject(trial_c, c, 4, params)) + { + float mse = compute_se(trial_c, c, 4, params); + float bits = cur_run_len ? 0 : 8.0f; + float trial_t = mse_scale * mse + bits * lambda; + if (trial_t < best_t) + { + best_mse = mse; + best_bits = bits; + best_t = trial_t; + best_command = cRUN; + + if (best_mse == 0.0f) + { + cur_run_len++; + if (cur_run_len == 62) + { + total_run_pixels += cur_run_len; + + data.push_back(0xC0 | (cur_run_len - 1)); + cur_run_len = 0; + + total_run++; + } + + hash[(prev_r * 3 + prev_g * 5 + prev_b * 7 + prev_a * 11) & 63].set(prev_r, prev_g, prev_b, prev_a); + + continue; + } + } + } + } + + if (8.0f * lambda < best_t) + { + uint32_t hash_idx = (c.r * 3 + c.g * 5 + c.b * 7 + c.a * 11) & 63; + + // First try the INDEX command losslessly. + if (c == hash[hash_idx]) + { + float bits = 8.0f; + float trial_t = bits * lambda; + + assert(trial_t < best_t); + + best_mse = 0.0f; + best_bits = bits; + best_t = trial_t; + best_command = cIDX; + best_index = hash_idx; + } + else + { + // Try a lossy INDEX command. + for (uint32_t i = 0; i < 64; i++) + { + if (!should_reject(hash[i], c, 4, params)) + { + float mse = compute_se(hash[i], c, 4, params); + float bits = 8.0f; + float trial_t = mse_scale * mse + bits * lambda; + if (trial_t < best_t) + { + best_mse = mse; + best_bits = bits; + best_t = trial_t; + best_command = cIDX; + best_index = i; + } + } + } + } + } + + if (8.0f * lambda < best_t) + { + bool delta_encodable_losslessly = false; + + // First try the DELTA command losslessly. + if (c.a == prev_a) + { + int dr = ((int)c.r - prev_r + 2) & 255; + int dg = ((int)c.g - prev_g + 2) & 255; + int db = ((int)c.b - prev_b + 2) & 255; + + if ((dr <= 3) && (dg <= 3) && (db <= 3)) + { + delta_encodable_losslessly = true; + + float bits = 8.0f; + float trial_t = bits * lambda; + + assert(trial_t < best_t); + + best_mse = 0.0f; + best_bits = bits; + best_t = trial_t; + best_command = cDELTA; + best_dr = dr - 2; + best_dg = dg - 2; + best_db = db - 2; + } + } + + // Try a lossy DELTA command. + if (!delta_encodable_losslessly) + { + for (uint32_t i = 0; i < 64; i++) + { + int dr = ((i >> 4) & 3) - 2; + int dg = ((i >> 2) & 3) - 2; + int db = (i & 3) - 2; + + color_rgba trial_c((prev_r + dr) & 255, (prev_g + dg) & 255, (prev_b + db) & 255, prev_a); + + if (!should_reject(trial_c, c, 4, params)) + { + float mse = compute_se(trial_c, c, 4, params); + float bits = 8.0f; + float trial_t = mse_scale * mse + bits * lambda; + + if (trial_t < best_t) + { + best_mse = mse; + best_bits = bits; + best_t = trial_t; + best_command = cDELTA; + best_dr = dr; + best_dg = dg; + best_db = db; + } + } + } + } + } + + if (16.0f * lambda < best_t) + { + bool luma_encodable_losslessly_in_rgb = false; + + // First try the LUMA command losslessly in RGB (may not be lossy in alpha). + { + int g_diff = (int)c.g - prev_g; + + int dg = (g_diff + 32) & 255; + + int dr = (((int)c.r - prev_r) - g_diff + 8) & 255; + int db = (((int)c.b - prev_b) - g_diff + 8) & 255; + + if ((dg <= 63) && (dr <= 15) && (db <= 15)) + { + luma_encodable_losslessly_in_rgb = true; + + color_rgba trial_c(c.r, c.g, c.b, prev_a); + + if (!should_reject(trial_c, c, 4, params)) + { + float mse = compute_se(trial_c, c, 4, params); + float bits = 16.0f; + float trial_t = mse_scale * mse + bits * lambda; + + if (trial_t < best_t) + { + best_mse = mse; + best_bits = bits; + best_t = trial_t; + best_command = cLUMA; + best_dr = dr - 8; + best_dg = dg - 32; + best_db = db - 8; + } + } + } + } + + // If we can't use it losslessly, try it lossy. + if ((!luma_encodable_losslessly_in_rgb) && (params.m_speed_mode != cFastestSpeed)) + { + if (params.m_speed_mode == cNormalSpeed) + { + // Search all encodable LUMA commands. + for (uint32_t i = 0; i < 16384; i++) + { + int dr = ((i >> 6) & 15) - 8; + int dg = (i & 63) - 32; + int db = ((i >> 10) & 15) - 8; + + color_rgba trial_c((prev_r + dg + dr) & 255, (prev_g + dg) & 255, (prev_b + dg + db) & 255, prev_a); + + if (!should_reject(trial_c, c, 4, params)) + { + float mse = compute_se(trial_c, c, 4, params); + float bits = 16.0f; + float trial_t = mse_scale * mse + bits * lambda; + + if (trial_t < best_t) + { + best_mse = mse; + best_bits = bits; + best_t = trial_t; + best_command = cLUMA; + best_dr = dr; + best_dg = dg; + best_db = db; + } + } + } + } + else + { + // TODO: This isn't very smart. What if the G delta is encodable but R and/or B aren't? + const int g_deltas[] = { -24, -16, -14, -12, -10, -8, -6, -4, -3, -2, -1, 0, 1, 2, 3, 4, 6, 8, 10, 12, 14, 16, 24 }; + const int TOTAL_G_DELTAS = sizeof(g_deltas) / sizeof(g_deltas[0]); + + for (int kg = 0; kg < TOTAL_G_DELTAS; kg++) + { + const int dg = g_deltas[kg]; + for (uint32_t i = 0; i < 256; i++) + { + int dr = (i & 15) - 8; + int db = ((i >> 4) & 15) - 8; + + color_rgba trial_c((prev_r + dg + dr) & 255, (prev_g + dg) & 255, (prev_b + dg + db) & 255, prev_a); + + if (!should_reject(trial_c, c, 4, params)) + { + float mse = compute_se(trial_c, c, 4, params); + float bits = 16.0f; + float trial_t = mse_scale * mse + bits * lambda; + + if (trial_t < best_t) + { + best_mse = mse; + best_bits = bits; + best_t = trial_t; + best_command = cLUMA; + best_dr = dr; + best_dg = dg; + best_db = db; + } + } + } + } + } + } + } + + switch (best_command) + { + case cRUN: + { + cur_run_len++; + if (cur_run_len == 62) + { + total_run_pixels += cur_run_len; + + data.push_back(0xC0 | (cur_run_len - 1)); + cur_run_len = 0; + + total_run++; + } + + hash[(prev_r * 3 + prev_g * 5 + prev_b * 7 + prev_a * 11) & 63].set(prev_r, prev_g, prev_b, prev_a); + + break; + } + case cRGB: + { + if (cur_run_len) + { + total_run_pixels += cur_run_len; + + data.push_back(0xC0 | (cur_run_len - 1)); + cur_run_len = 0; + + total_run++; + } + + data.push_back(254); + data.push_back((uint8_t)c.r); + data.push_back((uint8_t)c.g); + data.push_back((uint8_t)c.b); + hash[(c.r * 3 + c.g * 5 + c.b * 7 + prev_a * 11) & 63].set(c.r, c.g, c.b, prev_a); + prev_r = c.r; + prev_g = c.g; + prev_b = c.b; + + total_rgb++; + + break; + } + case cRGBA: + { + if (cur_run_len) + { + total_run_pixels += cur_run_len; + + data.push_back(0xC0 | (cur_run_len - 1)); + cur_run_len = 0; + + total_run++; + } + + data.push_back(255); + data.push_back((uint8_t)c.r); + data.push_back((uint8_t)c.g); + data.push_back((uint8_t)c.b); + data.push_back((uint8_t)c.a); + hash[(c.r * 3 + c.g * 5 + c.b * 7 + c.a * 11) & 63] = c; + prev_r = c.r; + prev_g = c.g; + prev_b = c.b; + prev_a = c.a; + + total_rgba++; + + break; + } + case cIDX: + { + if (cur_run_len) + { + total_run_pixels += cur_run_len; + + data.push_back(0xC0 | (cur_run_len - 1)); + cur_run_len = 0; + + total_run++; + } + + data.push_back(best_index); + + prev_r = hash[best_index].r; + prev_g = hash[best_index].g; + prev_b = hash[best_index].b; + prev_a = hash[best_index].a; + + total_index++; + + break; + } + case cDELTA: + { + if (cur_run_len) + { + total_run_pixels += cur_run_len; + + data.push_back(0xC0 | (cur_run_len - 1)); + cur_run_len = 0; + + total_run++; + } + + assert(best_dr >= -2 && best_dr <= 1); + assert(best_dg >= -2 && best_dg <= 1); + assert(best_db >= -2 && best_db <= 1); + + data.push_back(64 + ((best_dr + 2) << 4) + ((best_dg + 2) << 2) + (best_db + 2)); + + uint32_t decoded_r = (prev_r + best_dr) & 0xFF; + uint32_t decoded_g = (prev_g + best_dg) & 0xFF; + uint32_t decoded_b = (prev_b + best_db) & 0xFF; + uint32_t decoded_a = prev_a; + + hash[(decoded_r * 3 + decoded_g * 5 + decoded_b * 7 + decoded_a * 11) & 63].set(decoded_r, decoded_g, decoded_b, decoded_a); + + prev_r = decoded_r; + prev_g = decoded_g; + prev_b = decoded_b; + prev_a = decoded_a; + + total_delta++; + + break; + } + case cLUMA: + { + if (cur_run_len) + { + total_run_pixels += cur_run_len; + + data.push_back(0xC0 | (cur_run_len - 1)); + cur_run_len = 0; + + total_run++; + } + + assert(best_dr >= -8 && best_dr <= 7); + assert(best_dg >= -32 && best_dg <= 31); + assert(best_db >= -8 && best_db <= 7); + + data.push_back((uint8_t)(128 + (best_dg + 32))); + data.push_back((uint8_t)(((best_dr + 8) << 4) | (best_db + 8))); + + uint32_t decoded_r = (prev_r + best_dr + best_dg) & 0xFF; + uint32_t decoded_g = (prev_g + best_dg) & 0xFF; + uint32_t decoded_b = (prev_b + best_db + best_dg) & 0xFF; + uint32_t decoded_a = prev_a; + + hash[(decoded_r * 3 + decoded_g * 5 + decoded_b * 7 + decoded_a * 11) & 63].set(decoded_r, decoded_g, decoded_b, decoded_a); + + prev_r = decoded_r; + prev_g = decoded_g; + prev_b = decoded_b; + prev_a = decoded_a; + + total_luma++; + + break; + } + default: + { + assert(0); + break; + } + } + + } + } + + if (cur_run_len) + { + total_run_pixels += cur_run_len; + + data.push_back((64 + 128) | (cur_run_len - 1)); + cur_run_len = 0; + + total_run++; + } + + // end padding + for (uint32_t i = 0; i < 7; i++) { + data.push_back(0); + } + data.push_back(1); + + if (params.m_print_stats) + { + printf("Totals: Run: %u, Run Pixels: %u %3.2f%%, RGB: %u %3.2f%%, RGBA: %u %3.2f%%, INDEX: %u %3.2f%%, DELTA: %u %3.2f%%, LUMA: %u %3.2f%%\n\n", + total_run, + total_run_pixels, (total_run_pixels * 100.0f) / orig_img.get_total_pixels(), + total_rgb, (total_rgb * 100.0f) / orig_img.get_total_pixels(), + total_rgba, (total_rgba * 100.0f) / orig_img.get_total_pixels(), + total_index, (total_index * 100.0f) / orig_img.get_total_pixels(), + total_delta, (total_delta * 100.0f) / orig_img.get_total_pixels(), + total_luma, (total_luma * 100.0f) / orig_img.get_total_pixels()); + } + + return true; +} + +static bool g_init {false}; + +bool init_qoi_rdo(void) { + if (g_init) { + return false; + } + init_srgb_to_linear(); + init_oklab_table("", true, false); + g_init = true; + return true; +} + +bool quit_qoi_rdo(void) { + if (!g_init) { + return false; + } + g_srgb_to_oklab16.clear(); + + return true; +} + +static float lambda_from_quality(int quality) { + quality = clamp(quality, 1, 100); + + // TODO: more stuff and log scale + //return lerp(50000, 100, quality/100.f); + //return lerp(250'000, 0, sqrtf(quality/100.f)); + //return lerp(1'000'000, 0, sqrtf(quality/100.f)); + //return lerp(1'000'000, 0, clamp(log10f(quality/100.f)+1, 0.f, 1.f)); + //return lerp(250'000, 0, clamp(log10f(quality/100.f)+1, 0.f, 1.f)); + return lerp(250'000, 0, cbrtf(quality/100.f)); +} + +std::vector encode_qoi_rdo_simple(const uint8_t* data, const qoi_rdo_desc& desc, int quality) { + if (!g_init) { + return {}; + } + + const float lambda = lambda_from_quality(quality); + + vector2D smooth_block_mse_scales(desc.width, desc.height); + + image orig_img(data, desc.width, desc.height, desc.channels); + + if (false /* m_no_mse_scaling */) { + smooth_block_mse_scales.set_all(1.0f); + } else { + create_smooth_maps( + smooth_block_mse_scales, + orig_img, + {} // smooth_desc + ); + } + + std::vector output_data; + + if (!encode_rdo_qoi( + orig_img, + output_data, + smooth_block_mse_scales, + lambda)) + { + return {}; + } + + return output_data; +} + diff --git a/qoirdo.hpp b/qoirdo.hpp new file mode 100644 index 0000000..5b4c64d --- /dev/null +++ b/qoirdo.hpp @@ -0,0 +1,35 @@ +#pragma once + +// qoirdo.hpp +// Copyright (C) 2022 Richard Geldreich, Jr. All Rights Reserved. +// Copyright (C) 2025 Erik Scholz +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +bool init_qoi_rdo(void); +bool quit_qoi_rdo(void); + +struct qoi_rdo_desc { + unsigned int width; + unsigned int height; + unsigned char channels; + unsigned char colorspace; +}; + +// quality 1-100 +std::vector encode_qoi_rdo_simple(const uint8_t* data, const qoi_rdo_desc& desc, int quality); + +// TODO: finetuneable +//uint8_t* encode_qoi_rdo_advanced(const uint8_t* data, const qoi_rdo_desc* desc, int* out_len); diff --git a/tool.cpp b/tool.cpp new file mode 100644 index 0000000..815de3f --- /dev/null +++ b/tool.cpp @@ -0,0 +1,139 @@ +#include "./qoirdo.hpp" + +#define QOI_IMPLEMENTATION +#include "./qoi.h" + +#include +#include +#include +#include +#include +#include + +void print_help(const char* exe) { + std::cout << exe << " [-q 1-100] \n"; +} + +// read qoi image, reencode lossy with rdo +int main(int argc, const char** argv) { + if (argc < 2) { + std::cerr << "error: at least one paramenter required.\n"; + std::cout << "help:\n"; + print_help(argv[0]); + return -1; + } + + std::filesystem::path input_qoi; + + int quality = 80; + + if (argv[1] == std::string_view{"-q"}) { + if (argc < 4) { + std::cerr << "error: more parameters required\n"; + std::cout << "help:\n"; + print_help(argv[0]); + return -1; + } + + quality = std::atoi(argv[2]); + if (quality < 1 || quality > 100) { + std::cerr << "error: invalid quality\n"; + std::cout << "help:\n"; + print_help(argv[0]); + return -1; + } + + input_qoi = argv[3]; + } else { + input_qoi = argv[1]; + } + + std::filesystem::path output_qoi; + if (input_qoi.extension() == ".qoi" || input_qoi.extension() == ".QOI") { + output_qoi = input_qoi; + output_qoi.replace_extension("rdo.qoi"); + } else { + output_qoi = input_qoi; + output_qoi.replace_filename(input_qoi.filename().generic_u8string() + std::string{".rdo.qoi"}); + } + + std::cout << "input_qoi: " << input_qoi.generic_u8string() << "\n"; + std::cout << "output_qoi: " << output_qoi.generic_u8string() << "\n"; + std::cout << "quality: " << quality << "\n"; + + std::vector input_encoded_data; + size_t input_file_size {0}; + { // read file + std::ifstream ifile{input_qoi, std::ios::in | std::ios::binary}; + if (!ifile.is_open()) { + std::cerr << "failed to open file " << input_qoi << "\n"; + return -2; + } + ifile.seekg(0, std::ios_base::end); + const auto size = ifile.tellg(); + if (size <= 0) { + std::cerr << "failed to open file " << input_qoi << ", file too small\n"; + return -2; + } + ifile.seekg(0, std::ios_base::beg); + input_encoded_data.resize(size); + ifile.read(reinterpret_cast(input_encoded_data.data()), input_encoded_data.size()); + input_file_size = size; + } + + // decode + + qoi_desc input_desc{}; + uint8_t* raw_image = static_cast(qoi_decode(input_encoded_data.data(), input_encoded_data.size(), &input_desc, 4)); + + if (raw_image == nullptr) { + std::cerr << "failed to decode input\n"; + return -3; + } + if (input_desc.width == 0 || input_desc.height == 0) { + free(raw_image); + std::cerr << "funny trying to decode input\n"; + return -3; + } + + init_qoi_rdo(); + + // encode with rdo + + qoi_rdo_desc desc{ + input_desc.width, + input_desc.height, + /*input_desc.channels*/ 4, // ? + input_desc.colorspace, + }; + std::vector encoded_data = encode_qoi_rdo_simple(raw_image, desc,quality); + free(raw_image); + + quit_qoi_rdo(); + + if (encoded_data.empty()) { + std::cerr << "failed to encode image\n"; + return -3; + } + if (encoded_data.size() < 4) { + std::cout << "warn: encoded image suspiciously small\n"; + } + + { // write out + std::ofstream ofile{output_qoi, std::ios::out | std::ios::binary | std::ios::trunc}; + + if (!ofile.is_open()) { + std::cerr << "failed to open output file " << output_qoi << "\n"; + return -2; + } + + ofile.write(reinterpret_cast(encoded_data.data()), encoded_data.size()); + } + + std::cout << "written " << encoded_data.size() << " bytes to " << output_qoi << ". input was " << input_file_size << "\n"; + + // TODO: metrics + + return 0; +} +