// qoirdo.cpp // Copyright (C) 2022 Richard Geldreich, Jr. All Rights Reserved. // Copyright (C) 2025 Erik Scholz // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "./qoirdo.hpp" #if _MSC_VER // For sprintf(), strcpy() #define _CRT_SECURE_NO_WARNINGS (1) #endif #include #include #include #include #include #include #include "./basisu.min.hpp" using namespace basisu; #define RDO_PNG_VERSION "v1.10" const float DEF_MAX_SMOOTH_STD_DEV = 35.0f; const float DEF_SMOOTH_MAX_MSE_SCALE = 250.0f; const float DEF_MAX_ULTRA_SMOOTH_STD_DEV = 5.0F; const float DEF_ULTRA_SMOOTH_MAX_MSE_SCALE = 1500.0F; const float QOI_DEF_SMOOTH_MAX_MSE_SCALE = 2500.0f; const float QOI_DEF_ULTRA_SMOOTH_MAX_MSE_SCALE = 5000.0f; enum speed_mode { cNormalSpeed, cFasterSpeed, cFastestSpeed }; struct rdo_png_params { rdo_png_params() { clear(); } void clear() { m_orig_img.clear(); m_output_file_data.clear(); m_lambda = 300.0f; m_level = 0; m_psnr = 0; m_angular_rms_error = 0; m_y_psnr = 0; m_bpp = 0; m_print_debug_output = false; m_debug_images = false; m_print_progress = false; m_print_stats = false; m_use_chan_weights = false; m_chan_weights[0] = 1; m_chan_weights[1] = 1; m_chan_weights[2] = 1; m_chan_weights[3] = 1; { float LW = 2; float AW = 1.5; float BW = 1; float l = sqrtf(LW * LW + AW * AW + BW * BW); LW /= l; AW /= l; BW /= l; m_chan_weights_lab[0] = LW; // L m_chan_weights_lab[1] = AW; // a m_chan_weights_lab[2] = BW; // b m_chan_weights_lab[3] = 1.5f; // alpha } m_use_reject_thresholds = true; m_reject_thresholds[0] = 32; m_reject_thresholds[1] = 32; m_reject_thresholds[2] = 32; m_reject_thresholds[3] = 32; m_reject_thresholds_lab[0] = .05f; //m_reject_thresholds_lab[1] = .075f; m_reject_thresholds_lab[1] = .05f; m_transparent_reject_test = false; m_perceptual_error = true; m_match_only = false; m_two_pass = false; m_alpha_is_opacity = true; m_speed_mode = cFastestSpeed; m_max_smooth_std_dev = DEF_MAX_SMOOTH_STD_DEV; m_smooth_max_mse_scale = DEF_SMOOTH_MAX_MSE_SCALE; m_max_ultra_smooth_std_dev = DEF_MAX_ULTRA_SMOOTH_STD_DEV; m_ultra_smooth_max_mse_scale = DEF_ULTRA_SMOOTH_MAX_MSE_SCALE; m_no_mse_scaling = false; } void print() { printf("orig image: %ux%u has alpha: %u\n", m_orig_img.get_width(), m_orig_img.get_height(), m_orig_img.has_alpha()); printf("lambda: %f\n", m_lambda); printf("level: %u\n", m_level); printf("chan weights: %u %u %u %u\n", m_chan_weights[0], m_chan_weights[1], m_chan_weights[2], m_chan_weights[3]); printf("use chan weights: %u\n", m_use_chan_weights); printf("chan weights lab: %f %f %f %f\n", m_chan_weights_lab[0], m_chan_weights_lab[1], m_chan_weights_lab[2], m_chan_weights_lab[3]); printf("reject thresholds: %u %u %u %u\n", m_reject_thresholds[0], m_reject_thresholds[1], m_reject_thresholds[2], m_reject_thresholds[3]); printf("reject thresholds lab: %f %f\n", m_reject_thresholds_lab[0], m_reject_thresholds_lab[1]); printf("use reject thresholds: %u\n", m_use_reject_thresholds); printf("transparent reject test: %u\n", m_transparent_reject_test); printf("print debug output: %u\n", m_print_debug_output); printf("debug images: %u\n", m_debug_images); printf("print progress: %u\n", m_print_progress); printf("print stats: %u\n", m_print_stats); printf("perceptual error: %u\n", m_perceptual_error); printf("match only: %u\n", m_match_only); printf("two pass: %u\n", m_two_pass); printf("alpha is opacity: %u\n", m_alpha_is_opacity); printf("speed mode: %u\n", (uint32_t)m_speed_mode); printf("max smooth std dev: %f\n", m_max_smooth_std_dev); printf("smooth max mse scale: %f\n", m_smooth_max_mse_scale); printf("max ultra smooth std dev: %f\n", m_max_ultra_smooth_std_dev); printf("ultra smooth max mse scale: %f\n", m_ultra_smooth_max_mse_scale); printf("no MSE scaling: %u\n", m_no_mse_scaling); } // TODO: results - move float m_psnr; float m_angular_rms_error; float m_y_psnr; float m_bpp; // This is the output image data, but note for PNG you can't save it at the right size without the scanline predictor values. image m_output_image; image m_orig_img; std::vector m_output_file_data; float m_lambda; uint32_t m_level; uint32_t m_chan_weights[4]; float m_chan_weights_lab[4]; bool m_use_chan_weights; uint32_t m_reject_thresholds[4]; float m_reject_thresholds_lab[2]; bool m_use_reject_thresholds; bool m_transparent_reject_test; bool m_print_debug_output; bool m_debug_images; bool m_print_progress; bool m_print_stats; bool m_perceptual_error; bool m_match_only; bool m_two_pass; bool m_alpha_is_opacity; speed_mode m_speed_mode; float m_max_smooth_std_dev; float m_smooth_max_mse_scale; float m_max_ultra_smooth_std_dev; float m_ultra_smooth_max_mse_scale; bool m_no_mse_scaling; }; static inline float square(float f) { return f * f; } static inline uint32_t byteswap_32(uint32_t v) { return ((v & 0xFF) << 24) | (((v >> 8) & 0xFF) << 16) | (((v >> 16) & 0xFF) << 8) | ((v >> 24) & 0xFF); } class tracked_stat { public: tracked_stat() { clear(); } inline void clear() { m_num = 0; m_total = 0; m_total2 = 0; } inline void update(uint32_t val) { m_num++; m_total += val; m_total2 += val * val; } inline tracked_stat& operator += (uint32_t val) { update(val); return *this; } inline uint32_t get_number_of_values() { return m_num; } inline uint64_t get_total() const { return m_total; } inline uint64_t get_total2() const { return m_total2; } inline float get_average() const { return m_num ? (float)m_total / m_num : 0.0f; }; inline float get_std_dev() const { return m_num ? sqrtf((float)(m_num * m_total2 - m_total * m_total)) / m_num : 0.0f; } inline float get_variance() const { float s = get_std_dev(); return s * s; } private: uint32_t m_num; uint64_t m_total; uint64_t m_total2; }; struct Lab { float L; float a; float b; }; struct RGB { float r; float g; float b; }; static inline Lab linear_srgb_to_oklab(RGB c) { float l = 0.4122214708f * c.r + 0.5363325363f * c.g + 0.0514459929f * c.b; float m = 0.2119034982f * c.r + 0.6806995451f * c.g + 0.1073969566f * c.b; float s = 0.0883024619f * c.r + 0.2817188376f * c.g + 0.6299787005f * c.b; float l_ = std::cbrtf(l); float m_ = std::cbrtf(m); float s_ = std::cbrtf(s); return { 0.2104542553f * l_ + 0.7936177850f * m_ - 0.0040720468f * s_, 1.9779984951f * l_ - 2.4285922050f * m_ + 0.4505937099f * s_, 0.0259040371f * l_ + 0.7827717662f * m_ - 0.8086757660f * s_, }; } static float g_srgb_to_linear[256]; static float f_inv(float x) { if (x <= 0.04045f) return x / 12.92f; else return powf(((x + 0.055f) / 1.055f), 2.4f); } static void init_srgb_to_linear() { for (uint32_t i = 0; i < 256; i++) g_srgb_to_linear[i] = f_inv(i / 255.0f); } #pragma pack(push, 1) struct Lab16 { uint16_t m_L, m_a, m_b; }; #pragma pack(pop) std::vector g_srgb_to_oklab16; const float SCALE_L = 1.0f / 65535.0f; const float SCALE_A = (1.0f / 65535.0f) * (0.276216f - (-0.233887f)); const float OFS_A = -0.233887f; const float SCALE_B = (1.0f / 65535.0f) * (0.198570f - (-0.311528f)); const float OFS_B = -0.311528f; const float MIN_L = 0.000000f, MAX_L = 1.000000f; const float MIN_A = -0.233888f, MAX_A = 0.276217f; const float MIN_B = -0.311529f, MAX_B = 0.198570f; static inline Lab srgb_to_oklab(const color_rgba &c) { const Lab16 &l = g_srgb_to_oklab16[c.r + c.g * 256 + c.b * 65536]; Lab res; res.L = l.m_L * SCALE_L; res.a = l.m_a * SCALE_A + OFS_A; res.b = l.m_b * SCALE_B + OFS_B; return res; } static inline Lab srgb_to_oklab_norm(const color_rgba& c) { const Lab16& l = g_srgb_to_oklab16[c.r + c.g * 256 + c.b * 65536]; Lab res; res.L = l.m_L * SCALE_L; res.a = l.m_a * SCALE_L; res.b = l.m_b * SCALE_L; return res; } static void init_oklab_table(const char *pExec, bool quiet, bool caching_enabled) { g_srgb_to_oklab16.resize(256 * 256 * 256); for (uint32_t r = 0; r <= 255; r++) { for (uint32_t g = 0; g <= 255; g++) { for (uint32_t b = 0; b <= 255; b++) { color_rgba c(r, g, b, 255); Lab l(linear_srgb_to_oklab({ g_srgb_to_linear[c.r], g_srgb_to_linear[c.g], g_srgb_to_linear[c.b] })); assert(l.L >= MIN_L && l.L <= MAX_L); assert(l.a >= MIN_A && l.a <= MAX_A); assert(l.b >= MIN_B && l.b <= MAX_B); float lL = std::round(((l.L - MIN_L) / (MAX_L - MIN_L)) * 65535.0f); float la = std::round(((l.a - MIN_A) / (MAX_A - MIN_A)) * 65535.0f); float lb = std::round(((l.b - MIN_B) / (MAX_B - MIN_B)) * 65535.0f); lL = clamp(lL, 0.0f, 65535.0f); la = clamp(la, 0.0f, 65535.0f); lb = clamp(lb, 0.0f, 65535.0f); Lab16& v = g_srgb_to_oklab16[r + g * 256 + b * 65536]; v.m_L = (uint16_t)lL; v.m_a = (uint16_t)la; v.m_b = (uint16_t)lb; } } } } static inline float compute_se(const color_rgba& a, const color_rgba& orig, uint32_t num_comps, const rdo_png_params ¶ms) { float dist; if (params.m_perceptual_error) { Lab la = srgb_to_oklab_norm(a); Lab lb = srgb_to_oklab_norm(orig); la.L -= lb.L; la.a -= lb.a; la.b -= lb.b; float L_d = la.L * la.L; float a_d = la.a * la.a; float b_d = la.b * la.b; L_d *= params.m_chan_weights_lab[0]; a_d *= params.m_chan_weights_lab[1]; b_d *= params.m_chan_weights_lab[2]; dist = L_d + a_d + b_d; // TODO: Scales the error to bring it into a range where lambda will be roughly comparable to plain MSE. const float NORM_ERROR_SCALE = 350000.0f; dist *= NORM_ERROR_SCALE; if (num_comps == 4) { int da = (int)a[3] - (int)orig[3]; dist += params.m_chan_weights_lab[3] * square((float)da); } } else if (params.m_use_chan_weights) { int dr = (int)a[0] - (int)orig[0]; int dg = (int)a[1] - (int)orig[1]; int db = (int)a[2] - (int)orig[2]; uint32_t idist = (uint32_t)(params.m_chan_weights[0] * (uint32_t)(dr * dr) + params.m_chan_weights[1] * (uint32_t)(dg * dg) + params.m_chan_weights[2] * (uint32_t)(db * db)); if (num_comps == 4) { int da = (int)a[3] - (int)orig[3]; idist += params.m_chan_weights[3] * (uint32_t)(da * da); } dist = (float)idist; } else { int dr = (int)a[0] - (int)orig[0]; int dg = (int)a[1] - (int)orig[1]; int db = (int)a[2] - (int)orig[2]; uint32_t idist = (uint32_t)(dr * dr + dg * dg + db * db); if (num_comps == 4) { int da = (int)a[3] - (int)orig[3]; idist += da * da; } dist = (float)idist; } return dist; } static inline bool should_reject(const color_rgba& trial_color, const color_rgba& orig_color, uint32_t num_comps, const rdo_png_params& params) { if ((params.m_transparent_reject_test) && (num_comps == 4)) { if ((orig_color[3] == 0) && (trial_color[3] > 0)) return true; if ((orig_color[3] == 255) && (trial_color[3] < 255)) return true; } if (params.m_use_reject_thresholds) { if (params.m_perceptual_error) { Lab t(srgb_to_oklab_norm(trial_color)); Lab o(srgb_to_oklab_norm(orig_color)); float L_diff = fabs(t.L - o.L); if (L_diff > params.m_reject_thresholds_lab[0]) return true; float ab_dist = square(t.a - o.a) + square(t.b - o.b); if (ab_dist > (params.m_reject_thresholds_lab[1] * params.m_reject_thresholds_lab[1])) return true; if (num_comps == 4) { uint32_t delta_a = abs((int)trial_color[3] - (int)orig_color[3]); if (delta_a > params.m_reject_thresholds[3]) return true; } } else { uint32_t delta_r = abs((int)trial_color[0] - (int)orig_color[0]); uint32_t delta_g = abs((int)trial_color[1] - (int)orig_color[1]); uint32_t delta_b = abs((int)trial_color[2] - (int)orig_color[2]); if (delta_r > params.m_reject_thresholds[0]) return true; if (delta_g > params.m_reject_thresholds[1]) return true; if (delta_b > params.m_reject_thresholds[2]) return true; if (num_comps == 4) { uint32_t delta_a = abs((int)trial_color[3] - (int)orig_color[3]); if (delta_a > params.m_reject_thresholds[3]) return true; } } } return false; } struct smooth_desc { bool alpha_is_opacity {true}; float max_smooth_std_dev {DEF_MAX_SMOOTH_STD_DEV}; float smooth_max_mse_scale {QOI_DEF_SMOOTH_MAX_MSE_SCALE}; float max_ultra_smooth_std_dev {DEF_MAX_ULTRA_SMOOTH_STD_DEV}; float ultra_smooth_max_mse_scale {QOI_DEF_ULTRA_SMOOTH_MAX_MSE_SCALE}; }; static void create_smooth_maps( vector2D &smooth_block_mse_scales, const image& orig_img, const smooth_desc& desc ) { const uint32_t width = orig_img.get_width(); const uint32_t height = orig_img.get_height(); const uint32_t total_pixels = orig_img.get_total_pixels(); const bool has_alpha = orig_img.has_alpha(); const uint32_t num_comps = has_alpha ? 4 : 3; #if 0 if (params.m_no_mse_scaling) { smooth_block_mse_scales.set_all(1.0f); return; } #endif image smooth_vis(width, height); image alpha_edge_vis(width, height); image ultra_smooth_vis(width, height); for (uint32_t y = 0; y < height; y++) { for (uint32_t x = 0; x < width; x++) { float alpha_edge_yl = 0.0f; if ((num_comps == 4) && (desc.alpha_is_opacity)) { tracked_stat alpha_comp_stats; for (int yd = -3; yd <= 3; yd++) { for (int xd = -3; xd <= 3; xd++) { const color_rgba& p = orig_img.get_clamped((int)x + xd, (int)y + yd); alpha_comp_stats.update(p[3]); } } float max_std_dev = alpha_comp_stats.get_std_dev(); float yl = clamp(max_std_dev / desc.max_smooth_std_dev, 0.0f, 1.0f); alpha_edge_yl = yl * yl; } { tracked_stat comp_stats[4]; for (int yd = -1; yd <= 1; yd++) { for (int xd = -1; xd <= 1; xd++) { const color_rgba& p = orig_img.get_clamped((int)x + xd, (int)y + yd); comp_stats[0].update(p[0]); comp_stats[1].update(p[1]); comp_stats[2].update(p[2]); if (num_comps == 4) comp_stats[3].update(p[3]); } } float max_std_dev = 0.0f; for (uint32_t i = 0; i < num_comps; i++) max_std_dev = std::max(max_std_dev, comp_stats[i].get_std_dev()); float yl = clamp(max_std_dev / desc.max_smooth_std_dev, 0.0f, 1.0f); yl = yl * yl; smooth_block_mse_scales(x, y) = lerp(desc.smooth_max_mse_scale, 1.0f, yl); if (num_comps == 4) { alpha_edge_vis(x, y).set((int)std::round(alpha_edge_yl * 255.0f)); smooth_block_mse_scales(x, y) = lerp(smooth_block_mse_scales(x, y), desc.smooth_max_mse_scale, alpha_edge_yl); } smooth_vis(x, y).set(clamp((int)((smooth_block_mse_scales(x, y) - 1.0f) / (desc.smooth_max_mse_scale - 1.0f) * 255.0f + .5f), 0, 255)); } { tracked_stat comp_stats[4]; const int S = 5; for (int yd = -S; yd < S; yd++) { for (int xd = -S; xd < S; xd++) { const color_rgba& p = orig_img.get_clamped((int)x + xd, (int)y + yd); comp_stats[0].update(p[0]); comp_stats[1].update(p[1]); comp_stats[2].update(p[2]); if (num_comps == 4) comp_stats[3].update(p[3]); } } float max_std_dev = 0.0f; for (uint32_t i = 0; i < num_comps; i++) max_std_dev = std::max(max_std_dev, comp_stats[i].get_std_dev()); float yl = clamp(max_std_dev / desc.max_ultra_smooth_std_dev, 0.0f, 1.0f); yl = powf(yl, 3.0f); smooth_block_mse_scales(x, y) = lerp(desc.ultra_smooth_max_mse_scale, smooth_block_mse_scales(x, y), yl); ultra_smooth_vis(x, y).set((int)std::round(yl * 255.0f)); } } } #if 0 if (params.m_debug_images) { save_png("dbg_smooth_vis.png", smooth_vis); save_png("dbg_alpha_edge_vis.png", alpha_edge_vis); save_png("dbg_ultra_smooth_vis.png", ultra_smooth_vis); } #endif } #pragma pack(push, 1) struct qoi_header { char magic[4]; // magic bytes "qoif" uint32_t width; // image width in pixels (BE) uint32_t height; // image height in pixels (BE) uint8_t channels; // 3 = RGB, 4 = RGBA uint8_t colorspace; // 0 = sRGB with linear alpha 1 = all channels linear }; #pragma pack(pop) static bool encode_rdo_qoi( const image& orig_img, std::vector& data, //const rdo_png_params& params, const vector2D& smooth_block_mse_scales, float lambda) { // This function wasn't designed to deal with lambda=0, so nudge it up. lambda = max(lambda, .0000125f); const rdo_png_params params{}; const bool has_alpha = orig_img.has_alpha(); uint32_t num_comps = has_alpha ? 4 : 3; color_rgba hash[64]; //clear_obj(hash); memset(&hash, 0, sizeof(hash)); data.resize(0); qoi_header hdr; memcpy(hdr.magic, "qoif", 4); hdr.width = byteswap_32(orig_img.get_width()); hdr.height = byteswap_32(orig_img.get_height()); hdr.channels = has_alpha ? 4 : 3; hdr.colorspace = 0; data.resize(sizeof(hdr)); memcpy(data.data(), &hdr, sizeof(hdr)); int prev_r = 0, prev_g = 0, prev_b = 0, prev_a = 255; uint32_t cur_run_len = 0; enum commands_t { cRUN, cIDX, cDELTA, cLUMA, cRGB, cRGBA, }; uint32_t total_run = 0, total_rgb = 0, total_rgba = 0, total_index = 0, total_delta = 0, total_luma = 0, total_run_pixels = 0; for (uint32_t y = 0; y < orig_img.get_height(); y++) { for (uint32_t x = 0; x < orig_img.get_width(); x++) { const color_rgba& c = orig_img(x, y); const float mse_scale = smooth_block_mse_scales(x, y); float best_mse = 0.0f; float best_bits = 40.0f; float best_t = best_mse + best_bits * lambda; int best_command = cRGBA; int best_index = 0, best_dr = 0, best_dg = 0, best_db = 0; { color_rgba trial_c(c.r, c.g, c.b, prev_a); if (!should_reject(trial_c, c, 4, params)) { float mse = compute_se(trial_c, c, 4, params); float bits = 32.0f; float trial_t = mse_scale * mse + bits * lambda; if (trial_t < best_t) { best_mse = mse; best_bits = bits; best_t = trial_t; best_command = cRGB; } } } { color_rgba trial_c(prev_r, prev_g, prev_b, prev_a); if (!should_reject(trial_c, c, 4, params)) { float mse = compute_se(trial_c, c, 4, params); float bits = cur_run_len ? 0 : 8.0f; float trial_t = mse_scale * mse + bits * lambda; if (trial_t < best_t) { best_mse = mse; best_bits = bits; best_t = trial_t; best_command = cRUN; if (best_mse == 0.0f) { cur_run_len++; if (cur_run_len == 62) { total_run_pixels += cur_run_len; data.push_back(0xC0 | (cur_run_len - 1)); cur_run_len = 0; total_run++; } hash[(prev_r * 3 + prev_g * 5 + prev_b * 7 + prev_a * 11) & 63].set(prev_r, prev_g, prev_b, prev_a); continue; } } } } if (8.0f * lambda < best_t) { uint32_t hash_idx = (c.r * 3 + c.g * 5 + c.b * 7 + c.a * 11) & 63; // First try the INDEX command losslessly. if (c == hash[hash_idx]) { float bits = 8.0f; float trial_t = bits * lambda; assert(trial_t < best_t); best_mse = 0.0f; best_bits = bits; best_t = trial_t; best_command = cIDX; best_index = hash_idx; } else { // Try a lossy INDEX command. for (uint32_t i = 0; i < 64; i++) { if (!should_reject(hash[i], c, 4, params)) { float mse = compute_se(hash[i], c, 4, params); float bits = 8.0f; float trial_t = mse_scale * mse + bits * lambda; if (trial_t < best_t) { best_mse = mse; best_bits = bits; best_t = trial_t; best_command = cIDX; best_index = i; } } } } } if (8.0f * lambda < best_t) { bool delta_encodable_losslessly = false; // First try the DELTA command losslessly. if (c.a == prev_a) { int dr = ((int)c.r - prev_r + 2) & 255; int dg = ((int)c.g - prev_g + 2) & 255; int db = ((int)c.b - prev_b + 2) & 255; if ((dr <= 3) && (dg <= 3) && (db <= 3)) { delta_encodable_losslessly = true; float bits = 8.0f; float trial_t = bits * lambda; assert(trial_t < best_t); best_mse = 0.0f; best_bits = bits; best_t = trial_t; best_command = cDELTA; best_dr = dr - 2; best_dg = dg - 2; best_db = db - 2; } } // Try a lossy DELTA command. if (!delta_encodable_losslessly) { for (uint32_t i = 0; i < 64; i++) { int dr = ((i >> 4) & 3) - 2; int dg = ((i >> 2) & 3) - 2; int db = (i & 3) - 2; color_rgba trial_c((prev_r + dr) & 255, (prev_g + dg) & 255, (prev_b + db) & 255, prev_a); if (!should_reject(trial_c, c, 4, params)) { float mse = compute_se(trial_c, c, 4, params); float bits = 8.0f; float trial_t = mse_scale * mse + bits * lambda; if (trial_t < best_t) { best_mse = mse; best_bits = bits; best_t = trial_t; best_command = cDELTA; best_dr = dr; best_dg = dg; best_db = db; } } } } } if (16.0f * lambda < best_t) { bool luma_encodable_losslessly_in_rgb = false; // First try the LUMA command losslessly in RGB (may not be lossy in alpha). { int g_diff = (int)c.g - prev_g; int dg = (g_diff + 32) & 255; int dr = (((int)c.r - prev_r) - g_diff + 8) & 255; int db = (((int)c.b - prev_b) - g_diff + 8) & 255; if ((dg <= 63) && (dr <= 15) && (db <= 15)) { luma_encodable_losslessly_in_rgb = true; color_rgba trial_c(c.r, c.g, c.b, prev_a); if (!should_reject(trial_c, c, 4, params)) { float mse = compute_se(trial_c, c, 4, params); float bits = 16.0f; float trial_t = mse_scale * mse + bits * lambda; if (trial_t < best_t) { best_mse = mse; best_bits = bits; best_t = trial_t; best_command = cLUMA; best_dr = dr - 8; best_dg = dg - 32; best_db = db - 8; } } } } // If we can't use it losslessly, try it lossy. if ((!luma_encodable_losslessly_in_rgb) && (params.m_speed_mode != cFastestSpeed)) { if (params.m_speed_mode == cNormalSpeed) { // Search all encodable LUMA commands. for (uint32_t i = 0; i < 16384; i++) { int dr = ((i >> 6) & 15) - 8; int dg = (i & 63) - 32; int db = ((i >> 10) & 15) - 8; color_rgba trial_c((prev_r + dg + dr) & 255, (prev_g + dg) & 255, (prev_b + dg + db) & 255, prev_a); if (!should_reject(trial_c, c, 4, params)) { float mse = compute_se(trial_c, c, 4, params); float bits = 16.0f; float trial_t = mse_scale * mse + bits * lambda; if (trial_t < best_t) { best_mse = mse; best_bits = bits; best_t = trial_t; best_command = cLUMA; best_dr = dr; best_dg = dg; best_db = db; } } } } else { // TODO: This isn't very smart. What if the G delta is encodable but R and/or B aren't? const int g_deltas[] = { -24, -16, -14, -12, -10, -8, -6, -4, -3, -2, -1, 0, 1, 2, 3, 4, 6, 8, 10, 12, 14, 16, 24 }; const int TOTAL_G_DELTAS = sizeof(g_deltas) / sizeof(g_deltas[0]); for (int kg = 0; kg < TOTAL_G_DELTAS; kg++) { const int dg = g_deltas[kg]; for (uint32_t i = 0; i < 256; i++) { int dr = (i & 15) - 8; int db = ((i >> 4) & 15) - 8; color_rgba trial_c((prev_r + dg + dr) & 255, (prev_g + dg) & 255, (prev_b + dg + db) & 255, prev_a); if (!should_reject(trial_c, c, 4, params)) { float mse = compute_se(trial_c, c, 4, params); float bits = 16.0f; float trial_t = mse_scale * mse + bits * lambda; if (trial_t < best_t) { best_mse = mse; best_bits = bits; best_t = trial_t; best_command = cLUMA; best_dr = dr; best_dg = dg; best_db = db; } } } } } } } switch (best_command) { case cRUN: { cur_run_len++; if (cur_run_len == 62) { total_run_pixels += cur_run_len; data.push_back(0xC0 | (cur_run_len - 1)); cur_run_len = 0; total_run++; } hash[(prev_r * 3 + prev_g * 5 + prev_b * 7 + prev_a * 11) & 63].set(prev_r, prev_g, prev_b, prev_a); break; } case cRGB: { if (cur_run_len) { total_run_pixels += cur_run_len; data.push_back(0xC0 | (cur_run_len - 1)); cur_run_len = 0; total_run++; } data.push_back(254); data.push_back((uint8_t)c.r); data.push_back((uint8_t)c.g); data.push_back((uint8_t)c.b); hash[(c.r * 3 + c.g * 5 + c.b * 7 + prev_a * 11) & 63].set(c.r, c.g, c.b, prev_a); prev_r = c.r; prev_g = c.g; prev_b = c.b; total_rgb++; break; } case cRGBA: { if (cur_run_len) { total_run_pixels += cur_run_len; data.push_back(0xC0 | (cur_run_len - 1)); cur_run_len = 0; total_run++; } data.push_back(255); data.push_back((uint8_t)c.r); data.push_back((uint8_t)c.g); data.push_back((uint8_t)c.b); data.push_back((uint8_t)c.a); hash[(c.r * 3 + c.g * 5 + c.b * 7 + c.a * 11) & 63] = c; prev_r = c.r; prev_g = c.g; prev_b = c.b; prev_a = c.a; total_rgba++; break; } case cIDX: { if (cur_run_len) { total_run_pixels += cur_run_len; data.push_back(0xC0 | (cur_run_len - 1)); cur_run_len = 0; total_run++; } data.push_back(best_index); prev_r = hash[best_index].r; prev_g = hash[best_index].g; prev_b = hash[best_index].b; prev_a = hash[best_index].a; total_index++; break; } case cDELTA: { if (cur_run_len) { total_run_pixels += cur_run_len; data.push_back(0xC0 | (cur_run_len - 1)); cur_run_len = 0; total_run++; } assert(best_dr >= -2 && best_dr <= 1); assert(best_dg >= -2 && best_dg <= 1); assert(best_db >= -2 && best_db <= 1); data.push_back(64 + ((best_dr + 2) << 4) + ((best_dg + 2) << 2) + (best_db + 2)); uint32_t decoded_r = (prev_r + best_dr) & 0xFF; uint32_t decoded_g = (prev_g + best_dg) & 0xFF; uint32_t decoded_b = (prev_b + best_db) & 0xFF; uint32_t decoded_a = prev_a; hash[(decoded_r * 3 + decoded_g * 5 + decoded_b * 7 + decoded_a * 11) & 63].set(decoded_r, decoded_g, decoded_b, decoded_a); prev_r = decoded_r; prev_g = decoded_g; prev_b = decoded_b; prev_a = decoded_a; total_delta++; break; } case cLUMA: { if (cur_run_len) { total_run_pixels += cur_run_len; data.push_back(0xC0 | (cur_run_len - 1)); cur_run_len = 0; total_run++; } assert(best_dr >= -8 && best_dr <= 7); assert(best_dg >= -32 && best_dg <= 31); assert(best_db >= -8 && best_db <= 7); data.push_back((uint8_t)(128 + (best_dg + 32))); data.push_back((uint8_t)(((best_dr + 8) << 4) | (best_db + 8))); uint32_t decoded_r = (prev_r + best_dr + best_dg) & 0xFF; uint32_t decoded_g = (prev_g + best_dg) & 0xFF; uint32_t decoded_b = (prev_b + best_db + best_dg) & 0xFF; uint32_t decoded_a = prev_a; hash[(decoded_r * 3 + decoded_g * 5 + decoded_b * 7 + decoded_a * 11) & 63].set(decoded_r, decoded_g, decoded_b, decoded_a); prev_r = decoded_r; prev_g = decoded_g; prev_b = decoded_b; prev_a = decoded_a; total_luma++; break; } default: { assert(0); break; } } } } if (cur_run_len) { total_run_pixels += cur_run_len; data.push_back((64 + 128) | (cur_run_len - 1)); cur_run_len = 0; total_run++; } // end padding for (uint32_t i = 0; i < 7; i++) { data.push_back(0); } data.push_back(1); if (params.m_print_stats) { printf("Totals: Run: %u, Run Pixels: %u %3.2f%%, RGB: %u %3.2f%%, RGBA: %u %3.2f%%, INDEX: %u %3.2f%%, DELTA: %u %3.2f%%, LUMA: %u %3.2f%%\n\n", total_run, total_run_pixels, (total_run_pixels * 100.0f) / orig_img.get_total_pixels(), total_rgb, (total_rgb * 100.0f) / orig_img.get_total_pixels(), total_rgba, (total_rgba * 100.0f) / orig_img.get_total_pixels(), total_index, (total_index * 100.0f) / orig_img.get_total_pixels(), total_delta, (total_delta * 100.0f) / orig_img.get_total_pixels(), total_luma, (total_luma * 100.0f) / orig_img.get_total_pixels()); } return true; } static bool g_init {false}; bool init_qoi_rdo(void) { if (g_init) { return false; } init_srgb_to_linear(); init_oklab_table("", true, false); g_init = true; return true; } bool quit_qoi_rdo(void) { if (!g_init) { return false; } g_srgb_to_oklab16.clear(); return true; } static float lambda_from_quality(int quality) { quality = clamp(quality, 1, 100); // TODO: more stuff and log scale //return lerp(50000, 100, quality/100.f); //return lerp(250'000, 0, sqrtf(quality/100.f)); //return lerp(1'000'000, 0, sqrtf(quality/100.f)); //return lerp(1'000'000, 0, clamp(log10f(quality/100.f)+1, 0.f, 1.f)); //return lerp(250'000, 0, clamp(log10f(quality/100.f)+1, 0.f, 1.f)); return lerp(250'000, 0, cbrtf(quality/100.f)); } std::vector encode_qoi_rdo_simple(const uint8_t* data, const qoi_rdo_desc& desc, int quality) { if (!g_init) { return {}; } const float lambda = lambda_from_quality(quality); vector2D smooth_block_mse_scales(desc.width, desc.height); image orig_img(data, desc.width, desc.height, desc.channels); if (false /* m_no_mse_scaling */) { smooth_block_mse_scales.set_all(1.0f); } else { create_smooth_maps( smooth_block_mse_scales, orig_img, {} // smooth_desc ); } std::vector output_data; if (!encode_rdo_qoi( orig_img, output_data, smooth_block_mse_scales, lambda)) { return {}; } return output_data; }