tomato/qoirdo.cpp
Green Sky 1c189bfd9c Squashed 'external/libqoirdo/' content from commit 59f81203c9
git-subtree-dir: external/libqoirdo
git-subtree-split: 59f81203c99b2bd6edda0c84b98ba66a38f0e2c4
2025-05-12 20:44:00 +02:00

1213 lines
30 KiB
C++

// qoirdo.cpp
// Copyright (C) 2022 Richard Geldreich, Jr. All Rights Reserved.
// Copyright (C) 2025 Erik Scholz
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "./qoirdo.hpp"
#if _MSC_VER
// For sprintf(), strcpy()
#define _CRT_SECURE_NO_WARNINGS (1)
#endif
#include <cstdint>
#include <cstdio>
#include <cmath>
#include <string>
#include <array>
#include <vector>
#include "./basisu.min.hpp"
using namespace basisu;
#define RDO_PNG_VERSION "v1.10"
const float DEF_MAX_SMOOTH_STD_DEV = 35.0f;
const float DEF_SMOOTH_MAX_MSE_SCALE = 250.0f;
const float DEF_MAX_ULTRA_SMOOTH_STD_DEV = 5.0F;
const float DEF_ULTRA_SMOOTH_MAX_MSE_SCALE = 1500.0F;
const float QOI_DEF_SMOOTH_MAX_MSE_SCALE = 2500.0f;
const float QOI_DEF_ULTRA_SMOOTH_MAX_MSE_SCALE = 5000.0f;
enum speed_mode
{
cNormalSpeed,
cFasterSpeed,
cFastestSpeed
};
struct rdo_png_params
{
rdo_png_params()
{
clear();
}
void clear()
{
m_orig_img.clear();
m_output_file_data.clear();
m_lambda = 300.0f;
m_level = 0;
m_psnr = 0;
m_angular_rms_error = 0;
m_y_psnr = 0;
m_bpp = 0;
m_print_debug_output = false;
m_debug_images = false;
m_print_progress = false;
m_print_stats = false;
m_use_chan_weights = false;
m_chan_weights[0] = 1;
m_chan_weights[1] = 1;
m_chan_weights[2] = 1;
m_chan_weights[3] = 1;
{
float LW = 2;
float AW = 1.5;
float BW = 1;
float l = sqrtf(LW * LW + AW * AW + BW * BW);
LW /= l;
AW /= l;
BW /= l;
m_chan_weights_lab[0] = LW; // L
m_chan_weights_lab[1] = AW; // a
m_chan_weights_lab[2] = BW; // b
m_chan_weights_lab[3] = 1.5f; // alpha
}
m_use_reject_thresholds = true;
m_reject_thresholds[0] = 32;
m_reject_thresholds[1] = 32;
m_reject_thresholds[2] = 32;
m_reject_thresholds[3] = 32;
m_reject_thresholds_lab[0] = .05f;
//m_reject_thresholds_lab[1] = .075f;
m_reject_thresholds_lab[1] = .05f;
m_transparent_reject_test = false;
m_perceptual_error = true;
m_match_only = false;
m_two_pass = false;
m_alpha_is_opacity = true;
m_speed_mode = cFastestSpeed;
m_max_smooth_std_dev = DEF_MAX_SMOOTH_STD_DEV;
m_smooth_max_mse_scale = DEF_SMOOTH_MAX_MSE_SCALE;
m_max_ultra_smooth_std_dev = DEF_MAX_ULTRA_SMOOTH_STD_DEV;
m_ultra_smooth_max_mse_scale = DEF_ULTRA_SMOOTH_MAX_MSE_SCALE;
m_no_mse_scaling = false;
}
void print()
{
printf("orig image: %ux%u has alpha: %u\n", m_orig_img.get_width(), m_orig_img.get_height(), m_orig_img.has_alpha());
printf("lambda: %f\n", m_lambda);
printf("level: %u\n", m_level);
printf("chan weights: %u %u %u %u\n", m_chan_weights[0], m_chan_weights[1], m_chan_weights[2], m_chan_weights[3]);
printf("use chan weights: %u\n", m_use_chan_weights);
printf("chan weights lab: %f %f %f %f\n", m_chan_weights_lab[0], m_chan_weights_lab[1], m_chan_weights_lab[2], m_chan_weights_lab[3]);
printf("reject thresholds: %u %u %u %u\n", m_reject_thresholds[0], m_reject_thresholds[1], m_reject_thresholds[2], m_reject_thresholds[3]);
printf("reject thresholds lab: %f %f\n", m_reject_thresholds_lab[0], m_reject_thresholds_lab[1]);
printf("use reject thresholds: %u\n", m_use_reject_thresholds);
printf("transparent reject test: %u\n", m_transparent_reject_test);
printf("print debug output: %u\n", m_print_debug_output);
printf("debug images: %u\n", m_debug_images);
printf("print progress: %u\n", m_print_progress);
printf("print stats: %u\n", m_print_stats);
printf("perceptual error: %u\n", m_perceptual_error);
printf("match only: %u\n", m_match_only);
printf("two pass: %u\n", m_two_pass);
printf("alpha is opacity: %u\n", m_alpha_is_opacity);
printf("speed mode: %u\n", (uint32_t)m_speed_mode);
printf("max smooth std dev: %f\n", m_max_smooth_std_dev);
printf("smooth max mse scale: %f\n", m_smooth_max_mse_scale);
printf("max ultra smooth std dev: %f\n", m_max_ultra_smooth_std_dev);
printf("ultra smooth max mse scale: %f\n", m_ultra_smooth_max_mse_scale);
printf("no MSE scaling: %u\n", m_no_mse_scaling);
}
// TODO: results - move
float m_psnr;
float m_angular_rms_error;
float m_y_psnr;
float m_bpp;
// This is the output image data, but note for PNG you can't save it at the right size without the scanline predictor values.
image m_output_image;
image m_orig_img;
std::vector<uint8_t> m_output_file_data;
float m_lambda;
uint32_t m_level;
uint32_t m_chan_weights[4];
float m_chan_weights_lab[4];
bool m_use_chan_weights;
uint32_t m_reject_thresholds[4];
float m_reject_thresholds_lab[2];
bool m_use_reject_thresholds;
bool m_transparent_reject_test;
bool m_print_debug_output;
bool m_debug_images;
bool m_print_progress;
bool m_print_stats;
bool m_perceptual_error;
bool m_match_only;
bool m_two_pass;
bool m_alpha_is_opacity;
speed_mode m_speed_mode;
float m_max_smooth_std_dev;
float m_smooth_max_mse_scale;
float m_max_ultra_smooth_std_dev;
float m_ultra_smooth_max_mse_scale;
bool m_no_mse_scaling;
};
static inline float square(float f)
{
return f * f;
}
static inline uint32_t byteswap_32(uint32_t v)
{
return ((v & 0xFF) << 24) | (((v >> 8) & 0xFF) << 16) | (((v >> 16) & 0xFF) << 8) | ((v >> 24) & 0xFF);
}
class tracked_stat
{
public:
tracked_stat() { clear(); }
inline void clear() { m_num = 0; m_total = 0; m_total2 = 0; }
inline void update(uint32_t val) { m_num++; m_total += val; m_total2 += val * val; }
inline tracked_stat& operator += (uint32_t val) { update(val); return *this; }
inline uint32_t get_number_of_values() { return m_num; }
inline uint64_t get_total() const { return m_total; }
inline uint64_t get_total2() const { return m_total2; }
inline float get_average() const { return m_num ? (float)m_total / m_num : 0.0f; };
inline float get_std_dev() const { return m_num ? sqrtf((float)(m_num * m_total2 - m_total * m_total)) / m_num : 0.0f; }
inline float get_variance() const { float s = get_std_dev(); return s * s; }
private:
uint32_t m_num;
uint64_t m_total;
uint64_t m_total2;
};
struct Lab { float L; float a; float b; };
struct RGB { float r; float g; float b; };
static inline Lab linear_srgb_to_oklab(RGB c)
{
float l = 0.4122214708f * c.r + 0.5363325363f * c.g + 0.0514459929f * c.b;
float m = 0.2119034982f * c.r + 0.6806995451f * c.g + 0.1073969566f * c.b;
float s = 0.0883024619f * c.r + 0.2817188376f * c.g + 0.6299787005f * c.b;
float l_ = std::cbrtf(l);
float m_ = std::cbrtf(m);
float s_ = std::cbrtf(s);
return
{
0.2104542553f * l_ + 0.7936177850f * m_ - 0.0040720468f * s_,
1.9779984951f * l_ - 2.4285922050f * m_ + 0.4505937099f * s_,
0.0259040371f * l_ + 0.7827717662f * m_ - 0.8086757660f * s_,
};
}
static float g_srgb_to_linear[256];
static float f_inv(float x)
{
if (x <= 0.04045f)
return x / 12.92f;
else
return powf(((x + 0.055f) / 1.055f), 2.4f);
}
static void init_srgb_to_linear()
{
for (uint32_t i = 0; i < 256; i++)
g_srgb_to_linear[i] = f_inv(i / 255.0f);
}
#pragma pack(push, 1)
struct Lab16
{
uint16_t m_L, m_a, m_b;
};
#pragma pack(pop)
std::vector<Lab16> g_srgb_to_oklab16;
const float SCALE_L = 1.0f / 65535.0f;
const float SCALE_A = (1.0f / 65535.0f) * (0.276216f - (-0.233887f));
const float OFS_A = -0.233887f;
const float SCALE_B = (1.0f / 65535.0f) * (0.198570f - (-0.311528f));
const float OFS_B = -0.311528f;
const float MIN_L = 0.000000f, MAX_L = 1.000000f;
const float MIN_A = -0.233888f, MAX_A = 0.276217f;
const float MIN_B = -0.311529f, MAX_B = 0.198570f;
static inline Lab srgb_to_oklab(const color_rgba &c)
{
const Lab16 &l = g_srgb_to_oklab16[c.r + c.g * 256 + c.b * 65536];
Lab res;
res.L = l.m_L * SCALE_L;
res.a = l.m_a * SCALE_A + OFS_A;
res.b = l.m_b * SCALE_B + OFS_B;
return res;
}
static inline Lab srgb_to_oklab_norm(const color_rgba& c)
{
const Lab16& l = g_srgb_to_oklab16[c.r + c.g * 256 + c.b * 65536];
Lab res;
res.L = l.m_L * SCALE_L;
res.a = l.m_a * SCALE_L;
res.b = l.m_b * SCALE_L;
return res;
}
static void init_oklab_table(const char *pExec, bool quiet, bool caching_enabled)
{
g_srgb_to_oklab16.resize(256 * 256 * 256);
for (uint32_t r = 0; r <= 255; r++)
{
for (uint32_t g = 0; g <= 255; g++)
{
for (uint32_t b = 0; b <= 255; b++)
{
color_rgba c(r, g, b, 255);
Lab l(linear_srgb_to_oklab({ g_srgb_to_linear[c.r], g_srgb_to_linear[c.g], g_srgb_to_linear[c.b] }));
assert(l.L >= MIN_L && l.L <= MAX_L);
assert(l.a >= MIN_A && l.a <= MAX_A);
assert(l.b >= MIN_B && l.b <= MAX_B);
float lL = std::round(((l.L - MIN_L) / (MAX_L - MIN_L)) * 65535.0f);
float la = std::round(((l.a - MIN_A) / (MAX_A - MIN_A)) * 65535.0f);
float lb = std::round(((l.b - MIN_B) / (MAX_B - MIN_B)) * 65535.0f);
lL = clamp(lL, 0.0f, 65535.0f);
la = clamp(la, 0.0f, 65535.0f);
lb = clamp(lb, 0.0f, 65535.0f);
Lab16& v = g_srgb_to_oklab16[r + g * 256 + b * 65536];
v.m_L = (uint16_t)lL;
v.m_a = (uint16_t)la;
v.m_b = (uint16_t)lb;
}
}
}
}
static inline float compute_se(const color_rgba& a, const color_rgba& orig, uint32_t num_comps, const rdo_png_params &params)
{
float dist;
if (params.m_perceptual_error)
{
Lab la = srgb_to_oklab_norm(a);
Lab lb = srgb_to_oklab_norm(orig);
la.L -= lb.L;
la.a -= lb.a;
la.b -= lb.b;
float L_d = la.L * la.L;
float a_d = la.a * la.a;
float b_d = la.b * la.b;
L_d *= params.m_chan_weights_lab[0];
a_d *= params.m_chan_weights_lab[1];
b_d *= params.m_chan_weights_lab[2];
dist = L_d + a_d + b_d;
// TODO: Scales the error to bring it into a range where lambda will be roughly comparable to plain MSE.
const float NORM_ERROR_SCALE = 350000.0f;
dist *= NORM_ERROR_SCALE;
if (num_comps == 4)
{
int da = (int)a[3] - (int)orig[3];
dist += params.m_chan_weights_lab[3] * square((float)da);
}
}
else if (params.m_use_chan_weights)
{
int dr = (int)a[0] - (int)orig[0];
int dg = (int)a[1] - (int)orig[1];
int db = (int)a[2] - (int)orig[2];
uint32_t idist = (uint32_t)(params.m_chan_weights[0] * (uint32_t)(dr * dr) + params.m_chan_weights[1] * (uint32_t)(dg * dg) + params.m_chan_weights[2] * (uint32_t)(db * db));
if (num_comps == 4)
{
int da = (int)a[3] - (int)orig[3];
idist += params.m_chan_weights[3] * (uint32_t)(da * da);
}
dist = (float)idist;
}
else
{
int dr = (int)a[0] - (int)orig[0];
int dg = (int)a[1] - (int)orig[1];
int db = (int)a[2] - (int)orig[2];
uint32_t idist = (uint32_t)(dr * dr + dg * dg + db * db);
if (num_comps == 4)
{
int da = (int)a[3] - (int)orig[3];
idist += da * da;
}
dist = (float)idist;
}
return dist;
}
static inline bool should_reject(const color_rgba& trial_color, const color_rgba& orig_color, uint32_t num_comps, const rdo_png_params& params)
{
if ((params.m_transparent_reject_test) && (num_comps == 4))
{
if ((orig_color[3] == 0) && (trial_color[3] > 0))
return true;
if ((orig_color[3] == 255) && (trial_color[3] < 255))
return true;
}
if (params.m_use_reject_thresholds)
{
if (params.m_perceptual_error)
{
Lab t(srgb_to_oklab_norm(trial_color));
Lab o(srgb_to_oklab_norm(orig_color));
float L_diff = fabs(t.L - o.L);
if (L_diff > params.m_reject_thresholds_lab[0])
return true;
float ab_dist = square(t.a - o.a) + square(t.b - o.b);
if (ab_dist > (params.m_reject_thresholds_lab[1] * params.m_reject_thresholds_lab[1]))
return true;
if (num_comps == 4)
{
uint32_t delta_a = abs((int)trial_color[3] - (int)orig_color[3]);
if (delta_a > params.m_reject_thresholds[3])
return true;
}
}
else
{
uint32_t delta_r = abs((int)trial_color[0] - (int)orig_color[0]);
uint32_t delta_g = abs((int)trial_color[1] - (int)orig_color[1]);
uint32_t delta_b = abs((int)trial_color[2] - (int)orig_color[2]);
if (delta_r > params.m_reject_thresholds[0])
return true;
if (delta_g > params.m_reject_thresholds[1])
return true;
if (delta_b > params.m_reject_thresholds[2])
return true;
if (num_comps == 4)
{
uint32_t delta_a = abs((int)trial_color[3] - (int)orig_color[3]);
if (delta_a > params.m_reject_thresholds[3])
return true;
}
}
}
return false;
}
struct smooth_desc {
bool alpha_is_opacity {true};
float max_smooth_std_dev {DEF_MAX_SMOOTH_STD_DEV};
float smooth_max_mse_scale {QOI_DEF_SMOOTH_MAX_MSE_SCALE};
float max_ultra_smooth_std_dev {DEF_MAX_ULTRA_SMOOTH_STD_DEV};
float ultra_smooth_max_mse_scale {QOI_DEF_ULTRA_SMOOTH_MAX_MSE_SCALE};
};
static void create_smooth_maps(
vector2D<float> &smooth_block_mse_scales,
const image& orig_img,
const smooth_desc& desc
) {
const uint32_t width = orig_img.get_width();
const uint32_t height = orig_img.get_height();
const uint32_t total_pixels = orig_img.get_total_pixels();
const bool has_alpha = orig_img.has_alpha();
const uint32_t num_comps = has_alpha ? 4 : 3;
#if 0
if (params.m_no_mse_scaling)
{
smooth_block_mse_scales.set_all(1.0f);
return;
}
#endif
image smooth_vis(width, height);
image alpha_edge_vis(width, height);
image ultra_smooth_vis(width, height);
for (uint32_t y = 0; y < height; y++)
{
for (uint32_t x = 0; x < width; x++)
{
float alpha_edge_yl = 0.0f;
if ((num_comps == 4) && (desc.alpha_is_opacity))
{
tracked_stat alpha_comp_stats;
for (int yd = -3; yd <= 3; yd++)
{
for (int xd = -3; xd <= 3; xd++)
{
const color_rgba& p = orig_img.get_clamped((int)x + xd, (int)y + yd);
alpha_comp_stats.update(p[3]);
}
}
float max_std_dev = alpha_comp_stats.get_std_dev();
float yl = clamp(max_std_dev / desc.max_smooth_std_dev, 0.0f, 1.0f);
alpha_edge_yl = yl * yl;
}
{
tracked_stat comp_stats[4];
for (int yd = -1; yd <= 1; yd++)
{
for (int xd = -1; xd <= 1; xd++)
{
const color_rgba& p = orig_img.get_clamped((int)x + xd, (int)y + yd);
comp_stats[0].update(p[0]);
comp_stats[1].update(p[1]);
comp_stats[2].update(p[2]);
if (num_comps == 4)
comp_stats[3].update(p[3]);
}
}
float max_std_dev = 0.0f;
for (uint32_t i = 0; i < num_comps; i++)
max_std_dev = std::max(max_std_dev, comp_stats[i].get_std_dev());
float yl = clamp(max_std_dev / desc.max_smooth_std_dev, 0.0f, 1.0f);
yl = yl * yl;
smooth_block_mse_scales(x, y) = lerp(desc.smooth_max_mse_scale, 1.0f, yl);
if (num_comps == 4)
{
alpha_edge_vis(x, y).set((int)std::round(alpha_edge_yl * 255.0f));
smooth_block_mse_scales(x, y) = lerp(smooth_block_mse_scales(x, y), desc.smooth_max_mse_scale, alpha_edge_yl);
}
smooth_vis(x, y).set(clamp((int)((smooth_block_mse_scales(x, y) - 1.0f) / (desc.smooth_max_mse_scale - 1.0f) * 255.0f + .5f), 0, 255));
}
{
tracked_stat comp_stats[4];
const int S = 5;
for (int yd = -S; yd < S; yd++)
{
for (int xd = -S; xd < S; xd++)
{
const color_rgba& p = orig_img.get_clamped((int)x + xd, (int)y + yd);
comp_stats[0].update(p[0]);
comp_stats[1].update(p[1]);
comp_stats[2].update(p[2]);
if (num_comps == 4)
comp_stats[3].update(p[3]);
}
}
float max_std_dev = 0.0f;
for (uint32_t i = 0; i < num_comps; i++)
max_std_dev = std::max(max_std_dev, comp_stats[i].get_std_dev());
float yl = clamp(max_std_dev / desc.max_ultra_smooth_std_dev, 0.0f, 1.0f);
yl = powf(yl, 3.0f);
smooth_block_mse_scales(x, y) = lerp(desc.ultra_smooth_max_mse_scale, smooth_block_mse_scales(x, y), yl);
ultra_smooth_vis(x, y).set((int)std::round(yl * 255.0f));
}
}
}
#if 0
if (params.m_debug_images)
{
save_png("dbg_smooth_vis.png", smooth_vis);
save_png("dbg_alpha_edge_vis.png", alpha_edge_vis);
save_png("dbg_ultra_smooth_vis.png", ultra_smooth_vis);
}
#endif
}
#pragma pack(push, 1)
struct qoi_header
{
char magic[4]; // magic bytes "qoif"
uint32_t width; // image width in pixels (BE)
uint32_t height; // image height in pixels (BE)
uint8_t channels; // 3 = RGB, 4 = RGBA
uint8_t colorspace; // 0 = sRGB with linear alpha 1 = all channels linear
};
#pragma pack(pop)
static bool encode_rdo_qoi(
const image& orig_img,
std::vector<uint8_t>& data,
//const rdo_png_params& params,
const vector2D<float>& smooth_block_mse_scales,
float lambda)
{
// This function wasn't designed to deal with lambda=0, so nudge it up.
lambda = max(lambda, .0000125f);
const rdo_png_params params{};
const bool has_alpha = orig_img.has_alpha();
uint32_t num_comps = has_alpha ? 4 : 3;
color_rgba hash[64];
//clear_obj(hash);
memset(&hash, 0, sizeof(hash));
data.resize(0);
qoi_header hdr;
memcpy(hdr.magic, "qoif", 4);
hdr.width = byteswap_32(orig_img.get_width());
hdr.height = byteswap_32(orig_img.get_height());
hdr.channels = has_alpha ? 4 : 3;
hdr.colorspace = 0;
data.resize(sizeof(hdr));
memcpy(data.data(), &hdr, sizeof(hdr));
int prev_r = 0, prev_g = 0, prev_b = 0, prev_a = 255;
uint32_t cur_run_len = 0;
enum commands_t
{
cRUN,
cIDX,
cDELTA,
cLUMA,
cRGB,
cRGBA,
};
uint32_t total_run = 0, total_rgb = 0, total_rgba = 0, total_index = 0, total_delta = 0, total_luma = 0, total_run_pixels = 0;
for (uint32_t y = 0; y < orig_img.get_height(); y++)
{
for (uint32_t x = 0; x < orig_img.get_width(); x++)
{
const color_rgba& c = orig_img(x, y);
const float mse_scale = smooth_block_mse_scales(x, y);
float best_mse = 0.0f;
float best_bits = 40.0f;
float best_t = best_mse + best_bits * lambda;
int best_command = cRGBA;
int best_index = 0, best_dr = 0, best_dg = 0, best_db = 0;
{
color_rgba trial_c(c.r, c.g, c.b, prev_a);
if (!should_reject(trial_c, c, 4, params))
{
float mse = compute_se(trial_c, c, 4, params);
float bits = 32.0f;
float trial_t = mse_scale * mse + bits * lambda;
if (trial_t < best_t)
{
best_mse = mse;
best_bits = bits;
best_t = trial_t;
best_command = cRGB;
}
}
}
{
color_rgba trial_c(prev_r, prev_g, prev_b, prev_a);
if (!should_reject(trial_c, c, 4, params))
{
float mse = compute_se(trial_c, c, 4, params);
float bits = cur_run_len ? 0 : 8.0f;
float trial_t = mse_scale * mse + bits * lambda;
if (trial_t < best_t)
{
best_mse = mse;
best_bits = bits;
best_t = trial_t;
best_command = cRUN;
if (best_mse == 0.0f)
{
cur_run_len++;
if (cur_run_len == 62)
{
total_run_pixels += cur_run_len;
data.push_back(0xC0 | (cur_run_len - 1));
cur_run_len = 0;
total_run++;
}
hash[(prev_r * 3 + prev_g * 5 + prev_b * 7 + prev_a * 11) & 63].set(prev_r, prev_g, prev_b, prev_a);
continue;
}
}
}
}
if (8.0f * lambda < best_t)
{
uint32_t hash_idx = (c.r * 3 + c.g * 5 + c.b * 7 + c.a * 11) & 63;
// First try the INDEX command losslessly.
if (c == hash[hash_idx])
{
float bits = 8.0f;
float trial_t = bits * lambda;
assert(trial_t < best_t);
best_mse = 0.0f;
best_bits = bits;
best_t = trial_t;
best_command = cIDX;
best_index = hash_idx;
}
else
{
// Try a lossy INDEX command.
for (uint32_t i = 0; i < 64; i++)
{
if (!should_reject(hash[i], c, 4, params))
{
float mse = compute_se(hash[i], c, 4, params);
float bits = 8.0f;
float trial_t = mse_scale * mse + bits * lambda;
if (trial_t < best_t)
{
best_mse = mse;
best_bits = bits;
best_t = trial_t;
best_command = cIDX;
best_index = i;
}
}
}
}
}
if (8.0f * lambda < best_t)
{
bool delta_encodable_losslessly = false;
// First try the DELTA command losslessly.
if (c.a == prev_a)
{
int dr = ((int)c.r - prev_r + 2) & 255;
int dg = ((int)c.g - prev_g + 2) & 255;
int db = ((int)c.b - prev_b + 2) & 255;
if ((dr <= 3) && (dg <= 3) && (db <= 3))
{
delta_encodable_losslessly = true;
float bits = 8.0f;
float trial_t = bits * lambda;
assert(trial_t < best_t);
best_mse = 0.0f;
best_bits = bits;
best_t = trial_t;
best_command = cDELTA;
best_dr = dr - 2;
best_dg = dg - 2;
best_db = db - 2;
}
}
// Try a lossy DELTA command.
if (!delta_encodable_losslessly)
{
for (uint32_t i = 0; i < 64; i++)
{
int dr = ((i >> 4) & 3) - 2;
int dg = ((i >> 2) & 3) - 2;
int db = (i & 3) - 2;
color_rgba trial_c((prev_r + dr) & 255, (prev_g + dg) & 255, (prev_b + db) & 255, prev_a);
if (!should_reject(trial_c, c, 4, params))
{
float mse = compute_se(trial_c, c, 4, params);
float bits = 8.0f;
float trial_t = mse_scale * mse + bits * lambda;
if (trial_t < best_t)
{
best_mse = mse;
best_bits = bits;
best_t = trial_t;
best_command = cDELTA;
best_dr = dr;
best_dg = dg;
best_db = db;
}
}
}
}
}
if (16.0f * lambda < best_t)
{
bool luma_encodable_losslessly_in_rgb = false;
// First try the LUMA command losslessly in RGB (may not be lossy in alpha).
{
int g_diff = (int)c.g - prev_g;
int dg = (g_diff + 32) & 255;
int dr = (((int)c.r - prev_r) - g_diff + 8) & 255;
int db = (((int)c.b - prev_b) - g_diff + 8) & 255;
if ((dg <= 63) && (dr <= 15) && (db <= 15))
{
luma_encodable_losslessly_in_rgb = true;
color_rgba trial_c(c.r, c.g, c.b, prev_a);
if (!should_reject(trial_c, c, 4, params))
{
float mse = compute_se(trial_c, c, 4, params);
float bits = 16.0f;
float trial_t = mse_scale * mse + bits * lambda;
if (trial_t < best_t)
{
best_mse = mse;
best_bits = bits;
best_t = trial_t;
best_command = cLUMA;
best_dr = dr - 8;
best_dg = dg - 32;
best_db = db - 8;
}
}
}
}
// If we can't use it losslessly, try it lossy.
if ((!luma_encodable_losslessly_in_rgb) && (params.m_speed_mode != cFastestSpeed))
{
if (params.m_speed_mode == cNormalSpeed)
{
// Search all encodable LUMA commands.
for (uint32_t i = 0; i < 16384; i++)
{
int dr = ((i >> 6) & 15) - 8;
int dg = (i & 63) - 32;
int db = ((i >> 10) & 15) - 8;
color_rgba trial_c((prev_r + dg + dr) & 255, (prev_g + dg) & 255, (prev_b + dg + db) & 255, prev_a);
if (!should_reject(trial_c, c, 4, params))
{
float mse = compute_se(trial_c, c, 4, params);
float bits = 16.0f;
float trial_t = mse_scale * mse + bits * lambda;
if (trial_t < best_t)
{
best_mse = mse;
best_bits = bits;
best_t = trial_t;
best_command = cLUMA;
best_dr = dr;
best_dg = dg;
best_db = db;
}
}
}
}
else
{
// TODO: This isn't very smart. What if the G delta is encodable but R and/or B aren't?
const int g_deltas[] = { -24, -16, -14, -12, -10, -8, -6, -4, -3, -2, -1, 0, 1, 2, 3, 4, 6, 8, 10, 12, 14, 16, 24 };
const int TOTAL_G_DELTAS = sizeof(g_deltas) / sizeof(g_deltas[0]);
for (int kg = 0; kg < TOTAL_G_DELTAS; kg++)
{
const int dg = g_deltas[kg];
for (uint32_t i = 0; i < 256; i++)
{
int dr = (i & 15) - 8;
int db = ((i >> 4) & 15) - 8;
color_rgba trial_c((prev_r + dg + dr) & 255, (prev_g + dg) & 255, (prev_b + dg + db) & 255, prev_a);
if (!should_reject(trial_c, c, 4, params))
{
float mse = compute_se(trial_c, c, 4, params);
float bits = 16.0f;
float trial_t = mse_scale * mse + bits * lambda;
if (trial_t < best_t)
{
best_mse = mse;
best_bits = bits;
best_t = trial_t;
best_command = cLUMA;
best_dr = dr;
best_dg = dg;
best_db = db;
}
}
}
}
}
}
}
switch (best_command)
{
case cRUN:
{
cur_run_len++;
if (cur_run_len == 62)
{
total_run_pixels += cur_run_len;
data.push_back(0xC0 | (cur_run_len - 1));
cur_run_len = 0;
total_run++;
}
hash[(prev_r * 3 + prev_g * 5 + prev_b * 7 + prev_a * 11) & 63].set(prev_r, prev_g, prev_b, prev_a);
break;
}
case cRGB:
{
if (cur_run_len)
{
total_run_pixels += cur_run_len;
data.push_back(0xC0 | (cur_run_len - 1));
cur_run_len = 0;
total_run++;
}
data.push_back(254);
data.push_back((uint8_t)c.r);
data.push_back((uint8_t)c.g);
data.push_back((uint8_t)c.b);
hash[(c.r * 3 + c.g * 5 + c.b * 7 + prev_a * 11) & 63].set(c.r, c.g, c.b, prev_a);
prev_r = c.r;
prev_g = c.g;
prev_b = c.b;
total_rgb++;
break;
}
case cRGBA:
{
if (cur_run_len)
{
total_run_pixels += cur_run_len;
data.push_back(0xC0 | (cur_run_len - 1));
cur_run_len = 0;
total_run++;
}
data.push_back(255);
data.push_back((uint8_t)c.r);
data.push_back((uint8_t)c.g);
data.push_back((uint8_t)c.b);
data.push_back((uint8_t)c.a);
hash[(c.r * 3 + c.g * 5 + c.b * 7 + c.a * 11) & 63] = c;
prev_r = c.r;
prev_g = c.g;
prev_b = c.b;
prev_a = c.a;
total_rgba++;
break;
}
case cIDX:
{
if (cur_run_len)
{
total_run_pixels += cur_run_len;
data.push_back(0xC0 | (cur_run_len - 1));
cur_run_len = 0;
total_run++;
}
data.push_back(best_index);
prev_r = hash[best_index].r;
prev_g = hash[best_index].g;
prev_b = hash[best_index].b;
prev_a = hash[best_index].a;
total_index++;
break;
}
case cDELTA:
{
if (cur_run_len)
{
total_run_pixels += cur_run_len;
data.push_back(0xC0 | (cur_run_len - 1));
cur_run_len = 0;
total_run++;
}
assert(best_dr >= -2 && best_dr <= 1);
assert(best_dg >= -2 && best_dg <= 1);
assert(best_db >= -2 && best_db <= 1);
data.push_back(64 + ((best_dr + 2) << 4) + ((best_dg + 2) << 2) + (best_db + 2));
uint32_t decoded_r = (prev_r + best_dr) & 0xFF;
uint32_t decoded_g = (prev_g + best_dg) & 0xFF;
uint32_t decoded_b = (prev_b + best_db) & 0xFF;
uint32_t decoded_a = prev_a;
hash[(decoded_r * 3 + decoded_g * 5 + decoded_b * 7 + decoded_a * 11) & 63].set(decoded_r, decoded_g, decoded_b, decoded_a);
prev_r = decoded_r;
prev_g = decoded_g;
prev_b = decoded_b;
prev_a = decoded_a;
total_delta++;
break;
}
case cLUMA:
{
if (cur_run_len)
{
total_run_pixels += cur_run_len;
data.push_back(0xC0 | (cur_run_len - 1));
cur_run_len = 0;
total_run++;
}
assert(best_dr >= -8 && best_dr <= 7);
assert(best_dg >= -32 && best_dg <= 31);
assert(best_db >= -8 && best_db <= 7);
data.push_back((uint8_t)(128 + (best_dg + 32)));
data.push_back((uint8_t)(((best_dr + 8) << 4) | (best_db + 8)));
uint32_t decoded_r = (prev_r + best_dr + best_dg) & 0xFF;
uint32_t decoded_g = (prev_g + best_dg) & 0xFF;
uint32_t decoded_b = (prev_b + best_db + best_dg) & 0xFF;
uint32_t decoded_a = prev_a;
hash[(decoded_r * 3 + decoded_g * 5 + decoded_b * 7 + decoded_a * 11) & 63].set(decoded_r, decoded_g, decoded_b, decoded_a);
prev_r = decoded_r;
prev_g = decoded_g;
prev_b = decoded_b;
prev_a = decoded_a;
total_luma++;
break;
}
default:
{
assert(0);
break;
}
}
}
}
if (cur_run_len)
{
total_run_pixels += cur_run_len;
data.push_back((64 + 128) | (cur_run_len - 1));
cur_run_len = 0;
total_run++;
}
// end padding
for (uint32_t i = 0; i < 7; i++) {
data.push_back(0);
}
data.push_back(1);
if (params.m_print_stats)
{
printf("Totals: Run: %u, Run Pixels: %u %3.2f%%, RGB: %u %3.2f%%, RGBA: %u %3.2f%%, INDEX: %u %3.2f%%, DELTA: %u %3.2f%%, LUMA: %u %3.2f%%\n\n",
total_run,
total_run_pixels, (total_run_pixels * 100.0f) / orig_img.get_total_pixels(),
total_rgb, (total_rgb * 100.0f) / orig_img.get_total_pixels(),
total_rgba, (total_rgba * 100.0f) / orig_img.get_total_pixels(),
total_index, (total_index * 100.0f) / orig_img.get_total_pixels(),
total_delta, (total_delta * 100.0f) / orig_img.get_total_pixels(),
total_luma, (total_luma * 100.0f) / orig_img.get_total_pixels());
}
return true;
}
static bool g_init {false};
bool init_qoi_rdo(void) {
if (g_init) {
return false;
}
init_srgb_to_linear();
init_oklab_table("", true, false);
g_init = true;
return true;
}
bool quit_qoi_rdo(void) {
if (!g_init) {
return false;
}
g_srgb_to_oklab16.clear();
return true;
}
static float lambda_from_quality(int quality) {
quality = clamp(quality, 1, 100);
// TODO: more stuff and log scale
//return lerp(50000, 100, quality/100.f);
//return lerp(250'000, 0, sqrtf(quality/100.f));
//return lerp(1'000'000, 0, sqrtf(quality/100.f));
//return lerp(1'000'000, 0, clamp(log10f(quality/100.f)+1, 0.f, 1.f));
//return lerp(250'000, 0, clamp(log10f(quality/100.f)+1, 0.f, 1.f));
return lerp(250'000, 0, cbrtf(quality/100.f));
}
std::vector<uint8_t> encode_qoi_rdo_simple(const uint8_t* data, const qoi_rdo_desc& desc, int quality) {
if (!g_init) {
return {};
}
const float lambda = lambda_from_quality(quality);
vector2D<float> smooth_block_mse_scales(desc.width, desc.height);
image orig_img(data, desc.width, desc.height, desc.channels);
if (false /* m_no_mse_scaling */) {
smooth_block_mse_scales.set_all(1.0f);
} else {
create_smooth_maps(
smooth_block_mse_scales,
orig_img,
{} // smooth_desc
);
}
std::vector<uint8_t> output_data;
if (!encode_rdo_qoi(
orig_img,
output_data,
smooth_block_mse_scales,
lambda))
{
return {};
}
return output_data;
}