1213 lines
30 KiB
C++
1213 lines
30 KiB
C++
// qoirdo.cpp
|
|
// Copyright (C) 2022 Richard Geldreich, Jr. All Rights Reserved.
|
|
// Copyright (C) 2025 Erik Scholz
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#include "./qoirdo.hpp"
|
|
|
|
#if _MSC_VER
|
|
// For sprintf(), strcpy()
|
|
#define _CRT_SECURE_NO_WARNINGS (1)
|
|
#endif
|
|
|
|
#include <cstdint>
|
|
#include <cstdio>
|
|
#include <cmath>
|
|
#include <string>
|
|
#include <array>
|
|
#include <vector>
|
|
|
|
#include "./basisu.min.hpp"
|
|
|
|
using namespace basisu;
|
|
|
|
#define RDO_PNG_VERSION "v1.10"
|
|
|
|
const float DEF_MAX_SMOOTH_STD_DEV = 35.0f;
|
|
const float DEF_SMOOTH_MAX_MSE_SCALE = 250.0f;
|
|
const float DEF_MAX_ULTRA_SMOOTH_STD_DEV = 5.0F;
|
|
const float DEF_ULTRA_SMOOTH_MAX_MSE_SCALE = 1500.0F;
|
|
|
|
const float QOI_DEF_SMOOTH_MAX_MSE_SCALE = 2500.0f;
|
|
const float QOI_DEF_ULTRA_SMOOTH_MAX_MSE_SCALE = 5000.0f;
|
|
|
|
enum speed_mode
|
|
{
|
|
cNormalSpeed,
|
|
cFasterSpeed,
|
|
cFastestSpeed
|
|
};
|
|
|
|
struct rdo_png_params
|
|
{
|
|
rdo_png_params()
|
|
{
|
|
clear();
|
|
}
|
|
|
|
void clear()
|
|
{
|
|
m_orig_img.clear();
|
|
m_output_file_data.clear();
|
|
m_lambda = 300.0f;
|
|
m_level = 0;
|
|
m_psnr = 0;
|
|
m_angular_rms_error = 0;
|
|
m_y_psnr = 0;
|
|
m_bpp = 0;
|
|
m_print_debug_output = false;
|
|
m_debug_images = false;
|
|
m_print_progress = false;
|
|
m_print_stats = false;
|
|
|
|
m_use_chan_weights = false;
|
|
m_chan_weights[0] = 1;
|
|
m_chan_weights[1] = 1;
|
|
m_chan_weights[2] = 1;
|
|
m_chan_weights[3] = 1;
|
|
|
|
{
|
|
float LW = 2;
|
|
float AW = 1.5;
|
|
float BW = 1;
|
|
float l = sqrtf(LW * LW + AW * AW + BW * BW);
|
|
LW /= l;
|
|
AW /= l;
|
|
BW /= l;
|
|
m_chan_weights_lab[0] = LW; // L
|
|
m_chan_weights_lab[1] = AW; // a
|
|
m_chan_weights_lab[2] = BW; // b
|
|
m_chan_weights_lab[3] = 1.5f; // alpha
|
|
}
|
|
|
|
m_use_reject_thresholds = true;
|
|
m_reject_thresholds[0] = 32;
|
|
m_reject_thresholds[1] = 32;
|
|
m_reject_thresholds[2] = 32;
|
|
m_reject_thresholds[3] = 32;
|
|
|
|
m_reject_thresholds_lab[0] = .05f;
|
|
//m_reject_thresholds_lab[1] = .075f;
|
|
m_reject_thresholds_lab[1] = .05f;
|
|
|
|
m_transparent_reject_test = false;
|
|
|
|
m_perceptual_error = true;
|
|
|
|
m_match_only = false;
|
|
|
|
m_two_pass = false;
|
|
|
|
m_alpha_is_opacity = true;
|
|
|
|
m_speed_mode = cFastestSpeed;
|
|
|
|
m_max_smooth_std_dev = DEF_MAX_SMOOTH_STD_DEV;
|
|
m_smooth_max_mse_scale = DEF_SMOOTH_MAX_MSE_SCALE;
|
|
m_max_ultra_smooth_std_dev = DEF_MAX_ULTRA_SMOOTH_STD_DEV;
|
|
m_ultra_smooth_max_mse_scale = DEF_ULTRA_SMOOTH_MAX_MSE_SCALE;
|
|
|
|
m_no_mse_scaling = false;
|
|
}
|
|
|
|
void print()
|
|
{
|
|
printf("orig image: %ux%u has alpha: %u\n", m_orig_img.get_width(), m_orig_img.get_height(), m_orig_img.has_alpha());
|
|
printf("lambda: %f\n", m_lambda);
|
|
printf("level: %u\n", m_level);
|
|
printf("chan weights: %u %u %u %u\n", m_chan_weights[0], m_chan_weights[1], m_chan_weights[2], m_chan_weights[3]);
|
|
printf("use chan weights: %u\n", m_use_chan_weights);
|
|
printf("chan weights lab: %f %f %f %f\n", m_chan_weights_lab[0], m_chan_weights_lab[1], m_chan_weights_lab[2], m_chan_weights_lab[3]);
|
|
printf("reject thresholds: %u %u %u %u\n", m_reject_thresholds[0], m_reject_thresholds[1], m_reject_thresholds[2], m_reject_thresholds[3]);
|
|
printf("reject thresholds lab: %f %f\n", m_reject_thresholds_lab[0], m_reject_thresholds_lab[1]);
|
|
printf("use reject thresholds: %u\n", m_use_reject_thresholds);
|
|
printf("transparent reject test: %u\n", m_transparent_reject_test);
|
|
printf("print debug output: %u\n", m_print_debug_output);
|
|
printf("debug images: %u\n", m_debug_images);
|
|
printf("print progress: %u\n", m_print_progress);
|
|
printf("print stats: %u\n", m_print_stats);
|
|
printf("perceptual error: %u\n", m_perceptual_error);
|
|
printf("match only: %u\n", m_match_only);
|
|
printf("two pass: %u\n", m_two_pass);
|
|
printf("alpha is opacity: %u\n", m_alpha_is_opacity);
|
|
printf("speed mode: %u\n", (uint32_t)m_speed_mode);
|
|
printf("max smooth std dev: %f\n", m_max_smooth_std_dev);
|
|
printf("smooth max mse scale: %f\n", m_smooth_max_mse_scale);
|
|
printf("max ultra smooth std dev: %f\n", m_max_ultra_smooth_std_dev);
|
|
printf("ultra smooth max mse scale: %f\n", m_ultra_smooth_max_mse_scale);
|
|
printf("no MSE scaling: %u\n", m_no_mse_scaling);
|
|
}
|
|
|
|
// TODO: results - move
|
|
float m_psnr;
|
|
float m_angular_rms_error;
|
|
float m_y_psnr;
|
|
float m_bpp;
|
|
|
|
// This is the output image data, but note for PNG you can't save it at the right size without the scanline predictor values.
|
|
image m_output_image;
|
|
|
|
image m_orig_img;
|
|
|
|
std::vector<uint8_t> m_output_file_data;
|
|
|
|
float m_lambda;
|
|
|
|
uint32_t m_level;
|
|
|
|
uint32_t m_chan_weights[4];
|
|
float m_chan_weights_lab[4];
|
|
bool m_use_chan_weights;
|
|
|
|
uint32_t m_reject_thresholds[4];
|
|
float m_reject_thresholds_lab[2];
|
|
bool m_use_reject_thresholds;
|
|
|
|
bool m_transparent_reject_test;
|
|
|
|
bool m_print_debug_output;
|
|
bool m_debug_images;
|
|
bool m_print_progress;
|
|
bool m_print_stats;
|
|
|
|
bool m_perceptual_error;
|
|
|
|
bool m_match_only;
|
|
bool m_two_pass;
|
|
|
|
bool m_alpha_is_opacity;
|
|
|
|
speed_mode m_speed_mode;
|
|
|
|
float m_max_smooth_std_dev;
|
|
float m_smooth_max_mse_scale;
|
|
float m_max_ultra_smooth_std_dev;
|
|
float m_ultra_smooth_max_mse_scale;
|
|
|
|
bool m_no_mse_scaling;
|
|
};
|
|
|
|
static inline float square(float f)
|
|
{
|
|
return f * f;
|
|
}
|
|
|
|
static inline uint32_t byteswap_32(uint32_t v)
|
|
{
|
|
return ((v & 0xFF) << 24) | (((v >> 8) & 0xFF) << 16) | (((v >> 16) & 0xFF) << 8) | ((v >> 24) & 0xFF);
|
|
}
|
|
|
|
class tracked_stat
|
|
{
|
|
public:
|
|
tracked_stat() { clear(); }
|
|
|
|
inline void clear() { m_num = 0; m_total = 0; m_total2 = 0; }
|
|
|
|
inline void update(uint32_t val) { m_num++; m_total += val; m_total2 += val * val; }
|
|
|
|
inline tracked_stat& operator += (uint32_t val) { update(val); return *this; }
|
|
|
|
inline uint32_t get_number_of_values() { return m_num; }
|
|
inline uint64_t get_total() const { return m_total; }
|
|
inline uint64_t get_total2() const { return m_total2; }
|
|
|
|
inline float get_average() const { return m_num ? (float)m_total / m_num : 0.0f; };
|
|
inline float get_std_dev() const { return m_num ? sqrtf((float)(m_num * m_total2 - m_total * m_total)) / m_num : 0.0f; }
|
|
inline float get_variance() const { float s = get_std_dev(); return s * s; }
|
|
|
|
private:
|
|
uint32_t m_num;
|
|
uint64_t m_total;
|
|
uint64_t m_total2;
|
|
};
|
|
|
|
struct Lab { float L; float a; float b; };
|
|
struct RGB { float r; float g; float b; };
|
|
|
|
static inline Lab linear_srgb_to_oklab(RGB c)
|
|
{
|
|
float l = 0.4122214708f * c.r + 0.5363325363f * c.g + 0.0514459929f * c.b;
|
|
float m = 0.2119034982f * c.r + 0.6806995451f * c.g + 0.1073969566f * c.b;
|
|
float s = 0.0883024619f * c.r + 0.2817188376f * c.g + 0.6299787005f * c.b;
|
|
|
|
float l_ = std::cbrtf(l);
|
|
float m_ = std::cbrtf(m);
|
|
float s_ = std::cbrtf(s);
|
|
|
|
return
|
|
{
|
|
0.2104542553f * l_ + 0.7936177850f * m_ - 0.0040720468f * s_,
|
|
1.9779984951f * l_ - 2.4285922050f * m_ + 0.4505937099f * s_,
|
|
0.0259040371f * l_ + 0.7827717662f * m_ - 0.8086757660f * s_,
|
|
};
|
|
}
|
|
|
|
static float g_srgb_to_linear[256];
|
|
|
|
static float f_inv(float x)
|
|
{
|
|
if (x <= 0.04045f)
|
|
return x / 12.92f;
|
|
else
|
|
return powf(((x + 0.055f) / 1.055f), 2.4f);
|
|
}
|
|
|
|
static void init_srgb_to_linear()
|
|
{
|
|
for (uint32_t i = 0; i < 256; i++)
|
|
g_srgb_to_linear[i] = f_inv(i / 255.0f);
|
|
}
|
|
|
|
#pragma pack(push, 1)
|
|
struct Lab16
|
|
{
|
|
uint16_t m_L, m_a, m_b;
|
|
};
|
|
#pragma pack(pop)
|
|
|
|
std::vector<Lab16> g_srgb_to_oklab16;
|
|
|
|
const float SCALE_L = 1.0f / 65535.0f;
|
|
const float SCALE_A = (1.0f / 65535.0f) * (0.276216f - (-0.233887f));
|
|
const float OFS_A = -0.233887f;
|
|
const float SCALE_B = (1.0f / 65535.0f) * (0.198570f - (-0.311528f));
|
|
const float OFS_B = -0.311528f;
|
|
|
|
const float MIN_L = 0.000000f, MAX_L = 1.000000f;
|
|
const float MIN_A = -0.233888f, MAX_A = 0.276217f;
|
|
const float MIN_B = -0.311529f, MAX_B = 0.198570f;
|
|
|
|
static inline Lab srgb_to_oklab(const color_rgba &c)
|
|
{
|
|
const Lab16 &l = g_srgb_to_oklab16[c.r + c.g * 256 + c.b * 65536];
|
|
|
|
Lab res;
|
|
res.L = l.m_L * SCALE_L;
|
|
res.a = l.m_a * SCALE_A + OFS_A;
|
|
res.b = l.m_b * SCALE_B + OFS_B;
|
|
|
|
return res;
|
|
}
|
|
|
|
static inline Lab srgb_to_oklab_norm(const color_rgba& c)
|
|
{
|
|
const Lab16& l = g_srgb_to_oklab16[c.r + c.g * 256 + c.b * 65536];
|
|
|
|
Lab res;
|
|
res.L = l.m_L * SCALE_L;
|
|
res.a = l.m_a * SCALE_L;
|
|
res.b = l.m_b * SCALE_L;
|
|
|
|
return res;
|
|
}
|
|
|
|
static void init_oklab_table(const char *pExec, bool quiet, bool caching_enabled)
|
|
{
|
|
g_srgb_to_oklab16.resize(256 * 256 * 256);
|
|
|
|
for (uint32_t r = 0; r <= 255; r++)
|
|
{
|
|
for (uint32_t g = 0; g <= 255; g++)
|
|
{
|
|
for (uint32_t b = 0; b <= 255; b++)
|
|
{
|
|
color_rgba c(r, g, b, 255);
|
|
Lab l(linear_srgb_to_oklab({ g_srgb_to_linear[c.r], g_srgb_to_linear[c.g], g_srgb_to_linear[c.b] }));
|
|
|
|
assert(l.L >= MIN_L && l.L <= MAX_L);
|
|
assert(l.a >= MIN_A && l.a <= MAX_A);
|
|
assert(l.b >= MIN_B && l.b <= MAX_B);
|
|
|
|
float lL = std::round(((l.L - MIN_L) / (MAX_L - MIN_L)) * 65535.0f);
|
|
float la = std::round(((l.a - MIN_A) / (MAX_A - MIN_A)) * 65535.0f);
|
|
float lb = std::round(((l.b - MIN_B) / (MAX_B - MIN_B)) * 65535.0f);
|
|
|
|
lL = clamp(lL, 0.0f, 65535.0f);
|
|
la = clamp(la, 0.0f, 65535.0f);
|
|
lb = clamp(lb, 0.0f, 65535.0f);
|
|
|
|
Lab16& v = g_srgb_to_oklab16[r + g * 256 + b * 65536];
|
|
v.m_L = (uint16_t)lL;
|
|
v.m_a = (uint16_t)la;
|
|
v.m_b = (uint16_t)lb;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static inline float compute_se(const color_rgba& a, const color_rgba& orig, uint32_t num_comps, const rdo_png_params ¶ms)
|
|
{
|
|
float dist;
|
|
|
|
if (params.m_perceptual_error)
|
|
{
|
|
Lab la = srgb_to_oklab_norm(a);
|
|
Lab lb = srgb_to_oklab_norm(orig);
|
|
|
|
la.L -= lb.L;
|
|
la.a -= lb.a;
|
|
la.b -= lb.b;
|
|
|
|
float L_d = la.L * la.L;
|
|
float a_d = la.a * la.a;
|
|
float b_d = la.b * la.b;
|
|
|
|
L_d *= params.m_chan_weights_lab[0];
|
|
a_d *= params.m_chan_weights_lab[1];
|
|
b_d *= params.m_chan_weights_lab[2];
|
|
|
|
dist = L_d + a_d + b_d;
|
|
|
|
// TODO: Scales the error to bring it into a range where lambda will be roughly comparable to plain MSE.
|
|
const float NORM_ERROR_SCALE = 350000.0f;
|
|
dist *= NORM_ERROR_SCALE;
|
|
|
|
if (num_comps == 4)
|
|
{
|
|
int da = (int)a[3] - (int)orig[3];
|
|
dist += params.m_chan_weights_lab[3] * square((float)da);
|
|
}
|
|
}
|
|
else if (params.m_use_chan_weights)
|
|
{
|
|
int dr = (int)a[0] - (int)orig[0];
|
|
int dg = (int)a[1] - (int)orig[1];
|
|
int db = (int)a[2] - (int)orig[2];
|
|
|
|
uint32_t idist = (uint32_t)(params.m_chan_weights[0] * (uint32_t)(dr * dr) + params.m_chan_weights[1] * (uint32_t)(dg * dg) + params.m_chan_weights[2] * (uint32_t)(db * db));
|
|
if (num_comps == 4)
|
|
{
|
|
int da = (int)a[3] - (int)orig[3];
|
|
idist += params.m_chan_weights[3] * (uint32_t)(da * da);
|
|
}
|
|
|
|
dist = (float)idist;
|
|
}
|
|
else
|
|
{
|
|
int dr = (int)a[0] - (int)orig[0];
|
|
int dg = (int)a[1] - (int)orig[1];
|
|
int db = (int)a[2] - (int)orig[2];
|
|
|
|
uint32_t idist = (uint32_t)(dr * dr + dg * dg + db * db);
|
|
if (num_comps == 4)
|
|
{
|
|
int da = (int)a[3] - (int)orig[3];
|
|
idist += da * da;
|
|
}
|
|
|
|
dist = (float)idist;
|
|
}
|
|
|
|
return dist;
|
|
}
|
|
|
|
static inline bool should_reject(const color_rgba& trial_color, const color_rgba& orig_color, uint32_t num_comps, const rdo_png_params& params)
|
|
{
|
|
if ((params.m_transparent_reject_test) && (num_comps == 4))
|
|
{
|
|
if ((orig_color[3] == 0) && (trial_color[3] > 0))
|
|
return true;
|
|
|
|
if ((orig_color[3] == 255) && (trial_color[3] < 255))
|
|
return true;
|
|
}
|
|
|
|
if (params.m_use_reject_thresholds)
|
|
{
|
|
if (params.m_perceptual_error)
|
|
{
|
|
Lab t(srgb_to_oklab_norm(trial_color));
|
|
Lab o(srgb_to_oklab_norm(orig_color));
|
|
|
|
float L_diff = fabs(t.L - o.L);
|
|
|
|
if (L_diff > params.m_reject_thresholds_lab[0])
|
|
return true;
|
|
|
|
float ab_dist = square(t.a - o.a) + square(t.b - o.b);
|
|
|
|
if (ab_dist > (params.m_reject_thresholds_lab[1] * params.m_reject_thresholds_lab[1]))
|
|
return true;
|
|
|
|
if (num_comps == 4)
|
|
{
|
|
uint32_t delta_a = abs((int)trial_color[3] - (int)orig_color[3]);
|
|
if (delta_a > params.m_reject_thresholds[3])
|
|
return true;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
uint32_t delta_r = abs((int)trial_color[0] - (int)orig_color[0]);
|
|
uint32_t delta_g = abs((int)trial_color[1] - (int)orig_color[1]);
|
|
uint32_t delta_b = abs((int)trial_color[2] - (int)orig_color[2]);
|
|
|
|
if (delta_r > params.m_reject_thresholds[0])
|
|
return true;
|
|
if (delta_g > params.m_reject_thresholds[1])
|
|
return true;
|
|
if (delta_b > params.m_reject_thresholds[2])
|
|
return true;
|
|
|
|
if (num_comps == 4)
|
|
{
|
|
uint32_t delta_a = abs((int)trial_color[3] - (int)orig_color[3]);
|
|
if (delta_a > params.m_reject_thresholds[3])
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
struct smooth_desc {
|
|
bool alpha_is_opacity {true};
|
|
float max_smooth_std_dev {DEF_MAX_SMOOTH_STD_DEV};
|
|
float smooth_max_mse_scale {QOI_DEF_SMOOTH_MAX_MSE_SCALE};
|
|
float max_ultra_smooth_std_dev {DEF_MAX_ULTRA_SMOOTH_STD_DEV};
|
|
float ultra_smooth_max_mse_scale {QOI_DEF_ULTRA_SMOOTH_MAX_MSE_SCALE};
|
|
};
|
|
|
|
static void create_smooth_maps(
|
|
vector2D<float> &smooth_block_mse_scales,
|
|
const image& orig_img,
|
|
const smooth_desc& desc
|
|
) {
|
|
const uint32_t width = orig_img.get_width();
|
|
const uint32_t height = orig_img.get_height();
|
|
const uint32_t total_pixels = orig_img.get_total_pixels();
|
|
const bool has_alpha = orig_img.has_alpha();
|
|
const uint32_t num_comps = has_alpha ? 4 : 3;
|
|
|
|
#if 0
|
|
if (params.m_no_mse_scaling)
|
|
{
|
|
smooth_block_mse_scales.set_all(1.0f);
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
image smooth_vis(width, height);
|
|
image alpha_edge_vis(width, height);
|
|
image ultra_smooth_vis(width, height);
|
|
|
|
for (uint32_t y = 0; y < height; y++)
|
|
{
|
|
for (uint32_t x = 0; x < width; x++)
|
|
{
|
|
float alpha_edge_yl = 0.0f;
|
|
if ((num_comps == 4) && (desc.alpha_is_opacity))
|
|
{
|
|
tracked_stat alpha_comp_stats;
|
|
for (int yd = -3; yd <= 3; yd++)
|
|
{
|
|
for (int xd = -3; xd <= 3; xd++)
|
|
{
|
|
const color_rgba& p = orig_img.get_clamped((int)x + xd, (int)y + yd);
|
|
alpha_comp_stats.update(p[3]);
|
|
}
|
|
}
|
|
|
|
float max_std_dev = alpha_comp_stats.get_std_dev();
|
|
|
|
float yl = clamp(max_std_dev / desc.max_smooth_std_dev, 0.0f, 1.0f);
|
|
alpha_edge_yl = yl * yl;
|
|
}
|
|
|
|
{
|
|
tracked_stat comp_stats[4];
|
|
for (int yd = -1; yd <= 1; yd++)
|
|
{
|
|
for (int xd = -1; xd <= 1; xd++)
|
|
{
|
|
const color_rgba& p = orig_img.get_clamped((int)x + xd, (int)y + yd);
|
|
comp_stats[0].update(p[0]);
|
|
comp_stats[1].update(p[1]);
|
|
comp_stats[2].update(p[2]);
|
|
if (num_comps == 4)
|
|
comp_stats[3].update(p[3]);
|
|
}
|
|
}
|
|
|
|
float max_std_dev = 0.0f;
|
|
for (uint32_t i = 0; i < num_comps; i++)
|
|
max_std_dev = std::max(max_std_dev, comp_stats[i].get_std_dev());
|
|
|
|
float yl = clamp(max_std_dev / desc.max_smooth_std_dev, 0.0f, 1.0f);
|
|
yl = yl * yl;
|
|
|
|
smooth_block_mse_scales(x, y) = lerp(desc.smooth_max_mse_scale, 1.0f, yl);
|
|
|
|
if (num_comps == 4)
|
|
{
|
|
alpha_edge_vis(x, y).set((int)std::round(alpha_edge_yl * 255.0f));
|
|
|
|
smooth_block_mse_scales(x, y) = lerp(smooth_block_mse_scales(x, y), desc.smooth_max_mse_scale, alpha_edge_yl);
|
|
}
|
|
|
|
smooth_vis(x, y).set(clamp((int)((smooth_block_mse_scales(x, y) - 1.0f) / (desc.smooth_max_mse_scale - 1.0f) * 255.0f + .5f), 0, 255));
|
|
}
|
|
|
|
{
|
|
tracked_stat comp_stats[4];
|
|
|
|
const int S = 5;
|
|
for (int yd = -S; yd < S; yd++)
|
|
{
|
|
for (int xd = -S; xd < S; xd++)
|
|
{
|
|
const color_rgba& p = orig_img.get_clamped((int)x + xd, (int)y + yd);
|
|
comp_stats[0].update(p[0]);
|
|
comp_stats[1].update(p[1]);
|
|
comp_stats[2].update(p[2]);
|
|
if (num_comps == 4)
|
|
comp_stats[3].update(p[3]);
|
|
}
|
|
}
|
|
|
|
float max_std_dev = 0.0f;
|
|
for (uint32_t i = 0; i < num_comps; i++)
|
|
max_std_dev = std::max(max_std_dev, comp_stats[i].get_std_dev());
|
|
|
|
float yl = clamp(max_std_dev / desc.max_ultra_smooth_std_dev, 0.0f, 1.0f);
|
|
yl = powf(yl, 3.0f);
|
|
|
|
smooth_block_mse_scales(x, y) = lerp(desc.ultra_smooth_max_mse_scale, smooth_block_mse_scales(x, y), yl);
|
|
|
|
ultra_smooth_vis(x, y).set((int)std::round(yl * 255.0f));
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
#if 0
|
|
if (params.m_debug_images)
|
|
{
|
|
save_png("dbg_smooth_vis.png", smooth_vis);
|
|
save_png("dbg_alpha_edge_vis.png", alpha_edge_vis);
|
|
save_png("dbg_ultra_smooth_vis.png", ultra_smooth_vis);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
#pragma pack(push, 1)
|
|
struct qoi_header
|
|
{
|
|
char magic[4]; // magic bytes "qoif"
|
|
uint32_t width; // image width in pixels (BE)
|
|
uint32_t height; // image height in pixels (BE)
|
|
uint8_t channels; // 3 = RGB, 4 = RGBA
|
|
uint8_t colorspace; // 0 = sRGB with linear alpha 1 = all channels linear
|
|
};
|
|
#pragma pack(pop)
|
|
|
|
static bool encode_rdo_qoi(
|
|
const image& orig_img,
|
|
std::vector<uint8_t>& data,
|
|
//const rdo_png_params& params,
|
|
const vector2D<float>& smooth_block_mse_scales,
|
|
float lambda)
|
|
{
|
|
// This function wasn't designed to deal with lambda=0, so nudge it up.
|
|
lambda = max(lambda, .0000125f);
|
|
|
|
const rdo_png_params params{};
|
|
|
|
const bool has_alpha = orig_img.has_alpha();
|
|
uint32_t num_comps = has_alpha ? 4 : 3;
|
|
|
|
color_rgba hash[64];
|
|
//clear_obj(hash);
|
|
memset(&hash, 0, sizeof(hash));
|
|
|
|
data.resize(0);
|
|
|
|
qoi_header hdr;
|
|
memcpy(hdr.magic, "qoif", 4);
|
|
hdr.width = byteswap_32(orig_img.get_width());
|
|
hdr.height = byteswap_32(orig_img.get_height());
|
|
hdr.channels = has_alpha ? 4 : 3;
|
|
hdr.colorspace = 0;
|
|
data.resize(sizeof(hdr));
|
|
memcpy(data.data(), &hdr, sizeof(hdr));
|
|
|
|
int prev_r = 0, prev_g = 0, prev_b = 0, prev_a = 255;
|
|
uint32_t cur_run_len = 0;
|
|
|
|
enum commands_t
|
|
{
|
|
cRUN,
|
|
cIDX,
|
|
cDELTA,
|
|
cLUMA,
|
|
cRGB,
|
|
cRGBA,
|
|
};
|
|
|
|
uint32_t total_run = 0, total_rgb = 0, total_rgba = 0, total_index = 0, total_delta = 0, total_luma = 0, total_run_pixels = 0;
|
|
|
|
for (uint32_t y = 0; y < orig_img.get_height(); y++)
|
|
{
|
|
for (uint32_t x = 0; x < orig_img.get_width(); x++)
|
|
{
|
|
const color_rgba& c = orig_img(x, y);
|
|
const float mse_scale = smooth_block_mse_scales(x, y);
|
|
|
|
float best_mse = 0.0f;
|
|
float best_bits = 40.0f;
|
|
float best_t = best_mse + best_bits * lambda;
|
|
int best_command = cRGBA;
|
|
int best_index = 0, best_dr = 0, best_dg = 0, best_db = 0;
|
|
|
|
{
|
|
color_rgba trial_c(c.r, c.g, c.b, prev_a);
|
|
if (!should_reject(trial_c, c, 4, params))
|
|
{
|
|
float mse = compute_se(trial_c, c, 4, params);
|
|
float bits = 32.0f;
|
|
float trial_t = mse_scale * mse + bits * lambda;
|
|
if (trial_t < best_t)
|
|
{
|
|
best_mse = mse;
|
|
best_bits = bits;
|
|
best_t = trial_t;
|
|
best_command = cRGB;
|
|
}
|
|
}
|
|
}
|
|
|
|
{
|
|
color_rgba trial_c(prev_r, prev_g, prev_b, prev_a);
|
|
if (!should_reject(trial_c, c, 4, params))
|
|
{
|
|
float mse = compute_se(trial_c, c, 4, params);
|
|
float bits = cur_run_len ? 0 : 8.0f;
|
|
float trial_t = mse_scale * mse + bits * lambda;
|
|
if (trial_t < best_t)
|
|
{
|
|
best_mse = mse;
|
|
best_bits = bits;
|
|
best_t = trial_t;
|
|
best_command = cRUN;
|
|
|
|
if (best_mse == 0.0f)
|
|
{
|
|
cur_run_len++;
|
|
if (cur_run_len == 62)
|
|
{
|
|
total_run_pixels += cur_run_len;
|
|
|
|
data.push_back(0xC0 | (cur_run_len - 1));
|
|
cur_run_len = 0;
|
|
|
|
total_run++;
|
|
}
|
|
|
|
hash[(prev_r * 3 + prev_g * 5 + prev_b * 7 + prev_a * 11) & 63].set(prev_r, prev_g, prev_b, prev_a);
|
|
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (8.0f * lambda < best_t)
|
|
{
|
|
uint32_t hash_idx = (c.r * 3 + c.g * 5 + c.b * 7 + c.a * 11) & 63;
|
|
|
|
// First try the INDEX command losslessly.
|
|
if (c == hash[hash_idx])
|
|
{
|
|
float bits = 8.0f;
|
|
float trial_t = bits * lambda;
|
|
|
|
assert(trial_t < best_t);
|
|
|
|
best_mse = 0.0f;
|
|
best_bits = bits;
|
|
best_t = trial_t;
|
|
best_command = cIDX;
|
|
best_index = hash_idx;
|
|
}
|
|
else
|
|
{
|
|
// Try a lossy INDEX command.
|
|
for (uint32_t i = 0; i < 64; i++)
|
|
{
|
|
if (!should_reject(hash[i], c, 4, params))
|
|
{
|
|
float mse = compute_se(hash[i], c, 4, params);
|
|
float bits = 8.0f;
|
|
float trial_t = mse_scale * mse + bits * lambda;
|
|
if (trial_t < best_t)
|
|
{
|
|
best_mse = mse;
|
|
best_bits = bits;
|
|
best_t = trial_t;
|
|
best_command = cIDX;
|
|
best_index = i;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (8.0f * lambda < best_t)
|
|
{
|
|
bool delta_encodable_losslessly = false;
|
|
|
|
// First try the DELTA command losslessly.
|
|
if (c.a == prev_a)
|
|
{
|
|
int dr = ((int)c.r - prev_r + 2) & 255;
|
|
int dg = ((int)c.g - prev_g + 2) & 255;
|
|
int db = ((int)c.b - prev_b + 2) & 255;
|
|
|
|
if ((dr <= 3) && (dg <= 3) && (db <= 3))
|
|
{
|
|
delta_encodable_losslessly = true;
|
|
|
|
float bits = 8.0f;
|
|
float trial_t = bits * lambda;
|
|
|
|
assert(trial_t < best_t);
|
|
|
|
best_mse = 0.0f;
|
|
best_bits = bits;
|
|
best_t = trial_t;
|
|
best_command = cDELTA;
|
|
best_dr = dr - 2;
|
|
best_dg = dg - 2;
|
|
best_db = db - 2;
|
|
}
|
|
}
|
|
|
|
// Try a lossy DELTA command.
|
|
if (!delta_encodable_losslessly)
|
|
{
|
|
for (uint32_t i = 0; i < 64; i++)
|
|
{
|
|
int dr = ((i >> 4) & 3) - 2;
|
|
int dg = ((i >> 2) & 3) - 2;
|
|
int db = (i & 3) - 2;
|
|
|
|
color_rgba trial_c((prev_r + dr) & 255, (prev_g + dg) & 255, (prev_b + db) & 255, prev_a);
|
|
|
|
if (!should_reject(trial_c, c, 4, params))
|
|
{
|
|
float mse = compute_se(trial_c, c, 4, params);
|
|
float bits = 8.0f;
|
|
float trial_t = mse_scale * mse + bits * lambda;
|
|
|
|
if (trial_t < best_t)
|
|
{
|
|
best_mse = mse;
|
|
best_bits = bits;
|
|
best_t = trial_t;
|
|
best_command = cDELTA;
|
|
best_dr = dr;
|
|
best_dg = dg;
|
|
best_db = db;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (16.0f * lambda < best_t)
|
|
{
|
|
bool luma_encodable_losslessly_in_rgb = false;
|
|
|
|
// First try the LUMA command losslessly in RGB (may not be lossy in alpha).
|
|
{
|
|
int g_diff = (int)c.g - prev_g;
|
|
|
|
int dg = (g_diff + 32) & 255;
|
|
|
|
int dr = (((int)c.r - prev_r) - g_diff + 8) & 255;
|
|
int db = (((int)c.b - prev_b) - g_diff + 8) & 255;
|
|
|
|
if ((dg <= 63) && (dr <= 15) && (db <= 15))
|
|
{
|
|
luma_encodable_losslessly_in_rgb = true;
|
|
|
|
color_rgba trial_c(c.r, c.g, c.b, prev_a);
|
|
|
|
if (!should_reject(trial_c, c, 4, params))
|
|
{
|
|
float mse = compute_se(trial_c, c, 4, params);
|
|
float bits = 16.0f;
|
|
float trial_t = mse_scale * mse + bits * lambda;
|
|
|
|
if (trial_t < best_t)
|
|
{
|
|
best_mse = mse;
|
|
best_bits = bits;
|
|
best_t = trial_t;
|
|
best_command = cLUMA;
|
|
best_dr = dr - 8;
|
|
best_dg = dg - 32;
|
|
best_db = db - 8;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// If we can't use it losslessly, try it lossy.
|
|
if ((!luma_encodable_losslessly_in_rgb) && (params.m_speed_mode != cFastestSpeed))
|
|
{
|
|
if (params.m_speed_mode == cNormalSpeed)
|
|
{
|
|
// Search all encodable LUMA commands.
|
|
for (uint32_t i = 0; i < 16384; i++)
|
|
{
|
|
int dr = ((i >> 6) & 15) - 8;
|
|
int dg = (i & 63) - 32;
|
|
int db = ((i >> 10) & 15) - 8;
|
|
|
|
color_rgba trial_c((prev_r + dg + dr) & 255, (prev_g + dg) & 255, (prev_b + dg + db) & 255, prev_a);
|
|
|
|
if (!should_reject(trial_c, c, 4, params))
|
|
{
|
|
float mse = compute_se(trial_c, c, 4, params);
|
|
float bits = 16.0f;
|
|
float trial_t = mse_scale * mse + bits * lambda;
|
|
|
|
if (trial_t < best_t)
|
|
{
|
|
best_mse = mse;
|
|
best_bits = bits;
|
|
best_t = trial_t;
|
|
best_command = cLUMA;
|
|
best_dr = dr;
|
|
best_dg = dg;
|
|
best_db = db;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// TODO: This isn't very smart. What if the G delta is encodable but R and/or B aren't?
|
|
const int g_deltas[] = { -24, -16, -14, -12, -10, -8, -6, -4, -3, -2, -1, 0, 1, 2, 3, 4, 6, 8, 10, 12, 14, 16, 24 };
|
|
const int TOTAL_G_DELTAS = sizeof(g_deltas) / sizeof(g_deltas[0]);
|
|
|
|
for (int kg = 0; kg < TOTAL_G_DELTAS; kg++)
|
|
{
|
|
const int dg = g_deltas[kg];
|
|
for (uint32_t i = 0; i < 256; i++)
|
|
{
|
|
int dr = (i & 15) - 8;
|
|
int db = ((i >> 4) & 15) - 8;
|
|
|
|
color_rgba trial_c((prev_r + dg + dr) & 255, (prev_g + dg) & 255, (prev_b + dg + db) & 255, prev_a);
|
|
|
|
if (!should_reject(trial_c, c, 4, params))
|
|
{
|
|
float mse = compute_se(trial_c, c, 4, params);
|
|
float bits = 16.0f;
|
|
float trial_t = mse_scale * mse + bits * lambda;
|
|
|
|
if (trial_t < best_t)
|
|
{
|
|
best_mse = mse;
|
|
best_bits = bits;
|
|
best_t = trial_t;
|
|
best_command = cLUMA;
|
|
best_dr = dr;
|
|
best_dg = dg;
|
|
best_db = db;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
switch (best_command)
|
|
{
|
|
case cRUN:
|
|
{
|
|
cur_run_len++;
|
|
if (cur_run_len == 62)
|
|
{
|
|
total_run_pixels += cur_run_len;
|
|
|
|
data.push_back(0xC0 | (cur_run_len - 1));
|
|
cur_run_len = 0;
|
|
|
|
total_run++;
|
|
}
|
|
|
|
hash[(prev_r * 3 + prev_g * 5 + prev_b * 7 + prev_a * 11) & 63].set(prev_r, prev_g, prev_b, prev_a);
|
|
|
|
break;
|
|
}
|
|
case cRGB:
|
|
{
|
|
if (cur_run_len)
|
|
{
|
|
total_run_pixels += cur_run_len;
|
|
|
|
data.push_back(0xC0 | (cur_run_len - 1));
|
|
cur_run_len = 0;
|
|
|
|
total_run++;
|
|
}
|
|
|
|
data.push_back(254);
|
|
data.push_back((uint8_t)c.r);
|
|
data.push_back((uint8_t)c.g);
|
|
data.push_back((uint8_t)c.b);
|
|
hash[(c.r * 3 + c.g * 5 + c.b * 7 + prev_a * 11) & 63].set(c.r, c.g, c.b, prev_a);
|
|
prev_r = c.r;
|
|
prev_g = c.g;
|
|
prev_b = c.b;
|
|
|
|
total_rgb++;
|
|
|
|
break;
|
|
}
|
|
case cRGBA:
|
|
{
|
|
if (cur_run_len)
|
|
{
|
|
total_run_pixels += cur_run_len;
|
|
|
|
data.push_back(0xC0 | (cur_run_len - 1));
|
|
cur_run_len = 0;
|
|
|
|
total_run++;
|
|
}
|
|
|
|
data.push_back(255);
|
|
data.push_back((uint8_t)c.r);
|
|
data.push_back((uint8_t)c.g);
|
|
data.push_back((uint8_t)c.b);
|
|
data.push_back((uint8_t)c.a);
|
|
hash[(c.r * 3 + c.g * 5 + c.b * 7 + c.a * 11) & 63] = c;
|
|
prev_r = c.r;
|
|
prev_g = c.g;
|
|
prev_b = c.b;
|
|
prev_a = c.a;
|
|
|
|
total_rgba++;
|
|
|
|
break;
|
|
}
|
|
case cIDX:
|
|
{
|
|
if (cur_run_len)
|
|
{
|
|
total_run_pixels += cur_run_len;
|
|
|
|
data.push_back(0xC0 | (cur_run_len - 1));
|
|
cur_run_len = 0;
|
|
|
|
total_run++;
|
|
}
|
|
|
|
data.push_back(best_index);
|
|
|
|
prev_r = hash[best_index].r;
|
|
prev_g = hash[best_index].g;
|
|
prev_b = hash[best_index].b;
|
|
prev_a = hash[best_index].a;
|
|
|
|
total_index++;
|
|
|
|
break;
|
|
}
|
|
case cDELTA:
|
|
{
|
|
if (cur_run_len)
|
|
{
|
|
total_run_pixels += cur_run_len;
|
|
|
|
data.push_back(0xC0 | (cur_run_len - 1));
|
|
cur_run_len = 0;
|
|
|
|
total_run++;
|
|
}
|
|
|
|
assert(best_dr >= -2 && best_dr <= 1);
|
|
assert(best_dg >= -2 && best_dg <= 1);
|
|
assert(best_db >= -2 && best_db <= 1);
|
|
|
|
data.push_back(64 + ((best_dr + 2) << 4) + ((best_dg + 2) << 2) + (best_db + 2));
|
|
|
|
uint32_t decoded_r = (prev_r + best_dr) & 0xFF;
|
|
uint32_t decoded_g = (prev_g + best_dg) & 0xFF;
|
|
uint32_t decoded_b = (prev_b + best_db) & 0xFF;
|
|
uint32_t decoded_a = prev_a;
|
|
|
|
hash[(decoded_r * 3 + decoded_g * 5 + decoded_b * 7 + decoded_a * 11) & 63].set(decoded_r, decoded_g, decoded_b, decoded_a);
|
|
|
|
prev_r = decoded_r;
|
|
prev_g = decoded_g;
|
|
prev_b = decoded_b;
|
|
prev_a = decoded_a;
|
|
|
|
total_delta++;
|
|
|
|
break;
|
|
}
|
|
case cLUMA:
|
|
{
|
|
if (cur_run_len)
|
|
{
|
|
total_run_pixels += cur_run_len;
|
|
|
|
data.push_back(0xC0 | (cur_run_len - 1));
|
|
cur_run_len = 0;
|
|
|
|
total_run++;
|
|
}
|
|
|
|
assert(best_dr >= -8 && best_dr <= 7);
|
|
assert(best_dg >= -32 && best_dg <= 31);
|
|
assert(best_db >= -8 && best_db <= 7);
|
|
|
|
data.push_back((uint8_t)(128 + (best_dg + 32)));
|
|
data.push_back((uint8_t)(((best_dr + 8) << 4) | (best_db + 8)));
|
|
|
|
uint32_t decoded_r = (prev_r + best_dr + best_dg) & 0xFF;
|
|
uint32_t decoded_g = (prev_g + best_dg) & 0xFF;
|
|
uint32_t decoded_b = (prev_b + best_db + best_dg) & 0xFF;
|
|
uint32_t decoded_a = prev_a;
|
|
|
|
hash[(decoded_r * 3 + decoded_g * 5 + decoded_b * 7 + decoded_a * 11) & 63].set(decoded_r, decoded_g, decoded_b, decoded_a);
|
|
|
|
prev_r = decoded_r;
|
|
prev_g = decoded_g;
|
|
prev_b = decoded_b;
|
|
prev_a = decoded_a;
|
|
|
|
total_luma++;
|
|
|
|
break;
|
|
}
|
|
default:
|
|
{
|
|
assert(0);
|
|
break;
|
|
}
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
if (cur_run_len)
|
|
{
|
|
total_run_pixels += cur_run_len;
|
|
|
|
data.push_back((64 + 128) | (cur_run_len - 1));
|
|
cur_run_len = 0;
|
|
|
|
total_run++;
|
|
}
|
|
|
|
// end padding
|
|
for (uint32_t i = 0; i < 7; i++) {
|
|
data.push_back(0);
|
|
}
|
|
data.push_back(1);
|
|
|
|
if (params.m_print_stats)
|
|
{
|
|
printf("Totals: Run: %u, Run Pixels: %u %3.2f%%, RGB: %u %3.2f%%, RGBA: %u %3.2f%%, INDEX: %u %3.2f%%, DELTA: %u %3.2f%%, LUMA: %u %3.2f%%\n\n",
|
|
total_run,
|
|
total_run_pixels, (total_run_pixels * 100.0f) / orig_img.get_total_pixels(),
|
|
total_rgb, (total_rgb * 100.0f) / orig_img.get_total_pixels(),
|
|
total_rgba, (total_rgba * 100.0f) / orig_img.get_total_pixels(),
|
|
total_index, (total_index * 100.0f) / orig_img.get_total_pixels(),
|
|
total_delta, (total_delta * 100.0f) / orig_img.get_total_pixels(),
|
|
total_luma, (total_luma * 100.0f) / orig_img.get_total_pixels());
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static bool g_init {false};
|
|
|
|
bool init_qoi_rdo(void) {
|
|
if (g_init) {
|
|
return false;
|
|
}
|
|
init_srgb_to_linear();
|
|
init_oklab_table("", true, false);
|
|
g_init = true;
|
|
return true;
|
|
}
|
|
|
|
bool quit_qoi_rdo(void) {
|
|
if (!g_init) {
|
|
return false;
|
|
}
|
|
g_srgb_to_oklab16.clear();
|
|
|
|
return true;
|
|
}
|
|
|
|
static float lambda_from_quality(int quality) {
|
|
quality = clamp(quality, 1, 100);
|
|
|
|
// TODO: more stuff and log scale
|
|
//return lerp(50000, 100, quality/100.f);
|
|
//return lerp(250'000, 0, sqrtf(quality/100.f));
|
|
//return lerp(1'000'000, 0, sqrtf(quality/100.f));
|
|
//return lerp(1'000'000, 0, clamp(log10f(quality/100.f)+1, 0.f, 1.f));
|
|
//return lerp(250'000, 0, clamp(log10f(quality/100.f)+1, 0.f, 1.f));
|
|
return lerp(250'000, 0, cbrtf(quality/100.f));
|
|
}
|
|
|
|
std::vector<uint8_t> encode_qoi_rdo_simple(const uint8_t* data, const qoi_rdo_desc& desc, int quality) {
|
|
if (!g_init) {
|
|
return {};
|
|
}
|
|
|
|
const float lambda = lambda_from_quality(quality);
|
|
|
|
vector2D<float> smooth_block_mse_scales(desc.width, desc.height);
|
|
|
|
image orig_img(data, desc.width, desc.height, desc.channels);
|
|
|
|
if (false /* m_no_mse_scaling */) {
|
|
smooth_block_mse_scales.set_all(1.0f);
|
|
} else {
|
|
create_smooth_maps(
|
|
smooth_block_mse_scales,
|
|
orig_img,
|
|
{} // smooth_desc
|
|
);
|
|
}
|
|
|
|
std::vector<uint8_t> output_data;
|
|
|
|
if (!encode_rdo_qoi(
|
|
orig_img,
|
|
output_data,
|
|
smooth_block_mse_scales,
|
|
lambda))
|
|
{
|
|
return {};
|
|
}
|
|
|
|
return output_data;
|
|
}
|
|
|