Squashed 'external/libwebp/libwebp/' content from commit dd7364c3c
git-subtree-dir: external/libwebp/libwebp git-subtree-split: dd7364c3cefe0f5c0b3c18c3b1887d353f90fc1f
This commit is contained in:
41
sharpyuv/Makefile.am
Normal file
41
sharpyuv/Makefile.am
Normal file
@ -0,0 +1,41 @@
|
||||
AM_CPPFLAGS += -I$(top_builddir) -I$(top_srcdir)
|
||||
AM_CPPFLAGS += -I$(top_builddir)/src -I$(top_srcdir)/src
|
||||
|
||||
lib_LTLIBRARIES = libsharpyuv.la
|
||||
|
||||
noinst_LTLIBRARIES =
|
||||
noinst_LTLIBRARIES += libsharpyuv_sse2.la
|
||||
noinst_LTLIBRARIES += libsharpyuv_neon.la
|
||||
|
||||
libsharpyuvinclude_HEADERS =
|
||||
libsharpyuvinclude_HEADERS += sharpyuv.h
|
||||
libsharpyuvinclude_HEADERS += sharpyuv_csp.h
|
||||
noinst_HEADERS =
|
||||
noinst_HEADERS += ../src/dsp/cpu.c
|
||||
noinst_HEADERS += ../src/dsp/cpu.h
|
||||
noinst_HEADERS += ../src/webp/types.h
|
||||
|
||||
libsharpyuv_sse2_la_SOURCES =
|
||||
libsharpyuv_sse2_la_SOURCES += sharpyuv_sse2.c
|
||||
libsharpyuv_sse2_la_CPPFLAGS = $(libsharpyuv_la_CPPFLAGS)
|
||||
libsharpyuv_sse2_la_CFLAGS = $(AM_CFLAGS) $(SSE2_FLAGS)
|
||||
|
||||
libsharpyuv_neon_la_SOURCES =
|
||||
libsharpyuv_neon_la_SOURCES += sharpyuv_neon.c
|
||||
libsharpyuv_neon_la_CPPFLAGS = $(libsharpyuv_la_CPPFLAGS)
|
||||
libsharpyuv_neon_la_CFLAGS = $(AM_CFLAGS) $(NEON_FLAGS)
|
||||
|
||||
libsharpyuv_la_SOURCES =
|
||||
libsharpyuv_la_SOURCES += sharpyuv_cpu.c sharpyuv_cpu.h
|
||||
libsharpyuv_la_SOURCES += sharpyuv_csp.c sharpyuv_csp.h
|
||||
libsharpyuv_la_SOURCES += sharpyuv_dsp.c sharpyuv_dsp.h
|
||||
libsharpyuv_la_SOURCES += sharpyuv_gamma.c sharpyuv_gamma.h
|
||||
libsharpyuv_la_SOURCES += sharpyuv.c sharpyuv.h
|
||||
|
||||
libsharpyuv_la_CPPFLAGS = $(AM_CPPFLAGS)
|
||||
libsharpyuv_la_LDFLAGS = -no-undefined -version-info 0:1:0 -lm
|
||||
libsharpyuv_la_LIBADD =
|
||||
libsharpyuv_la_LIBADD += libsharpyuv_sse2.la
|
||||
libsharpyuv_la_LIBADD += libsharpyuv_neon.la
|
||||
libsharpyuvincludedir = $(includedir)/webp/sharpyuv
|
||||
pkgconfig_DATA = libsharpyuv.pc
|
11
sharpyuv/libsharpyuv.pc.in
Normal file
11
sharpyuv/libsharpyuv.pc.in
Normal file
@ -0,0 +1,11 @@
|
||||
prefix=@prefix@
|
||||
exec_prefix=@exec_prefix@
|
||||
libdir=@libdir@
|
||||
includedir=@includedir@/webp
|
||||
|
||||
Name: libsharpyuv
|
||||
Description: Library for sharp RGB to YUV conversion
|
||||
Version: @PACKAGE_VERSION@
|
||||
Cflags: -I${includedir}
|
||||
Libs: -L${libdir} -l@webp_libname_prefix@sharpyuv
|
||||
Libs.private: -lm @PTHREAD_CFLAGS@ @PTHREAD_LIBS@
|
41
sharpyuv/libsharpyuv.rc
Normal file
41
sharpyuv/libsharpyuv.rc
Normal file
@ -0,0 +1,41 @@
|
||||
#define APSTUDIO_READONLY_SYMBOLS
|
||||
#include "winres.h"
|
||||
#undef APSTUDIO_READONLY_SYMBOLS
|
||||
|
||||
#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU)
|
||||
LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US
|
||||
|
||||
VS_VERSION_INFO VERSIONINFO
|
||||
FILEVERSION 0,0,2,1
|
||||
PRODUCTVERSION 0,0,2,1
|
||||
FILEFLAGSMASK 0x3fL
|
||||
#ifdef _DEBUG
|
||||
FILEFLAGS 0x1L
|
||||
#else
|
||||
FILEFLAGS 0x0L
|
||||
#endif
|
||||
FILEOS 0x40004L
|
||||
FILETYPE 0x2L
|
||||
FILESUBTYPE 0x0L
|
||||
BEGIN
|
||||
BLOCK "StringFileInfo"
|
||||
BEGIN
|
||||
BLOCK "040904b0"
|
||||
BEGIN
|
||||
VALUE "CompanyName", "Google, Inc."
|
||||
VALUE "FileDescription", "libsharpyuv DLL"
|
||||
VALUE "FileVersion", "0.2.1"
|
||||
VALUE "InternalName", "libsharpyuv.dll"
|
||||
VALUE "LegalCopyright", "Copyright (C) 2023"
|
||||
VALUE "OriginalFilename", "libsharpyuv.dll"
|
||||
VALUE "ProductName", "SharpYuv Library"
|
||||
VALUE "ProductVersion", "0.2.1"
|
||||
END
|
||||
END
|
||||
BLOCK "VarFileInfo"
|
||||
BEGIN
|
||||
VALUE "Translation", 0x409, 1200
|
||||
END
|
||||
END
|
||||
|
||||
#endif // English (United States) resources
|
565
sharpyuv/sharpyuv.c
Normal file
565
sharpyuv/sharpyuv.c
Normal file
@ -0,0 +1,565 @@
|
||||
// Copyright 2022 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// Sharp RGB to YUV conversion.
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "sharpyuv/sharpyuv.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <limits.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "src/webp/types.h"
|
||||
#include "sharpyuv/sharpyuv_cpu.h"
|
||||
#include "sharpyuv/sharpyuv_dsp.h"
|
||||
#include "sharpyuv/sharpyuv_gamma.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
int SharpYuvGetVersion(void) {
|
||||
return SHARPYUV_VERSION;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Sharp RGB->YUV conversion
|
||||
|
||||
static const int kNumIterations = 4;
|
||||
|
||||
#define YUV_FIX 16 // fixed-point precision for RGB->YUV
|
||||
static const int kYuvHalf = 1 << (YUV_FIX - 1);
|
||||
|
||||
// Max bit depth so that intermediate calculations fit in 16 bits.
|
||||
static const int kMaxBitDepth = 14;
|
||||
|
||||
// Returns the precision shift to use based on the input rgb_bit_depth.
|
||||
static int GetPrecisionShift(int rgb_bit_depth) {
|
||||
// Try to add 2 bits of precision if it fits in kMaxBitDepth. Otherwise remove
|
||||
// bits if needed.
|
||||
return ((rgb_bit_depth + 2) <= kMaxBitDepth) ? 2
|
||||
: (kMaxBitDepth - rgb_bit_depth);
|
||||
}
|
||||
|
||||
typedef int16_t fixed_t; // signed type with extra precision for UV
|
||||
typedef uint16_t fixed_y_t; // unsigned type with extra precision for W
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static uint8_t clip_8b(fixed_t v) {
|
||||
return (!(v & ~0xff)) ? (uint8_t)v : (v < 0) ? 0u : 255u;
|
||||
}
|
||||
|
||||
static uint16_t clip(fixed_t v, int max) {
|
||||
return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v;
|
||||
}
|
||||
|
||||
static fixed_y_t clip_bit_depth(int y, int bit_depth) {
|
||||
const int max = (1 << bit_depth) - 1;
|
||||
return (!(y & ~max)) ? (fixed_y_t)y : (y < 0) ? 0 : max;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static int RGBToGray(int64_t r, int64_t g, int64_t b) {
|
||||
const int64_t luma = 13933 * r + 46871 * g + 4732 * b + kYuvHalf;
|
||||
return (int)(luma >> YUV_FIX);
|
||||
}
|
||||
|
||||
static uint32_t ScaleDown(uint16_t a, uint16_t b, uint16_t c, uint16_t d,
|
||||
int rgb_bit_depth,
|
||||
SharpYuvTransferFunctionType transfer_type) {
|
||||
const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth);
|
||||
const uint32_t A = SharpYuvGammaToLinear(a, bit_depth, transfer_type);
|
||||
const uint32_t B = SharpYuvGammaToLinear(b, bit_depth, transfer_type);
|
||||
const uint32_t C = SharpYuvGammaToLinear(c, bit_depth, transfer_type);
|
||||
const uint32_t D = SharpYuvGammaToLinear(d, bit_depth, transfer_type);
|
||||
return SharpYuvLinearToGamma((A + B + C + D + 2) >> 2, bit_depth,
|
||||
transfer_type);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int w,
|
||||
int rgb_bit_depth,
|
||||
SharpYuvTransferFunctionType transfer_type) {
|
||||
const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth);
|
||||
int i;
|
||||
for (i = 0; i < w; ++i) {
|
||||
const uint32_t R =
|
||||
SharpYuvGammaToLinear(src[0 * w + i], bit_depth, transfer_type);
|
||||
const uint32_t G =
|
||||
SharpYuvGammaToLinear(src[1 * w + i], bit_depth, transfer_type);
|
||||
const uint32_t B =
|
||||
SharpYuvGammaToLinear(src[2 * w + i], bit_depth, transfer_type);
|
||||
const uint32_t Y = RGBToGray(R, G, B);
|
||||
dst[i] = (fixed_y_t)SharpYuvLinearToGamma(Y, bit_depth, transfer_type);
|
||||
}
|
||||
}
|
||||
|
||||
static void UpdateChroma(const fixed_y_t* src1, const fixed_y_t* src2,
|
||||
fixed_t* dst, int uv_w, int rgb_bit_depth,
|
||||
SharpYuvTransferFunctionType transfer_type) {
|
||||
int i;
|
||||
for (i = 0; i < uv_w; ++i) {
|
||||
const int r =
|
||||
ScaleDown(src1[0 * uv_w + 0], src1[0 * uv_w + 1], src2[0 * uv_w + 0],
|
||||
src2[0 * uv_w + 1], rgb_bit_depth, transfer_type);
|
||||
const int g =
|
||||
ScaleDown(src1[2 * uv_w + 0], src1[2 * uv_w + 1], src2[2 * uv_w + 0],
|
||||
src2[2 * uv_w + 1], rgb_bit_depth, transfer_type);
|
||||
const int b =
|
||||
ScaleDown(src1[4 * uv_w + 0], src1[4 * uv_w + 1], src2[4 * uv_w + 0],
|
||||
src2[4 * uv_w + 1], rgb_bit_depth, transfer_type);
|
||||
const int W = RGBToGray(r, g, b);
|
||||
dst[0 * uv_w] = (fixed_t)(r - W);
|
||||
dst[1 * uv_w] = (fixed_t)(g - W);
|
||||
dst[2 * uv_w] = (fixed_t)(b - W);
|
||||
dst += 1;
|
||||
src1 += 2;
|
||||
src2 += 2;
|
||||
}
|
||||
}
|
||||
|
||||
static void StoreGray(const fixed_y_t* rgb, fixed_y_t* y, int w) {
|
||||
int i;
|
||||
assert(w > 0);
|
||||
for (i = 0; i < w; ++i) {
|
||||
y[i] = RGBToGray(rgb[0 * w + i], rgb[1 * w + i], rgb[2 * w + i]);
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static WEBP_INLINE fixed_y_t Filter2(int A, int B, int W0, int bit_depth) {
|
||||
const int v0 = (A * 3 + B + 2) >> 2;
|
||||
return clip_bit_depth(v0 + W0, bit_depth);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static WEBP_INLINE int Shift(int v, int shift) {
|
||||
return (shift >= 0) ? (v << shift) : (v >> -shift);
|
||||
}
|
||||
|
||||
static void ImportOneRow(const uint8_t* const r_ptr,
|
||||
const uint8_t* const g_ptr,
|
||||
const uint8_t* const b_ptr,
|
||||
int rgb_step,
|
||||
int rgb_bit_depth,
|
||||
int pic_width,
|
||||
fixed_y_t* const dst) {
|
||||
// Convert the rgb_step from a number of bytes to a number of uint8_t or
|
||||
// uint16_t values depending the bit depth.
|
||||
const int step = (rgb_bit_depth > 8) ? rgb_step / 2 : rgb_step;
|
||||
int i;
|
||||
const int w = (pic_width + 1) & ~1;
|
||||
for (i = 0; i < pic_width; ++i) {
|
||||
const int off = i * step;
|
||||
const int shift = GetPrecisionShift(rgb_bit_depth);
|
||||
if (rgb_bit_depth == 8) {
|
||||
dst[i + 0 * w] = Shift(r_ptr[off], shift);
|
||||
dst[i + 1 * w] = Shift(g_ptr[off], shift);
|
||||
dst[i + 2 * w] = Shift(b_ptr[off], shift);
|
||||
} else {
|
||||
dst[i + 0 * w] = Shift(((uint16_t*)r_ptr)[off], shift);
|
||||
dst[i + 1 * w] = Shift(((uint16_t*)g_ptr)[off], shift);
|
||||
dst[i + 2 * w] = Shift(((uint16_t*)b_ptr)[off], shift);
|
||||
}
|
||||
}
|
||||
if (pic_width & 1) { // replicate rightmost pixel
|
||||
dst[pic_width + 0 * w] = dst[pic_width + 0 * w - 1];
|
||||
dst[pic_width + 1 * w] = dst[pic_width + 1 * w - 1];
|
||||
dst[pic_width + 2 * w] = dst[pic_width + 2 * w - 1];
|
||||
}
|
||||
}
|
||||
|
||||
static void InterpolateTwoRows(const fixed_y_t* const best_y,
|
||||
const fixed_t* prev_uv,
|
||||
const fixed_t* cur_uv,
|
||||
const fixed_t* next_uv,
|
||||
int w,
|
||||
fixed_y_t* out1,
|
||||
fixed_y_t* out2,
|
||||
int rgb_bit_depth) {
|
||||
const int uv_w = w >> 1;
|
||||
const int len = (w - 1) >> 1; // length to filter
|
||||
int k = 3;
|
||||
const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth);
|
||||
while (k-- > 0) { // process each R/G/B segments in turn
|
||||
// special boundary case for i==0
|
||||
out1[0] = Filter2(cur_uv[0], prev_uv[0], best_y[0], bit_depth);
|
||||
out2[0] = Filter2(cur_uv[0], next_uv[0], best_y[w], bit_depth);
|
||||
|
||||
SharpYuvFilterRow(cur_uv, prev_uv, len, best_y + 0 + 1, out1 + 1,
|
||||
bit_depth);
|
||||
SharpYuvFilterRow(cur_uv, next_uv, len, best_y + w + 1, out2 + 1,
|
||||
bit_depth);
|
||||
|
||||
// special boundary case for i == w - 1 when w is even
|
||||
if (!(w & 1)) {
|
||||
out1[w - 1] = Filter2(cur_uv[uv_w - 1], prev_uv[uv_w - 1],
|
||||
best_y[w - 1 + 0], bit_depth);
|
||||
out2[w - 1] = Filter2(cur_uv[uv_w - 1], next_uv[uv_w - 1],
|
||||
best_y[w - 1 + w], bit_depth);
|
||||
}
|
||||
out1 += w;
|
||||
out2 += w;
|
||||
prev_uv += uv_w;
|
||||
cur_uv += uv_w;
|
||||
next_uv += uv_w;
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE int RGBToYUVComponent(int r, int g, int b,
|
||||
const int coeffs[4], int sfix) {
|
||||
const int srounder = 1 << (YUV_FIX + sfix - 1);
|
||||
const int luma = coeffs[0] * r + coeffs[1] * g + coeffs[2] * b +
|
||||
coeffs[3] + srounder;
|
||||
return (luma >> (YUV_FIX + sfix));
|
||||
}
|
||||
|
||||
static int ConvertWRGBToYUV(const fixed_y_t* best_y, const fixed_t* best_uv,
|
||||
uint8_t* y_ptr, int y_stride, uint8_t* u_ptr,
|
||||
int u_stride, uint8_t* v_ptr, int v_stride,
|
||||
int rgb_bit_depth,
|
||||
int yuv_bit_depth, int width, int height,
|
||||
const SharpYuvConversionMatrix* yuv_matrix) {
|
||||
int i, j;
|
||||
const fixed_t* const best_uv_base = best_uv;
|
||||
const int w = (width + 1) & ~1;
|
||||
const int h = (height + 1) & ~1;
|
||||
const int uv_w = w >> 1;
|
||||
const int uv_h = h >> 1;
|
||||
const int sfix = GetPrecisionShift(rgb_bit_depth);
|
||||
const int yuv_max = (1 << yuv_bit_depth) - 1;
|
||||
|
||||
for (best_uv = best_uv_base, j = 0; j < height; ++j) {
|
||||
for (i = 0; i < width; ++i) {
|
||||
const int off = (i >> 1);
|
||||
const int W = best_y[i];
|
||||
const int r = best_uv[off + 0 * uv_w] + W;
|
||||
const int g = best_uv[off + 1 * uv_w] + W;
|
||||
const int b = best_uv[off + 2 * uv_w] + W;
|
||||
const int y = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_y, sfix);
|
||||
if (yuv_bit_depth <= 8) {
|
||||
y_ptr[i] = clip_8b(y);
|
||||
} else {
|
||||
((uint16_t*)y_ptr)[i] = clip(y, yuv_max);
|
||||
}
|
||||
}
|
||||
best_y += w;
|
||||
best_uv += (j & 1) * 3 * uv_w;
|
||||
y_ptr += y_stride;
|
||||
}
|
||||
for (best_uv = best_uv_base, j = 0; j < uv_h; ++j) {
|
||||
for (i = 0; i < uv_w; ++i) {
|
||||
const int off = i;
|
||||
// Note r, g and b values here are off by W, but a constant offset on all
|
||||
// 3 components doesn't change the value of u and v with a YCbCr matrix.
|
||||
const int r = best_uv[off + 0 * uv_w];
|
||||
const int g = best_uv[off + 1 * uv_w];
|
||||
const int b = best_uv[off + 2 * uv_w];
|
||||
const int u = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_u, sfix);
|
||||
const int v = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_v, sfix);
|
||||
if (yuv_bit_depth <= 8) {
|
||||
u_ptr[i] = clip_8b(u);
|
||||
v_ptr[i] = clip_8b(v);
|
||||
} else {
|
||||
((uint16_t*)u_ptr)[i] = clip(u, yuv_max);
|
||||
((uint16_t*)v_ptr)[i] = clip(v, yuv_max);
|
||||
}
|
||||
}
|
||||
best_uv += 3 * uv_w;
|
||||
u_ptr += u_stride;
|
||||
v_ptr += v_stride;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Main function
|
||||
|
||||
static void* SafeMalloc(uint64_t nmemb, size_t size) {
|
||||
const uint64_t total_size = nmemb * (uint64_t)size;
|
||||
if (total_size != (size_t)total_size) return NULL;
|
||||
return malloc((size_t)total_size);
|
||||
}
|
||||
|
||||
#define SAFE_ALLOC(W, H, T) ((T*)SafeMalloc((W) * (H), sizeof(T)))
|
||||
|
||||
static int DoSharpArgbToYuv(const uint8_t* r_ptr, const uint8_t* g_ptr,
|
||||
const uint8_t* b_ptr, int rgb_step, int rgb_stride,
|
||||
int rgb_bit_depth, uint8_t* y_ptr, int y_stride,
|
||||
uint8_t* u_ptr, int u_stride, uint8_t* v_ptr,
|
||||
int v_stride, int yuv_bit_depth, int width,
|
||||
int height,
|
||||
const SharpYuvConversionMatrix* yuv_matrix,
|
||||
SharpYuvTransferFunctionType transfer_type) {
|
||||
// we expand the right/bottom border if needed
|
||||
const int w = (width + 1) & ~1;
|
||||
const int h = (height + 1) & ~1;
|
||||
const int uv_w = w >> 1;
|
||||
const int uv_h = h >> 1;
|
||||
uint64_t prev_diff_y_sum = ~0;
|
||||
int j, iter;
|
||||
|
||||
// TODO(skal): allocate one big memory chunk. But for now, it's easier
|
||||
// for valgrind debugging to have several chunks.
|
||||
fixed_y_t* const tmp_buffer = SAFE_ALLOC(w * 3, 2, fixed_y_t); // scratch
|
||||
fixed_y_t* const best_y_base = SAFE_ALLOC(w, h, fixed_y_t);
|
||||
fixed_y_t* const target_y_base = SAFE_ALLOC(w, h, fixed_y_t);
|
||||
fixed_y_t* const best_rgb_y = SAFE_ALLOC(w, 2, fixed_y_t);
|
||||
fixed_t* const best_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t);
|
||||
fixed_t* const target_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t);
|
||||
fixed_t* const best_rgb_uv = SAFE_ALLOC(uv_w * 3, 1, fixed_t);
|
||||
fixed_y_t* best_y = best_y_base;
|
||||
fixed_y_t* target_y = target_y_base;
|
||||
fixed_t* best_uv = best_uv_base;
|
||||
fixed_t* target_uv = target_uv_base;
|
||||
const uint64_t diff_y_threshold = (uint64_t)(3.0 * w * h);
|
||||
int ok;
|
||||
assert(w > 0);
|
||||
assert(h > 0);
|
||||
|
||||
if (best_y_base == NULL || best_uv_base == NULL ||
|
||||
target_y_base == NULL || target_uv_base == NULL ||
|
||||
best_rgb_y == NULL || best_rgb_uv == NULL ||
|
||||
tmp_buffer == NULL) {
|
||||
ok = 0;
|
||||
goto End;
|
||||
}
|
||||
|
||||
// Import RGB samples to W/RGB representation.
|
||||
for (j = 0; j < height; j += 2) {
|
||||
const int is_last_row = (j == height - 1);
|
||||
fixed_y_t* const src1 = tmp_buffer + 0 * w;
|
||||
fixed_y_t* const src2 = tmp_buffer + 3 * w;
|
||||
|
||||
// prepare two rows of input
|
||||
ImportOneRow(r_ptr, g_ptr, b_ptr, rgb_step, rgb_bit_depth, width,
|
||||
src1);
|
||||
if (!is_last_row) {
|
||||
ImportOneRow(r_ptr + rgb_stride, g_ptr + rgb_stride, b_ptr + rgb_stride,
|
||||
rgb_step, rgb_bit_depth, width, src2);
|
||||
} else {
|
||||
memcpy(src2, src1, 3 * w * sizeof(*src2));
|
||||
}
|
||||
StoreGray(src1, best_y + 0, w);
|
||||
StoreGray(src2, best_y + w, w);
|
||||
|
||||
UpdateW(src1, target_y, w, rgb_bit_depth, transfer_type);
|
||||
UpdateW(src2, target_y + w, w, rgb_bit_depth, transfer_type);
|
||||
UpdateChroma(src1, src2, target_uv, uv_w, rgb_bit_depth, transfer_type);
|
||||
memcpy(best_uv, target_uv, 3 * uv_w * sizeof(*best_uv));
|
||||
best_y += 2 * w;
|
||||
best_uv += 3 * uv_w;
|
||||
target_y += 2 * w;
|
||||
target_uv += 3 * uv_w;
|
||||
r_ptr += 2 * rgb_stride;
|
||||
g_ptr += 2 * rgb_stride;
|
||||
b_ptr += 2 * rgb_stride;
|
||||
}
|
||||
|
||||
// Iterate and resolve clipping conflicts.
|
||||
for (iter = 0; iter < kNumIterations; ++iter) {
|
||||
const fixed_t* cur_uv = best_uv_base;
|
||||
const fixed_t* prev_uv = best_uv_base;
|
||||
uint64_t diff_y_sum = 0;
|
||||
|
||||
best_y = best_y_base;
|
||||
best_uv = best_uv_base;
|
||||
target_y = target_y_base;
|
||||
target_uv = target_uv_base;
|
||||
for (j = 0; j < h; j += 2) {
|
||||
fixed_y_t* const src1 = tmp_buffer + 0 * w;
|
||||
fixed_y_t* const src2 = tmp_buffer + 3 * w;
|
||||
{
|
||||
const fixed_t* const next_uv = cur_uv + ((j < h - 2) ? 3 * uv_w : 0);
|
||||
InterpolateTwoRows(best_y, prev_uv, cur_uv, next_uv, w,
|
||||
src1, src2, rgb_bit_depth);
|
||||
prev_uv = cur_uv;
|
||||
cur_uv = next_uv;
|
||||
}
|
||||
|
||||
UpdateW(src1, best_rgb_y + 0 * w, w, rgb_bit_depth, transfer_type);
|
||||
UpdateW(src2, best_rgb_y + 1 * w, w, rgb_bit_depth, transfer_type);
|
||||
UpdateChroma(src1, src2, best_rgb_uv, uv_w, rgb_bit_depth, transfer_type);
|
||||
|
||||
// update two rows of Y and one row of RGB
|
||||
diff_y_sum +=
|
||||
SharpYuvUpdateY(target_y, best_rgb_y, best_y, 2 * w,
|
||||
rgb_bit_depth + GetPrecisionShift(rgb_bit_depth));
|
||||
SharpYuvUpdateRGB(target_uv, best_rgb_uv, best_uv, 3 * uv_w);
|
||||
|
||||
best_y += 2 * w;
|
||||
best_uv += 3 * uv_w;
|
||||
target_y += 2 * w;
|
||||
target_uv += 3 * uv_w;
|
||||
}
|
||||
// test exit condition
|
||||
if (iter > 0) {
|
||||
if (diff_y_sum < diff_y_threshold) break;
|
||||
if (diff_y_sum > prev_diff_y_sum) break;
|
||||
}
|
||||
prev_diff_y_sum = diff_y_sum;
|
||||
}
|
||||
|
||||
// final reconstruction
|
||||
ok = ConvertWRGBToYUV(best_y_base, best_uv_base, y_ptr, y_stride, u_ptr,
|
||||
u_stride, v_ptr, v_stride, rgb_bit_depth, yuv_bit_depth,
|
||||
width, height, yuv_matrix);
|
||||
|
||||
End:
|
||||
free(best_y_base);
|
||||
free(best_uv_base);
|
||||
free(target_y_base);
|
||||
free(target_uv_base);
|
||||
free(best_rgb_y);
|
||||
free(best_rgb_uv);
|
||||
free(tmp_buffer);
|
||||
return ok;
|
||||
}
|
||||
#undef SAFE_ALLOC
|
||||
|
||||
#if defined(WEBP_USE_THREAD) && !defined(_WIN32)
|
||||
#include <pthread.h> // NOLINT
|
||||
|
||||
#define LOCK_ACCESS \
|
||||
static pthread_mutex_t sharpyuv_lock = PTHREAD_MUTEX_INITIALIZER; \
|
||||
if (pthread_mutex_lock(&sharpyuv_lock)) return
|
||||
#define UNLOCK_ACCESS_AND_RETURN \
|
||||
do { \
|
||||
(void)pthread_mutex_unlock(&sharpyuv_lock); \
|
||||
return; \
|
||||
} while (0)
|
||||
#else // !(defined(WEBP_USE_THREAD) && !defined(_WIN32))
|
||||
#define LOCK_ACCESS do {} while (0)
|
||||
#define UNLOCK_ACCESS_AND_RETURN return
|
||||
#endif // defined(WEBP_USE_THREAD) && !defined(_WIN32)
|
||||
|
||||
// Hidden exported init function.
|
||||
// By default SharpYuvConvert calls it with SharpYuvGetCPUInfo. If needed,
|
||||
// users can declare it as extern and call it with an alternate VP8CPUInfo
|
||||
// function.
|
||||
extern VP8CPUInfo SharpYuvGetCPUInfo;
|
||||
SHARPYUV_EXTERN void SharpYuvInit(VP8CPUInfo cpu_info_func);
|
||||
void SharpYuvInit(VP8CPUInfo cpu_info_func) {
|
||||
static volatile VP8CPUInfo sharpyuv_last_cpuinfo_used =
|
||||
(VP8CPUInfo)&sharpyuv_last_cpuinfo_used;
|
||||
LOCK_ACCESS;
|
||||
// Only update SharpYuvGetCPUInfo when called from external code to avoid a
|
||||
// race on reading the value in SharpYuvConvert().
|
||||
if (cpu_info_func != (VP8CPUInfo)&SharpYuvGetCPUInfo) {
|
||||
SharpYuvGetCPUInfo = cpu_info_func;
|
||||
}
|
||||
if (sharpyuv_last_cpuinfo_used == SharpYuvGetCPUInfo) {
|
||||
UNLOCK_ACCESS_AND_RETURN;
|
||||
}
|
||||
|
||||
SharpYuvInitDsp();
|
||||
SharpYuvInitGammaTables();
|
||||
|
||||
sharpyuv_last_cpuinfo_used = SharpYuvGetCPUInfo;
|
||||
UNLOCK_ACCESS_AND_RETURN;
|
||||
}
|
||||
|
||||
int SharpYuvConvert(const void* r_ptr, const void* g_ptr, const void* b_ptr,
|
||||
int rgb_step, int rgb_stride, int rgb_bit_depth,
|
||||
void* y_ptr, int y_stride, void* u_ptr, int u_stride,
|
||||
void* v_ptr, int v_stride, int yuv_bit_depth, int width,
|
||||
int height, const SharpYuvConversionMatrix* yuv_matrix) {
|
||||
SharpYuvOptions options;
|
||||
options.yuv_matrix = yuv_matrix;
|
||||
options.transfer_type = kSharpYuvTransferFunctionSrgb;
|
||||
return SharpYuvConvertWithOptions(
|
||||
r_ptr, g_ptr, b_ptr, rgb_step, rgb_stride, rgb_bit_depth, y_ptr, y_stride,
|
||||
u_ptr, u_stride, v_ptr, v_stride, yuv_bit_depth, width, height, &options);
|
||||
}
|
||||
|
||||
int SharpYuvOptionsInitInternal(const SharpYuvConversionMatrix* yuv_matrix,
|
||||
SharpYuvOptions* options, int version) {
|
||||
const int major = (version >> 24);
|
||||
const int minor = (version >> 16) & 0xff;
|
||||
if (options == NULL || yuv_matrix == NULL ||
|
||||
(major == SHARPYUV_VERSION_MAJOR && major == 0 &&
|
||||
minor != SHARPYUV_VERSION_MINOR) ||
|
||||
(major != SHARPYUV_VERSION_MAJOR)) {
|
||||
return 0;
|
||||
}
|
||||
options->yuv_matrix = yuv_matrix;
|
||||
options->transfer_type = kSharpYuvTransferFunctionSrgb;
|
||||
return 1;
|
||||
}
|
||||
|
||||
int SharpYuvConvertWithOptions(const void* r_ptr, const void* g_ptr,
|
||||
const void* b_ptr, int rgb_step, int rgb_stride,
|
||||
int rgb_bit_depth, void* y_ptr, int y_stride,
|
||||
void* u_ptr, int u_stride, void* v_ptr,
|
||||
int v_stride, int yuv_bit_depth, int width,
|
||||
int height, const SharpYuvOptions* options) {
|
||||
const SharpYuvConversionMatrix* yuv_matrix = options->yuv_matrix;
|
||||
SharpYuvTransferFunctionType transfer_type = options->transfer_type;
|
||||
SharpYuvConversionMatrix scaled_matrix;
|
||||
const int rgb_max = (1 << rgb_bit_depth) - 1;
|
||||
const int rgb_round = 1 << (rgb_bit_depth - 1);
|
||||
const int yuv_max = (1 << yuv_bit_depth) - 1;
|
||||
const int sfix = GetPrecisionShift(rgb_bit_depth);
|
||||
|
||||
if (width < 1 || height < 1 || width == INT_MAX || height == INT_MAX ||
|
||||
r_ptr == NULL || g_ptr == NULL || b_ptr == NULL || y_ptr == NULL ||
|
||||
u_ptr == NULL || v_ptr == NULL) {
|
||||
return 0;
|
||||
}
|
||||
if (rgb_bit_depth != 8 && rgb_bit_depth != 10 && rgb_bit_depth != 12 &&
|
||||
rgb_bit_depth != 16) {
|
||||
return 0;
|
||||
}
|
||||
if (yuv_bit_depth != 8 && yuv_bit_depth != 10 && yuv_bit_depth != 12) {
|
||||
return 0;
|
||||
}
|
||||
if (rgb_bit_depth > 8 && (rgb_step % 2 != 0 || rgb_stride %2 != 0)) {
|
||||
// Step/stride should be even for uint16_t buffers.
|
||||
return 0;
|
||||
}
|
||||
if (yuv_bit_depth > 8 &&
|
||||
(y_stride % 2 != 0 || u_stride % 2 != 0 || v_stride % 2 != 0)) {
|
||||
// Stride should be even for uint16_t buffers.
|
||||
return 0;
|
||||
}
|
||||
// The address of the function pointer is used to avoid a read race.
|
||||
SharpYuvInit((VP8CPUInfo)&SharpYuvGetCPUInfo);
|
||||
|
||||
// Add scaling factor to go from rgb_bit_depth to yuv_bit_depth, to the
|
||||
// rgb->yuv conversion matrix.
|
||||
if (rgb_bit_depth == yuv_bit_depth) {
|
||||
memcpy(&scaled_matrix, yuv_matrix, sizeof(scaled_matrix));
|
||||
} else {
|
||||
int i;
|
||||
for (i = 0; i < 3; ++i) {
|
||||
scaled_matrix.rgb_to_y[i] =
|
||||
(yuv_matrix->rgb_to_y[i] * yuv_max + rgb_round) / rgb_max;
|
||||
scaled_matrix.rgb_to_u[i] =
|
||||
(yuv_matrix->rgb_to_u[i] * yuv_max + rgb_round) / rgb_max;
|
||||
scaled_matrix.rgb_to_v[i] =
|
||||
(yuv_matrix->rgb_to_v[i] * yuv_max + rgb_round) / rgb_max;
|
||||
}
|
||||
}
|
||||
// Also incorporate precision change scaling.
|
||||
scaled_matrix.rgb_to_y[3] = Shift(yuv_matrix->rgb_to_y[3], sfix);
|
||||
scaled_matrix.rgb_to_u[3] = Shift(yuv_matrix->rgb_to_u[3], sfix);
|
||||
scaled_matrix.rgb_to_v[3] = Shift(yuv_matrix->rgb_to_v[3], sfix);
|
||||
|
||||
return DoSharpArgbToYuv(r_ptr, g_ptr, b_ptr, rgb_step, rgb_stride,
|
||||
rgb_bit_depth, y_ptr, y_stride, u_ptr, u_stride,
|
||||
v_ptr, v_stride, yuv_bit_depth, width, height,
|
||||
&scaled_matrix, transfer_type);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
174
sharpyuv/sharpyuv.h
Normal file
174
sharpyuv/sharpyuv.h
Normal file
@ -0,0 +1,174 @@
|
||||
// Copyright 2022 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// Sharp RGB to YUV conversion.
|
||||
|
||||
#ifndef WEBP_SHARPYUV_SHARPYUV_H_
|
||||
#define WEBP_SHARPYUV_SHARPYUV_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifndef SHARPYUV_EXTERN
|
||||
#ifdef WEBP_EXTERN
|
||||
#define SHARPYUV_EXTERN WEBP_EXTERN
|
||||
#else
|
||||
// This explicitly marks library functions and allows for changing the
|
||||
// signature for e.g., Windows DLL builds.
|
||||
#if defined(__GNUC__) && __GNUC__ >= 4
|
||||
#define SHARPYUV_EXTERN extern __attribute__((visibility("default")))
|
||||
#else
|
||||
#if defined(_MSC_VER) && defined(WEBP_DLL)
|
||||
#define SHARPYUV_EXTERN __declspec(dllexport)
|
||||
#else
|
||||
#define SHARPYUV_EXTERN extern
|
||||
#endif /* _MSC_VER && WEBP_DLL */
|
||||
#endif /* __GNUC__ >= 4 */
|
||||
#endif /* WEBP_EXTERN */
|
||||
#endif /* SHARPYUV_EXTERN */
|
||||
|
||||
#ifndef SHARPYUV_INLINE
|
||||
#ifdef WEBP_INLINE
|
||||
#define SHARPYUV_INLINE WEBP_INLINE
|
||||
#else
|
||||
#ifndef _MSC_VER
|
||||
#if defined(__cplusplus) || !defined(__STRICT_ANSI__) || \
|
||||
(defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L)
|
||||
#define SHARPYUV_INLINE inline
|
||||
#else
|
||||
#define SHARPYUV_INLINE
|
||||
#endif
|
||||
#else
|
||||
#define SHARPYUV_INLINE __forceinline
|
||||
#endif /* _MSC_VER */
|
||||
#endif /* WEBP_INLINE */
|
||||
#endif /* SHARPYUV_INLINE */
|
||||
|
||||
// SharpYUV API version following the convention from semver.org
|
||||
#define SHARPYUV_VERSION_MAJOR 0
|
||||
#define SHARPYUV_VERSION_MINOR 4
|
||||
#define SHARPYUV_VERSION_PATCH 0
|
||||
// Version as a uint32_t. The major number is the high 8 bits.
|
||||
// The minor number is the middle 8 bits. The patch number is the low 16 bits.
|
||||
#define SHARPYUV_MAKE_VERSION(MAJOR, MINOR, PATCH) \
|
||||
(((MAJOR) << 24) | ((MINOR) << 16) | (PATCH))
|
||||
#define SHARPYUV_VERSION \
|
||||
SHARPYUV_MAKE_VERSION(SHARPYUV_VERSION_MAJOR, SHARPYUV_VERSION_MINOR, \
|
||||
SHARPYUV_VERSION_PATCH)
|
||||
|
||||
// Returns the library's version number, packed in hexadecimal. See
|
||||
// SHARPYUV_VERSION.
|
||||
SHARPYUV_EXTERN int SharpYuvGetVersion(void);
|
||||
|
||||
// RGB to YUV conversion matrix, in 16 bit fixed point.
|
||||
// y = rgb_to_y[0] * r + rgb_to_y[1] * g + rgb_to_y[2] * b + rgb_to_y[3]
|
||||
// u = rgb_to_u[0] * r + rgb_to_u[1] * g + rgb_to_u[2] * b + rgb_to_u[3]
|
||||
// v = rgb_to_v[0] * r + rgb_to_v[1] * g + rgb_to_v[2] * b + rgb_to_v[3]
|
||||
// Then y, u and v values are divided by 1<<16 and rounded.
|
||||
typedef struct {
|
||||
int rgb_to_y[4];
|
||||
int rgb_to_u[4];
|
||||
int rgb_to_v[4];
|
||||
} SharpYuvConversionMatrix;
|
||||
|
||||
typedef struct SharpYuvOptions SharpYuvOptions;
|
||||
|
||||
// Enums for transfer functions, as defined in H.273,
|
||||
// https://www.itu.int/rec/T-REC-H.273-202107-I/en
|
||||
typedef enum SharpYuvTransferFunctionType {
|
||||
// 0 is reserved
|
||||
kSharpYuvTransferFunctionBt709 = 1,
|
||||
// 2 is unspecified
|
||||
// 3 is reserved
|
||||
kSharpYuvTransferFunctionBt470M = 4,
|
||||
kSharpYuvTransferFunctionBt470Bg = 5,
|
||||
kSharpYuvTransferFunctionBt601 = 6,
|
||||
kSharpYuvTransferFunctionSmpte240 = 7,
|
||||
kSharpYuvTransferFunctionLinear = 8,
|
||||
kSharpYuvTransferFunctionLog100 = 9,
|
||||
kSharpYuvTransferFunctionLog100_Sqrt10 = 10,
|
||||
kSharpYuvTransferFunctionIec61966 = 11,
|
||||
kSharpYuvTransferFunctionBt1361 = 12,
|
||||
kSharpYuvTransferFunctionSrgb = 13,
|
||||
kSharpYuvTransferFunctionBt2020_10Bit = 14,
|
||||
kSharpYuvTransferFunctionBt2020_12Bit = 15,
|
||||
kSharpYuvTransferFunctionSmpte2084 = 16, // PQ
|
||||
kSharpYuvTransferFunctionSmpte428 = 17,
|
||||
kSharpYuvTransferFunctionHlg = 18,
|
||||
kSharpYuvTransferFunctionNum
|
||||
} SharpYuvTransferFunctionType;
|
||||
|
||||
// Converts RGB to YUV420 using a downsampling algorithm that minimizes
|
||||
// artefacts caused by chroma subsampling.
|
||||
// This is slower than standard downsampling (averaging of 4 UV values).
|
||||
// Assumes that the image will be upsampled using a bilinear filter. If nearest
|
||||
// neighbor is used instead, the upsampled image might look worse than with
|
||||
// standard downsampling.
|
||||
// r_ptr, g_ptr, b_ptr: pointers to the source r, g and b channels. Should point
|
||||
// to uint8_t buffers if rgb_bit_depth is 8, or uint16_t buffers otherwise.
|
||||
// rgb_step: distance in bytes between two horizontally adjacent pixels on the
|
||||
// r, g and b channels. If rgb_bit_depth is > 8, it should be a
|
||||
// multiple of 2.
|
||||
// rgb_stride: distance in bytes between two vertically adjacent pixels on the
|
||||
// r, g, and b channels. If rgb_bit_depth is > 8, it should be a
|
||||
// multiple of 2.
|
||||
// rgb_bit_depth: number of bits for each r/g/b value. One of: 8, 10, 12, 16.
|
||||
// Note: 16 bit input is truncated to 14 bits before conversion to yuv.
|
||||
// yuv_bit_depth: number of bits for each y/u/v value. One of: 8, 10, 12.
|
||||
// y_ptr, u_ptr, v_ptr: pointers to the destination y, u and v channels. Should
|
||||
// point to uint8_t buffers if yuv_bit_depth is 8, or uint16_t buffers
|
||||
// otherwise.
|
||||
// y_stride, u_stride, v_stride: distance in bytes between two vertically
|
||||
// adjacent pixels on the y, u and v channels. If yuv_bit_depth > 8, they
|
||||
// should be multiples of 2.
|
||||
// width, height: width and height of the image in pixels
|
||||
// This function calls SharpYuvConvertWithOptions with a default transfer
|
||||
// function of kSharpYuvTransferFunctionSrgb.
|
||||
SHARPYUV_EXTERN int SharpYuvConvert(const void* r_ptr, const void* g_ptr,
|
||||
const void* b_ptr, int rgb_step,
|
||||
int rgb_stride, int rgb_bit_depth,
|
||||
void* y_ptr, int y_stride, void* u_ptr,
|
||||
int u_stride, void* v_ptr, int v_stride,
|
||||
int yuv_bit_depth, int width, int height,
|
||||
const SharpYuvConversionMatrix* yuv_matrix);
|
||||
|
||||
struct SharpYuvOptions {
|
||||
// This matrix cannot be NULL and can be initialized by
|
||||
// SharpYuvComputeConversionMatrix.
|
||||
const SharpYuvConversionMatrix* yuv_matrix;
|
||||
SharpYuvTransferFunctionType transfer_type;
|
||||
};
|
||||
|
||||
// Internal, version-checked, entry point
|
||||
SHARPYUV_EXTERN int SharpYuvOptionsInitInternal(const SharpYuvConversionMatrix*,
|
||||
SharpYuvOptions*, int);
|
||||
|
||||
// Should always be called, to initialize a fresh SharpYuvOptions
|
||||
// structure before modification. SharpYuvOptionsInit() must have succeeded
|
||||
// before using the 'options' object.
|
||||
static SHARPYUV_INLINE int SharpYuvOptionsInit(
|
||||
const SharpYuvConversionMatrix* yuv_matrix, SharpYuvOptions* options) {
|
||||
return SharpYuvOptionsInitInternal(yuv_matrix, options, SHARPYUV_VERSION);
|
||||
}
|
||||
|
||||
SHARPYUV_EXTERN int SharpYuvConvertWithOptions(
|
||||
const void* r_ptr, const void* g_ptr, const void* b_ptr, int rgb_step,
|
||||
int rgb_stride, int rgb_bit_depth, void* y_ptr, int y_stride, void* u_ptr,
|
||||
int u_stride, void* v_ptr, int v_stride, int yuv_bit_depth, int width,
|
||||
int height, const SharpYuvOptions* options);
|
||||
|
||||
// TODO(b/194336375): Add YUV444 to YUV420 conversion. Maybe also add 422
|
||||
// support (it's rarely used in practice, especially for images).
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // WEBP_SHARPYUV_SHARPYUV_H_
|
14
sharpyuv/sharpyuv_cpu.c
Normal file
14
sharpyuv/sharpyuv_cpu.c
Normal file
@ -0,0 +1,14 @@
|
||||
// Copyright 2022 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
#include "sharpyuv/sharpyuv_cpu.h"
|
||||
|
||||
// Include src/dsp/cpu.c to create SharpYuvGetCPUInfo from VP8GetCPUInfo. The
|
||||
// function pointer is renamed in sharpyuv_cpu.h.
|
||||
#include "src/dsp/cpu.c"
|
22
sharpyuv/sharpyuv_cpu.h
Normal file
22
sharpyuv/sharpyuv_cpu.h
Normal file
@ -0,0 +1,22 @@
|
||||
// Copyright 2022 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
#ifndef WEBP_SHARPYUV_SHARPYUV_CPU_H_
|
||||
#define WEBP_SHARPYUV_SHARPYUV_CPU_H_
|
||||
|
||||
#include "sharpyuv/sharpyuv.h"
|
||||
|
||||
// Avoid exporting SharpYuvGetCPUInfo in shared object / DLL builds.
|
||||
// SharpYuvInit() replaces the use of the function pointer.
|
||||
#undef WEBP_EXTERN
|
||||
#define WEBP_EXTERN extern
|
||||
#define VP8GetCPUInfo SharpYuvGetCPUInfo
|
||||
#include "src/dsp/cpu.h"
|
||||
|
||||
#endif // WEBP_SHARPYUV_SHARPYUV_CPU_H_
|
110
sharpyuv/sharpyuv_csp.c
Normal file
110
sharpyuv/sharpyuv_csp.c
Normal file
@ -0,0 +1,110 @@
|
||||
// Copyright 2022 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// Colorspace utilities.
|
||||
|
||||
#include "sharpyuv/sharpyuv_csp.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <stddef.h>
|
||||
|
||||
static int ToFixed16(float f) { return (int)floor(f * (1 << 16) + 0.5f); }
|
||||
|
||||
void SharpYuvComputeConversionMatrix(const SharpYuvColorSpace* yuv_color_space,
|
||||
SharpYuvConversionMatrix* matrix) {
|
||||
const float kr = yuv_color_space->kr;
|
||||
const float kb = yuv_color_space->kb;
|
||||
const float kg = 1.0f - kr - kb;
|
||||
const float cr = 0.5f / (1.0f - kb);
|
||||
const float cb = 0.5f / (1.0f - kr);
|
||||
|
||||
const int shift = yuv_color_space->bit_depth - 8;
|
||||
|
||||
const float denom = (float)((1 << yuv_color_space->bit_depth) - 1);
|
||||
float scale_y = 1.0f;
|
||||
float add_y = 0.0f;
|
||||
float scale_u = cr;
|
||||
float scale_v = cb;
|
||||
float add_uv = (float)(128 << shift);
|
||||
assert(yuv_color_space->bit_depth >= 8);
|
||||
|
||||
if (yuv_color_space->range == kSharpYuvRangeLimited) {
|
||||
scale_y *= (219 << shift) / denom;
|
||||
scale_u *= (224 << shift) / denom;
|
||||
scale_v *= (224 << shift) / denom;
|
||||
add_y = (float)(16 << shift);
|
||||
}
|
||||
|
||||
matrix->rgb_to_y[0] = ToFixed16(kr * scale_y);
|
||||
matrix->rgb_to_y[1] = ToFixed16(kg * scale_y);
|
||||
matrix->rgb_to_y[2] = ToFixed16(kb * scale_y);
|
||||
matrix->rgb_to_y[3] = ToFixed16(add_y);
|
||||
|
||||
matrix->rgb_to_u[0] = ToFixed16(-kr * scale_u);
|
||||
matrix->rgb_to_u[1] = ToFixed16(-kg * scale_u);
|
||||
matrix->rgb_to_u[2] = ToFixed16((1 - kb) * scale_u);
|
||||
matrix->rgb_to_u[3] = ToFixed16(add_uv);
|
||||
|
||||
matrix->rgb_to_v[0] = ToFixed16((1 - kr) * scale_v);
|
||||
matrix->rgb_to_v[1] = ToFixed16(-kg * scale_v);
|
||||
matrix->rgb_to_v[2] = ToFixed16(-kb * scale_v);
|
||||
matrix->rgb_to_v[3] = ToFixed16(add_uv);
|
||||
}
|
||||
|
||||
// Matrices are in YUV_FIX fixed point precision.
|
||||
// WebP's matrix, similar but not identical to kRec601LimitedMatrix.
|
||||
static const SharpYuvConversionMatrix kWebpMatrix = {
|
||||
{16839, 33059, 6420, 16 << 16},
|
||||
{-9719, -19081, 28800, 128 << 16},
|
||||
{28800, -24116, -4684, 128 << 16},
|
||||
};
|
||||
// Kr=0.2990f Kb=0.1140f bits=8 range=kSharpYuvRangeLimited
|
||||
static const SharpYuvConversionMatrix kRec601LimitedMatrix = {
|
||||
{16829, 33039, 6416, 16 << 16},
|
||||
{-9714, -19071, 28784, 128 << 16},
|
||||
{28784, -24103, -4681, 128 << 16},
|
||||
};
|
||||
// Kr=0.2990f Kb=0.1140f bits=8 range=kSharpYuvRangeFull
|
||||
static const SharpYuvConversionMatrix kRec601FullMatrix = {
|
||||
{19595, 38470, 7471, 0},
|
||||
{-11058, -21710, 32768, 128 << 16},
|
||||
{32768, -27439, -5329, 128 << 16},
|
||||
};
|
||||
// Kr=0.2126f Kb=0.0722f bits=8 range=kSharpYuvRangeLimited
|
||||
static const SharpYuvConversionMatrix kRec709LimitedMatrix = {
|
||||
{11966, 40254, 4064, 16 << 16},
|
||||
{-6596, -22189, 28784, 128 << 16},
|
||||
{28784, -26145, -2639, 128 << 16},
|
||||
};
|
||||
// Kr=0.2126f Kb=0.0722f bits=8 range=kSharpYuvRangeFull
|
||||
static const SharpYuvConversionMatrix kRec709FullMatrix = {
|
||||
{13933, 46871, 4732, 0},
|
||||
{-7509, -25259, 32768, 128 << 16},
|
||||
{32768, -29763, -3005, 128 << 16},
|
||||
};
|
||||
|
||||
const SharpYuvConversionMatrix* SharpYuvGetConversionMatrix(
|
||||
SharpYuvMatrixType matrix_type) {
|
||||
switch (matrix_type) {
|
||||
case kSharpYuvMatrixWebp:
|
||||
return &kWebpMatrix;
|
||||
case kSharpYuvMatrixRec601Limited:
|
||||
return &kRec601LimitedMatrix;
|
||||
case kSharpYuvMatrixRec601Full:
|
||||
return &kRec601FullMatrix;
|
||||
case kSharpYuvMatrixRec709Limited:
|
||||
return &kRec709LimitedMatrix;
|
||||
case kSharpYuvMatrixRec709Full:
|
||||
return &kRec709FullMatrix;
|
||||
case kSharpYuvMatrixNum:
|
||||
return NULL;
|
||||
}
|
||||
return NULL;
|
||||
}
|
60
sharpyuv/sharpyuv_csp.h
Normal file
60
sharpyuv/sharpyuv_csp.h
Normal file
@ -0,0 +1,60 @@
|
||||
// Copyright 2022 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// Colorspace utilities.
|
||||
|
||||
#ifndef WEBP_SHARPYUV_SHARPYUV_CSP_H_
|
||||
#define WEBP_SHARPYUV_SHARPYUV_CSP_H_
|
||||
|
||||
#include "sharpyuv/sharpyuv.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Range of YUV values.
|
||||
typedef enum {
|
||||
kSharpYuvRangeFull, // YUV values between [0;255] (for 8 bit)
|
||||
kSharpYuvRangeLimited // Y in [16;235], YUV in [16;240] (for 8 bit)
|
||||
} SharpYuvRange;
|
||||
|
||||
// Constants that define a YUV color space.
|
||||
typedef struct {
|
||||
// Kr and Kb are defined such that:
|
||||
// Y = Kr * r + Kg * g + Kb * b where Kg = 1 - Kr - Kb.
|
||||
float kr;
|
||||
float kb;
|
||||
int bit_depth; // 8, 10 or 12
|
||||
SharpYuvRange range;
|
||||
} SharpYuvColorSpace;
|
||||
|
||||
// Fills in 'matrix' for the given YUVColorSpace.
|
||||
SHARPYUV_EXTERN void SharpYuvComputeConversionMatrix(
|
||||
const SharpYuvColorSpace* yuv_color_space,
|
||||
SharpYuvConversionMatrix* matrix);
|
||||
|
||||
// Enums for precomputed conversion matrices.
|
||||
typedef enum {
|
||||
kSharpYuvMatrixWebp = 0,
|
||||
kSharpYuvMatrixRec601Limited,
|
||||
kSharpYuvMatrixRec601Full,
|
||||
kSharpYuvMatrixRec709Limited,
|
||||
kSharpYuvMatrixRec709Full,
|
||||
kSharpYuvMatrixNum
|
||||
} SharpYuvMatrixType;
|
||||
|
||||
// Returns a pointer to a matrix for one of the predefined colorspaces.
|
||||
SHARPYUV_EXTERN const SharpYuvConversionMatrix* SharpYuvGetConversionMatrix(
|
||||
SharpYuvMatrixType matrix_type);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // WEBP_SHARPYUV_SHARPYUV_CSP_H_
|
104
sharpyuv/sharpyuv_dsp.c
Normal file
104
sharpyuv/sharpyuv_dsp.c
Normal file
@ -0,0 +1,104 @@
|
||||
// Copyright 2022 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// Speed-critical functions for Sharp YUV.
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "sharpyuv/sharpyuv_dsp.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "sharpyuv/sharpyuv_cpu.h"
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static uint16_t clip(int v, int max) {
|
||||
return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v;
|
||||
}
|
||||
|
||||
static uint64_t SharpYuvUpdateY_C(const uint16_t* ref, const uint16_t* src,
|
||||
uint16_t* dst, int len, int bit_depth) {
|
||||
uint64_t diff = 0;
|
||||
int i;
|
||||
const int max_y = (1 << bit_depth) - 1;
|
||||
for (i = 0; i < len; ++i) {
|
||||
const int diff_y = ref[i] - src[i];
|
||||
const int new_y = (int)dst[i] + diff_y;
|
||||
dst[i] = clip(new_y, max_y);
|
||||
diff += (uint64_t)abs(diff_y);
|
||||
}
|
||||
return diff;
|
||||
}
|
||||
|
||||
static void SharpYuvUpdateRGB_C(const int16_t* ref, const int16_t* src,
|
||||
int16_t* dst, int len) {
|
||||
int i;
|
||||
for (i = 0; i < len; ++i) {
|
||||
const int diff_uv = ref[i] - src[i];
|
||||
dst[i] += diff_uv;
|
||||
}
|
||||
}
|
||||
|
||||
static void SharpYuvFilterRow_C(const int16_t* A, const int16_t* B, int len,
|
||||
const uint16_t* best_y, uint16_t* out,
|
||||
int bit_depth) {
|
||||
int i;
|
||||
const int max_y = (1 << bit_depth) - 1;
|
||||
for (i = 0; i < len; ++i, ++A, ++B) {
|
||||
const int v0 = (A[0] * 9 + A[1] * 3 + B[0] * 3 + B[1] + 8) >> 4;
|
||||
const int v1 = (A[1] * 9 + A[0] * 3 + B[1] * 3 + B[0] + 8) >> 4;
|
||||
out[2 * i + 0] = clip(best_y[2 * i + 0] + v0, max_y);
|
||||
out[2 * i + 1] = clip(best_y[2 * i + 1] + v1, max_y);
|
||||
}
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
uint64_t (*SharpYuvUpdateY)(const uint16_t* src, const uint16_t* ref,
|
||||
uint16_t* dst, int len, int bit_depth);
|
||||
void (*SharpYuvUpdateRGB)(const int16_t* src, const int16_t* ref, int16_t* dst,
|
||||
int len);
|
||||
void (*SharpYuvFilterRow)(const int16_t* A, const int16_t* B, int len,
|
||||
const uint16_t* best_y, uint16_t* out,
|
||||
int bit_depth);
|
||||
|
||||
extern VP8CPUInfo SharpYuvGetCPUInfo;
|
||||
extern void InitSharpYuvSSE2(void);
|
||||
extern void InitSharpYuvNEON(void);
|
||||
|
||||
void SharpYuvInitDsp(void) {
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
SharpYuvUpdateY = SharpYuvUpdateY_C;
|
||||
SharpYuvUpdateRGB = SharpYuvUpdateRGB_C;
|
||||
SharpYuvFilterRow = SharpYuvFilterRow_C;
|
||||
#endif
|
||||
|
||||
if (SharpYuvGetCPUInfo != NULL) {
|
||||
#if defined(WEBP_HAVE_SSE2)
|
||||
if (SharpYuvGetCPUInfo(kSSE2)) {
|
||||
InitSharpYuvSSE2();
|
||||
}
|
||||
#endif // WEBP_HAVE_SSE2
|
||||
}
|
||||
|
||||
#if defined(WEBP_HAVE_NEON)
|
||||
if (WEBP_NEON_OMIT_C_CODE ||
|
||||
(SharpYuvGetCPUInfo != NULL && SharpYuvGetCPUInfo(kNEON))) {
|
||||
InitSharpYuvNEON();
|
||||
}
|
||||
#endif // WEBP_HAVE_NEON
|
||||
|
||||
assert(SharpYuvUpdateY != NULL);
|
||||
assert(SharpYuvUpdateRGB != NULL);
|
||||
assert(SharpYuvFilterRow != NULL);
|
||||
}
|
28
sharpyuv/sharpyuv_dsp.h
Normal file
28
sharpyuv/sharpyuv_dsp.h
Normal file
@ -0,0 +1,28 @@
|
||||
// Copyright 2022 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// Speed-critical functions for Sharp YUV.
|
||||
|
||||
#ifndef WEBP_SHARPYUV_SHARPYUV_DSP_H_
|
||||
#define WEBP_SHARPYUV_SHARPYUV_DSP_H_
|
||||
|
||||
#include "sharpyuv/sharpyuv_cpu.h"
|
||||
#include "src/webp/types.h"
|
||||
|
||||
extern uint64_t (*SharpYuvUpdateY)(const uint16_t* src, const uint16_t* ref,
|
||||
uint16_t* dst, int len, int bit_depth);
|
||||
extern void (*SharpYuvUpdateRGB)(const int16_t* src, const int16_t* ref,
|
||||
int16_t* dst, int len);
|
||||
extern void (*SharpYuvFilterRow)(const int16_t* A, const int16_t* B, int len,
|
||||
const uint16_t* best_y, uint16_t* out,
|
||||
int bit_depth);
|
||||
|
||||
void SharpYuvInitDsp(void);
|
||||
|
||||
#endif // WEBP_SHARPYUV_SHARPYUV_DSP_H_
|
419
sharpyuv/sharpyuv_gamma.c
Normal file
419
sharpyuv/sharpyuv_gamma.c
Normal file
@ -0,0 +1,419 @@
|
||||
// Copyright 2022 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// Gamma correction utilities.
|
||||
|
||||
#include "sharpyuv/sharpyuv_gamma.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <float.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "src/webp/types.h"
|
||||
|
||||
// Gamma correction compensates loss of resolution during chroma subsampling.
|
||||
// Size of pre-computed table for converting from gamma to linear.
|
||||
#define GAMMA_TO_LINEAR_TAB_BITS 10
|
||||
#define GAMMA_TO_LINEAR_TAB_SIZE (1 << GAMMA_TO_LINEAR_TAB_BITS)
|
||||
static uint32_t kGammaToLinearTabS[GAMMA_TO_LINEAR_TAB_SIZE + 2];
|
||||
#define LINEAR_TO_GAMMA_TAB_BITS 9
|
||||
#define LINEAR_TO_GAMMA_TAB_SIZE (1 << LINEAR_TO_GAMMA_TAB_BITS)
|
||||
static uint32_t kLinearToGammaTabS[LINEAR_TO_GAMMA_TAB_SIZE + 2];
|
||||
|
||||
static const double kGammaF = 1. / 0.45;
|
||||
#define GAMMA_TO_LINEAR_BITS 16
|
||||
|
||||
static volatile int kGammaTablesSOk = 0;
|
||||
void SharpYuvInitGammaTables(void) {
|
||||
assert(GAMMA_TO_LINEAR_BITS <= 16);
|
||||
if (!kGammaTablesSOk) {
|
||||
int v;
|
||||
const double a = 0.09929682680944;
|
||||
const double thresh = 0.018053968510807;
|
||||
const double final_scale = 1 << GAMMA_TO_LINEAR_BITS;
|
||||
// Precompute gamma to linear table.
|
||||
{
|
||||
const double norm = 1. / GAMMA_TO_LINEAR_TAB_SIZE;
|
||||
const double a_rec = 1. / (1. + a);
|
||||
for (v = 0; v <= GAMMA_TO_LINEAR_TAB_SIZE; ++v) {
|
||||
const double g = norm * v;
|
||||
double value;
|
||||
if (g <= thresh * 4.5) {
|
||||
value = g / 4.5;
|
||||
} else {
|
||||
value = pow(a_rec * (g + a), kGammaF);
|
||||
}
|
||||
kGammaToLinearTabS[v] = (uint32_t)(value * final_scale + .5);
|
||||
}
|
||||
// to prevent small rounding errors to cause read-overflow:
|
||||
kGammaToLinearTabS[GAMMA_TO_LINEAR_TAB_SIZE + 1] =
|
||||
kGammaToLinearTabS[GAMMA_TO_LINEAR_TAB_SIZE];
|
||||
}
|
||||
// Precompute linear to gamma table.
|
||||
{
|
||||
const double scale = 1. / LINEAR_TO_GAMMA_TAB_SIZE;
|
||||
for (v = 0; v <= LINEAR_TO_GAMMA_TAB_SIZE; ++v) {
|
||||
const double g = scale * v;
|
||||
double value;
|
||||
if (g <= thresh) {
|
||||
value = 4.5 * g;
|
||||
} else {
|
||||
value = (1. + a) * pow(g, 1. / kGammaF) - a;
|
||||
}
|
||||
kLinearToGammaTabS[v] =
|
||||
(uint32_t)(final_scale * value + 0.5);
|
||||
}
|
||||
// to prevent small rounding errors to cause read-overflow:
|
||||
kLinearToGammaTabS[LINEAR_TO_GAMMA_TAB_SIZE + 1] =
|
||||
kLinearToGammaTabS[LINEAR_TO_GAMMA_TAB_SIZE];
|
||||
}
|
||||
kGammaTablesSOk = 1;
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE int Shift(int v, int shift) {
|
||||
return (shift >= 0) ? (v << shift) : (v >> -shift);
|
||||
}
|
||||
|
||||
static WEBP_INLINE uint32_t FixedPointInterpolation(int v, uint32_t* tab,
|
||||
int tab_pos_shift_right,
|
||||
int tab_value_shift) {
|
||||
const uint32_t tab_pos = Shift(v, -tab_pos_shift_right);
|
||||
// fractional part, in 'tab_pos_shift' fixed-point precision
|
||||
const uint32_t x = v - (tab_pos << tab_pos_shift_right); // fractional part
|
||||
// v0 / v1 are in kGammaToLinearBits fixed-point precision (range [0..1])
|
||||
const uint32_t v0 = Shift(tab[tab_pos + 0], tab_value_shift);
|
||||
const uint32_t v1 = Shift(tab[tab_pos + 1], tab_value_shift);
|
||||
// Final interpolation.
|
||||
const uint32_t v2 = (v1 - v0) * x; // note: v1 >= v0.
|
||||
const int half =
|
||||
(tab_pos_shift_right > 0) ? 1 << (tab_pos_shift_right - 1) : 0;
|
||||
const uint32_t result = v0 + ((v2 + half) >> tab_pos_shift_right);
|
||||
return result;
|
||||
}
|
||||
|
||||
static uint32_t ToLinearSrgb(uint16_t v, int bit_depth) {
|
||||
const int shift = GAMMA_TO_LINEAR_TAB_BITS - bit_depth;
|
||||
if (shift > 0) {
|
||||
return kGammaToLinearTabS[v << shift];
|
||||
}
|
||||
return FixedPointInterpolation(v, kGammaToLinearTabS, -shift, 0);
|
||||
}
|
||||
|
||||
static uint16_t FromLinearSrgb(uint32_t value, int bit_depth) {
|
||||
return FixedPointInterpolation(
|
||||
value, kLinearToGammaTabS,
|
||||
(GAMMA_TO_LINEAR_BITS - LINEAR_TO_GAMMA_TAB_BITS),
|
||||
bit_depth - GAMMA_TO_LINEAR_BITS);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#define CLAMP(x, low, high) \
|
||||
(((x) < (low)) ? (low) : (((high) < (x)) ? (high) : (x)))
|
||||
#define MIN(a, b) (((a) < (b)) ? (a) : (b))
|
||||
#define MAX(a, b) (((a) > (b)) ? (a) : (b))
|
||||
|
||||
static WEBP_INLINE float Roundf(float x) {
|
||||
if (x < 0)
|
||||
return (float)ceil((double)(x - 0.5f));
|
||||
else
|
||||
return (float)floor((double)(x + 0.5f));
|
||||
}
|
||||
|
||||
static WEBP_INLINE float Powf(float base, float exp) {
|
||||
return (float)pow((double)base, (double)exp);
|
||||
}
|
||||
|
||||
static WEBP_INLINE float Log10f(float x) { return (float)log10((double)x); }
|
||||
|
||||
static float ToLinear709(float gamma) {
|
||||
if (gamma < 0.f) {
|
||||
return 0.f;
|
||||
} else if (gamma < 4.5f * 0.018053968510807f) {
|
||||
return gamma / 4.5f;
|
||||
} else if (gamma < 1.f) {
|
||||
return Powf((gamma + 0.09929682680944f) / 1.09929682680944f, 1.f / 0.45f);
|
||||
}
|
||||
return 1.f;
|
||||
}
|
||||
|
||||
static float FromLinear709(float linear) {
|
||||
if (linear < 0.f) {
|
||||
return 0.f;
|
||||
} else if (linear < 0.018053968510807f) {
|
||||
return linear * 4.5f;
|
||||
} else if (linear < 1.f) {
|
||||
return 1.09929682680944f * Powf(linear, 0.45f) - 0.09929682680944f;
|
||||
}
|
||||
return 1.f;
|
||||
}
|
||||
|
||||
static float ToLinear470M(float gamma) {
|
||||
return Powf(CLAMP(gamma, 0.f, 1.f), 1.f / 2.2f);
|
||||
}
|
||||
|
||||
static float FromLinear470M(float linear) {
|
||||
return Powf(CLAMP(linear, 0.f, 1.f), 2.2f);
|
||||
}
|
||||
|
||||
static float ToLinear470Bg(float gamma) {
|
||||
return Powf(CLAMP(gamma, 0.f, 1.f), 1.f / 2.8f);
|
||||
}
|
||||
|
||||
static float FromLinear470Bg(float linear) {
|
||||
return Powf(CLAMP(linear, 0.f, 1.f), 2.8f);
|
||||
}
|
||||
|
||||
static float ToLinearSmpte240(float gamma) {
|
||||
if (gamma < 0.f) {
|
||||
return 0.f;
|
||||
} else if (gamma < 4.f * 0.022821585529445f) {
|
||||
return gamma / 4.f;
|
||||
} else if (gamma < 1.f) {
|
||||
return Powf((gamma + 0.111572195921731f) / 1.111572195921731f, 1.f / 0.45f);
|
||||
}
|
||||
return 1.f;
|
||||
}
|
||||
|
||||
static float FromLinearSmpte240(float linear) {
|
||||
if (linear < 0.f) {
|
||||
return 0.f;
|
||||
} else if (linear < 0.022821585529445f) {
|
||||
return linear * 4.f;
|
||||
} else if (linear < 1.f) {
|
||||
return 1.111572195921731f * Powf(linear, 0.45f) - 0.111572195921731f;
|
||||
}
|
||||
return 1.f;
|
||||
}
|
||||
|
||||
static float ToLinearLog100(float gamma) {
|
||||
return (gamma < 0.01f) ? 0.0f : 1.0f + Log10f(MIN(gamma, 1.f)) / 2.0f;
|
||||
}
|
||||
|
||||
static float FromLinearLog100(float linear) {
|
||||
// The function is non-bijective so choose the middle of [0, 0.01].
|
||||
const float mid_interval = 0.01f / 2.f;
|
||||
return (linear <= 0.0f) ? mid_interval
|
||||
: Powf(10.0f, 2.f * (MIN(linear, 1.f) - 1.0f));
|
||||
}
|
||||
|
||||
static float ToLinearLog100Sqrt10(float gamma) {
|
||||
return (gamma < 0.00316227766f) ? 0.0f
|
||||
: 1.0f + Log10f(MIN(gamma, 1.f)) / 2.5f;
|
||||
}
|
||||
|
||||
static float FromLinearLog100Sqrt10(float linear) {
|
||||
// The function is non-bijective so choose the middle of [0, 0.00316227766f[.
|
||||
const float mid_interval = 0.00316227766f / 2.f;
|
||||
return (linear < 0.0f) ? mid_interval
|
||||
: Powf(10.0f, 2.5f * (MIN(linear, 1.f) - 1.0f));
|
||||
}
|
||||
|
||||
static float ToLinearIec61966(float gamma) {
|
||||
if (gamma <= -4.5f * 0.018053968510807f) {
|
||||
return Powf((-gamma + 0.09929682680944f) / -1.09929682680944f, 1.f / 0.45f);
|
||||
} else if (gamma < 4.5f * 0.018053968510807f) {
|
||||
return gamma / 4.5f;
|
||||
}
|
||||
return Powf((gamma + 0.09929682680944f) / 1.09929682680944f, 1.f / 0.45f);
|
||||
}
|
||||
|
||||
static float FromLinearIec61966(float linear) {
|
||||
if (linear <= -0.018053968510807f) {
|
||||
return -1.09929682680944f * Powf(-linear, 0.45f) + 0.09929682680944f;
|
||||
} else if (linear < 0.018053968510807f) {
|
||||
return linear * 4.5f;
|
||||
}
|
||||
return 1.09929682680944f * Powf(linear, 0.45f) - 0.09929682680944f;
|
||||
}
|
||||
|
||||
static float ToLinearBt1361(float gamma) {
|
||||
if (gamma < -0.25f) {
|
||||
return -0.25f;
|
||||
} else if (gamma < 0.f) {
|
||||
return Powf((gamma - 0.02482420670236f) / -0.27482420670236f, 1.f / 0.45f) /
|
||||
-4.f;
|
||||
} else if (gamma < 4.5f * 0.018053968510807f) {
|
||||
return gamma / 4.5f;
|
||||
} else if (gamma < 1.f) {
|
||||
return Powf((gamma + 0.09929682680944f) / 1.09929682680944f, 1.f / 0.45f);
|
||||
}
|
||||
return 1.f;
|
||||
}
|
||||
|
||||
static float FromLinearBt1361(float linear) {
|
||||
if (linear < -0.25f) {
|
||||
return -0.25f;
|
||||
} else if (linear < 0.f) {
|
||||
return -0.27482420670236f * Powf(-4.f * linear, 0.45f) + 0.02482420670236f;
|
||||
} else if (linear < 0.018053968510807f) {
|
||||
return linear * 4.5f;
|
||||
} else if (linear < 1.f) {
|
||||
return 1.09929682680944f * Powf(linear, 0.45f) - 0.09929682680944f;
|
||||
}
|
||||
return 1.f;
|
||||
}
|
||||
|
||||
static float ToLinearPq(float gamma) {
|
||||
if (gamma > 0.f) {
|
||||
const float pow_gamma = Powf(gamma, 32.f / 2523.f);
|
||||
const float num = MAX(pow_gamma - 107.f / 128.f, 0.0f);
|
||||
const float den = MAX(2413.f / 128.f - 2392.f / 128.f * pow_gamma, FLT_MIN);
|
||||
return Powf(num / den, 4096.f / 653.f);
|
||||
}
|
||||
return 0.f;
|
||||
}
|
||||
|
||||
static float FromLinearPq(float linear) {
|
||||
if (linear > 0.f) {
|
||||
const float pow_linear = Powf(linear, 653.f / 4096.f);
|
||||
const float num = 107.f / 128.f + 2413.f / 128.f * pow_linear;
|
||||
const float den = 1.0f + 2392.f / 128.f * pow_linear;
|
||||
return Powf(num / den, 2523.f / 32.f);
|
||||
}
|
||||
return 0.f;
|
||||
}
|
||||
|
||||
static float ToLinearSmpte428(float gamma) {
|
||||
return Powf(0.91655527974030934f * MAX(gamma, 0.f), 1.f / 2.6f);
|
||||
}
|
||||
|
||||
static float FromLinearSmpte428(float linear) {
|
||||
return Powf(MAX(linear, 0.f), 2.6f) / 0.91655527974030934f;
|
||||
}
|
||||
|
||||
// Conversion in BT.2100 requires RGB info. Simplify to gamma correction here.
|
||||
static float ToLinearHlg(float gamma) {
|
||||
if (gamma < 0.f) {
|
||||
return 0.f;
|
||||
} else if (gamma <= 0.5f) {
|
||||
return Powf((gamma * gamma) * (1.f / 3.f), 1.2f);
|
||||
}
|
||||
return Powf((expf((gamma - 0.55991073f) / 0.17883277f) + 0.28466892f) / 12.0f,
|
||||
1.2f);
|
||||
}
|
||||
|
||||
static float FromLinearHlg(float linear) {
|
||||
linear = Powf(linear, 1.f / 1.2f);
|
||||
if (linear < 0.f) {
|
||||
return 0.f;
|
||||
} else if (linear <= (1.f / 12.f)) {
|
||||
return sqrtf(3.f * linear);
|
||||
}
|
||||
return 0.17883277f * logf(12.f * linear - 0.28466892f) + 0.55991073f;
|
||||
}
|
||||
|
||||
uint32_t SharpYuvGammaToLinear(uint16_t v, int bit_depth,
|
||||
SharpYuvTransferFunctionType transfer_type) {
|
||||
float v_float, linear;
|
||||
if (transfer_type == kSharpYuvTransferFunctionSrgb) {
|
||||
return ToLinearSrgb(v, bit_depth);
|
||||
}
|
||||
v_float = (float)v / ((1 << bit_depth) - 1);
|
||||
switch (transfer_type) {
|
||||
case kSharpYuvTransferFunctionBt709:
|
||||
case kSharpYuvTransferFunctionBt601:
|
||||
case kSharpYuvTransferFunctionBt2020_10Bit:
|
||||
case kSharpYuvTransferFunctionBt2020_12Bit:
|
||||
linear = ToLinear709(v_float);
|
||||
break;
|
||||
case kSharpYuvTransferFunctionBt470M:
|
||||
linear = ToLinear470M(v_float);
|
||||
break;
|
||||
case kSharpYuvTransferFunctionBt470Bg:
|
||||
linear = ToLinear470Bg(v_float);
|
||||
break;
|
||||
case kSharpYuvTransferFunctionSmpte240:
|
||||
linear = ToLinearSmpte240(v_float);
|
||||
break;
|
||||
case kSharpYuvTransferFunctionLinear:
|
||||
return v;
|
||||
case kSharpYuvTransferFunctionLog100:
|
||||
linear = ToLinearLog100(v_float);
|
||||
break;
|
||||
case kSharpYuvTransferFunctionLog100_Sqrt10:
|
||||
linear = ToLinearLog100Sqrt10(v_float);
|
||||
break;
|
||||
case kSharpYuvTransferFunctionIec61966:
|
||||
linear = ToLinearIec61966(v_float);
|
||||
break;
|
||||
case kSharpYuvTransferFunctionBt1361:
|
||||
linear = ToLinearBt1361(v_float);
|
||||
break;
|
||||
case kSharpYuvTransferFunctionSmpte2084:
|
||||
linear = ToLinearPq(v_float);
|
||||
break;
|
||||
case kSharpYuvTransferFunctionSmpte428:
|
||||
linear = ToLinearSmpte428(v_float);
|
||||
break;
|
||||
case kSharpYuvTransferFunctionHlg:
|
||||
linear = ToLinearHlg(v_float);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
linear = 0;
|
||||
break;
|
||||
}
|
||||
return (uint32_t)Roundf(linear * ((1 << 16) - 1));
|
||||
}
|
||||
|
||||
uint16_t SharpYuvLinearToGamma(uint32_t v, int bit_depth,
|
||||
SharpYuvTransferFunctionType transfer_type) {
|
||||
float v_float, linear;
|
||||
if (transfer_type == kSharpYuvTransferFunctionSrgb) {
|
||||
return FromLinearSrgb(v, bit_depth);
|
||||
}
|
||||
v_float = (float)v / ((1 << 16) - 1);
|
||||
switch (transfer_type) {
|
||||
case kSharpYuvTransferFunctionBt709:
|
||||
case kSharpYuvTransferFunctionBt601:
|
||||
case kSharpYuvTransferFunctionBt2020_10Bit:
|
||||
case kSharpYuvTransferFunctionBt2020_12Bit:
|
||||
linear = FromLinear709(v_float);
|
||||
break;
|
||||
case kSharpYuvTransferFunctionBt470M:
|
||||
linear = FromLinear470M(v_float);
|
||||
break;
|
||||
case kSharpYuvTransferFunctionBt470Bg:
|
||||
linear = FromLinear470Bg(v_float);
|
||||
break;
|
||||
case kSharpYuvTransferFunctionSmpte240:
|
||||
linear = FromLinearSmpte240(v_float);
|
||||
break;
|
||||
case kSharpYuvTransferFunctionLinear:
|
||||
return v;
|
||||
case kSharpYuvTransferFunctionLog100:
|
||||
linear = FromLinearLog100(v_float);
|
||||
break;
|
||||
case kSharpYuvTransferFunctionLog100_Sqrt10:
|
||||
linear = FromLinearLog100Sqrt10(v_float);
|
||||
break;
|
||||
case kSharpYuvTransferFunctionIec61966:
|
||||
linear = FromLinearIec61966(v_float);
|
||||
break;
|
||||
case kSharpYuvTransferFunctionBt1361:
|
||||
linear = FromLinearBt1361(v_float);
|
||||
break;
|
||||
case kSharpYuvTransferFunctionSmpte2084:
|
||||
linear = FromLinearPq(v_float);
|
||||
break;
|
||||
case kSharpYuvTransferFunctionSmpte428:
|
||||
linear = FromLinearSmpte428(v_float);
|
||||
break;
|
||||
case kSharpYuvTransferFunctionHlg:
|
||||
linear = FromLinearHlg(v_float);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
linear = 0;
|
||||
break;
|
||||
}
|
||||
return (uint16_t)Roundf(linear * ((1 << bit_depth) - 1));
|
||||
}
|
38
sharpyuv/sharpyuv_gamma.h
Normal file
38
sharpyuv/sharpyuv_gamma.h
Normal file
@ -0,0 +1,38 @@
|
||||
// Copyright 2022 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// Gamma correction utilities.
|
||||
|
||||
#ifndef WEBP_SHARPYUV_SHARPYUV_GAMMA_H_
|
||||
#define WEBP_SHARPYUV_SHARPYUV_GAMMA_H_
|
||||
|
||||
#include "sharpyuv/sharpyuv.h"
|
||||
#include "src/webp/types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Initializes precomputed tables. Must be called once before calling
|
||||
// SharpYuvGammaToLinear or SharpYuvLinearToGamma.
|
||||
void SharpYuvInitGammaTables(void);
|
||||
|
||||
// Converts a 'bit_depth'-bit gamma color value to a 16-bit linear value.
|
||||
uint32_t SharpYuvGammaToLinear(uint16_t v, int bit_depth,
|
||||
SharpYuvTransferFunctionType transfer_type);
|
||||
|
||||
// Converts a 16-bit linear color value to a 'bit_depth'-bit gamma value.
|
||||
uint16_t SharpYuvLinearToGamma(uint32_t value, int bit_depth,
|
||||
SharpYuvTransferFunctionType transfer_type);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // WEBP_SHARPYUV_SHARPYUV_GAMMA_H_
|
181
sharpyuv/sharpyuv_neon.c
Normal file
181
sharpyuv/sharpyuv_neon.c
Normal file
@ -0,0 +1,181 @@
|
||||
// Copyright 2022 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// Speed-critical functions for Sharp YUV.
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "sharpyuv/sharpyuv_dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_NEON)
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <arm_neon.h>
|
||||
|
||||
static uint16_t clip_NEON(int v, int max) {
|
||||
return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v;
|
||||
}
|
||||
|
||||
static uint64_t SharpYuvUpdateY_NEON(const uint16_t* ref, const uint16_t* src,
|
||||
uint16_t* dst, int len, int bit_depth) {
|
||||
const int max_y = (1 << bit_depth) - 1;
|
||||
int i;
|
||||
const int16x8_t zero = vdupq_n_s16(0);
|
||||
const int16x8_t max = vdupq_n_s16(max_y);
|
||||
uint64x2_t sum = vdupq_n_u64(0);
|
||||
uint64_t diff;
|
||||
|
||||
for (i = 0; i + 8 <= len; i += 8) {
|
||||
const int16x8_t A = vreinterpretq_s16_u16(vld1q_u16(ref + i));
|
||||
const int16x8_t B = vreinterpretq_s16_u16(vld1q_u16(src + i));
|
||||
const int16x8_t C = vreinterpretq_s16_u16(vld1q_u16(dst + i));
|
||||
const int16x8_t D = vsubq_s16(A, B); // diff_y
|
||||
const int16x8_t F = vaddq_s16(C, D); // new_y
|
||||
const uint16x8_t H =
|
||||
vreinterpretq_u16_s16(vmaxq_s16(vminq_s16(F, max), zero));
|
||||
const int16x8_t I = vabsq_s16(D); // abs(diff_y)
|
||||
vst1q_u16(dst + i, H);
|
||||
sum = vpadalq_u32(sum, vpaddlq_u16(vreinterpretq_u16_s16(I)));
|
||||
}
|
||||
diff = vgetq_lane_u64(sum, 0) + vgetq_lane_u64(sum, 1);
|
||||
for (; i < len; ++i) {
|
||||
const int diff_y = ref[i] - src[i];
|
||||
const int new_y = (int)(dst[i]) + diff_y;
|
||||
dst[i] = clip_NEON(new_y, max_y);
|
||||
diff += (uint64_t)(abs(diff_y));
|
||||
}
|
||||
return diff;
|
||||
}
|
||||
|
||||
static void SharpYuvUpdateRGB_NEON(const int16_t* ref, const int16_t* src,
|
||||
int16_t* dst, int len) {
|
||||
int i;
|
||||
for (i = 0; i + 8 <= len; i += 8) {
|
||||
const int16x8_t A = vld1q_s16(ref + i);
|
||||
const int16x8_t B = vld1q_s16(src + i);
|
||||
const int16x8_t C = vld1q_s16(dst + i);
|
||||
const int16x8_t D = vsubq_s16(A, B); // diff_uv
|
||||
const int16x8_t E = vaddq_s16(C, D); // new_uv
|
||||
vst1q_s16(dst + i, E);
|
||||
}
|
||||
for (; i < len; ++i) {
|
||||
const int diff_uv = ref[i] - src[i];
|
||||
dst[i] += diff_uv;
|
||||
}
|
||||
}
|
||||
|
||||
static void SharpYuvFilterRow16_NEON(const int16_t* A, const int16_t* B,
|
||||
int len, const uint16_t* best_y,
|
||||
uint16_t* out, int bit_depth) {
|
||||
const int max_y = (1 << bit_depth) - 1;
|
||||
int i;
|
||||
const int16x8_t max = vdupq_n_s16(max_y);
|
||||
const int16x8_t zero = vdupq_n_s16(0);
|
||||
for (i = 0; i + 8 <= len; i += 8) {
|
||||
const int16x8_t a0 = vld1q_s16(A + i + 0);
|
||||
const int16x8_t a1 = vld1q_s16(A + i + 1);
|
||||
const int16x8_t b0 = vld1q_s16(B + i + 0);
|
||||
const int16x8_t b1 = vld1q_s16(B + i + 1);
|
||||
const int16x8_t a0b1 = vaddq_s16(a0, b1);
|
||||
const int16x8_t a1b0 = vaddq_s16(a1, b0);
|
||||
const int16x8_t a0a1b0b1 = vaddq_s16(a0b1, a1b0); // A0+A1+B0+B1
|
||||
const int16x8_t a0b1_2 = vaddq_s16(a0b1, a0b1); // 2*(A0+B1)
|
||||
const int16x8_t a1b0_2 = vaddq_s16(a1b0, a1b0); // 2*(A1+B0)
|
||||
const int16x8_t c0 = vshrq_n_s16(vaddq_s16(a0b1_2, a0a1b0b1), 3);
|
||||
const int16x8_t c1 = vshrq_n_s16(vaddq_s16(a1b0_2, a0a1b0b1), 3);
|
||||
const int16x8_t e0 = vrhaddq_s16(c1, a0);
|
||||
const int16x8_t e1 = vrhaddq_s16(c0, a1);
|
||||
const int16x8x2_t f = vzipq_s16(e0, e1);
|
||||
const int16x8_t g0 = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i + 0));
|
||||
const int16x8_t g1 = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i + 8));
|
||||
const int16x8_t h0 = vaddq_s16(g0, f.val[0]);
|
||||
const int16x8_t h1 = vaddq_s16(g1, f.val[1]);
|
||||
const int16x8_t i0 = vmaxq_s16(vminq_s16(h0, max), zero);
|
||||
const int16x8_t i1 = vmaxq_s16(vminq_s16(h1, max), zero);
|
||||
vst1q_u16(out + 2 * i + 0, vreinterpretq_u16_s16(i0));
|
||||
vst1q_u16(out + 2 * i + 8, vreinterpretq_u16_s16(i1));
|
||||
}
|
||||
for (; i < len; ++i) {
|
||||
const int a0b1 = A[i + 0] + B[i + 1];
|
||||
const int a1b0 = A[i + 1] + B[i + 0];
|
||||
const int a0a1b0b1 = a0b1 + a1b0 + 8;
|
||||
const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4;
|
||||
const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4;
|
||||
out[2 * i + 0] = clip_NEON(best_y[2 * i + 0] + v0, max_y);
|
||||
out[2 * i + 1] = clip_NEON(best_y[2 * i + 1] + v1, max_y);
|
||||
}
|
||||
}
|
||||
|
||||
static void SharpYuvFilterRow32_NEON(const int16_t* A, const int16_t* B,
|
||||
int len, const uint16_t* best_y,
|
||||
uint16_t* out, int bit_depth) {
|
||||
const int max_y = (1 << bit_depth) - 1;
|
||||
int i;
|
||||
const uint16x8_t max = vdupq_n_u16(max_y);
|
||||
for (i = 0; i + 4 <= len; i += 4) {
|
||||
const int16x4_t a0 = vld1_s16(A + i + 0);
|
||||
const int16x4_t a1 = vld1_s16(A + i + 1);
|
||||
const int16x4_t b0 = vld1_s16(B + i + 0);
|
||||
const int16x4_t b1 = vld1_s16(B + i + 1);
|
||||
const int32x4_t a0b1 = vaddl_s16(a0, b1);
|
||||
const int32x4_t a1b0 = vaddl_s16(a1, b0);
|
||||
const int32x4_t a0a1b0b1 = vaddq_s32(a0b1, a1b0); // A0+A1+B0+B1
|
||||
const int32x4_t a0b1_2 = vaddq_s32(a0b1, a0b1); // 2*(A0+B1)
|
||||
const int32x4_t a1b0_2 = vaddq_s32(a1b0, a1b0); // 2*(A1+B0)
|
||||
const int32x4_t c0 = vshrq_n_s32(vaddq_s32(a0b1_2, a0a1b0b1), 3);
|
||||
const int32x4_t c1 = vshrq_n_s32(vaddq_s32(a1b0_2, a0a1b0b1), 3);
|
||||
const int32x4_t e0 = vrhaddq_s32(c1, vmovl_s16(a0));
|
||||
const int32x4_t e1 = vrhaddq_s32(c0, vmovl_s16(a1));
|
||||
const int32x4x2_t f = vzipq_s32(e0, e1);
|
||||
|
||||
const int16x8_t g = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i));
|
||||
const int32x4_t h0 = vaddw_s16(f.val[0], vget_low_s16(g));
|
||||
const int32x4_t h1 = vaddw_s16(f.val[1], vget_high_s16(g));
|
||||
const uint16x8_t i_16 = vcombine_u16(vqmovun_s32(h0), vqmovun_s32(h1));
|
||||
const uint16x8_t i_clamped = vminq_u16(i_16, max);
|
||||
vst1q_u16(out + 2 * i + 0, i_clamped);
|
||||
}
|
||||
for (; i < len; ++i) {
|
||||
const int a0b1 = A[i + 0] + B[i + 1];
|
||||
const int a1b0 = A[i + 1] + B[i + 0];
|
||||
const int a0a1b0b1 = a0b1 + a1b0 + 8;
|
||||
const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4;
|
||||
const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4;
|
||||
out[2 * i + 0] = clip_NEON(best_y[2 * i + 0] + v0, max_y);
|
||||
out[2 * i + 1] = clip_NEON(best_y[2 * i + 1] + v1, max_y);
|
||||
}
|
||||
}
|
||||
|
||||
static void SharpYuvFilterRow_NEON(const int16_t* A, const int16_t* B, int len,
|
||||
const uint16_t* best_y, uint16_t* out,
|
||||
int bit_depth) {
|
||||
if (bit_depth <= 10) {
|
||||
SharpYuvFilterRow16_NEON(A, B, len, best_y, out, bit_depth);
|
||||
} else {
|
||||
SharpYuvFilterRow32_NEON(A, B, len, best_y, out, bit_depth);
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
extern void InitSharpYuvNEON(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void InitSharpYuvNEON(void) {
|
||||
SharpYuvUpdateY = SharpYuvUpdateY_NEON;
|
||||
SharpYuvUpdateRGB = SharpYuvUpdateRGB_NEON;
|
||||
SharpYuvFilterRow = SharpYuvFilterRow_NEON;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_NEON
|
||||
|
||||
extern void InitSharpYuvNEON(void);
|
||||
|
||||
void InitSharpYuvNEON(void) {}
|
||||
|
||||
#endif // WEBP_USE_NEON
|
201
sharpyuv/sharpyuv_sse2.c
Normal file
201
sharpyuv/sharpyuv_sse2.c
Normal file
@ -0,0 +1,201 @@
|
||||
// Copyright 2022 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// Speed-critical functions for Sharp YUV.
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#include "sharpyuv/sharpyuv_dsp.h"
|
||||
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
#include <stdlib.h>
|
||||
#include <emmintrin.h>
|
||||
|
||||
static uint16_t clip_SSE2(int v, int max) {
|
||||
return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v;
|
||||
}
|
||||
|
||||
static uint64_t SharpYuvUpdateY_SSE2(const uint16_t* ref, const uint16_t* src,
|
||||
uint16_t* dst, int len, int bit_depth) {
|
||||
const int max_y = (1 << bit_depth) - 1;
|
||||
uint64_t diff = 0;
|
||||
uint32_t tmp[4];
|
||||
int i;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i max = _mm_set1_epi16(max_y);
|
||||
const __m128i one = _mm_set1_epi16(1);
|
||||
__m128i sum = zero;
|
||||
|
||||
for (i = 0; i + 8 <= len; i += 8) {
|
||||
const __m128i A = _mm_loadu_si128((const __m128i*)(ref + i));
|
||||
const __m128i B = _mm_loadu_si128((const __m128i*)(src + i));
|
||||
const __m128i C = _mm_loadu_si128((const __m128i*)(dst + i));
|
||||
const __m128i D = _mm_sub_epi16(A, B); // diff_y
|
||||
const __m128i E = _mm_cmpgt_epi16(zero, D); // sign (-1 or 0)
|
||||
const __m128i F = _mm_add_epi16(C, D); // new_y
|
||||
const __m128i G = _mm_or_si128(E, one); // -1 or 1
|
||||
const __m128i H = _mm_max_epi16(_mm_min_epi16(F, max), zero);
|
||||
const __m128i I = _mm_madd_epi16(D, G); // sum(abs(...))
|
||||
_mm_storeu_si128((__m128i*)(dst + i), H);
|
||||
sum = _mm_add_epi32(sum, I);
|
||||
}
|
||||
_mm_storeu_si128((__m128i*)tmp, sum);
|
||||
diff = tmp[3] + tmp[2] + tmp[1] + tmp[0];
|
||||
for (; i < len; ++i) {
|
||||
const int diff_y = ref[i] - src[i];
|
||||
const int new_y = (int)dst[i] + diff_y;
|
||||
dst[i] = clip_SSE2(new_y, max_y);
|
||||
diff += (uint64_t)abs(diff_y);
|
||||
}
|
||||
return diff;
|
||||
}
|
||||
|
||||
static void SharpYuvUpdateRGB_SSE2(const int16_t* ref, const int16_t* src,
|
||||
int16_t* dst, int len) {
|
||||
int i = 0;
|
||||
for (i = 0; i + 8 <= len; i += 8) {
|
||||
const __m128i A = _mm_loadu_si128((const __m128i*)(ref + i));
|
||||
const __m128i B = _mm_loadu_si128((const __m128i*)(src + i));
|
||||
const __m128i C = _mm_loadu_si128((const __m128i*)(dst + i));
|
||||
const __m128i D = _mm_sub_epi16(A, B); // diff_uv
|
||||
const __m128i E = _mm_add_epi16(C, D); // new_uv
|
||||
_mm_storeu_si128((__m128i*)(dst + i), E);
|
||||
}
|
||||
for (; i < len; ++i) {
|
||||
const int diff_uv = ref[i] - src[i];
|
||||
dst[i] += diff_uv;
|
||||
}
|
||||
}
|
||||
|
||||
static void SharpYuvFilterRow16_SSE2(const int16_t* A, const int16_t* B,
|
||||
int len, const uint16_t* best_y,
|
||||
uint16_t* out, int bit_depth) {
|
||||
const int max_y = (1 << bit_depth) - 1;
|
||||
int i;
|
||||
const __m128i kCst8 = _mm_set1_epi16(8);
|
||||
const __m128i max = _mm_set1_epi16(max_y);
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
for (i = 0; i + 8 <= len; i += 8) {
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)(A + i + 0));
|
||||
const __m128i a1 = _mm_loadu_si128((const __m128i*)(A + i + 1));
|
||||
const __m128i b0 = _mm_loadu_si128((const __m128i*)(B + i + 0));
|
||||
const __m128i b1 = _mm_loadu_si128((const __m128i*)(B + i + 1));
|
||||
const __m128i a0b1 = _mm_add_epi16(a0, b1);
|
||||
const __m128i a1b0 = _mm_add_epi16(a1, b0);
|
||||
const __m128i a0a1b0b1 = _mm_add_epi16(a0b1, a1b0); // A0+A1+B0+B1
|
||||
const __m128i a0a1b0b1_8 = _mm_add_epi16(a0a1b0b1, kCst8);
|
||||
const __m128i a0b1_2 = _mm_add_epi16(a0b1, a0b1); // 2*(A0+B1)
|
||||
const __m128i a1b0_2 = _mm_add_epi16(a1b0, a1b0); // 2*(A1+B0)
|
||||
const __m128i c0 = _mm_srai_epi16(_mm_add_epi16(a0b1_2, a0a1b0b1_8), 3);
|
||||
const __m128i c1 = _mm_srai_epi16(_mm_add_epi16(a1b0_2, a0a1b0b1_8), 3);
|
||||
const __m128i d0 = _mm_add_epi16(c1, a0);
|
||||
const __m128i d1 = _mm_add_epi16(c0, a1);
|
||||
const __m128i e0 = _mm_srai_epi16(d0, 1);
|
||||
const __m128i e1 = _mm_srai_epi16(d1, 1);
|
||||
const __m128i f0 = _mm_unpacklo_epi16(e0, e1);
|
||||
const __m128i f1 = _mm_unpackhi_epi16(e0, e1);
|
||||
const __m128i g0 = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 0));
|
||||
const __m128i g1 = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 8));
|
||||
const __m128i h0 = _mm_add_epi16(g0, f0);
|
||||
const __m128i h1 = _mm_add_epi16(g1, f1);
|
||||
const __m128i i0 = _mm_max_epi16(_mm_min_epi16(h0, max), zero);
|
||||
const __m128i i1 = _mm_max_epi16(_mm_min_epi16(h1, max), zero);
|
||||
_mm_storeu_si128((__m128i*)(out + 2 * i + 0), i0);
|
||||
_mm_storeu_si128((__m128i*)(out + 2 * i + 8), i1);
|
||||
}
|
||||
for (; i < len; ++i) {
|
||||
// (9 * A0 + 3 * A1 + 3 * B0 + B1 + 8) >> 4 =
|
||||
// = (8 * A0 + 2 * (A1 + B0) + (A0 + A1 + B0 + B1 + 8)) >> 4
|
||||
// We reuse the common sub-expressions.
|
||||
const int a0b1 = A[i + 0] + B[i + 1];
|
||||
const int a1b0 = A[i + 1] + B[i + 0];
|
||||
const int a0a1b0b1 = a0b1 + a1b0 + 8;
|
||||
const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4;
|
||||
const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4;
|
||||
out[2 * i + 0] = clip_SSE2(best_y[2 * i + 0] + v0, max_y);
|
||||
out[2 * i + 1] = clip_SSE2(best_y[2 * i + 1] + v1, max_y);
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE __m128i s16_to_s32(__m128i in) {
|
||||
return _mm_srai_epi32(_mm_unpacklo_epi16(in, in), 16);
|
||||
}
|
||||
|
||||
static void SharpYuvFilterRow32_SSE2(const int16_t* A, const int16_t* B,
|
||||
int len, const uint16_t* best_y,
|
||||
uint16_t* out, int bit_depth) {
|
||||
const int max_y = (1 << bit_depth) - 1;
|
||||
int i;
|
||||
const __m128i kCst8 = _mm_set1_epi32(8);
|
||||
const __m128i max = _mm_set1_epi16(max_y);
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
for (i = 0; i + 4 <= len; i += 4) {
|
||||
const __m128i a0 = s16_to_s32(_mm_loadl_epi64((const __m128i*)(A + i + 0)));
|
||||
const __m128i a1 = s16_to_s32(_mm_loadl_epi64((const __m128i*)(A + i + 1)));
|
||||
const __m128i b0 = s16_to_s32(_mm_loadl_epi64((const __m128i*)(B + i + 0)));
|
||||
const __m128i b1 = s16_to_s32(_mm_loadl_epi64((const __m128i*)(B + i + 1)));
|
||||
const __m128i a0b1 = _mm_add_epi32(a0, b1);
|
||||
const __m128i a1b0 = _mm_add_epi32(a1, b0);
|
||||
const __m128i a0a1b0b1 = _mm_add_epi32(a0b1, a1b0); // A0+A1+B0+B1
|
||||
const __m128i a0a1b0b1_8 = _mm_add_epi32(a0a1b0b1, kCst8);
|
||||
const __m128i a0b1_2 = _mm_add_epi32(a0b1, a0b1); // 2*(A0+B1)
|
||||
const __m128i a1b0_2 = _mm_add_epi32(a1b0, a1b0); // 2*(A1+B0)
|
||||
const __m128i c0 = _mm_srai_epi32(_mm_add_epi32(a0b1_2, a0a1b0b1_8), 3);
|
||||
const __m128i c1 = _mm_srai_epi32(_mm_add_epi32(a1b0_2, a0a1b0b1_8), 3);
|
||||
const __m128i d0 = _mm_add_epi32(c1, a0);
|
||||
const __m128i d1 = _mm_add_epi32(c0, a1);
|
||||
const __m128i e0 = _mm_srai_epi32(d0, 1);
|
||||
const __m128i e1 = _mm_srai_epi32(d1, 1);
|
||||
const __m128i f0 = _mm_unpacklo_epi32(e0, e1);
|
||||
const __m128i f1 = _mm_unpackhi_epi32(e0, e1);
|
||||
const __m128i g = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 0));
|
||||
const __m128i h_16 = _mm_add_epi16(g, _mm_packs_epi32(f0, f1));
|
||||
const __m128i final = _mm_max_epi16(_mm_min_epi16(h_16, max), zero);
|
||||
_mm_storeu_si128((__m128i*)(out + 2 * i + 0), final);
|
||||
}
|
||||
for (; i < len; ++i) {
|
||||
// (9 * A0 + 3 * A1 + 3 * B0 + B1 + 8) >> 4 =
|
||||
// = (8 * A0 + 2 * (A1 + B0) + (A0 + A1 + B0 + B1 + 8)) >> 4
|
||||
// We reuse the common sub-expressions.
|
||||
const int a0b1 = A[i + 0] + B[i + 1];
|
||||
const int a1b0 = A[i + 1] + B[i + 0];
|
||||
const int a0a1b0b1 = a0b1 + a1b0 + 8;
|
||||
const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4;
|
||||
const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4;
|
||||
out[2 * i + 0] = clip_SSE2(best_y[2 * i + 0] + v0, max_y);
|
||||
out[2 * i + 1] = clip_SSE2(best_y[2 * i + 1] + v1, max_y);
|
||||
}
|
||||
}
|
||||
|
||||
static void SharpYuvFilterRow_SSE2(const int16_t* A, const int16_t* B, int len,
|
||||
const uint16_t* best_y, uint16_t* out,
|
||||
int bit_depth) {
|
||||
if (bit_depth <= 10) {
|
||||
SharpYuvFilterRow16_SSE2(A, B, len, best_y, out, bit_depth);
|
||||
} else {
|
||||
SharpYuvFilterRow32_SSE2(A, B, len, best_y, out, bit_depth);
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
extern void InitSharpYuvSSE2(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void InitSharpYuvSSE2(void) {
|
||||
SharpYuvUpdateY = SharpYuvUpdateY_SSE2;
|
||||
SharpYuvUpdateRGB = SharpYuvUpdateRGB_SSE2;
|
||||
SharpYuvFilterRow = SharpYuvFilterRow_SSE2;
|
||||
}
|
||||
#else // !WEBP_USE_SSE2
|
||||
|
||||
extern void InitSharpYuvSSE2(void);
|
||||
|
||||
void InitSharpYuvSSE2(void) {}
|
||||
|
||||
#endif // WEBP_USE_SSE2
|
Reference in New Issue
Block a user