/* Copyright (c) 2021, Dominic Szablewski - https://phoboslab.org SPDX-License-Identifier: MIT Simple benchmark suite for png, stbi and qoi Requires libpng, "stb_image.h" and "stb_image_write.h" Compile with: gcc qoibench.c -std=gnu99 -lpng -O3 -o qoibench */ #include #include #include #define STB_IMAGE_IMPLEMENTATION #define STBI_ONLY_PNG #define STBI_NO_LINEAR #include "stb_image.h" #define STB_IMAGE_WRITE_IMPLEMENTATION #include "stb_image_write.h" #define QOI_IMPLEMENTATION #include "qoi.h" // ----------------------------------------------------------------------------- // Cross platform high resolution timer // From https://gist.github.com/ForeverZer0/0a4f80fc02b96e19380ebb7a3debbee5 #include #if defined(__linux) #define HAVE_POSIX_TIMER #include #ifdef CLOCK_MONOTONIC #define CLOCKID CLOCK_MONOTONIC #else #define CLOCKID CLOCK_REALTIME #endif #elif defined(__APPLE__) #define HAVE_MACH_TIMER #include #elif defined(_WIN32) #define WIN32_LEAN_AND_MEAN #include #endif static uint64_t ns() { static uint64_t is_init = 0; #if defined(__APPLE__) static mach_timebase_info_data_t info; if (0 == is_init) { mach_timebase_info(&info); is_init = 1; } uint64_t now; now = mach_absolute_time(); now *= info.numer; now /= info.denom; return now; #elif defined(__linux) static struct timespec linux_rate; if (0 == is_init) { clock_getres(CLOCKID, &linux_rate); is_init = 1; } uint64_t now; struct timespec spec; clock_gettime(CLOCKID, &spec); now = spec.tv_sec * 1.0e9 + spec.tv_nsec; return now; #elif defined(_WIN32) static LARGE_INTEGER win_frequency; if (0 == is_init) { QueryPerformanceFrequency(&win_frequency); is_init = 1; } LARGE_INTEGER now; QueryPerformanceCounter(&now); return (uint64_t) ((1e9 * now.QuadPart) / win_frequency.QuadPart); #endif } #define STRINGIFY(x) #x #define TOSTRING(x) STRINGIFY(x) #define ERROR(...) printf("abort at line " TOSTRING(__LINE__) ": " __VA_ARGS__); printf("\n"); exit(1) // ----------------------------------------------------------------------------- // libpng encode/decode wrappers // Seriously, who thought this was a good abstraction for an API to read/write // images? typedef struct { int size; int capacity; unsigned char *data; } libpng_write_t; void libpng_encode_callback(png_structp png_ptr, png_bytep data, png_size_t length) { libpng_write_t *write_data = (libpng_write_t*)png_get_io_ptr(png_ptr); if (write_data->size + length >= write_data->capacity) { ERROR("PNG write"); } memcpy(write_data->data + write_data->size, data, length); write_data->size += length; } void *libpng_encode(void *pixels, int w, int h, int channels, int *out_len) { png_structp png = png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL); if (!png) { ERROR("png_create_write_struct"); } png_infop info = png_create_info_struct(png); if (!info) { ERROR("png_create_info_struct"); } if (setjmp(png_jmpbuf(png))) { ERROR("png_jmpbuf"); } // Output is 8bit depth, RGBA format. png_set_IHDR( png, info, w, h, 8, channels == 3 ? PNG_COLOR_TYPE_RGB : PNG_COLOR_TYPE_RGBA, PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT ); png_bytep row_pointers[h]; for(int y = 0; y < h; y++){ row_pointers[y] = ((unsigned char *)pixels + y * w * channels); } libpng_write_t write_data = { .size = 0, .capacity = w * h * channels, .data = malloc(w * h * channels) }; png_set_rows(png, info, row_pointers); png_set_write_fn(png, &write_data, libpng_encode_callback, NULL); png_write_png(png, info, PNG_TRANSFORM_IDENTITY, NULL); png_destroy_write_struct(&png, &info); *out_len = write_data.size; return write_data.data; } typedef struct { int pos; int size; unsigned char *data; } libpng_read_t; void png_decode_callback(png_structp png, png_bytep data, png_size_t length) { libpng_read_t *read_data = (libpng_read_t*)png_get_io_ptr(png); if (read_data->pos + length > read_data->size) { ERROR("PNG read %ld bytes at pos %d (size: %d)", length, read_data->pos, read_data->size); } memcpy(data, read_data->data + read_data->pos, length); read_data->pos += length; } void png_warning_callback(png_structp png_ptr, png_const_charp warning_msg) { // Ignore warnings about sRGB profiles and such. } void *libpng_decode(void *data, int size, int *out_w, int *out_h) { png_structp png = png_create_read_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, png_warning_callback); if (!png) { ERROR("png_create_read_struct"); } png_infop info = png_create_info_struct(png); if (!info) { ERROR("png_create_info_struct"); } libpng_read_t read_data = { .pos = 0, .size = size, .data = data }; png_set_read_fn(png, &read_data, png_decode_callback); png_set_sig_bytes(png, 0); png_read_info(png, info); png_uint_32 w, h; int bitDepth, colorType, interlaceType; png_get_IHDR(png, info, &w, &h, &bitDepth, &colorType, &interlaceType, NULL, NULL); // 16 bit -> 8 bit png_set_strip_16(png); // 1, 2, 4 bit -> 8 bit if (bitDepth < 8) { png_set_packing(png); } if (colorType & PNG_COLOR_MASK_PALETTE) { png_set_expand(png); } if (!(colorType & PNG_COLOR_MASK_COLOR)) { png_set_gray_to_rgb(png); } // set paletted or RGB images with transparency to full alpha so we get RGBA if (png_get_valid(png, info, PNG_INFO_tRNS)) { png_set_tRNS_to_alpha(png); } // make sure every pixel has an alpha value if (!(colorType & PNG_COLOR_MASK_ALPHA)) { png_set_filler(png, 255, PNG_FILLER_AFTER); } png_read_update_info(png, info); unsigned char* out = malloc(w * h * 4); *out_w = w; *out_h = h; // png_uint_32 rowBytes = png_get_rowbytes(png, info); png_bytep row_pointers[h]; for (png_uint_32 row = 0; row < h; row++ ) { row_pointers[row] = (png_bytep)(out + (row * w * 4)); } png_read_image(png, row_pointers); png_read_end(png, info); png_destroy_read_struct( &png, &info, NULL); return out; } // ----------------------------------------------------------------------------- // stb_image encode callback void stbi_write_callback(void *context, void *data, int size) { int *encoded_size = (int *)context; *encoded_size += size; // In theory we'd need to do another malloc(), memcpy() and free() here to // be fair to the other decode functions... } // ----------------------------------------------------------------------------- // function to load a whole file into memory void *fload(const char *path, int *out_size) { FILE *fh = fopen(path, "rb"); if (!fh) { ERROR("Can't open file"); } fseek(fh, 0, SEEK_END); int size = ftell(fh); fseek(fh, 0, SEEK_SET); void *buffer = malloc(size); if (!buffer) { ERROR("Malloc for %d bytes failed", size); } if (!fread(buffer, size, 1, fh)) { ERROR("Can't read file %s", path); } fclose(fh); *out_size = size; return buffer; } // ----------------------------------------------------------------------------- // benchmark runner int opt_runs = 1; int opt_nopng = 0; int opt_nowarmup = 0; int opt_noverify = 0; int opt_nodecode = 0; int opt_noencode = 0; int opt_norecurse = 0; int opt_onlytotals = 0; enum { LIBPNG, STBI, QOI, BENCH_COUNT /* must be the last element */ }; static const char *const lib_names[BENCH_COUNT] = { // NOTE: pad with spaces so everything lines up properly [LIBPNG] = "libpng: ", [STBI] = "stbi: ", [QOI] = "qoi: ", }; typedef struct { uint64_t size; uint64_t encode_time; uint64_t decode_time; } benchmark_lib_result_t; typedef struct { int count; uint64_t raw_size; uint64_t px; int w; int h; benchmark_lib_result_t libs[BENCH_COUNT]; } benchmark_result_t; void benchmark_print_result(benchmark_result_t res) { res.px /= res.count; res.raw_size /= res.count; double px = res.px; printf(" decode ms encode ms decode mpps encode mpps size kb rate\n"); for (int i = 0; i < BENCH_COUNT; ++i) { if (opt_nopng && (i == LIBPNG || i == STBI)) { continue; } res.libs[i].encode_time /= res.count; res.libs[i].decode_time /= res.count; res.libs[i].size /= res.count; printf( "%s %8.1f %8.1f %8.2f %8.2f %8ld %4.1f%%\n", lib_names[i], (double)res.libs[i].decode_time/1000000.0, (double)res.libs[i].encode_time/1000000.0, (res.libs[i].decode_time > 0 ? px / ((double)res.libs[i].decode_time/1000.0) : 0), (res.libs[i].encode_time > 0 ? px / ((double)res.libs[i].encode_time/1000.0) : 0), res.libs[i].size/1024, ((double)res.libs[i].size/(double)res.raw_size) * 100.0 ); } printf("\n"); } // Run __VA_ARGS__ a number of times and measure the time taken. The first // run is ignored. #define BENCHMARK_FN(NOWARMUP, RUNS, AVG_TIME, ...) \ do { \ uint64_t time = 0; \ for (int i = NOWARMUP; i <= RUNS; i++) { \ uint64_t time_start = ns(); \ __VA_ARGS__ \ uint64_t time_end = ns(); \ if (i > 0) { \ time += time_end - time_start; \ } \ } \ AVG_TIME = time / RUNS; \ } while (0) benchmark_result_t benchmark_image(const char *path) { int encoded_png_size; int encoded_qoi_size; int w; int h; int channels; // Load the encoded PNG, encoded QOI and raw pixels into memory if(!stbi_info(path, &w, &h, &channels)) { ERROR("Error decoding header %s", path); } if (channels != 3) { channels = 4; } void *pixels = (void *)stbi_load(path, &w, &h, NULL, channels); void *encoded_png = fload(path, &encoded_png_size); void *encoded_qoi = qoi_encode(pixels, &(qoi_desc){ .width = w, .height = h, .channels = channels, .colorspace = QOI_SRGB }, &encoded_qoi_size); if (!pixels || !encoded_qoi || !encoded_png) { ERROR("Error encoding %s", path); } // Verify QOI Output if (!opt_noverify) { qoi_desc dc; void *pixels_qoi = qoi_decode(encoded_qoi, encoded_qoi_size, &dc, channels); if (memcmp(pixels, pixels_qoi, w * h * channels) != 0) { ERROR("QOI roundtrip pixel mismatch for %s", path); } free(pixels_qoi); } benchmark_result_t res = {0}; res.count = 1; res.raw_size = w * h * channels; res.px = w * h; res.w = w; res.h = h; // Decoding if (!opt_nodecode) { if (!opt_nopng) { BENCHMARK_FN(opt_nowarmup, opt_runs, res.libs[LIBPNG].decode_time, { int dec_w, dec_h; void *dec_p = libpng_decode(encoded_png, encoded_png_size, &dec_w, &dec_h); free(dec_p); }); BENCHMARK_FN(opt_nowarmup, opt_runs, res.libs[STBI].decode_time, { int dec_w, dec_h, dec_channels; void *dec_p = stbi_load_from_memory(encoded_png, encoded_png_size, &dec_w, &dec_h, &dec_channels, 4); free(dec_p); }); } BENCHMARK_FN(opt_nowarmup, opt_runs, res.libs[QOI].decode_time, { qoi_desc desc; void *dec_p = qoi_decode(encoded_qoi, encoded_qoi_size, &desc, 4); free(dec_p); }); } // Encoding if (!opt_noencode) { if (!opt_nopng) { BENCHMARK_FN(opt_nowarmup, opt_runs, res.libs[LIBPNG].encode_time, { int enc_size; void *enc_p = libpng_encode(pixels, w, h, channels, &enc_size); res.libs[LIBPNG].size = enc_size; free(enc_p); }); BENCHMARK_FN(opt_nowarmup, opt_runs, res.libs[STBI].encode_time, { int enc_size = 0; stbi_write_png_to_func(stbi_write_callback, &enc_size, w, h, channels, pixels, 0); res.libs[STBI].size = enc_size; }); } BENCHMARK_FN(opt_nowarmup, opt_runs, res.libs[QOI].encode_time, { int enc_size; void *enc_p = qoi_encode(pixels, &(qoi_desc){ .width = w, .height = h, .channels = channels, .colorspace = QOI_SRGB }, &enc_size); res.libs[QOI].size = enc_size; free(enc_p); }); } free(pixels); free(encoded_png); free(encoded_qoi); return res; } void benchmark_directory(const char *path, benchmark_result_t *grand_total) { DIR *dir = opendir(path); if (!dir) { ERROR("Couldn't open directory %s", path); } struct dirent *file; if (!opt_norecurse) { for (int i = 0; (file = readdir(dir)) != NULL; i++) { if ( file->d_type & DT_DIR && strcmp(file->d_name, ".") != 0 && strcmp(file->d_name, "..") != 0 ) { char subpath[1024]; snprintf(subpath, 1024, "%s/%s", path, file->d_name); benchmark_directory(subpath, grand_total); } } rewinddir(dir); } benchmark_result_t dir_total = {0}; int has_shown_head = 0; for (int i = 0; (file = readdir(dir)) != NULL; i++) { if (strcmp(file->d_name + strlen(file->d_name) - 4, ".png") != 0) { continue; } if (!has_shown_head) { has_shown_head = 1; printf("## Benchmarking %s/*.png -- %d runs\n\n", path, opt_runs); } char *file_path = malloc(strlen(file->d_name) + strlen(path)+8); sprintf(file_path, "%s/%s", path, file->d_name); benchmark_result_t res = benchmark_image(file_path); if (!opt_onlytotals) { printf("## %s size: %dx%d\n", file_path, res.w, res.h); benchmark_print_result(res); } free(file_path); dir_total.count++; dir_total.raw_size += res.raw_size; dir_total.px += res.px; for (int i = 0; i < BENCH_COUNT; ++i) { dir_total.libs[i].encode_time += res.libs[i].encode_time; dir_total.libs[i].decode_time += res.libs[i].decode_time; dir_total.libs[i].size += res.libs[i].size; } grand_total->count++; grand_total->raw_size += res.raw_size; grand_total->px += res.px; for (int i = 0; i < BENCH_COUNT; ++i) { grand_total->libs[i].encode_time += res.libs[i].encode_time; grand_total->libs[i].decode_time += res.libs[i].decode_time; grand_total->libs[i].size += res.libs[i].size; } } closedir(dir); if (dir_total.count > 0) { printf("## Total for %s\n", path); benchmark_print_result(dir_total); } } int main(int argc, char **argv) { if (argc < 3) { printf("Usage: qoibench [options]\n"); printf("Options:\n"); printf(" --nowarmup ... don't perform a warmup run\n"); printf(" --nopng ...... don't run png encode/decode\n"); printf(" --noverify ... don't verify qoi roundtrip\n"); printf(" --noencode ... don't run encoders\n"); printf(" --nodecode ... don't run decoders\n"); printf(" --norecurse .. don't descend into directories\n"); printf(" --onlytotals . don't print individual image results\n"); printf("Examples\n"); printf(" qoibench 10 images/textures/\n"); printf(" qoibench 1 images/textures/ --nopng --nowarmup\n"); exit(1); } for (int i = 3; i < argc; i++) { if (strcmp(argv[i], "--nowarmup") == 0) { opt_nowarmup = 1; } else if (strcmp(argv[i], "--nopng") == 0) { opt_nopng = 1; } else if (strcmp(argv[i], "--noverify") == 0) { opt_noverify = 1; } else if (strcmp(argv[i], "--noencode") == 0) { opt_noencode = 1; } else if (strcmp(argv[i], "--nodecode") == 0) { opt_nodecode = 1; } else if (strcmp(argv[i], "--norecurse") == 0) { opt_norecurse = 1; } else if (strcmp(argv[i], "--onlytotals") == 0) { opt_onlytotals = 1; } else { ERROR("Unknown option %s", argv[i]); } } opt_runs = atoi(argv[1]); if (opt_runs <=0) { ERROR("Invalid number of runs %d", opt_runs); } benchmark_result_t grand_total = {0}; benchmark_directory(argv[2], &grand_total); if (grand_total.count > 0) { printf("# Grand total for %s\n", argv[2]); benchmark_print_result(grand_total); } else { printf("No images found in %s\n", argv[2]); } return 0; }