#ifdefined(__clang__) // That #include <immintrin.h> is usually enough, but Clang's headers // "helpfully" skip including the whole kitchen sink when _MSC_VER is // defined, because lots of programs on Windows would include that and // it'd be a lot slower. But we want all those headers included so we // can use their features after runtime checks later. #include <smmintrin.h> #include <avxintrin.h> #include <avx2intrin.h> #include <avx512fintrin.h> #include <avx512dqintrin.h> #endif #endif
staticfloat log2f_(float x) { // The first approximation of log2(x) is its exponent 'e', minus 127.
int32_t bits;
memcpy(&bits, &x, sizeof(bits));
float e = (float)bits * (1.0f / (1<<23));
// If we use the mantissa too we can refine the error signficantly.
int32_t m_bits = (bits & 0x007fffff) | 0x3f000000; float m;
memcpy(&m, &m_bits, sizeof(m));
// Before we cast fbits to int32_t, check for out of range values to pacify UBSAN. // INT_MAX is not exactly representable as a float, so exclude it as effectively infinite. // Negative values are effectively underflow - we'll end up returning a (different) negative // value, which makes no sense. So clamp to zero. if (fbits >= (float)INT_MAX) { return INFINITY_;
} elseif (fbits < 0) { return 0;
}
// Not static, as it's used by some test tools. float powf_(float x, float y) { if (x <= 0.f) { return 0.f;
} if (x == 1.f) { return 1.f;
} return exp2f_(log2f_(x) * y);
}
// Most transfer functions we work with are sRGBish. // For exotic HDR transfer functions, we encode them using a tf.g that makes no sense, // and repurpose the other fields to hold the parameters of the HDR functions. struct TF_PQish { float A,B,C,D,E,F; }; struct TF_HLGish { float R,G,a,b,c,K_minus_1; }; // We didn't originally support a scale factor K for HLG, and instead just stored 0 in // the unused `f` field of skcms_TransferFunction for HLGish and HLGInvish transfer functions. // By storing f=K-1, those old unusued f=0 values now mean K=1, a noop scale factor.
staticfloat TFKind_marker(skcms_TFType kind) { // We'd use different NaNs, but those aren't guaranteed to be preserved by WASM. return -(float)kind;
}
static skcms_TFType classify(const skcms_TransferFunction& tf, TF_PQish* pq = nullptr
, TF_HLGish* hlg = nullptr) { if (tf.g < 0) { // Negative "g" is mapped to enum values; large negative are for sure invalid. if (tf.g < -128) { return skcms_TFType_Invalid;
} int enum_g = -static_cast<int>(tf.g); // Non-whole "g" values are invalid as well. if (static_cast<float>(-enum_g) != tf.g) { return skcms_TFType_Invalid;
} // TODO: soundness checks for PQ/HLG like we do for sRGBish? switch (enum_g) { case skcms_TFType_PQish: if (pq) {
memcpy(pq , &tf.a, sizeof(*pq ));
} return skcms_TFType_PQish; case skcms_TFType_HLGish: if (hlg) {
memcpy(hlg, &tf.a, sizeof(*hlg));
} return skcms_TFType_HLGish; case skcms_TFType_HLGinvish: if (hlg) {
memcpy(hlg, &tf.a, sizeof(*hlg));
} return skcms_TFType_HLGinvish;
} return skcms_TFType_Invalid;
}
// Basic soundness checks for sRGBish transfer functions. if (isfinitef_(tf.a + tf.b + tf.c + tf.d + tf.e + tf.f + tf.g) // a,c,d,g should be non-negative to make any sense.
&& tf.a >= 0
&& tf.c >= 0
&& tf.d >= 0
&& tf.g >= 0 // Raising a negative value to a fractional tf->g produces complex numbers.
&& tf.a * tf.d + tf.b >= 0) { return skcms_TFType_sRGBish;
}
case skcms_TFType_HLGish: { constfloat K = hlg.K_minus_1 + 1.0f; return K * sign * (x*hlg.R <= 1 ? powf_(x*hlg.R, hlg.G)
: expf_((x-hlg.c)*hlg.a) + hlg.b);
}
// skcms_TransferFunction_invert() inverts R, G, and a for HLGinvish so this math is fast. case skcms_TFType_HLGinvish: { constfloat K = hlg.K_minus_1 + 1.0f;
x /= K; return sign * (x <= 1 ? hlg.R * powf_(x, hlg.G)
: hlg.a * logf_(x - hlg.b) + hlg.c);
}
case skcms_TFType_sRGBish: return sign * (x < tf->d ? tf->c * x + tf->f
: powf_(tf->a * x + tf->b, tf->g) + tf->e);
float ix = fmaxf_(0, fminf_(x, 1)) * static_cast<float>(curve->table_entries - 1); int lo = (int) ix ,
hi = (int)(float)minus_1_ulp(ix + 1.0f); float t = ix - (float)lo;
// s15Fixed16ArrayType is technically variable sized, holding N values. However, the only valid // use of the type is for the CHAD tag that stores exactly nine values. typedefstruct {
uint8_t type [ 4];
uint8_t reserved [ 4];
uint8_t values [36];
} sf32_Layout;
bool skcms_GetCHAD(const skcms_ICCProfile* profile, skcms_Matrix3x3* m) {
skcms_ICCTag tag; if (!skcms_GetTagBySignature(profile, skcms_Signature_CHAD, &tag)) { returnfalse;
}
const sf32_Layout* sf32Tag = (const sf32_Layout*)tag.buf; const uint8_t* values = sf32Tag->values; for (int r = 0; r < 3; ++r) for (int c = 0; c < 3; ++c, values += 4) {
m->vals[r][c] = read_big_fixed(values);
} returntrue;
}
// XYZType is technically variable sized, holding N XYZ triples. However, the only valid uses of // the type are for tags/data that store exactly one triple. typedefstruct {
uint8_t type [4];
uint8_t reserved [4];
uint8_t X [4];
uint8_t Y [4];
uint8_t Z [4];
} XYZ_Layout;
staticint data_color_space_channel_count(uint32_t data_color_space) { switch (data_color_space) { case skcms_Signature_CMYK: return 4; case skcms_Signature_Gray: return 1; case skcms_Signature_RGB: return 3; case skcms_Signature_Lab: return 3; case skcms_Signature_XYZ: return 3; case skcms_Signature_CIELUV: return 3; case skcms_Signature_YCbCr: return 3; case skcms_Signature_CIEYxy: return 3; case skcms_Signature_HSV: return 3; case skcms_Signature_HLS: return 3; case skcms_Signature_CMY: return 3; case skcms_Signature_2CLR: return 2; case skcms_Signature_3CLR: return 3; case skcms_Signature_4CLR: return 4; case skcms_Signature_5CLR: return 5; case skcms_Signature_6CLR: return 6; case skcms_Signature_7CLR: return 7; case skcms_Signature_8CLR: return 8; case skcms_Signature_9CLR: return 9; case skcms_Signature_10CLR: return 10; case skcms_Signature_11CLR: return 11; case skcms_Signature_12CLR: return 12; case skcms_Signature_13CLR: return 13; case skcms_Signature_14CLR: return 14; case skcms_Signature_15CLR: return 15; default: return -1;
}
}
int skcms_GetInputChannelCount(const skcms_ICCProfile* profile) { int a2b_count = 0; if (profile->has_A2B) {
a2b_count = profile->A2B.input_channels != 0
? static_cast<int>(profile->A2B.input_channels)
: 3;
}
if (value_count < 2) {
curve->table_entries = 0;
curve->parametric.a = 1.0f;
curve->parametric.b = 0.0f;
curve->parametric.c = 0.0f;
curve->parametric.d = 0.0f;
curve->parametric.e = 0.0f;
curve->parametric.f = 0.0f; if (value_count == 0) { // Empty tables are a shorthand for an identity curve
curve->parametric.g = 1.0f;
} else { // Single entry tables are a shorthand for simple gamma
curve->parametric.g = read_big_u16(curvTag->variable) * (1.0f / 256.0f);
}
} else {
curve->table_8 = nullptr;
curve->table_16 = curvTag->variable;
curve->table_entries = value_count;
}
returntrue;
}
// Parses both curveType and parametricCurveType data. Ensures that at most 'size' bytes are read. // If curve_size is not nullptr, writes the number of bytes used by the curve in (*curve_size). staticbool read_curve(const uint8_t* buf, uint32_t size,
skcms_Curve* curve, uint32_t* curve_size) { if (!buf || size < 4 || !curve) { returnfalse;
}
staticbool read_mft_common(const mft_CommonLayout* mftTag, skcms_A2B* a2b) { // MFT matrices are applied before the first set of curves, but must be identity unless the // input is PCSXYZ. We don't support PCSXYZ profiles, so we ignore this matrix. Note that the // matrix in skcms_A2B is applied later in the pipe, so supporting this would require another // field/flag.
a2b->matrix_channels = 0;
a2b-> input_channels = mftTag-> input_channels[0];
a2b->output_channels = mftTag->output_channels[0];
// We require exactly three (ie XYZ/Lab/RGB) output channels if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) { returnfalse;
} // We require at least one, and no more than four (ie CMYK) input channels if (a2b->input_channels < 1 || a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) { returnfalse;
}
for (uint32_t i = 0; i < a2b->input_channels; ++i) {
a2b->grid_points[i] = mftTag->grid_points[0];
} // The grid only makes sense with at least two points along each axis if (a2b->grid_points[0] < 2) { returnfalse;
} returntrue;
}
// All as the A2B version above, except where noted. staticbool read_mft_common(const mft_CommonLayout* mftTag, skcms_B2A* b2a) { // Same as A2B.
b2a->matrix_channels = 0;
b2a-> input_channels = mftTag-> input_channels[0];
b2a->output_channels = mftTag->output_channels[0];
// For B2A, exactly 3 input channels (XYZ) and 3 (RGB) or 4 (CMYK) output channels. if (b2a->input_channels != ARRAY_COUNT(b2a->input_curves)) { returnfalse;
} if (b2a->output_channels < 3 || b2a->output_channels > ARRAY_COUNT(b2a->output_curves)) { returnfalse;
}
// Same as A2B. for (uint32_t i = 0; i < b2a->input_channels; ++i) {
b2a->grid_points[i] = mftTag->grid_points[0];
} if (b2a->grid_points[0] < 2) { returnfalse;
} returntrue;
}
template <typename A2B_or_B2A> staticbool init_tables(const uint8_t* table_base, uint64_t max_tables_len, uint32_t byte_width,
uint32_t input_table_entries, uint32_t output_table_entries,
A2B_or_B2A* out) { // byte_width is 1 or 2, [input|output]_table_entries are in [2, 4096], so no overflow
uint32_t byte_len_per_input_table = input_table_entries * byte_width;
uint32_t byte_len_per_output_table = output_table_entries * byte_width;
// [input|output]_channels are <= 4, so still no overflow
uint32_t byte_len_all_input_tables = out->input_channels * byte_len_per_input_table;
uint32_t byte_len_all_output_tables = out->output_channels * byte_len_per_output_table;
// We require exactly three (ie XYZ/Lab/RGB) output channels if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) { returnfalse;
} // We require no more than four (ie CMYK) input channels if (a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) { returnfalse;
}
// "B" curves must be present if (0 == b_curve_offset) { returnfalse;
}
if (!read_curves(tag->buf, tag->size, b_curve_offset, a2b->output_channels,
a2b->output_curves)) { returnfalse;
}
// "M" curves and Matrix must be used together if (0 != m_curve_offset) { if (0 == matrix_offset) { returnfalse;
}
a2b->matrix_channels = a2b->output_channels; if (!read_curves(tag->buf, tag->size, m_curve_offset, a2b->matrix_channels,
a2b->matrix_curves)) { returnfalse;
}
// "A" curves and CLUT must be used together if (0 != a_curve_offset) { if (0 == clut_offset) { returnfalse;
} if (!read_curves(tag->buf, tag->size, a_curve_offset, a2b->input_channels,
a2b->input_curves)) { returnfalse;
}
uint64_t grid_size = a2b->output_channels * clut->grid_byte_width[0]; // the payload for (uint32_t i = 0; i < a2b->input_channels; ++i) {
a2b->grid_points[i] = clut->grid_points[i]; // The grid only makes sense with at least two points along each axis if (a2b->grid_points[i] < 2) { returnfalse;
}
grid_size *= a2b->grid_points[i];
} if (tag->size < clut_offset + SAFE_FIXED_SIZE(CLUT_Layout) + grid_size) { returnfalse;
}
} else { if (0 != clut_offset) { returnfalse;
}
// If there is no CLUT, the number of input and output channels must match if (a2b->input_channels != a2b->output_channels) { returnfalse;
}
// Zero out the number of input channels to signal that we're skipping this stage
a2b->input_channels = 0;
}
returntrue;
}
// Exactly the same as read_tag_mab(), except where there are comments. // TODO: refactor the two to eliminate common code? staticbool read_tag_mba(const skcms_ICCTag* tag, skcms_B2A* b2a, bool pcs_is_xyz) { if (tag->size < SAFE_SIZEOF(mAB_or_mBA_Layout)) { returnfalse;
}
// "B" curves are our inputs, not outputs. if (!read_curves(tag->buf, tag->size, b_curve_offset, b2a->input_channels,
b2a->input_curves)) { returnfalse;
}
if (0 != m_curve_offset) { if (0 == matrix_offset) { returnfalse;
} // Matrix channels is tied to input_channels (3), not output_channels.
b2a->matrix_channels = b2a->input_channels;
if (!read_curves(tag->buf, tag->size, m_curve_offset, b2a->matrix_channels,
b2a->matrix_curves)) { returnfalse;
}
if (0 != a_curve_offset) { if (0 == clut_offset) { returnfalse;
}
// "A" curves are our output, not input. if (!read_curves(tag->buf, tag->size, a_curve_offset, b2a->output_channels,
b2a->output_curves)) { returnfalse;
}
uint64_t grid_size = b2a->output_channels * clut->grid_byte_width[0]; for (uint32_t i = 0; i < b2a->input_channels; ++i) {
b2a->grid_points[i] = clut->grid_points[i]; if (b2a->grid_points[i] < 2) { returnfalse;
}
grid_size *= b2a->grid_points[i];
} if (tag->size < clut_offset + SAFE_FIXED_SIZE(CLUT_Layout) + grid_size) { returnfalse;
}
} else { if (0 != clut_offset) { returnfalse;
}
if (b2a->input_channels != b2a->output_channels) { returnfalse;
}
// Zero out *output* channels to skip this stage.
b2a->output_channels = 0;
} returntrue;
}
// If you pass f, we'll fit a possibly-non-zero value for *f. // If you pass nullptr, we'll assume you want *f to be treated as zero. staticint fit_linear(const skcms_Curve* curve, int N, float tol, float* c, float* d, float* f = nullptr) {
assert(N > 1); // We iteratively fit the first points to the TF's linear piece. // We want the cx + f line to pass through the first and last points we fit exactly. // // As we walk along the points we find the minimum and maximum slope of the line before the // error would exceed our tolerance. We stop when the range [slope_min, slope_max] becomes // emtpy, when we definitely can't add any more points. // // Some points' error intervals may intersect the running interval but not lie fully // within it. So we keep track of the last point we saw that is a valid end point candidate, // and once the search is done, back up to build the line through *that* point. constfloat dx = 1.0f / static_cast<float>(N - 1);
int lin_points = 1;
float f_zero = 0.0f; if (f) {
*f = eval_curve(curve, 0);
} else {
f = &f_zero;
}
float slope_min = -INFINITY_; float slope_max = +INFINITY_; for (int i = 1; i < N; ++i) { float x = static_cast<float>(i) * dx; float y = eval_curve(curve, x);
float slope_max_i = (y + tol - *f) / x,
slope_min_i = (y - tol - *f) / x; if (slope_max_i < slope_min || slope_max < slope_min_i) { // Slope intervals would no longer overlap. break;
}
slope_max = fminf_(slope_max, slope_max_i);
slope_min = fmaxf_(slope_min, slope_min_i);
// Set D to the last point that met our tolerance.
*d = static_cast<float>(lin_points - 1) * dx; return lin_points;
}
// If this skcms_Curve holds an identity table, rewrite it as an identity skcms_TransferFunction. staticvoid canonicalize_identity(skcms_Curve* curve) { if (curve->table_entries && curve->table_entries <= (uint32_t)INT_MAX) { int N = (int)curve->table_entries;
float c = 0.0f, d = 0.0f, f = 0.0f; if (N == fit_linear(curve, N, 1.0f/static_cast<float>(2*N), &c,&d,&f)
&& c == 1.0f
&& f == 0.0f) {
curve->table_entries = 0;
curve->table_8 = nullptr;
curve->table_16 = nullptr;
curve->parametric = skcms_TransferFunction{1,1,0,0,0,0,0};
}
}
}
staticbool read_a2b(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) { bool ok = false; if (tag->type == skcms_Signature_mft1) { ok = read_tag_mft1(tag, a2b); } if (tag->type == skcms_Signature_mft2) { ok = read_tag_mft2(tag, a2b); } if (tag->type == skcms_Signature_mAB ) { ok = read_tag_mab(tag, a2b, pcs_is_xyz); } if (!ok) { returnfalse;
}
if (a2b->input_channels > 0) { canonicalize_identity(a2b->input_curves + 0); } if (a2b->input_channels > 1) { canonicalize_identity(a2b->input_curves + 1); } if (a2b->input_channels > 2) { canonicalize_identity(a2b->input_curves + 2); } if (a2b->input_channels > 3) { canonicalize_identity(a2b->input_curves + 3); }
if (a2b->matrix_channels > 0) { canonicalize_identity(a2b->matrix_curves + 0); } if (a2b->matrix_channels > 1) { canonicalize_identity(a2b->matrix_curves + 1); } if (a2b->matrix_channels > 2) { canonicalize_identity(a2b->matrix_curves + 2); }
if (a2b->output_channels > 0) { canonicalize_identity(a2b->output_curves + 0); } if (a2b->output_channels > 1) { canonicalize_identity(a2b->output_curves + 1); } if (a2b->output_channels > 2) { canonicalize_identity(a2b->output_curves + 2); }
returntrue;
}
staticbool read_b2a(const skcms_ICCTag* tag, skcms_B2A* b2a, bool pcs_is_xyz) { bool ok = false; if (tag->type == skcms_Signature_mft1) { ok = read_tag_mft1(tag, b2a); } if (tag->type == skcms_Signature_mft2) { ok = read_tag_mft2(tag, b2a); } if (tag->type == skcms_Signature_mBA ) { ok = read_tag_mba(tag, b2a, pcs_is_xyz); } if (!ok) { returnfalse;
}
if (b2a->input_channels > 0) { canonicalize_identity(b2a->input_curves + 0); } if (b2a->input_channels > 1) { canonicalize_identity(b2a->input_curves + 1); } if (b2a->input_channels > 2) { canonicalize_identity(b2a->input_curves + 2); }
if (b2a->matrix_channels > 0) { canonicalize_identity(b2a->matrix_curves + 0); } if (b2a->matrix_channels > 1) { canonicalize_identity(b2a->matrix_curves + 1); } if (b2a->matrix_channels > 2) { canonicalize_identity(b2a->matrix_curves + 2); }
if (b2a->output_channels > 0) { canonicalize_identity(b2a->output_curves + 0); } if (b2a->output_channels > 1) { canonicalize_identity(b2a->output_curves + 1); } if (b2a->output_channels > 2) { canonicalize_identity(b2a->output_curves + 2); } if (b2a->output_channels > 3) { canonicalize_identity(b2a->output_curves + 3); }
// Validate signature, size (smaller than buffer, large enough to hold tag table), // and major version
uint64_t tag_table_size = profile->tag_count * SAFE_SIZEOF(tag_Layout); if (signature != skcms_Signature_acsp ||
profile->size > len ||
profile->size < SAFE_SIZEOF(header_Layout) + tag_table_size ||
(version >> 24) > 4) { returnfalse;
}
// Validate that illuminant is D50 white if (fabsf_(illuminant_X - 0.9642f) > 0.0100f ||
fabsf_(illuminant_Y - 1.0000f) > 0.0100f ||
fabsf_(illuminant_Z - 0.8249f) > 0.0100f) { returnfalse;
}
// Validate that all tag entries have sane offset + size const tag_Layout* tags = get_tag_table(profile); for (uint32_t i = 0; i < profile->tag_count; ++i) {
uint32_t tag_offset = read_big_u32(tags[i].offset);
uint32_t tag_size = read_big_u32(tags[i].size);
uint64_t tag_end = (uint64_t)tag_offset + (uint64_t)tag_size; if (tag_size < 4 || tag_end > profile->size) { returnfalse;
}
}
0, // size, moot here
skcms_Signature_RGB, // data_color_space
skcms_Signature_XYZ, // pcs
0, // tag count, moot here
// We choose to represent sRGB with its canonical transfer function, // and with its canonical XYZD50 gamut matrix. true, // has_trc, followed by the 3 trc curves
{
{{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
{{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
{{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
},
false, // has_B2A, followed by B2A itself, which we also don't care about.
{
0,
{
{{0, {0,0, 0,0,0,0,0}}},
{{0, {0,0, 0,0,0,0,0}}},
{{0, {0,0, 0,0,0,0,0}}},
},
false, // has_CICP, followed by cicp itself which we don't care about.
{ 0, 0, 0, 0 },
}; return &sRGB_profile;
}
const skcms_ICCProfile* skcms_XYZD50_profile() { // Just like sRGB above, but with identity transfer functions and toXYZD50 matrix. staticconst skcms_ICCProfile XYZD50_profile = {
nullptr, // buffer, moot here
0, // size, moot here
skcms_Signature_RGB, // data_color_space
skcms_Signature_XYZ, // pcs
0, // tag count, moot here
true, // has_trc, followed by the 3 trc curves
{
{{0, {1,1, 0,0,0,0,0}}},
{{0, {1,1, 0,0,0,0,0}}},
{{0, {1,1, 0,0,0,0,0}}},
},
true, // has_toXYZD50, followed by 3x3 toXYZD50 matrix
{{
{ 1,0,0 },
{ 0,1,0 },
{ 0,0,1 },
}},
false, // has_A2B, followed by A2B itself, which we don't care about.
{
0,
{
{{0, {0,0, 0,0,0,0,0}}},
{{0, {0,0, 0,0,0,0,0}}},
{{0, {0,0, 0,0,0,0,0}}},
{{0, {0,0, 0,0,0,0,0}}},
},
{0,0,0,0},
nullptr,
nullptr,
false, // has_B2A, followed by B2A itself, which we also don't care about.
{
0,
{
{{0, {0,0, 0,0,0,0,0}}},
{{0, {0,0, 0,0,0,0,0}}},
{{0, {0,0, 0,0,0,0,0}}},
},
bool skcms_ApproximatelyEqualProfiles(const skcms_ICCProfile* A, const skcms_ICCProfile* B) { // Test for exactly equal profiles first. if (A == B || 0 == memcmp(A,B, sizeof(skcms_ICCProfile))) { returntrue;
}
// For now this is the essentially the same strategy we use in test_only.c // for our skcms_Transform() smoke tests: // 1) transform A to XYZD50 // 2) transform B to XYZD50 // 3) return true if they're similar enough // Our current criterion in 3) is maximum 1 bit error per XYZD50 byte.
// skcms_252_random_bytes are 252 of a random shuffle of all possible bytes. // 252 is evenly divisible by 3 and 4. Only 192, 10, 241, and 43 are missing.
// We want to allow otherwise equivalent profiles tagged as grayscale and RGB // to be treated as equal. But CMYK profiles are a totally different ballgame. constauto CMYK = skcms_Signature_CMYK; if ((A->data_color_space == CMYK) != (B->data_color_space == CMYK)) { returnfalse;
}
// Interpret as RGB_888 if data color space is RGB or GRAY, RGBA_8888 if CMYK. // TODO: working with RGBA_8888 either way is probably fastest.
skcms_PixelFormat fmt = skcms_PixelFormat_RGB_888;
size_t npixels = 84; if (A->data_color_space == skcms_Signature_CMYK) {
fmt = skcms_PixelFormat_RGBA_8888;
npixels = 63;
}
// TODO: if A or B is a known profile (skcms_sRGB_profile, skcms_XYZD50_profile), // use pre-canned results and skip that skcms_Transform() call?
uint8_t dstA[252],
dstB[252]; if (!skcms_Transform(
skcms_252_random_bytes, fmt, skcms_AlphaFormat_Unpremul, A,
dstA, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
npixels)) { returnfalse;
} if (!skcms_Transform(
skcms_252_random_bytes, fmt, skcms_AlphaFormat_Unpremul, B,
dstB, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
npixels)) { returnfalse;
}
// TODO: make sure this final check has reasonable codegen. for (size_t i = 0; i < 252; i++) { if (abs((int)dstA[i] - (int)dstB[i]) > 1) { returnfalse;
}
} returntrue;
}
// Assumes that Y is 1.0f.
skcms_Vector3 wXYZ = { { wx / wy, 1, (1 - wx - wy) / wy } };
// Now convert toXYZ matrix to toXYZD50.
skcms_Vector3 wXYZD50 = { { 0.96422f, 1.0f, 0.82521f } };
// Calculate the chromatic adaptation matrix. We will use the Bradford method, thus // the matrices below. The Bradford method is used by Adobe and is widely considered // to be the best.
skcms_Matrix3x3 xyz_to_lms = {{
{ 0.8951f, 0.2664f, -0.1614f },
{ -0.7502f, 1.7135f, 0.0367f },
{ 0.0389f, -0.0685f, 1.0296f },
}};
skcms_Matrix3x3 lms_to_xyz = {{
{ 0.9869929f, -0.1470543f, 0.1599627f },
{ 0.4323053f, 0.5183603f, 0.0492912f },
{ -0.0085287f, 0.0400428f, 0.9684867f },
}};
// We're inverting this function, solving for x in terms of y. // y = (cx + f) x < d // (ax + b)^g + e x ≥ d // The inverse of this function can be expressed in the same piecewise form.
skcms_TransferFunction inv = {0,0,0,0,0,0,0};
// We'll start by finding the new threshold inv.d. // In principle we should be able to find that by solving for y at x=d from either side. // (If those two d values aren't the same, it's a discontinuous transfer function.) float d_l = src->c * src->d + src->f,
d_r = powf_(src->a * src->d + src->b, src->g) + src->e; if (fabsf_(d_l - d_r) > 1/512.0f) { returnfalse;
}
inv.d = d_l; // TODO(mtklein): better in practice to choose d_r?
// When d=0, the linear section collapses to a point. We leave c,d,f all zero in that case. if (inv.d > 0) { // Inverting the linear section is pretty straightfoward: // y = cx + f // y - f = cx // (1/c)y - f/c = x
inv.c = 1.0f/src->c;
inv.f = -src->f/src->c;
}
// The interesting part is inverting the nonlinear section: // y = (ax + b)^g + e. // y - e = (ax + b)^g // (y - e)^1/g = ax + b // (y - e)^1/g - b = ax // (1/a)(y - e)^1/g - b/a = x // // To make that fit our form, we need to move the (1/a) term inside the exponentiation: // let k = (1/a)^g // (1/a)( y - e)^1/g - b/a = x // (ky - ke)^1/g - b/a = x
// We need to enforce the same constraints here that we do when fitting a curve, // a >= 0 and ad+b >= 0. These constraints are checked by classify(), so they're true // of the source function if we're here.
// Just like when fitting the curve, there's really no way to rescue a < 0. if (inv.a < 0) { returnfalse;
} // On the other hand we can rescue an ad+b that's gone slightly negative here. if (inv.a * inv.d + inv.b < 0) {
inv.b = -inv.a * inv.d;
}
// That should usually make classify(inv) == sRGBish true, but there are a couple situations // where we might still fail here, like non-finite parameter values. if (classify(inv) != skcms_TFType_sRGBish) { returnfalse;
}
// Now in principle we're done. // But to preserve the valuable invariant inv(src(1.0f)) == 1.0f, we'll tweak // e or f of the inverse, depending on which segment contains src(1.0f). float s = skcms_TransferFunction_eval(src, 1.0f); if (!isfinitef_(s)) { returnfalse;
}
float sign = s < 0 ? -1.0f : 1.0f;
s *= sign; if (s < inv.d) {
inv.f = 1.0f - sign * inv.c * s;
} else {
inv.e = 1.0f - sign * powf_(inv.a * s + inv.b, inv.g);
}
// From here below we're approximating an skcms_Curve with an skcms_TransferFunction{g,a,b,c,d,e,f}: // // tf(x) = cx + f x < d // tf(x) = (ax + b)^g + e x ≥ d // // When fitting, we add the additional constraint that both pieces meet at d: // // cd + f = (ad + b)^g + e // // Solving for e and folding it through gives an alternate formulation of the non-linear piece: // // tf(x) = cx + f x < d // tf(x) = (ax + b)^g - (ad + b)^g + cd + f x ≥ d // // Our overall strategy is then: // For a couple tolerances, // - fit_linear(): fit c,d,f iteratively to as many points as our tolerance allows // - invert c,d,f // - fit_nonlinear(): fit g,a,b using Gauss-Newton given those inverted c,d,f // (and by constraint, inverted e) to the inverse of the table. // Return the parameters with least maximum error. // // To run Gauss-Newton to find g,a,b, we'll also need the gradient of the residuals // of round-trip f_inv(x), the inverse of the non-linear piece of f(x). // // let y = Table(x) // r(x) = x - f_inv(y) // // ∂r/∂g = ln(ay + b)*(ay + b)^g // - ln(ad + b)*(ad + b)^g // ∂r/∂a = yg(ay + b)^(g-1) // - dg(ad + b)^(g-1) // ∂r/∂b = g(ay + b)^(g-1) // - g(ad + b)^(g-1)
// Return the residual of roundtripping skcms_Curve(x) through f_inv(y) with parameters P, // and fill out the gradient of the residual into dfdP. staticfloat rg_nonlinear(float x, const skcms_Curve* curve, const skcms_TransferFunction* tf, float dfdP[3]) { constfloat y = eval_curve(curve, x);
constfloat g = tf->g, a = tf->a, b = tf->b,
c = tf->c, d = tf->d, f = tf->f;
constfloat Y = fmaxf_(a*y + b, 0.0f),
D = a*d + b;
assert (D >= 0);
// The residual. constfloat f_inv = powf_(Y, g)
- powf_(D, g)
+ c*d + f; return x - f_inv;
}
staticbool gauss_newton_step(const skcms_Curve* curve,
skcms_TransferFunction* tf, float x0, float dx, int N) { // We'll sample x from the range [x0,x1] (both inclusive) N times with even spacing. // // Let P = [ tf->g, tf->a, tf->b ] (the three terms that we're adjusting). // // We want to do P' = P + (Jf^T Jf)^-1 Jf^T r(P), // where r(P) is the residual vector // and Jf is the Jacobian matrix of f(), ∂r/∂P. // // Let's review the shape of each of these expressions: // r(P) is [N x 1], a column vector with one entry per value of x tested // Jf is [N x 3], a matrix with an entry for each (x,P) pair // Jf^T is [3 x N], the transpose of Jf // // Jf^T Jf is [3 x N] * [N x 3] == [3 x 3], a 3x3 matrix, // and so is its inverse (Jf^T Jf)^-1 // Jf^T r(P) is [3 x N] * [N x 1] == [3 x 1], a column vector with the same shape as P // // Our implementation strategy to get to the final ∆P is // 1) evaluate Jf^T Jf, call that lhs // 2) evaluate Jf^T r(P), call that rhs // 3) invert lhs // 4) multiply inverse lhs by rhs // // This is a friendly implementation strategy because we don't have to have any // buffers that scale with N, and equally nice don't have to perform any matrix // operations that are variable size. // // Other implementation strategies could trade this off, e.g. evaluating the // pseudoinverse of Jf ( (Jf^T Jf)^-1 Jf^T ) directly, then multiplying that by // the residuals. That would probably require implementing singular value // decomposition, and would create a [3 x N] matrix to be multiplied by the // [N x 1] residual vector, but on the upside I think that'd eliminate the // possibility of this gauss_newton_step() function ever failing.
// 0) start off with lhs and rhs safely zeroed.
skcms_Matrix3x3 lhs = {{ {0,0,0}, {0,0,0}, {0,0,0} }};
skcms_Vector3 rhs = { {0,0,0} };
// 1,2) evaluate lhs and evaluate rhs // We want to evaluate Jf only once, but both lhs and rhs involve Jf^T, // so we'll have to update lhs and rhs at the same time. for (int i = 0; i < N; i++) { float x = x0 + static_cast<float>(i)*dx;
for (int r = 0; r < 3; r++) { for (int c = 0; c < 3; c++) {
lhs.vals[r][c] += dfdP[r] * dfdP[c];
}
rhs.vals[r] += dfdP[r] * resid;
}
}
// If any of the 3 P parameters are unused, this matrix will be singular. // Detect those cases and fix them up to indentity instead, so we can invert. for (int k = 0; k < 3; k++) { if (lhs.vals[0][k]==0 && lhs.vals[1][k]==0 && lhs.vals[2][k]==0 &&
lhs.vals[k][0]==0 && lhs.vals[k][1]==0 && lhs.vals[k][2]==0) {
lhs.vals[k][k] = 1;
}
}
// Fit the points in [L,N) to the non-linear piece of tf, or return false if we can't. staticbool fit_nonlinear(const skcms_Curve* curve, int L, int N, skcms_TransferFunction* tf) { // This enforces a few constraints that are not modeled in gauss_newton_step()'s optimization. auto fixup_tf = [tf]() { // a must be non-negative. That ensures the function is monotonically increasing. // We don't really know how to fix up a if it goes negative. if (tf->a < 0) { returnfalse;
} // ad+b must be non-negative. That ensures we don't end up with complex numbers in powf. // We feel just barely not uneasy enough to tweak b so ad+b is zero in this case. if (tf->a * tf->d + tf->b < 0) {
tf->b = -tf->a * tf->d;
}
assert (tf->a >= 0 &&
tf->a * tf->d + tf->b >= 0);
// cd+f must be ~= (ad+b)^g+e. That ensures the function is continuous. We keep e as a free // parameter so we can guarantee this.
tf->e = tf->c*tf->d + tf->f
- powf_(tf->a*tf->d + tf->b, tf->g);
return isfinitef_(tf->e);
};
if (!fixup_tf()) { returnfalse;
}
// No matter where we start, dx should always represent N even steps from 0 to 1. constfloat dx = 1.0f / static_cast<float>(N-1);
// Need this or several curves get worse... *sigh* float init_error = max_roundtrip_error_checked(curve, tf); if (init_error < best_max_error) {
best_max_error = init_error;
best_tf = *tf;
}
// As far as we can tell, 1 Gauss-Newton step won't converge, and 3 steps is no better than 2. for (int j = 0; j < 8; j++) { if (!gauss_newton_step(curve, tf, static_cast<float>(L)*dx, dx, N-L) || !fixup_tf()) {
*tf = best_tf; return isfinitef_(best_max_error);
}
if (curve->table_entries == 0) { // No point approximating an skcms_TransferFunction with an skcms_TransferFunction! returnfalse;
}
if (curve->table_entries == 1 || curve->table_entries > (uint32_t)INT_MAX) { // We need at least two points, and must put some reasonable cap on the maximum number. returnfalse;
}
int N = (int)curve->table_entries; constfloat dx = 1.0f / static_cast<float>(N - 1);
*max_error = INFINITY_; constfloat kTolerances[] = { 1.5f / 65535.0f, 1.0f / 512.0f }; for (int t = 0; t < ARRAY_COUNT(kTolerances); t++) {
skcms_TransferFunction tf,
tf_inv;
// It's problematic to fit curves with non-zero f, so always force it to zero explicitly.
tf.f = 0.0f; int L = fit_linear(curve, N, kTolerances[t], &tf.c, &tf.d);
if (L == N) { // If the entire data set was linear, move the coefficients to the nonlinear portion // with G == 1. This lets use a canonical representation with d == 0.
tf.g = 1;
tf.a = tf.c;
tf.b = tf.f;
tf.c = tf.d = tf.e = tf.f = 0;
} elseif (L == N - 1) { // Degenerate case with only two points in the nonlinear segment. Solve directly.
tf.g = 1;
tf.a = (eval_curve(curve, static_cast<float>(N-1)*dx) -
eval_curve(curve, static_cast<float>(N-2)*dx))
/ dx;
tf.b = eval_curve(curve, static_cast<float>(N-2)*dx)
- tf.a * static_cast<float>(N-2)*dx;
tf.e = 0;
} else { // Start by guessing a gamma-only curve through the midpoint. int mid = (L + N) / 2; float mid_x = static_cast<float>(mid) / static_cast<float>(N - 1); float mid_y = eval_curve(curve, mid_x);
tf.g = log2f_(mid_y) / log2f_(mid_x);
tf.a = 1;
tf.b = 0;
tf.e = tf.c*tf.d + tf.f
- powf_(tf.a*tf.d + tf.b, tf.g);
if (!skcms_TransferFunction_invert(&tf, &tf_inv) ||
!fit_nonlinear(curve, L,N, &tf_inv)) { continue;
}
// We fit tf_inv, so calculate tf to keep in sync. // fit_nonlinear() should guarantee invertibility. if (!skcms_TransferFunction_invert(&tf_inv, &tf)) {
assert(false); continue;
}
}
// We'd better have a sane, sRGB-ish TF by now. // Other non-Bad TFs would be fine, but we know we've only ever tried to fit sRGBish; // anything else is just some accident of math and the way we pun tf.g as a type flag. // fit_nonlinear() should guarantee this, but the special cases may fail this test. if (skcms_TFType_sRGBish != classify(tf)) { continue;
}
// We find our error by roundtripping the table through tf_inv. // // (The most likely use case for this approximation is to be inverted and // used as the transfer function for a destination color space.) // // We've kept tf and tf_inv in sync above, but we can't guarantee that tf is // invertible, so re-verify that here (and use the new inverse for testing). // fit_nonlinear() should guarantee this, but the special cases that don't use // it may fail this test. if (!skcms_TransferFunction_invert(&tf, &tf_inv)) { continue;
}
// Call cpuid(7) to check for AVX2 and AVX-512 bits.
__asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
: "0"(7), "2"(0)); // eax from xgetbv(0) will tell us whether XMM, YMM, and ZMM state is saved.
uint32_t xcr0, dont_need_edx;
__asm__ __volatile__("xgetbv" : "=a"(xcr0), "=d"(dont_need_edx) : "c"(0));
if ((xcr0 & (1u<<1)) && // XMM register state saved?
(xcr0 & (1u<<2)) && // YMM register state saved?
(ebx & (1u<<5))) { // AVX2 // At this point we're at least HSW. Continue checking for SKX. if ((xcr0 & (1u<< 5)) && // Opmasks state saved?
(xcr0 & (1u<< 6)) && // First 16 ZMM registers saved?
(xcr0 & (1u<< 7)) && // High 16 ZMM registers saved?
(ebx & (1u<<16)) && // AVX512F
(ebx & (1u<<17)) && // AVX512DQ
(ebx & (1u<<28)) && // AVX512CD
(ebx & (1u<<30)) && // AVX512BW
(ebx & (1u<<31))) { // AVX512VL return CpuType::SKX;
} return CpuType::HSW;
}
} return CpuType::Baseline;
}(); return type; #endif
}
switch (classify(tf)) { case skcms_TFType_Invalid: return noop; case skcms_TFType_sRGBish: return OpAndArg{op.sRGBish, &tf}; case skcms_TFType_PQish: return OpAndArg{op.PQish, &tf}; case skcms_TFType_HLGish: return OpAndArg{op.HLGish, &tf}; case skcms_TFType_HLGinvish: return OpAndArg{op.HLGinvish, &tf};
}
} return OpAndArg{op.table, curve};
}
staticint select_curve_ops(const skcms_Curve* curves, int numChannels, OpAndArg* ops) { // We process the channels in reverse order, yielding ops in ABGR order. // (Working backwards allows us to fuse trailing B+G+R ops into a single RGB op.) int cursor = 0; for (int index = numChannels; index-- > 0; ) {
ops[cursor] = select_curve_op(&curves[index], index); if (ops[cursor].arg) {
++cursor;
}
}
// Identify separate B+G+R ops and fuse them into a single RGB op. if (cursor >= 3) { struct FusableOps {
Op r, g, b, rgb;
}; static constexpr FusableOps kFusableOps[] = {
{Op::gamma_r, Op::gamma_g, Op::gamma_b, Op::gamma_rgb},
{Op::tf_r, Op::tf_g, Op::tf_b, Op::tf_rgb},
{Op::pq_r, Op::pq_g, Op::pq_b, Op::pq_rgb},
{Op::hlg_r, Op::hlg_g, Op::hlg_b, Op::hlg_rgb},
{Op::hlginv_r, Op::hlginv_g, Op::hlginv_b, Op::hlginv_rgb},
};
int posR = cursor - 1; int posG = cursor - 2; int posB = cursor - 3; for (const FusableOps& fusableOp : kFusableOps) { if (ops[posR].op == fusableOp.r &&
ops[posG].op == fusableOp.g &&
ops[posB].op == fusableOp.b &&
(0 == memcmp(ops[posR].arg, ops[posG].arg, sizeof(skcms_TransferFunction))) &&
(0 == memcmp(ops[posR].arg, ops[posB].arg, sizeof(skcms_TransferFunction)))) { // Fuse the three matching ops into one.
ops[posB].op = fusableOp.rgb;
cursor -= 2; break;
}
}
}
return cursor;
}
static size_t bytes_per_pixel(skcms_PixelFormat fmt) { switch (fmt >> 1) { // ignore rgb/bgr case skcms_PixelFormat_A_8 >> 1: return 1; case skcms_PixelFormat_G_8 >> 1: return 1; case skcms_PixelFormat_GA_88 >> 1: return 2; case skcms_PixelFormat_ABGR_4444 >> 1: return 2; case skcms_PixelFormat_RGB_565 >> 1: return 2; case skcms_PixelFormat_RGB_888 >> 1: return 3; case skcms_PixelFormat_RGBA_8888 >> 1: return 4; case skcms_PixelFormat_RGBA_8888_sRGB >> 1: return 4; case skcms_PixelFormat_RGBA_1010102 >> 1: return 4; case skcms_PixelFormat_RGB_101010x_XR >> 1: return 4; case skcms_PixelFormat_RGB_161616LE >> 1: return 6; case skcms_PixelFormat_RGBA_10101010_XR >> 1: return 8; case skcms_PixelFormat_RGBA_16161616LE >> 1: return 8; case skcms_PixelFormat_RGB_161616BE >> 1: return 6; case skcms_PixelFormat_RGBA_16161616BE >> 1: return 8; case skcms_PixelFormat_RGB_hhh_Norm >> 1: return 6; case skcms_PixelFormat_RGBA_hhhh_Norm >> 1: return 8; case skcms_PixelFormat_RGB_hhh >> 1: return 6; case skcms_PixelFormat_RGBA_hhhh >> 1: return 8; case skcms_PixelFormat_RGB_fff >> 1: return 12; case skcms_PixelFormat_RGBA_ffff >> 1: return 16;
}
assert(false); return 0;
}
bool skcms_Transform(constvoid* src,
skcms_PixelFormat srcFmt,
skcms_AlphaFormat srcAlpha, const skcms_ICCProfile* srcProfile, void* dst,
skcms_PixelFormat dstFmt,
skcms_AlphaFormat dstAlpha, const skcms_ICCProfile* dstProfile,
size_t nz) { const size_t dst_bpp = bytes_per_pixel(dstFmt),
src_bpp = bytes_per_pixel(srcFmt); // Let's just refuse if the request is absurdly big. if (nz * dst_bpp > INT_MAX || nz * src_bpp > INT_MAX) { returnfalse;
} int n = (int)nz;
// Null profiles default to sRGB. Passing null for both is handy when doing format conversion. if (!srcProfile) {
srcProfile = skcms_sRGB_profile();
} if (!dstProfile) {
dstProfile = skcms_sRGB_profile();
}
// We can't transform in place unless the PixelFormats are the same size. if (dst == src && dst_bpp != src_bpp) { returnfalse;
} // TODO: more careful alias rejection (like, dst == src + 1)?
auto add_op_ctx = [&](Op o, constvoid* c) {
*ops++ = o;
*contexts++ = c;
};
auto add_curve_ops = [&](const skcms_Curve* curves, int numChannels) {
OpAndArg oa[4];
assert(numChannels <= ARRAY_COUNT(oa));
int numOps = select_curve_ops(curves, numChannels, oa);
for (int i = 0; i < numOps; ++i) {
add_op_ctx(oa[i].op, oa[i].arg);
}
};
// These are always parametric curves of some sort.
skcms_Curve dst_curves[3];
dst_curves[0].table_entries =
dst_curves[1].table_entries =
dst_curves[2].table_entries = 0;
skcms_Matrix3x3 from_xyz;
switch (srcFmt >> 1) { default: returnfalse; case skcms_PixelFormat_A_8 >> 1: add_op(Op::load_a8); break; case skcms_PixelFormat_G_8 >> 1: add_op(Op::load_g8); break; case skcms_PixelFormat_GA_88 >> 1: add_op(Op::load_ga88); break; case skcms_PixelFormat_ABGR_4444 >> 1: add_op(Op::load_4444); break; case skcms_PixelFormat_RGB_565 >> 1: add_op(Op::load_565); break; case skcms_PixelFormat_RGB_888 >> 1: add_op(Op::load_888); break; case skcms_PixelFormat_RGBA_8888 >> 1: add_op(Op::load_8888); break; case skcms_PixelFormat_RGBA_1010102 >> 1: add_op(Op::load_1010102); break; case skcms_PixelFormat_RGB_101010x_XR >> 1: add_op(Op::load_101010x_XR); break; case skcms_PixelFormat_RGBA_10101010_XR >> 1: add_op(Op::load_10101010_XR); break; case skcms_PixelFormat_RGB_161616LE >> 1: add_op(Op::load_161616LE); break; case skcms_PixelFormat_RGBA_16161616LE >> 1: add_op(Op::load_16161616LE); break; case skcms_PixelFormat_RGB_161616BE >> 1: add_op(Op::load_161616BE); break; case skcms_PixelFormat_RGBA_16161616BE >> 1: add_op(Op::load_16161616BE); break; case skcms_PixelFormat_RGB_hhh_Norm >> 1: add_op(Op::load_hhh); break; case skcms_PixelFormat_RGBA_hhhh_Norm >> 1: add_op(Op::load_hhhh); break; case skcms_PixelFormat_RGB_hhh >> 1: add_op(Op::load_hhh); break; case skcms_PixelFormat_RGBA_hhhh >> 1: add_op(Op::load_hhhh); break; case skcms_PixelFormat_RGB_fff >> 1: add_op(Op::load_fff); break; case skcms_PixelFormat_RGBA_ffff >> 1: add_op(Op::load_ffff); break;
case skcms_PixelFormat_RGBA_8888_sRGB >> 1:
add_op(Op::load_8888);
add_op_ctx(Op::tf_rgb, skcms_sRGB_TransferFunction()); break;
} if (srcFmt == skcms_PixelFormat_RGB_hhh_Norm ||
srcFmt == skcms_PixelFormat_RGBA_hhhh_Norm) {
add_op(Op::clamp);
} if (srcFmt & 1) {
add_op(Op::swap_rb);
}
skcms_ICCProfile gray_dst_profile; switch (dstFmt >> 1) { case skcms_PixelFormat_G_8: case skcms_PixelFormat_GA_88: // When transforming to gray, stop at XYZ (by setting toXYZ to identity), then transform // luminance (Y) by the destination transfer function.
gray_dst_profile = *dstProfile;
skcms_SetXYZD50(&gray_dst_profile, &skcms_XYZD50_profile()->toXYZD50);
dstProfile = &gray_dst_profile; break; default: break;
}
if (srcProfile->data_color_space == skcms_Signature_CMYK) { // Photoshop creates CMYK images as inverse CMYK. // These happen to be the only ones we've _ever_ seen.
add_op(Op::invert); // With CMYK, ignore the alpha type, to avoid changing K or conflating CMY with K.
srcAlpha = skcms_AlphaFormat_Unpremul;
}
// A2B sources are in XYZD50 by now, but TRC sources are still in their original gamut.
assert (srcProfile->has_A2B || srcProfile->has_toXYZD50);
if (dstProfile->has_B2A) { // B2A needs its input in XYZD50, so transform TRC sources now. if (!srcProfile->has_A2B) {
add_op_ctx(Op::matrix_3x3, &srcProfile->toXYZD50);
}
if (dstProfile->pcs == skcms_Signature_Lab) {
add_op(Op::xyz_to_lab);
}
if (dstProfile->B2A.input_channels == 3) {
add_curve_ops(dstProfile->B2A.input_curves, /*numChannels=*/3);
}
if (dstProfile->B2A.matrix_channels == 3) { staticconst skcms_Matrix3x4 I = {{
{1,0,0,0},
{0,1,0,0},
{0,0,1,0},
}}; if (0 != memcmp(&I, &dstProfile->B2A.matrix, sizeof(I))) {
add_op_ctx(Op::matrix_3x4, &dstProfile->B2A.matrix);
}
if (dstProfile->B2A.output_channels) {
add_op(Op::clamp);
add_op_ctx(Op::clut_B2A, &dstProfile->B2A);
add_curve_ops(dstProfile->B2A.output_curves,
(int)dstProfile->B2A.output_channels);
}
} else { // This is a TRC destination. // We'll concat any src->xyz matrix with our xyz->dst matrix into one src->dst matrix. // (A2B sources are already in XYZD50, making that src->xyz matrix I.) staticconst skcms_Matrix3x3 I = {{
{ 1.0f, 0.0f, 0.0f },
{ 0.0f, 1.0f, 0.0f },
{ 0.0f, 0.0f, 1.0f },
}}; const skcms_Matrix3x3* to_xyz = srcProfile->has_A2B ? &I : &srcProfile->toXYZD50;
// There's a chance the source and destination gamuts are identical, // in which case we can skip the gamut transform. if (0 != memcmp(&dstProfile->toXYZD50, to_xyz, sizeof(skcms_Matrix3x3))) { // Concat the entire gamut transform into from_xyz, // now slightly misnamed but it's a handy spot to stash the result.
from_xyz = skcms_Matrix3x3_concat(&from_xyz, to_xyz);
add_op_ctx(Op::matrix_3x3, &from_xyz);
}
// Encode back to dst RGB using its parametric transfer functions.
OpAndArg oa[3]; int numOps = select_curve_ops(dst_curves, /*numChannels=*/3, oa); for (int index = 0; index < numOps; ++index) {
assert(oa[index].op != Op::table_r &&
oa[index].op != Op::table_g &&
oa[index].op != Op::table_b &&
oa[index].op != Op::table_a);
add_op_ctx(oa[index].op, oa[index].arg);
}
}
}
// Clamp here before premul to make sure we're clamping to normalized values _and_ gamut, // not just to values that fit in [0,1]. // // E.g. r = 1.1, a = 0.5 would fit fine in fixed point after premul (ra=0.55,a=0.5), // but would be carrying r > 1, which is really unexpected for downstream consumers. if (dstFmt < skcms_PixelFormat_RGB_hhh) {
add_op(Op::clamp);
}
if (dstProfile->data_color_space == skcms_Signature_CMYK) { // Photoshop creates CMYK images as inverse CMYK. // These happen to be the only ones we've _ever_ seen.
add_op(Op::invert);
// CMYK has no alpha channel, so make sure dstAlpha is a no-op.
dstAlpha = skcms_AlphaFormat_Unpremul;
}
if (dstAlpha == skcms_AlphaFormat_Opaque) {
add_op(Op::force_opaque);
} elseif (dstAlpha == skcms_AlphaFormat_PremulAsEncoded) {
add_op(Op::premul);
} if (dstFmt & 1) {
add_op(Op::swap_rb);
} switch (dstFmt >> 1) { default: returnfalse; case skcms_PixelFormat_A_8 >> 1: add_op(Op::store_a8); break; case skcms_PixelFormat_G_8 >> 1: add_op(Op::store_g8); break; case skcms_PixelFormat_GA_88 >> 1: add_op(Op::store_ga88); break; case skcms_PixelFormat_ABGR_4444 >> 1: add_op(Op::store_4444); break; case skcms_PixelFormat_RGB_565 >> 1: add_op(Op::store_565); break; case skcms_PixelFormat_RGB_888 >> 1: add_op(Op::store_888); break; case skcms_PixelFormat_RGBA_8888 >> 1: add_op(Op::store_8888); break; case skcms_PixelFormat_RGBA_1010102 >> 1: add_op(Op::store_1010102); break; case skcms_PixelFormat_RGB_161616LE >> 1: add_op(Op::store_161616LE); break; case skcms_PixelFormat_RGBA_16161616LE >> 1: add_op(Op::store_16161616LE); break; case skcms_PixelFormat_RGB_161616BE >> 1: add_op(Op::store_161616BE); break; case skcms_PixelFormat_RGBA_16161616BE >> 1: add_op(Op::store_16161616BE); break; case skcms_PixelFormat_RGB_hhh_Norm >> 1: add_op(Op::store_hhh); break; case skcms_PixelFormat_RGBA_hhhh_Norm >> 1: add_op(Op::store_hhhh); break; case skcms_PixelFormat_RGB_101010x_XR >> 1: add_op(Op::store_101010x_XR); break; case skcms_PixelFormat_RGB_hhh >> 1: add_op(Op::store_hhh); break; case skcms_PixelFormat_RGBA_hhhh >> 1: add_op(Op::store_hhhh); break; case skcms_PixelFormat_RGB_fff >> 1: add_op(Op::store_fff); break; case skcms_PixelFormat_RGBA_ffff >> 1: add_op(Op::store_ffff); break;
case skcms_PixelFormat_RGBA_8888_sRGB >> 1:
add_op_ctx(Op::tf_rgb, skcms_sRGB_Inverse_TransferFunction());
add_op(Op::store_8888); break;
}
assert(ops <= program + ARRAY_COUNT(program));
assert(contexts <= context + ARRAY_COUNT(context));
auto run = baseline::run_program; switch (cpu_type()) { case CpuType::SKX: #if !defined(SKCMS_DISABLE_SKX)
run = skx::run_program; break; #endif
case CpuType::HSW: #if !defined(SKCMS_DISABLE_HSW)
run = hsw::run_program; break; #endif
bool skcms_MakeUsableAsDestination(skcms_ICCProfile* profile) { if (!profile->has_B2A) {
skcms_Matrix3x3 fromXYZD50; if (!profile->has_trc || !profile->has_toXYZD50
|| !skcms_Matrix3x3_invert(&profile->toXYZD50, &fromXYZD50)) { returnfalse;
}
skcms_TransferFunction tf[3]; for (int i = 0; i < 3; i++) {
skcms_TransferFunction inv; if (profile->trc[i].table_entries == 0
&& skcms_TransferFunction_invert(&profile->trc[i].parametric, &inv)) {
tf[i] = profile->trc[i].parametric; continue;
}
float max_error; // Parametric curves from skcms_ApproximateCurve() are guaranteed to be invertible. if (!skcms_ApproximateCurve(&profile->trc[i], &tf[i], &max_error)) { returnfalse;
}
}
for (int i = 0; i < 3; ++i) {
profile->trc[i].table_entries = 0;
profile->trc[i].parametric = tf[i];
}
}
assert_usable_as_destination(profile); returntrue;
}
bool skcms_MakeUsableAsDestinationWithSingleCurve(skcms_ICCProfile* profile) { // Call skcms_MakeUsableAsDestination() with B2A disabled; // on success that'll return a TRC/XYZ profile with three skcms_TransferFunctions.
skcms_ICCProfile result = *profile;
result.has_B2A = false; if (!skcms_MakeUsableAsDestination(&result)) { returnfalse;
}
// Of the three, pick the transfer function that best fits the other two. int best_tf = 0; float min_max_error = INFINITY_; for (int i = 0; i < 3; i++) {
skcms_TransferFunction inv; if (!skcms_TransferFunction_invert(&result.trc[i].parametric, &inv)) { returnfalse;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.