Make alpha premultiply a step in YUV<->RGB conversion
diff --git a/apps/shared/avifjpeg.c b/apps/shared/avifjpeg.c
index ded4cdb..eec32d3 100644
--- a/apps/shared/avifjpeg.c
+++ b/apps/shared/avifjpeg.c
@@ -114,6 +114,7 @@
     return ret;
 }
 
+#if !defined(JCS_ALPHA_EXTENSIONS)
 // this is only for removing alpha when processing non-premultiplied image.
 static void avifRGBAToRGB(const avifRGBImage * src, avifRGBImage * dst) {
     dst->width = src->width;
@@ -133,6 +134,7 @@
         }
     }
 }
+#endif
 
 avifBool avifJPEGWrite(avifImage * avif, const char * outputFilename, int jpegQuality, avifChromaUpsampling chromaUpsampling)
 {
@@ -152,12 +154,17 @@
     rgb.format = avif->alphaPremultiplied ? AVIF_RGB_FORMAT_RGB : AVIF_RGB_FORMAT_RGBA;
     rgb.chromaUpsampling = chromaUpsampling;
     rgb.depth = 8;
+    // always get premultiplied result.
+    // This will give natural appearance to output JPG image.
+    rgb.alphaPremultiplied = AVIF_TRUE;
     avifRGBImageAllocatePixels(&rgb);
     if (avifImageYUVToRGB(avif, &rgb) != AVIF_RESULT_OK) {
         fprintf(stderr, "Conversion to RGB failed: %s\n", outputFilename);
         goto cleanup;
     }
 
+    // libjpeg-turbo accepts RGBA input, so do less if possible
+#if !defined(JCS_ALPHA_EXTENSIONS)
     if (!avif->alphaPremultiplied) {
         if (avifRGBImagePremultiplyAlpha(&rgb) != AVIF_RESULT_OK) {
             fprintf(stderr, "Conversion to RGB failed: %s\n", outputFilename);
@@ -166,6 +173,7 @@
         avifRGBAToRGB(&rgb, &rgbPremultiplied);
         avifRGBImageFreePixels(&rgb);
     }
+#endif
 
     f = fopen(outputFilename, "wb");
     if (!f) {
@@ -176,8 +184,13 @@
     jpeg_stdio_dest(&cinfo, f);
     cinfo.image_width = avif->width;
     cinfo.image_height = avif->height;
+#if defined(JCS_ALPHA_EXTENSIONS)
+    cinfo.input_components = 3;
+    cinfo.in_color_space = JCS_EXT_RGBX;
+#else
     cinfo.input_components = 3;
     cinfo.in_color_space = JCS_RGB;
+#endif
     jpeg_set_defaults(&cinfo);
     jpeg_set_quality(&cinfo, jpegQuality, TRUE);
     jpeg_start_compress(&cinfo, TRUE);
@@ -186,6 +199,12 @@
         write_icc_profile(&cinfo, avif->icc.data, (unsigned int)avif->icc.size);
     }
 
+#if defined(JCS_ALPHA_EXTENSIONS)
+    while (cinfo.next_scanline < cinfo.image_height) {
+        row_pointer[0] = &rgb.pixels[cinfo.next_scanline * rgb.rowBytes];
+        (void)jpeg_write_scanlines(&cinfo, row_pointer, 1);
+    }
+#else
     if (avif->alphaPremultiplied) {
         while (cinfo.next_scanline < cinfo.image_height) {
             row_pointer[0] = &rgb.pixels[cinfo.next_scanline * rgb.rowBytes];
@@ -197,6 +216,7 @@
             (void)jpeg_write_scanlines(&cinfo, row_pointer, 1);
         }
     }
+#endif
 
     jpeg_finish_compress(&cinfo);
     ret = AVIF_TRUE;
diff --git a/apps/shared/avifpng.c b/apps/shared/avifpng.c
index 5e3897e..276f09d 100644
--- a/apps/shared/avifpng.c
+++ b/apps/shared/avifpng.c
@@ -136,9 +136,6 @@
         rowPointers[y] = &rgb.pixels[y * rgb.rowBytes];
     }
     png_read_image(png, rowPointers);
-    if (avif->alphaPremultiplied) {
-        avifRGBImagePremultiplyAlpha(&rgb);
-    }
     if (avifImageRGBToYUV(avif, &rgb) != AVIF_RESULT_OK) {
         fprintf(stderr, "Conversion to YUV failed: %s\n", inputFilename);
         goto cleanup;
@@ -182,14 +179,12 @@
     avifRGBImageSetDefaults(&rgb, avif);
     rgb.depth = rgbDepth;
     rgb.chromaUpsampling = chromaUpsampling;
+    rgb.alphaPremultiplied = AVIF_FALSE;
     avifRGBImageAllocatePixels(&rgb);
     if (avifImageYUVToRGB(avif, &rgb) != AVIF_RESULT_OK) {
         fprintf(stderr, "Conversion to RGB failed: %s\n", outputFilename);
         goto cleanup;
     }
-    if (rgb.alphaPremultiplied) {
-        avifRGBImageUnpremultiplyAlpha(&rgb);
-    }
 
     f = fopen(outputFilename, "wb");
     if (!f) {
diff --git a/examples/avif_example_decode_file.c b/examples/avif_example_decode_file.c
index 4ac9167..15102df 100644
--- a/examples/avif_example_decode_file.c
+++ b/examples/avif_example_decode_file.c
@@ -53,7 +53,7 @@
         // * this frame's sequence timing
 
         avifRGBImageSetDefaults(&rgb, decoder->image);
-        // Override YUV(A)->RGB(A) defaults here: depth, format, chromaUpsampling, ignoreAlpha, libYUVUsage, etc
+        // Override YUV(A)->RGB(A) defaults here: depth, format, chromaUpsampling, ignoreAlpha, alphaPremultiplied, libYUVUsage, etc
 
         // Alternative: set rgb.pixels and rgb.rowBytes yourself, which should match your chosen rgb.format
         // Be sure to use uint16_t* instead of uint8_t* for rgb.pixels/rgb.rowBytes if (rgb.depth > 8)
@@ -66,9 +66,6 @@
 
         // Now available:
         // * RGB(A) pixel data (rgb.pixels, rgb.rowBytes)
-        //   note that if alpha is present, RGB may or may not be premultiplied by alpha.
-        //   call avifRGBImagePremultiplyAlpha() or avifRGBImageUnpremultiplyAlpha()
-        //   to convert pixel data into your desired format.
 
         if (rgb.depth > 8) {
             uint16_t * firstPixel = (uint16_t *)rgb.pixels;
diff --git a/examples/avif_example_decode_memory.c b/examples/avif_example_decode_memory.c
index 5804c14..f4c62a1 100644
--- a/examples/avif_example_decode_memory.c
+++ b/examples/avif_example_decode_memory.c
@@ -75,7 +75,7 @@
         // * this frame's sequence timing
 
         avifRGBImageSetDefaults(&rgb, decoder->image);
-        // Override YUV(A)->RGB(A) defaults here: depth, format, chromaUpsampling, ignoreAlpha, libYUVUsage, etc
+        // Override YUV(A)->RGB(A) defaults here: depth, format, chromaUpsampling, ignoreAlpha, alphaPremultiplied, libYUVUsage, etc
 
         // Alternative: set rgb.pixels and rgb.rowBytes yourself, which should match your chosen rgb.format
         // Be sure to use uint16_t* instead of uint8_t* for rgb.pixels/rgb.rowBytes if (rgb.depth > 8)
@@ -88,9 +88,6 @@
 
         // Now available:
         // * RGB(A) pixel data (rgb.pixels, rgb.rowBytes)
-        //   note that if alpha is present, RGB may or may not be premultiplied by alpha.
-        //   call avifRGBImagePremultiplyAlpha() or avifRGBImageUnpremultiplyAlpha()
-        //   to convert pixel data into your desired format.
 
         if (rgb.depth > 8) {
             uint16_t * firstPixel = (uint16_t *)rgb.pixels;
diff --git a/examples/avif_example_encode.c b/examples/avif_example_encode.c
index b97ff1f..7bcc01c 100644
--- a/examples/avif_example_encode.c
+++ b/examples/avif_example_encode.c
@@ -54,7 +54,7 @@
         printf("Encoding from converted RGBA\n");
 
         avifRGBImageSetDefaults(&rgb, image);
-        // Override RGB(A)->YUV(A) defaults here: depth, format, chromaUpsampling, ignoreAlpha, libYUVUsage, etc
+        // Override RGB(A)->YUV(A) defaults here: depth, format, chromaUpsampling, ignoreAlpha, alphaPremultiplied, libYUVUsage, etc
 
         // Alternative: set rgb.pixels and rgb.rowBytes yourself, which should match your chosen rgb.format
         // Be sure to use uint16_t* instead of uint8_t* for rgb.pixels/rgb.rowBytes if (rgb.depth > 8)
@@ -63,11 +63,6 @@
         // Fill your RGB(A) data here
         memset(rgb.pixels, 255, rgb.rowBytes * image->height);
 
-        // If your data is not premultiplied but you want to encode avif as
-        // premultiplied, set rgb.alphaPremultiplied to false, then call
-        // avifRGBImagePremultiplyAlpha() to convert your RGBA data
-        // into premultiplied format.
-
         avifResult convertResult = avifImageRGBToYUV(image, &rgb);
         if (convertResult != AVIF_RESULT_OK) {
             fprintf(stderr, "Failed to convert to YUV(A): %s\n", avifResultToString(convertResult));
diff --git a/include/avif/avif.h b/include/avif/avif.h
index 7c26a7b..c13fa74 100644
--- a/include/avif/avif.h
+++ b/include/avif/avif.h
@@ -473,9 +473,7 @@
                                            // Unused when converting to YUV. avifRGBImageSetDefaults() prefers quality over speed.
     avifBool ignoreAlpha; // Used for XRGB formats, treats formats containing alpha (such as ARGB) as if they were
                           // RGB, treating the alpha bits as if they were all 1.
-    avifBool alphaPremultiplied; // indicates if RGB value has been pre-multiplied by alpha
-                                 // this should always indicate the real state of RGB data
-                                 // To convert, use avifRGBImagePremultiplyAlpha() or avifRGBImageUnpremultiplyAlpha().
+    avifBool alphaPremultiplied; // indicates if RGB value is pre-multiplied by alpha
 
     uint8_t * pixels;
     uint32_t rowBytes;
@@ -493,6 +491,8 @@
 AVIF_API avifResult avifImageYUVToRGB(const avifImage * image, avifRGBImage * rgb);
 
 // Premultiply handling functions.
+// (Un)premultiply is automatically done by the main conversion functions above,
+// so usually you don't need to call these. They are there for convenience.
 AVIF_API avifResult avifRGBImagePremultiplyAlpha(avifRGBImage * rgb);
 AVIF_API avifResult avifRGBImageUnpremultiplyAlpha(avifRGBImage * rgb);
 
diff --git a/src/reformat.c b/src/reformat.c
index 4d2a863..047b8c1 100644
--- a/src/reformat.c
+++ b/src/reformat.c
@@ -159,10 +159,6 @@
         return AVIF_RESULT_REFORMAT_FAILED;
     }
 
-    if (image->alphaPremultiplied != rgb->alphaPremultiplied) {
-        return AVIF_RESULT_REFORMAT_FAILED;
-    }
-
     avifReformatState state;
     if (!avifPrepareReformatState(image, rgb, &state)) {
         return AVIF_RESULT_REFORMAT_FAILED;
@@ -215,6 +211,44 @@
                         rgbPixel[2] = rgb->pixels[state.rgbOffsetBytesB + (i * state.rgbPixelBytes) + (j * rgb->rowBytes)] / rgbMaxChannelF;
                     }
 
+                    if (avifRGBFormatHasAlpha(rgb->format) && !rgb->ignoreAlpha) {
+                        float a;
+                        if (state.rgbChannelBytes > 1) {
+                            a = *((uint16_t *)(&rgb->pixels[state.rgbOffsetBytesA + (i * state.rgbPixelBytes) + (j * rgb->rowBytes)])) /
+                                rgbMaxChannelF;
+                        } else {
+                            a = rgb->pixels[state.rgbOffsetBytesA + (i * state.rgbPixelBytes) + (j * rgb->rowBytes)] / rgbMaxChannelF;
+                        }
+
+                        if (!rgb->alphaPremultiplied && image->alphaPremultiplied) {
+                            // multiply
+                            if (a == 0) {
+                                rgbPixel[0] = 0;
+                                rgbPixel[1] = 0;
+                                rgbPixel[2] = 0;
+                            } else if (a < 1.0f) {
+                                rgbPixel[0] = rgbPixel[0] * a;
+                                rgbPixel[1] = rgbPixel[1] * a;
+                                rgbPixel[2] = rgbPixel[2] * a;
+                            }
+
+                        } else if (rgb->alphaPremultiplied && !image->alphaPremultiplied) {
+                            // unmultiply
+                            if (a == 0) {
+                                rgbPixel[0] = 0;
+                                rgbPixel[1] = 0;
+                                rgbPixel[2] = 0;
+                            } else if (a < 1.0f) {
+                                float r = rgbPixel[0] / a;
+                                float g = rgbPixel[1] / a;
+                                float b = rgbPixel[2] / a;
+                                rgbPixel[0] = AVIF_CLAMP(r, 0.0f, 1.0f);
+                                rgbPixel[1] = AVIF_CLAMP(g, 0.0f, 1.0f);
+                                rgbPixel[2] = AVIF_CLAMP(b, 0.0f, 1.0f);
+                            }
+                        }
+                    }
+
                     // RGB -> YUV conversion
                     if (state.mode == AVIF_REFORMAT_MODE_IDENTITY) {
                         // Formulas 41,42,43 from https://www.itu.int/rec/T-REC-H.273-201612-I/en
@@ -969,10 +1003,6 @@
         return AVIF_RESULT_REFORMAT_FAILED;
     }
 
-    if (image->alphaPremultiplied != rgb->alphaPremultiplied) {
-        return AVIF_RESULT_REFORMAT_FAILED;
-    }
-
     avifReformatState state;
     if (!avifPrepareReformatState(image, rgb, &state)) {
         return AVIF_RESULT_REFORMAT_FAILED;
@@ -1039,6 +1069,8 @@
     const avifBool hasColor =
         (image->yuvRowBytes[AVIF_CHAN_U] && image->yuvRowBytes[AVIF_CHAN_V] && (image->yuvFormat != AVIF_PIXEL_FORMAT_YUV400));
 
+    avifResult convertResult = AVIF_RESULT_NOT_IMPLEMENTED;
+
     if (!hasColor || (image->yuvFormat == AVIF_PIXEL_FORMAT_YUV444) || (chromaUpsampling == AVIF_CHROMA_UPSAMPLING_NEAREST)) {
         // None of these fast paths currently support bilinear upsampling, so avoid all of them
         // unless the YUV data isn't subsampled or they explicitly requested AVIF_CHROMA_UPSAMPLING_NEAREST.
@@ -1046,7 +1078,7 @@
         if (state.mode == AVIF_REFORMAT_MODE_IDENTITY) {
             if ((image->depth == 8) && (rgb->depth == 8) && (image->yuvFormat == AVIF_PIXEL_FORMAT_YUV444) &&
                 (image->yuvRange == AVIF_RANGE_FULL)) {
-                return avifImageIdentity8ToRGB8ColorFullRange(image, rgb, &state);
+                convertResult = avifImageIdentity8ToRGB8ColorFullRange(image, rgb, &state);
             }
 
             // TODO: Add more fast paths for identity
@@ -1058,16 +1090,18 @@
                     // yuv:u16, rgb:u16
 
                     if (hasColor) {
-                        return avifImageYUV16ToRGB16Color(image, rgb, &state);
+                        convertResult = avifImageYUV16ToRGB16Color(image, rgb, &state);
+                    } else {
+                        convertResult = avifImageYUV16ToRGB16Mono(image, rgb, &state);
                     }
-                    return avifImageYUV16ToRGB16Mono(image, rgb, &state);
                 } else {
                     // yuv:u16, rgb:u8
 
                     if (hasColor) {
-                        return avifImageYUV16ToRGB8Color(image, rgb, &state);
+                        convertResult = avifImageYUV16ToRGB8Color(image, rgb, &state);
+                    } else {
+                        convertResult = avifImageYUV16ToRGB8Mono(image, rgb, &state);
                     }
-                    return avifImageYUV16ToRGB8Mono(image, rgb, &state);
                 }
             } else {
                 // yuv:u8
@@ -1076,23 +1110,41 @@
                     // yuv:u8, rgb:u16
 
                     if (hasColor) {
-                        return avifImageYUV8ToRGB16Color(image, rgb, &state);
+                        convertResult = avifImageYUV8ToRGB16Color(image, rgb, &state);
+                    } else {
+                        convertResult = avifImageYUV8ToRGB16Mono(image, rgb, &state);
                     }
-                    return avifImageYUV8ToRGB16Mono(image, rgb, &state);
                 } else {
                     // yuv:u8, rgb:u8
 
                     if (hasColor) {
-                        return avifImageYUV8ToRGB8Color(image, rgb, &state);
+                        convertResult = avifImageYUV8ToRGB8Color(image, rgb, &state);
+                    } else {
+                        convertResult = avifImageYUV8ToRGB8Mono(image, rgb, &state);
                     }
-                    return avifImageYUV8ToRGB8Mono(image, rgb, &state);
                 }
             }
         }
     }
 
-    // If we get here, there is no fast path for this combination. Time to be slow!
-    return avifImageYUVAnyToRGBAnySlow(image, rgb, &state, chromaUpsampling);
+    if (convertResult == AVIF_RESULT_NOT_IMPLEMENTED) {
+        // If we get here, there is no fast path for this combination. Time to be slow!
+        convertResult = avifImageYUVAnyToRGBAnySlow(image, rgb, &state, chromaUpsampling);
+    }
+
+    if (convertResult != AVIF_RESULT_OK) {
+        return convertResult;
+    }
+
+    if (avifRGBFormatHasAlpha(rgb->format) && !rgb->ignoreAlpha) {
+        if (image->alphaPremultiplied && !rgb->alphaPremultiplied) {
+            return avifRGBImageUnpremultiplyAlpha(rgb);
+        } else if (!image->alphaPremultiplied && rgb->alphaPremultiplied) {
+            return avifRGBImagePremultiplyAlpha(rgb);
+        }
+    }
+
+    return convertResult;
 }
 
 // Limited -> Full