Support premultiply alpha
diff --git a/src/alpha.c b/src/alpha.c
index 0dbdf80..fa1d551 100644
--- a/src/alpha.c
+++ b/src/alpha.c
@@ -4,6 +4,7 @@
 #include "avif/internal.h"
 
 #include <string.h>
+#include <assert.h>
 
 static int calcMaxChannel(uint32_t depth, avifRange range)
 {
@@ -380,3 +381,248 @@
 
     return AVIF_TRUE;
 }
+
+avifResult avifRGBImagePremultiplyAlpha(avifRGBImage * rgb) {
+    // no data
+    if (!rgb->pixels || !rgb->rowBytes) {
+        return AVIF_RESULT_REFORMAT_FAILED;
+    }
+
+    // no alpha.
+    if (!avifRGBFormatHasAlpha(rgb->format)) {
+        return AVIF_RESULT_INVALID_ARGUMENT;
+    }
+
+    // already premultiplied. No-op.
+    if (rgb->alphaPremultiplied) {
+        return AVIF_RESULT_OK;
+    }
+
+    rgb->alphaPremultiplied = AVIF_TRUE;
+
+    avifResult libyuvResult = avifRGBImagePremultiplyAlphaLibYUV(rgb);
+    if (libyuvResult != AVIF_RESULT_NOT_IMPLEMENTED) {
+        return libyuvResult;
+    }
+
+    assert(rgb->depth >= 8 && rgb->depth <= 16);
+
+    uint32_t max = (1 << rgb->depth) - 1;
+    float maxF = (float)max;
+
+    if (rgb->depth > 8) {
+        if (rgb->format == AVIF_RGB_FORMAT_RGBA || rgb->format == AVIF_RGB_FORMAT_BGRA) {
+            for (uint32_t j = 0; j < rgb->height; ++j) {
+                uint8_t * row = &rgb->pixels[j * rgb->rowBytes];
+                for (uint32_t i = 0; i < rgb->width; ++i) {
+                    uint16_t * pixel = (uint16_t *)&row[i * 8];
+                    uint16_t a = pixel[3];
+                    if (a >= max) {
+                        // opaque is no-op
+                        continue;
+                    } else if (a == 0) {
+                        // result must be zero
+                        pixel[0] = 0;
+                        pixel[1] = 0;
+                        pixel[2] = 0;
+                    } else {
+                        // a < maxF is always true now, so we don't need clamp here
+                        pixel[0] = (uint16_t)avifRoundf((float)pixel[0] * (float)a / maxF);
+                        pixel[1] = (uint16_t)avifRoundf((float)pixel[1] * (float)a / maxF);
+                        pixel[2] = (uint16_t)avifRoundf((float)pixel[2] * (float)a / maxF);
+                    }
+                }
+            }
+        } else {
+            for (uint32_t j = 0; j < rgb->height; ++j) {
+                uint8_t * row = &rgb->pixels[j * rgb->rowBytes];
+                for (uint32_t i = 0; i < rgb->width; ++i) {
+                    uint16_t * pixel = (uint16_t *)&row[i * 8];
+                    uint16_t a = pixel[0];
+                    if (a >= max) {
+                        continue;
+                    } else if (a == 0) {
+                        pixel[1] = 0;
+                        pixel[2] = 0;
+                        pixel[3] = 0;
+                    } else {
+                        pixel[1] = (uint16_t)avifRoundf((float)pixel[1] * (float)a / maxF);
+                        pixel[2] = (uint16_t)avifRoundf((float)pixel[2] * (float)a / maxF);
+                        pixel[3] = (uint16_t)avifRoundf((float)pixel[3] * (float)a / maxF);
+                    }
+                }
+            }
+        }
+    } else {
+        if (rgb->format == AVIF_RGB_FORMAT_RGBA || rgb->format == AVIF_RGB_FORMAT_BGRA) {
+            for (uint32_t j = 0; j < rgb->height; ++j) {
+                uint8_t * row = &rgb->pixels[j * rgb->rowBytes];
+                for (uint32_t i = 0; i < rgb->width; ++i) {
+                    uint8_t * pixel = &row[i * 4];
+                    uint8_t a = pixel[3];
+                    // uint8_t can't exceed 255
+                    if (a == max) {
+                        continue;
+                    } else if (a == 0) {
+                        pixel[0] = 0;
+                        pixel[1] = 0;
+                        pixel[2] = 0;
+                    } else {
+                        pixel[0] = (uint8_t)avifRoundf((float)pixel[0] * (float)a / maxF);
+                        pixel[1] = (uint8_t)avifRoundf((float)pixel[1] * (float)a / maxF);
+                        pixel[2] = (uint8_t)avifRoundf((float)pixel[2] * (float)a / maxF);
+                    }
+                }
+            }
+        } else {
+            for (uint32_t j = 0; j < rgb->height; ++j) {
+                uint8_t * row = &rgb->pixels[j * rgb->rowBytes];
+                for (uint32_t i = 0; i < rgb->width; ++i) {
+                    uint8_t * pixel = &row[i * 4];
+                    uint8_t a = pixel[0];
+                    if (a == max) {
+                        continue;
+                    } else if (a == 0) {
+                        pixel[1] = 0;
+                        pixel[2] = 0;
+                        pixel[3] = 0;
+                    } else {
+                        pixel[1] = (uint8_t)avifRoundf((float)pixel[1] * (float)a / maxF);
+                        pixel[2] = (uint8_t)avifRoundf((float)pixel[2] * (float)a / maxF);
+                        pixel[3] = (uint8_t)avifRoundf((float)pixel[3] * (float)a / maxF);
+                    }
+                }
+            }
+        }
+    }
+
+    return AVIF_RESULT_OK;
+}
+
+avifResult avifRGBImageUnpremultiplyAlpha(avifRGBImage * rgb)
+{
+    // no data
+    if (!rgb->pixels || !rgb->rowBytes) {
+        return AVIF_RESULT_REFORMAT_FAILED;
+    }
+
+    // no alpha.
+    if (!avifRGBFormatHasAlpha(rgb->format)) {
+        return AVIF_RESULT_REFORMAT_FAILED;
+    }
+
+    // already premultiplied. No-op.
+    if (!rgb->alphaPremultiplied) {
+        return AVIF_RESULT_OK;
+    }
+
+    rgb->alphaPremultiplied = AVIF_FALSE;
+
+    avifResult libyuvResult = avifRGBImageUnpremultiplyAlphaLibYUV(rgb);
+    if (libyuvResult != AVIF_RESULT_NOT_IMPLEMENTED) {
+        return libyuvResult;
+    }
+
+    assert(rgb->depth >= 8 && rgb->depth <= 16);
+
+    uint32_t max = (1 << rgb->depth) - 1;
+    float maxF = (float)max;
+
+    if (rgb->depth > 8) {
+        if (rgb->format == AVIF_RGB_FORMAT_RGBA || rgb->format == AVIF_RGB_FORMAT_BGRA) {
+            for (uint32_t j = 0; j < rgb->height; ++j) {
+                uint8_t * row = &rgb->pixels[j * rgb->rowBytes];
+                for (uint32_t i = 0; i < rgb->width; ++i) {
+                    uint16_t * pixel = (uint16_t *)&row[i * 8];
+                    uint16_t a = pixel[3];
+                    if (a >= max) {
+                        // opaque is no-op
+                        continue;
+                    } else if (a == 0) {
+                        // prevent divide by zero
+                        pixel[0] = 0;
+                        pixel[1] = 0;
+                        pixel[2] = 0;
+                    } else {
+                        float c1 = avifRoundf((float)pixel[0] * maxF / (float)a);
+                        float c2 = avifRoundf((float)pixel[1] * maxF / (float)a);
+                        float c3 = avifRoundf((float)pixel[2] * maxF / (float)a);
+                        pixel[0] = (uint16_t)AVIF_CLAMP(c1, 0, maxF);
+                        pixel[1] = (uint16_t)AVIF_CLAMP(c2, 0, maxF);
+                        pixel[2] = (uint16_t)AVIF_CLAMP(c3, 0, maxF);
+                    }
+                }
+            }
+        } else {
+            for (uint32_t j = 0; j < rgb->height; ++j) {
+                uint8_t * row = &rgb->pixels[j * rgb->rowBytes];
+                for (uint32_t i = 0; i < rgb->width; ++i) {
+                    uint16_t * pixel = (uint16_t *)&row[i * 8];
+                    uint16_t a = pixel[0];
+                    if (a >= max) {
+                        continue;
+                    } else if (a == 0) {
+                        pixel[1] = 0;
+                        pixel[2] = 0;
+                        pixel[3] = 0;
+                    } else {
+                        float c1 = avifRoundf((float)pixel[1] * maxF / (float)a);
+                        float c2 = avifRoundf((float)pixel[2] * maxF / (float)a);
+                        float c3 = avifRoundf((float)pixel[3] * maxF / (float)a);
+                        pixel[1] = (uint16_t)AVIF_CLAMP(c1, 0, maxF);
+                        pixel[2] = (uint16_t)AVIF_CLAMP(c2, 0, maxF);
+                        pixel[3] = (uint16_t)AVIF_CLAMP(c3, 0, maxF);
+                    }
+                }
+            }
+        }
+    } else {
+        if (rgb->format == AVIF_RGB_FORMAT_RGBA || rgb->format == AVIF_RGB_FORMAT_BGRA) {
+            for (uint32_t j = 0; j < rgb->height; ++j) {
+                uint8_t * row = &rgb->pixels[j * rgb->rowBytes];
+                for (uint32_t i = 0; i < rgb->width; ++i) {
+                    uint8_t * pixel = &row[i * 4];
+                    uint8_t a = pixel[3];
+                    if (a == max) {
+                        continue;
+                    } else if (a == 0) {
+                        pixel[0] = 0;
+                        pixel[1] = 0;
+                        pixel[2] = 0;
+                    } else {
+                        float c1 = avifRoundf((float)pixel[0] * maxF / (float)a);
+                        float c2 = avifRoundf((float)pixel[1] * maxF / (float)a);
+                        float c3 = avifRoundf((float)pixel[2] * maxF / (float)a);
+                        pixel[0] = (uint8_t)AVIF_CLAMP(c1, 0, maxF);
+                        pixel[1] = (uint8_t)AVIF_CLAMP(c2, 0, maxF);
+                        pixel[2] = (uint8_t)AVIF_CLAMP(c3, 0, maxF);
+                    }
+                }
+            }
+        } else {
+            for (uint32_t j = 0; j < rgb->height; ++j) {
+                uint8_t * row = &rgb->pixels[j * rgb->rowBytes];
+                for (uint32_t i = 0; i < rgb->width; ++i) {
+                    uint8_t * pixel = &row[i * 4];
+                    uint8_t a = pixel[0];
+                    if (a == max) {
+                        continue;
+                    } else if (a == 0) {
+                        pixel[1] = 0;
+                        pixel[2] = 0;
+                        pixel[3] = 0;
+                    } else {
+                        float c1 = avifRoundf((float)pixel[1] * maxF / (float)a);
+                        float c2 = avifRoundf((float)pixel[2] * maxF / (float)a);
+                        float c3 = avifRoundf((float)pixel[3] * maxF / (float)a);
+                        pixel[1] = (uint8_t)AVIF_CLAMP(c1, 0, maxF);
+                        pixel[2] = (uint8_t)AVIF_CLAMP(c2, 0, maxF);
+                        pixel[3] = (uint8_t)AVIF_CLAMP(c3, 0, maxF);
+                    }
+                }
+            }
+        }
+    }
+
+    return AVIF_RESULT_OK;
+}
diff --git a/src/avif.c b/src/avif.c
index f4f0fcd..470174d 100644
--- a/src/avif.c
+++ b/src/avif.c
@@ -139,6 +139,7 @@
     dstImage->yuvRange = srcImage->yuvRange;
     dstImage->yuvChromaSamplePosition = srcImage->yuvChromaSamplePosition;
     dstImage->alphaRange = srcImage->alphaRange;
+    dstImage->alphaPremultiplied = srcImage->alphaPremultiplied;
 
     dstImage->colorPrimaries = srcImage->colorPrimaries;
     dstImage->transferCharacteristics = srcImage->transferCharacteristics;
@@ -360,6 +361,7 @@
     rgb->ignoreAlpha = AVIF_FALSE;
     rgb->pixels = NULL;
     rgb->rowBytes = 0;
+    rgb->alphaPremultiplied = image->alphaPremultiplied;
 }
 
 void avifRGBImageAllocatePixels(avifRGBImage * rgb)
diff --git a/src/read.c b/src/read.c
index 5398a4e..941b20d 100644
--- a/src/read.c
+++ b/src/read.c
@@ -140,6 +140,7 @@
     uint32_t auxForID;             // if non-zero, this item is an auxC plane for Item #{auxForID}
     uint32_t descForID;            // if non-zero, this item is a content description for Item #{descForID}
     uint32_t dimgForID;            // if non-zero, this item is a derived image for Item #{dimgForID}
+    uint32_t premByID;             // if non-zero, this item is premultiplied by Item #{premByID}
     avifBool hasUnsupportedEssentialProperty; // If true, this item cites a property flagged as 'essential' that libavif doesn't support (yet). Ignore the item, if so.
     avifBool ipmaSeen; // if true, this item already received a property association
 } avifDecoderItem;
@@ -295,6 +296,7 @@
 {
     uint32_t id;
     uint32_t auxForID; // if non-zero, this item is an auxC plane for Track #{auxForID}
+    uint32_t premByID; // if non-zero, this item is premultiplied by Item #{premByID}
     uint32_t mediaTimescale;
     uint64_t mediaDuration;
     uint32_t width;
@@ -1699,6 +1701,9 @@
 
                     dimg->dimgForID = fromID;
                 }
+                if (!memcmp(irefHeader.type, "prem", 4)) {
+                    item->premByID = toID;
+                }
             }
         }
     }
@@ -2031,6 +2036,10 @@
             CHECK(avifROStreamReadU32(&s, &toID));                       // unsigned int(32) track_IDs[]
             CHECK(avifROStreamSkip(&s, header.size - sizeof(uint32_t))); // just take the first one
             track->auxForID = toID;
+        } else if (!memcmp(header.type, "prem", 4)) {
+            uint32_t byID;
+            CHECK(avifROStreamReadU32(&s, &byID));                       // unsigned int(32) to_item_ID
+            track->premByID = byID;
         } else {
             CHECK(avifROStreamSkip(&s, header.size));
         }
@@ -2604,6 +2613,7 @@
         decoder->image->width = colorTrack->width;
         decoder->image->height = colorTrack->height;
         decoder->alphaPresent = (alphaTrack != NULL);
+        decoder->image->alphaPremultiplied = decoder->alphaPresent && colorTrack->premByID == alphaTrack->id;
     } else {
         // Create from items
 
@@ -2760,6 +2770,7 @@
             decoder->image->height = 0;
         }
         decoder->alphaPresent = (alphaItem != NULL);
+        decoder->image->alphaPremultiplied = decoder->alphaPresent && colorItem->premByID == alphaItem->id;
     }
 
     // Sanity check tiles
diff --git a/src/reformat.c b/src/reformat.c
index 57c9d7e..4d2a863 100644
--- a/src/reformat.c
+++ b/src/reformat.c
@@ -159,6 +159,10 @@
         return AVIF_RESULT_REFORMAT_FAILED;
     }
 
+    if (image->alphaPremultiplied != rgb->alphaPremultiplied) {
+        return AVIF_RESULT_REFORMAT_FAILED;
+    }
+
     avifReformatState state;
     if (!avifPrepareReformatState(image, rgb, &state)) {
         return AVIF_RESULT_REFORMAT_FAILED;
@@ -965,6 +969,10 @@
         return AVIF_RESULT_REFORMAT_FAILED;
     }
 
+    if (image->alphaPremultiplied != rgb->alphaPremultiplied) {
+        return AVIF_RESULT_REFORMAT_FAILED;
+    }
+
     avifReformatState state;
     if (!avifPrepareReformatState(image, rgb, &state)) {
         return AVIF_RESULT_REFORMAT_FAILED;
diff --git a/src/reformat_libyuv.c b/src/reformat_libyuv.c
index 273e7ad..061a579 100644
--- a/src/reformat_libyuv.c
+++ b/src/reformat_libyuv.c
@@ -12,6 +12,16 @@
     (void)rgb;
     return AVIF_RESULT_NOT_IMPLEMENTED;
 }
+avifResult avifRGBImagePremultiplyLibYUV(avifRGBImage * rgb)
+{
+    (void)rgb;
+    return AVIF_RESULT_NOT_IMPLEMENTED;
+}
+avifResult avifRGBImageUnpremultiplyLibYUV(avifRGBImage * rgb)
+{
+    (void)rgb;
+    return AVIF_RESULT_NOT_IMPLEMENTED;
+}
 unsigned int avifLibYUVVersion(void)
 {
     return 0;
@@ -422,6 +432,49 @@
     return AVIF_RESULT_NOT_IMPLEMENTED;
 }
 
+avifResult avifRGBImagePremultiplyAlphaLibYUV(avifRGBImage * rgb)
+{
+    // See if the current settings can be accomplished with libyuv, and use it (if possible).
+
+    if (rgb->depth != 8) {
+        return AVIF_RESULT_NOT_IMPLEMENTED;
+    }
+
+    // libavif uses byte-order when describing pixel formats, such that the R in RGBA is the lowest address,
+    // similar to PNG. libyuv orders in word-order, so libavif's RGBA would be referred to in libyuv as ABGR.
+
+    // order of RGB doesn't matter here.
+    if (rgb->format == AVIF_RGB_FORMAT_RGBA || rgb->format == AVIF_RGB_FORMAT_BGRA) {
+        if (ARGBAttenuate(rgb->pixels, rgb->rowBytes, rgb->pixels, rgb->rowBytes, rgb->width, rgb->height) != 0) {
+            return AVIF_RESULT_REFORMAT_FAILED;
+        }
+        return AVIF_RESULT_OK;
+    }
+
+    return AVIF_RESULT_NOT_IMPLEMENTED;
+}
+
+avifResult avifRGBImageUnpremultiplyAlphaLibYUV(avifRGBImage * rgb)
+{
+    // See if the current settings can be accomplished with libyuv, and use it (if possible).
+
+    if (rgb->depth != 8) {
+        return AVIF_RESULT_NOT_IMPLEMENTED;
+    }
+
+    // libavif uses byte-order when describing pixel formats, such that the R in RGBA is the lowest address,
+    // similar to PNG. libyuv orders in word-order, so libavif's RGBA would be referred to in libyuv as ABGR.
+
+    if (rgb->format == AVIF_RGB_FORMAT_RGBA || rgb->format == AVIF_RGB_FORMAT_BGRA) {
+        if (ARGBUnattenuate(rgb->pixels, rgb->rowBytes, rgb->pixels, rgb->rowBytes, rgb->width, rgb->height) != 0) {
+            return AVIF_RESULT_REFORMAT_FAILED;
+        }
+        return AVIF_RESULT_OK;
+    }
+
+    return AVIF_RESULT_NOT_IMPLEMENTED;
+}
+
 unsigned int avifLibYUVVersion(void)
 {
     return (unsigned int)LIBYUV_VERSION;
diff --git a/src/write.c b/src/write.c
index bb8e546..700c5f9 100644
--- a/src/write.c
+++ b/src/write.c
@@ -417,7 +417,8 @@
     for (uint32_t cellIndex = 0; cellIndex < cellCount; ++cellIndex) {
         const avifImage * cellImage = cellImages[cellIndex];
         if ((cellImage->depth != firstCell->depth) || (cellImage->width != firstCell->width) ||
-            (cellImage->height != firstCell->height) || (!!cellImage->alphaPlane != !!firstCell->alphaPlane)) {
+            (cellImage->height != firstCell->height) || (!!cellImage->alphaPlane != !!firstCell->alphaPlane) ||
+            (cellImage->alphaPremultiplied != firstCell->alphaPremultiplied)) {
             return AVIF_RESULT_INVALID_IMAGE_GRID;
         }
 
@@ -460,6 +461,9 @@
 
         // Prepare all AV1 items
 
+        const char ** pColorIrefType;
+        uint16_t * pColorIrefToID;
+
         uint16_t gridColorID = 0;
         if (cellCount > 1) {
             avifEncoderItem * gridColorItem = avifEncoderDataCreateItem(encoder->data, "grid", "Color", 6, 0);
@@ -469,6 +473,8 @@
 
             gridColorID = gridColorItem->id;
             encoder->data->primaryItemID = gridColorID;
+            pColorIrefType = &gridColorItem->irefType;
+            pColorIrefToID = &gridColorItem->irefToID;
         }
 
         for (uint32_t cellIndex = 0; cellIndex < cellCount; ++cellIndex) {
@@ -484,6 +490,8 @@
                 item->dimgFromID = gridColorID;
             } else {
                 encoder->data->primaryItemID = item->id;
+                pColorIrefType = &item->irefType;
+                pColorIrefToID = &item->irefToID;
             }
         }
 
@@ -519,6 +527,11 @@
                 gridAlphaItem->gridCols = gridCols;
                 gridAlphaItem->gridRows = gridRows;
                 gridAlphaID = gridAlphaItem->id;
+
+                if (encoder->data->imageMetadata->alphaPremultiplied) {
+                    *pColorIrefType = "prem";
+                    *pColorIrefToID = gridAlphaID;
+                }
             }
 
             for (uint32_t cellIndex = 0; cellIndex < cellCount; ++cellIndex) {
@@ -535,6 +548,11 @@
                 } else {
                     item->irefToID = encoder->data->primaryItemID;
                     item->irefType = "auxl";
+
+                    if (encoder->data->imageMetadata->alphaPremultiplied) {
+                        *pColorIrefType = "prem";
+                        *pColorIrefToID = item->id;
+                    }
                 }
             }
         }