Read extended XMP in avifjpeg.c
Add test unit and test images.
diff --git a/apps/shared/avifjpeg.c b/apps/shared/avifjpeg.c
index 29d915e..8e6356e 100644
--- a/apps/shared/avifjpeg.c
+++ b/apps/shared/avifjpeg.c
@@ -6,6 +6,7 @@
#include <assert.h>
#include <setjmp.h>
+#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -225,6 +226,34 @@
return AVIF_FALSE;
}
+// Reads 4-byte unsigned integer in big-endian format from the raw bitstream src.
+static uint32_t avifJPEGReadUint32BigEndian(const uint8_t * src)
+{
+ return ((uint32_t)src[0] << 24) | ((uint32_t)src[1] << 16) | ((uint32_t)src[2] << 8) | ((uint32_t)src[3] << 0);
+}
+
+// Returns the pointer in str to the first occurrence of substr. Returns NULL if substr cannot be found in str.
+static const uint8_t * avifJPEGFindSubstr(const uint8_t * str, size_t strLength, const uint8_t * substr, size_t substrLength)
+{
+ for (size_t index = 0; index + substrLength <= strLength; ++index) {
+ if (!memcmp(&str[index], substr, substrLength)) {
+ return &str[index];
+ }
+ }
+ return NULL;
+}
+
+#define AVIF_EXTENDED_XMP_GUID_LENGTH 32
+// One way of storing the Extended XMP GUID (generated by a camera for example).
+#define AVIF_XMP_NOTE_TAG "xmpNote:HasExtendedXMP=\""
+#define AVIF_XMP_NOTE_TAG_LENGTH 24
+// Another way of storing the Extended XMP GUID (generated by exiftool for example).
+#define AVIF_ALTERNATIVE_XMP_NOTE_TAG "<xmpNote:HasExtendedXMP>"
+#define AVIF_ALTERNATIVE_XMP_NOTE_TAG_LENGTH 24
+
+// Offset in APP1 segment (skip tag + guid + size + offset).
+#define AVIF_OFFSET_TILL_EXTENDED_XMP (tagExtendedXMP.size + AVIF_EXTENDED_XMP_GUID_LENGTH + 4 + 4)
+
// Note on setjmp() and volatile variables:
//
// K & R, The C Programming Language 2nd Ed, p. 254 says:
@@ -254,6 +283,11 @@
avifRGBImage rgb;
memset(&rgb, 0, sizeof(avifRGBImage));
+ // Standard XMP segment followed by all extended XMP segments.
+ avifRWData totalXMP = { NULL, 0 };
+ // Each byte set to 0 is a missing byte. Each byte set to 1 was read and copied to totalXMP.
+ avifRWData extendedXMPReadBytes = { NULL, 0 };
+
FILE * f = fopen(inputFilename, "rb");
if (!f) {
fprintf(stderr, "Can't open JPEG file for read: %s\n", inputFilename);
@@ -343,7 +377,7 @@
!memcmp(marker->data, tagExif.data, tagExif.size)) {
if (found) {
// TODO(yguyon): Implement instead of outputting an error.
- fprintf(stderr, "Exif extraction failed: unsupported Exif split into multiple chunks or invalid multiple Exif chunks\n");
+ fprintf(stderr, "Exif extraction failed: unsupported Exif split into multiple segments or invalid multiple Exif segments\n");
goto cleanup;
}
avifImageSetMetadataExif(avif, marker->data + tagExif.size, marker->data_length - tagExif.size);
@@ -354,24 +388,134 @@
}
}
if (!ignoreXMP) {
- const avifROData tagStandardXmp = { (const uint8_t *)"http://ns.adobe.com/xap/1.0/\0", 29 };
- const avifROData tagExtendedXmp = { (const uint8_t *)"http://ns.adobe.com/xmp/extension/\0", 35 };
- avifBool found = AVIF_FALSE;
+ const avifROData tagStandardXMP = { (const uint8_t *)"http://ns.adobe.com/xap/1.0/\0", 29 };
+ const uint8_t * standardXMPData = NULL;
+ uint32_t standardXMPSize = 0; // At most 64kB as defined by Adobe XMP Specification Part 3.
for (jpeg_saved_marker_ptr marker = cinfo.marker_list; marker != NULL; marker = marker->next) {
- if ((marker->marker == (JPEG_APP0 + 1)) && (marker->data_length > tagStandardXmp.size) &&
- !memcmp(marker->data, tagStandardXmp.data, tagStandardXmp.size)) {
- if (found) {
- fprintf(stderr, "XMP extraction failed: invalid multiple XMP chunks\n");
+ if ((marker->marker == (JPEG_APP0 + 1)) && (marker->data_length > tagStandardXMP.size) &&
+ !memcmp(marker->data, tagStandardXMP.data, tagStandardXMP.size)) {
+ if (standardXMPData) {
+ fprintf(stderr, "XMP extraction failed: invalid multiple standard XMP segments\n");
goto cleanup;
}
- avifImageSetMetadataXMP(avif, marker->data + tagStandardXmp.size, marker->data_length - tagStandardXmp.size);
- found = AVIF_TRUE;
- } else if ((marker->marker == (JPEG_APP0 + 1)) && (marker->data_length > tagExtendedXmp.size) &&
- !memcmp(marker->data, tagExtendedXmp.data, tagExtendedXmp.size)) {
- // TODO(yguyon): Implement instead of outputting an error.
- fprintf(stderr, "XMP extraction failed: extended XMP is unsupported\n");
+ standardXMPData = marker->data + tagStandardXMP.size;
+ standardXMPSize = (uint32_t)(marker->data_length - tagStandardXMP.size);
+ }
+ }
+
+ const avifROData tagExtendedXMP = { (const uint8_t *)"http://ns.adobe.com/xmp/extension/\0", 35 };
+ avifBool foundExtendedXMP = AVIF_FALSE;
+ uint8_t extendedXMPGUID[AVIF_EXTENDED_XMP_GUID_LENGTH]; // The value is common to all extended XMP segments.
+ for (jpeg_saved_marker_ptr marker = cinfo.marker_list; marker != NULL; marker = marker->next) {
+ if ((marker->marker == (JPEG_APP0 + 1)) && (marker->data_length > tagExtendedXMP.size) &&
+ !memcmp(marker->data, tagExtendedXMP.data, tagExtendedXMP.size)) {
+ if (!standardXMPData) {
+ fprintf(stderr, "XMP extraction failed: extended XMP segment found, missing standard XMP segment\n");
+ goto cleanup;
+ }
+
+ if (marker->data_length < AVIF_OFFSET_TILL_EXTENDED_XMP) {
+ fprintf(stderr, "XMP extraction failed: truncated extended XMP segment\n");
+ goto cleanup;
+ }
+ const uint8_t * guid = &marker->data[tagExtendedXMP.size];
+ for (size_t c = 0; c < AVIF_EXTENDED_XMP_GUID_LENGTH; ++c) {
+ // According to Adobe XMP Specification Part 3 section 1.1.3.1:
+ // "128-bit GUID stored as a 32-byte ASCII hex string, capital A-F, no null termination"
+ if (((guid[c] < '0') || (guid[c] > '9')) && ((guid[c] < 'A') || (guid[c] > 'F'))) {
+ fprintf(stderr, "XMP extraction failed: invalid XMP segment GUID\n");
+ goto cleanup;
+ }
+ }
+ // Size of the current extended segment.
+ const size_t extendedXMPSize = marker->data_length - AVIF_OFFSET_TILL_EXTENDED_XMP;
+ // Expected size of the sum of all extended segments.
+ // According to Adobe XMP Specification Part 3 section 1.1.3.1:
+ // "full length of the ExtendedXMP serialization as a 32-bit unsigned integer"
+ const uint32_t totalExtendedXMPSize =
+ avifJPEGReadUint32BigEndian(&marker->data[tagExtendedXMP.size + AVIF_EXTENDED_XMP_GUID_LENGTH]);
+ // Offset in totalXMP after standardXMP.
+ // According to Adobe XMP Specification Part 3 section 1.1.3.1:
+ // "offset of this portion as a 32-bit unsigned integer"
+ const uint32_t extendedXMPOffset =
+ avifJPEGReadUint32BigEndian(&marker->data[tagExtendedXMP.size + AVIF_EXTENDED_XMP_GUID_LENGTH + 4]);
+ if (((uint64_t)standardXMPSize + totalExtendedXMPSize) > SIZE_MAX) {
+ fprintf(stderr, "XMP extraction failed: total XMP size is too large\n");
+ goto cleanup;
+ }
+ if ((extendedXMPSize == 0) || (((uint64_t)extendedXMPOffset + extendedXMPSize) > totalExtendedXMPSize)) {
+ fprintf(stderr, "XMP extraction failed: invalid extended XMP segment size or offset\n");
+ goto cleanup;
+ }
+ if (foundExtendedXMP) {
+ if (memcmp(guid, extendedXMPGUID, AVIF_EXTENDED_XMP_GUID_LENGTH)) {
+ fprintf(stderr, "XMP extraction failed: extended XMP segment GUID mismatch\n");
+ goto cleanup;
+ }
+ if (totalExtendedXMPSize != (totalXMP.size - standardXMPSize)) {
+ fprintf(stderr, "XMP extraction failed: extended XMP total size mismatch\n");
+ goto cleanup;
+ }
+ } else {
+ memcpy(extendedXMPGUID, guid, AVIF_EXTENDED_XMP_GUID_LENGTH);
+
+ avifRWDataRealloc(&totalXMP, (size_t)standardXMPSize + totalExtendedXMPSize);
+ memcpy(totalXMP.data, standardXMPData, standardXMPSize);
+
+ // Keep track of the bytes that were set.
+ avifRWDataRealloc(&extendedXMPReadBytes, totalExtendedXMPSize);
+ memset(extendedXMPReadBytes.data, 0, extendedXMPReadBytes.size);
+
+ foundExtendedXMP = AVIF_TRUE;
+ }
+ // According to Adobe XMP Specification Part 3 section 1.1.3.1:
+ // "A robust JPEG reader should tolerate the marker segments in any order."
+ memcpy(&totalXMP.data[standardXMPSize + extendedXMPOffset], &marker->data[AVIF_OFFSET_TILL_EXTENDED_XMP], extendedXMPSize);
+
+ // Make sure no previously read data was overwritten by the current segment.
+ if (memchr(&extendedXMPReadBytes.data[extendedXMPOffset], 1, extendedXMPSize)) {
+ fprintf(stderr, "XMP extraction failed: overlapping extended XMP segments\n");
+ goto cleanup;
+ }
+ // Keep track of the bytes that were set.
+ memset(&extendedXMPReadBytes.data[extendedXMPOffset], 1, extendedXMPSize);
+ }
+ }
+
+ if (foundExtendedXMP) {
+ // Make sure there is no missing byte.
+ if (memchr(extendedXMPReadBytes.data, 0, extendedXMPReadBytes.size)) {
+ fprintf(stderr, "XMP extraction failed: missing extended XMP segments\n");
goto cleanup;
}
+
+ // According to Adobe XMP Specification Part 3 section 1.1.3.1:
+ // "A reader must incorporate only ExtendedXMP blocks whose GUID matches the value of xmpNote:HasExtendedXMP."
+ uint8_t xmpNote[AVIF_XMP_NOTE_TAG_LENGTH + AVIF_EXTENDED_XMP_GUID_LENGTH];
+ memcpy(xmpNote, AVIF_XMP_NOTE_TAG, AVIF_XMP_NOTE_TAG_LENGTH);
+ memcpy(xmpNote + AVIF_XMP_NOTE_TAG_LENGTH, extendedXMPGUID, AVIF_EXTENDED_XMP_GUID_LENGTH);
+ if (!avifJPEGFindSubstr(standardXMPData, standardXMPSize, xmpNote, sizeof(xmpNote))) {
+ // Try the alternative before returning an error.
+ uint8_t alternativeXmpNote[AVIF_ALTERNATIVE_XMP_NOTE_TAG_LENGTH + AVIF_EXTENDED_XMP_GUID_LENGTH];
+ memcpy(alternativeXmpNote, AVIF_ALTERNATIVE_XMP_NOTE_TAG, AVIF_ALTERNATIVE_XMP_NOTE_TAG_LENGTH);
+ memcpy(alternativeXmpNote + AVIF_ALTERNATIVE_XMP_NOTE_TAG_LENGTH, extendedXMPGUID, AVIF_EXTENDED_XMP_GUID_LENGTH);
+ if (!avifJPEGFindSubstr(standardXMPData, standardXMPSize, alternativeXmpNote, sizeof(alternativeXmpNote))) {
+ fprintf(stderr, "XMP extraction failed: standard and extended XMP GUID mismatch\n");
+ goto cleanup;
+ }
+ }
+
+ // According to Adobe XMP Specification Part 3 section 1.1.3.1:
+ // "A JPEG reader must [...] remove the xmpNote:HasExtendedXMP property."
+ // This constraint is ignored here because leaving the xmpNote:HasExtendedXMP property is rather harmless
+ // and editing XMP metadata is quite involved.
+
+ avifRWDataFree(&avif->xmp);
+ avif->xmp = totalXMP;
+ totalXMP.data = NULL;
+ totalXMP.size = 0;
+ } else if (standardXMPData) {
+ avifImageSetMetadataXMP(avif, standardXMPData, standardXMPSize);
}
}
jpeg_finish_decompress(&cinfo);
@@ -381,6 +525,8 @@
fclose(f);
free(iccData);
avifRGBImageFreePixels(&rgb);
+ avifRWDataFree(&totalXMP);
+ avifRWDataFree(&extendedXMPReadBytes);
return ret;
}
diff --git a/tests/data/README.md b/tests/data/README.md
index b9d9420..ff23109 100644
--- a/tests/data/README.md
+++ b/tests/data/README.md
@@ -33,6 +33,25 @@
The structure can be displayed using `exiv2 -pS <file>`.
+### File [dog_exif_extended_xmp_icc.jpg](dog_exif_extended_xmp_icc.jpg)
+
+
+
+License: [same as libavif](https://github.com/AOMediaCodec/libavif/blob/main/LICENSE)
+
+Source: Personal photo.
+
+| address | marker | length | data |
+|--------:|-------------|-------:|----------------------------------------------|
+| 0 | 0xffd8 SOI | | |
+| 2 | 0xffe1 APP1 | 884 | `Exif..II*......................` |
+| 888 | 0xffe1 APP1 | 353 | `http://ns.adobe.com/xap/1.0/.<x:` |
+| 1243 | 0xffe1 APP1 | 32417 | `http://ns.adobe.com/xmp/extensio` |
+| 33662 | 0xffe0 APP0 | 16 | `JFIF.........` |
+| | | | ... |
+| 33818 | 0xffe2 APP2 | 612 | `ICC_PROFILE......T........mntrRG chunk 1/1` |
+| | | | ... |
+
### File [paris_exif_xmp_icc.jpg](paris_exif_xmp_icc.jpg)

@@ -50,6 +69,30 @@
| 5087 | 0xffe2 APP2 | 612 | `ICC_PROFILE......T........mntrRG chunk 1/1` |
| | | | ... |
+### File [paris_extended_xmp.jpg](paris_extended_xmp.jpg)
+
+
+
+License: [same as libavif](https://github.com/AOMediaCodec/libavif/blob/main/LICENSE)
+
+Source: Metadata was extracted from `paris_exif_xmp_icc.jpg` with
+`exiftool -tagsfromfile paris_exif_xmp_icc.jpg paris_exif_xmp_icc.xmp`. The text of the first book of
+[De finibus bonorum et malorum](https://en.wikipedia.org/wiki/De_finibus_bonorum_et_malorum) was manually inserted in
+that file under the tag `xmp:Label` and the second book under the tag `xmp:Nickname` (any `<` or `>` removed to avoid
+conflicts with XMP). The file was reconstructed with
+`exiftool -tagsfromfile paris_exif_xmp_icc.xmp -Exif= -icc_profile= paris_exif_xmp_icc.jpg -o paris_extended_xmp.jpg`.
+The goal is to have a large XMP blob so that it can only be stored as multiple extended XMP chunks.
+
+| address | marker | length | data |
+|---------:|-------------|-------:|----------------------------------|
+| 0 | 0xffd8 SOI | | |
+| 2 | 0xffe0 APP0 | 16 | `JFIF.....,.,.` |
+| 20 | 0xffe1 APP1 | 5531 | http://ns.adobe.com/xap/1.0/.<?x |
+| 5553 | 0xffe1 APP1 | 65535 | http://ns.adobe.com/xmp/extensio |
+| 71090 | 0xffe1 APP1 | 65535 | http://ns.adobe.com/xmp/extensio |
+| 136627 | 0xffe1 APP1 | 4791 | http://ns.adobe.com/xmp/extensio |
+| | | | ... |
+
### File [paris_icc_exif_xmp.png](paris_icc_exif_xmp.png)

diff --git a/tests/data/dog_exif_extended_xmp_icc.jpg b/tests/data/dog_exif_extended_xmp_icc.jpg
new file mode 100644
index 0000000..0ec3c88
--- /dev/null
+++ b/tests/data/dog_exif_extended_xmp_icc.jpg
Binary files differ
diff --git a/tests/data/paris_extended_xmp.jpg b/tests/data/paris_extended_xmp.jpg
new file mode 100644
index 0000000..8ba5086
--- /dev/null
+++ b/tests/data/paris_extended_xmp.jpg
Binary files differ
diff --git a/tests/gtest/avifmetadatatest.cc b/tests/gtest/avifmetadatatest.cc
index 7372b97..fa4f14c 100644
--- a/tests/gtest/avifmetadatatest.cc
+++ b/tests/gtest/avifmetadatatest.cc
@@ -310,6 +310,22 @@
//------------------------------------------------------------------------------
+TEST(MetadataTest, ExtendedXMP) {
+ const testutil::AvifImagePtr image =
+ testutil::ReadImage(data_path, "dog_exif_extended_xmp_icc.jpg");
+ ASSERT_NE(image, nullptr);
+ ASSERT_NE(image->xmp.size, 0u);
+}
+
+TEST(MetadataTest, MultipleExtendedXMPAndAlternativeGUIDTag) {
+ const testutil::AvifImagePtr image =
+ testutil::ReadImage(data_path, "paris_extended_xmp.jpg");
+ ASSERT_NE(image, nullptr);
+ ASSERT_GT(image->xmp.size, size_t{65536 * 2});
+}
+
+//------------------------------------------------------------------------------
+
} // namespace
} // namespace libavif