Fix Extended XMP parsing (#2909)
diff --git a/apps/shared/avifjpeg.c b/apps/shared/avifjpeg.c index 575eeb9..300b3a8 100644 --- a/apps/shared/avifjpeg.c +++ b/apps/shared/avifjpeg.c
@@ -435,6 +435,7 @@ #define XML_NAME_SPACE_GAIN_MAP "http://ns.adobe.com/hdr-gain-map/1.0/" #define XML_NAME_SPACE_RDF "http://www.w3.org/1999/02/22-rdf-syntax-ns#" +#define XML_NAME_SPACE_XMP_NOTE "http://ns.adobe.com/xmp/note/" // Finds an 'rdf:Description' node containing a gain map version attribute (hdrgm:Version="1.0"). // Returns NULL if not found. @@ -466,19 +467,12 @@ return NULL; } -// Use XML_PARSE_RECOVER and XML_PARSE_NOERROR to avoid failing/printing errors for invalid XML. -// In particular, if the jpeg files contains extended XMP, avifJPEGReadInternal simply concatenates it to -// standard XMP, which is not a valid XML tree. -// TODO(maryla): better handle extended XMP. If the gain map metadata is in the extended part, -// the current code won't detect it. -#define LIBXML2_XML_PARSING_FLAGS (XML_PARSE_RECOVER | XML_PARSE_NOERROR) - // Returns true if there is an 'rdf:Description' node containing a gain map version attribute (hdrgm:Version="1.0"). // On the main image, this signals that the file also contains a gain map. // On a subsequent image, this signals that it is a gain map. static avifBool avifJPEGHasGainMapXMPNode(const uint8_t * xmpData, size_t xmpSize) { - xmlDoc * document = xmlReadMemory((const char *)xmpData, (int)xmpSize, NULL, NULL, LIBXML2_XML_PARSING_FLAGS); + xmlDoc * document = xmlReadMemory((const char *)xmpData, (int)xmpSize, NULL, NULL, /*options=*/0); if (document == NULL) { return AVIF_FALSE; // Probably and out of memory error. } @@ -660,7 +654,7 @@ // Returns AVIF_TRUE if the gain map metadata was successfully read. avifBool avifJPEGParseGainMapXMP(const uint8_t * xmpData, size_t xmpSize, avifGainMap * gainMap) { - xmlDoc * document = xmlReadMemory((const char *)xmpData, (int)xmpSize, NULL, NULL, LIBXML2_XML_PARSING_FLAGS); + xmlDoc * document = xmlReadMemory((const char *)xmpData, (int)xmpSize, NULL, NULL, /*options=*/0); if (document == NULL) { return AVIF_FALSE; // Probably an out of memory error. } @@ -848,6 +842,117 @@ } return AVIF_FALSE; } + +// Merges the standard XMP data with the extended XMP data. +// Returns AVIF_FALSE if an error occurred. +static avifBool avifJPEGMergeXMP(const uint8_t * standardXMPData, + uint32_t standardXMPSize, + const avifRWData extendedXMP, + avifBool foundAlternativeXMPNote, + avifRWData * xmp) +{ + // Initialize the XMP RDF. + avifBool isValid = AVIF_TRUE; + xmlDoc * extendedXMPDoc = NULL; + xmlChar * xmlBuff = NULL; + xmlDoc * xmpDoc = xmlReadMemory((const char *)standardXMPData, (int)standardXMPSize, "standard.xml", NULL, /*options=*/0); + xmlNode * xmpRdf = (xmlNode *)avifJPEGFindXMLNodeByName(xmlDocGetRootElement(xmpDoc), + XML_NAME_SPACE_RDF, + "RDF", + /*recursive=*/AVIF_TRUE); + if (!xmpRdf) { + fprintf(stderr, "XMP extraction failed: cannot find RDF node\n"); + isValid = AVIF_FALSE; + goto cleanup_xml; + } + // According to Adobe XMP Specification Part 3 section 1.1.3.1: + // "A JPEG reader must [...] remove the xmpNote:HasExtendedXMP property." + avifBool foundHasExtendedXMP = AVIF_FALSE; + xmlNode * descNode = xmpRdf->children; + while (!foundHasExtendedXMP && descNode != NULL) { + if (descNode->type == XML_ELEMENT_NODE && descNode->ns != NULL && + xmlStrcmp(descNode->ns->href, (const xmlChar *)XML_NAME_SPACE_RDF) == 0 && + xmlStrcmp(descNode->name, (const xmlChar *)"Description") == 0) { + // Remove the HasExtendedXMP property. + if (foundAlternativeXMPNote) { + xmlNodePtr cur = descNode->children; + while (cur != NULL) { + if (cur->type == XML_ELEMENT_NODE && cur->ns != NULL && xmlStrcmp(cur->name, (const xmlChar *)"HasExtendedXMP") == 0 && + xmlStrcmp(cur->ns->href, (const xmlChar *)XML_NAME_SPACE_XMP_NOTE) == 0) { + // We must Unlink and Free the node. + xmlUnlinkNode(cur); + xmlFreeNode(cur); + foundHasExtendedXMP = AVIF_TRUE; + break; + } + cur = cur->next; + } + } else { + xmlAttrPtr attr = xmlHasNsProp(descNode, (const xmlChar *)"HasExtendedXMP", (const xmlChar *)XML_NAME_SPACE_XMP_NOTE); + + if (attr) { + xmlRemoveProp(attr); + foundHasExtendedXMP = AVIF_TRUE; + break; + } + } + } + // Check next sibling in case there are multiple Descriptions. + descNode = descNode->next; + } + if (!foundHasExtendedXMP) { + fprintf(stderr, "XMP extraction failed: cannot find HasExtendedXMP property\n"); + isValid = AVIF_FALSE; + goto cleanup_xml; + } + + // Read the extended XMP. + extendedXMPDoc = xmlReadMemory((const char *)extendedXMP.data, + (int)extendedXMP.size, + "extended.xml", + NULL, + /*options=*/0); + const xmlNode * extendedXMPRdf = avifJPEGFindXMLNodeByName(xmlDocGetRootElement(extendedXMPDoc), + XML_NAME_SPACE_RDF, + "RDF", + /*recursive=*/AVIF_TRUE); + if (!extendedXMPRdf) { + fprintf(stderr, "XMP extraction failed: invalid standard XMP segment\n"); + isValid = AVIF_FALSE; + goto cleanup_xml; + } + // Copy the extended nodes over. + xmlNode * cur = extendedXMPRdf->xmlChildrenNode; + while (cur != NULL) { + // Copy the child. + xmlNode * childCopy = xmlDocCopyNode(cur, xmpDoc, 1); + xmlAddChild(xmpRdf, childCopy); + cur = cur->next; + } + + // Dump the new XMP to avif->xmp. + int buffer_size; + xmlDocDumpFormatMemory(xmpDoc, &xmlBuff, &buffer_size, 1); + if (xmlBuff == NULL) { + fprintf(stderr, "Error: Could not dump XML to memory.\n"); + isValid = AVIF_FALSE; + goto cleanup_xml; + } + + avifRWDataFree(xmp); + if (avifRWDataRealloc(xmp, (size_t)buffer_size) != AVIF_RESULT_OK) { + fprintf(stderr, "XMP copy failed: out of memory\n"); + isValid = AVIF_FALSE; + goto cleanup_xml; + } + memcpy(xmp->data, xmlBuff, buffer_size); +cleanup_xml: + xmlFreeDoc(xmpDoc); + xmlFreeDoc(extendedXMPDoc); + xmlFree(xmlBuff); + return isValid; +} + #endif // AVIF_ENABLE_JPEG_GAIN_MAP_CONVERSION // Note on setjmp() and volatile variables: @@ -882,8 +987,8 @@ avifRGBImage rgb; memset(&rgb, 0, sizeof(avifRGBImage)); - // Standard XMP segment followed by all extended XMP segments. - avifRWData totalXMP = { NULL, 0 }; + // Extended XMP after concatenation of all extended XMP segments. + avifRWData extendedXMP = { NULL, 0 }; // Each byte set to 0 is a missing byte. Each byte set to 1 was read and copied to totalXMP. avifRWData extendedXMPReadBytes = { NULL, 0 }; @@ -1115,21 +1220,16 @@ fprintf(stderr, "XMP extraction failed: extended XMP segment GUID mismatch\n"); goto cleanup; } - if (totalExtendedXMPSize != (totalXMP.size - standardXMPSize)) { + if (totalExtendedXMPSize != extendedXMP.size) { fprintf(stderr, "XMP extraction failed: extended XMP total size mismatch\n"); goto cleanup; } } else { memcpy(extendedXMPGUID, guid, AVIF_JPEG_EXTENDED_XMP_GUID_LENGTH); - if (avifRWDataRealloc(&totalXMP, (size_t)standardXMPSize + totalExtendedXMPSize) != AVIF_RESULT_OK) { - fprintf(stderr, "XMP extraction failed: out of memory\n"); - goto cleanup; - } - memcpy(totalXMP.data, standardXMPData, standardXMPSize); - - // Keep track of the bytes that were set. - if (avifRWDataRealloc(&extendedXMPReadBytes, totalExtendedXMPSize) != AVIF_RESULT_OK) { + // Allocate the extended XMP and keep track of the bytes that were set. + if (avifRWDataRealloc(&extendedXMP, (size_t)totalExtendedXMPSize) != AVIF_RESULT_OK || + avifRWDataRealloc(&extendedXMPReadBytes, totalExtendedXMPSize) != AVIF_RESULT_OK) { fprintf(stderr, "XMP extraction failed: out of memory\n"); goto cleanup; } @@ -1139,7 +1239,7 @@ } // According to Adobe XMP Specification Part 3 section 1.1.3.1: // "A robust JPEG reader should tolerate the marker segments in any order." - memcpy(&totalXMP.data[standardXMPSize + extendedXMPOffset], &marker->data[AVIF_JPEG_OFFSET_TILL_EXTENDED_XMP], extendedXMPSize); + memcpy(&extendedXMP.data[extendedXMPOffset], &marker->data[AVIF_JPEG_OFFSET_TILL_EXTENDED_XMP], extendedXMPSize); // Make sure no previously read data was overwritten by the current segment. if (memchr(&extendedXMPReadBytes.data[extendedXMPOffset], 1, extendedXMPSize)) { @@ -1163,7 +1263,10 @@ uint8_t xmpNote[AVIF_JPEG_XMP_NOTE_TAG_LENGTH + AVIF_JPEG_EXTENDED_XMP_GUID_LENGTH]; memcpy(xmpNote, AVIF_JPEG_XMP_NOTE_TAG, AVIF_JPEG_XMP_NOTE_TAG_LENGTH); memcpy(xmpNote + AVIF_JPEG_XMP_NOTE_TAG_LENGTH, extendedXMPGUID, AVIF_JPEG_EXTENDED_XMP_GUID_LENGTH); - if (!avifJPEGFindSubstr(standardXMPData, standardXMPSize, xmpNote, sizeof(xmpNote))) { + avifBool foundAlternativeXMPNote; + if (avifJPEGFindSubstr(standardXMPData, standardXMPSize, xmpNote, sizeof(xmpNote))) { + foundAlternativeXMPNote = AVIF_FALSE; + } else { // Try the alternative before returning an error. uint8_t alternativeXmpNote[AVIF_JPEG_ALTERNATIVE_XMP_NOTE_TAG_LENGTH + AVIF_JPEG_EXTENDED_XMP_GUID_LENGTH]; memcpy(alternativeXmpNote, AVIF_JPEG_ALTERNATIVE_XMP_NOTE_TAG, AVIF_JPEG_ALTERNATIVE_XMP_NOTE_TAG_LENGTH); @@ -1172,17 +1275,24 @@ fprintf(stderr, "XMP extraction failed: standard and extended XMP GUID mismatch\n"); goto cleanup; } + foundAlternativeXMPNote = AVIF_TRUE; } + (void)foundAlternativeXMPNote; - // According to Adobe XMP Specification Part 3 section 1.1.3.1: - // "A JPEG reader must [...] remove the xmpNote:HasExtendedXMP property." - // This constraint is ignored here because leaving the xmpNote:HasExtendedXMP property is rather harmless - // and editing XMP metadata is quite involved. - +#if defined(AVIF_ENABLE_JPEG_GAIN_MAP_CONVERSION) + if (!avifJPEGMergeXMP(standardXMPData, standardXMPSize, extendedXMP, foundAlternativeXMPNote, &avif->xmp)) { + goto cleanup; + } +#else + fprintf(stderr, "WARNING: must be compiled with libxml2 to copy extended XMP properly\n"); avifRWDataFree(&avif->xmp); - avif->xmp = totalXMP; - totalXMP.data = NULL; - totalXMP.size = 0; + if (avifRWDataRealloc(&avif->xmp, (size_t)standardXMPSize + extendedXMP.size) != AVIF_RESULT_OK) { + fprintf(stderr, "XMP copy failed: out of memory\n"); + goto cleanup; + } + memcpy(avif->xmp.data, standardXMPData, standardXMPSize); + memcpy(avif->xmp.data + standardXMPSize, extendedXMP.data, extendedXMP.size); +#endif // AVIF_ENABLE_JPEG_GAIN_MAP_CONVERSION } else if (standardXMPData) { if (avifImageSetMetadataXMP(avif, standardXMPData, standardXMPSize) != AVIF_RESULT_OK) { fprintf(stderr, "XMP extraction failed: out of memory\n"); @@ -1236,7 +1346,7 @@ jpeg_destroy_decompress(&cinfo); free(iccData); avifRGBImageFreePixels(&rgb); - avifRWDataFree(&totalXMP); + avifRWDataFree(&extendedXMP); avifRWDataFree(&extendedXMPReadBytes); return ret; }
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index dab2c74..96010ee 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt
@@ -266,6 +266,8 @@ if(AVIF_ENABLE_JPEG_GAIN_MAP_CONVERSION) add_cmd_test(test_cmd_avifgainmaputil ${CMAKE_CURRENT_SOURCE_DIR}/data) + else() + set_tests_properties(test_cmd_stdin PROPERTIES DISABLED True) endif() if(AVIF_ENABLE_GOLDEN_TESTS AND AVIF_CODEC_AOM_ENCODE)
diff --git a/tests/gtest/avifjpeggainmaptest.cc b/tests/gtest/avifjpeggainmaptest.cc index 5f729f9..a86e0f5 100644 --- a/tests/gtest/avifjpeggainmaptest.cc +++ b/tests/gtest/avifjpeggainmaptest.cc
@@ -201,6 +201,10 @@ <?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?> <x:xmpmeta xmlns:x="adobe:ns:meta/"> <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> + <rdf:Description rdf:about="stuff" xmlns:xmpNote="http://ns.adobe.com/xmp/note/" + xmpNote:HasExtendedXMP="F280A6636D6DC3D306B925078C2924D3"> + <stuff></stuff> + </rdf:Description> <rdf:Description rdf:about="stuff" xmlns:hdrgm="http://ns.adobe.com/hdr-gain-map/1.0/" hdrgm:Version="1.0" hdrgm:BaseRenditionIsHDR="False" @@ -209,16 +213,6 @@ </rdf:Description> </rdf:RDF> </x:xmpmeta> -<?xpacket end="w"?> - -<x:xmpmeta xmlns:x="adobe:ns:meta/"> - <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> - <!-- Imagine this is some extended xmp that avifenc concatenated to - the main XMP. As a result we have invalid XMP but should still be - able to parse it. --> - <stuff></stuff> - </rdf:RDF> -</x:xmpmeta> )"; GainMapPtr gainMap(avifGainMapCreate()); ASSERT_TRUE(avifJPEGParseGainMapXMP((const uint8_t*)xmp.data(), xmp.size(),