Merge "Harmonize and fix coeff context computation" into nextgenv2
diff --git a/LICENSE b/LICENSE
index 1ce4434..fc340c3 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,31 +1,27 @@
-Copyright (c) 2010, The WebM Project authors. All rights reserved.
+Copyright (c) 2016, Alliance for Open Media. All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
+modification, are permitted provided that the following conditions
+are met:
 
-  * Redistributions of source code must retain the above copyright
-    notice, this list of conditions and the following disclaimer.
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
 
-  * Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in
-    the documentation and/or other materials provided with the
-    distribution.
-
-  * Neither the name of Google, nor the WebM Project, nor the names
-    of its contributors may be used to endorse or promote products
-    derived from this software without specific prior written
-    permission.
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in
+   the documentation and/or other materials provided with the
+   distribution.
 
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
 
diff --git a/PATENTS b/PATENTS
index caedf60..be491f5 100644
--- a/PATENTS
+++ b/PATENTS
@@ -1,23 +1,108 @@
-Additional IP Rights Grant (Patents)
-------------------------------------
+Alliance for Open Media Patent License 1.0
 
-"These implementations" means the copyrightable works that implement the WebM
-codecs distributed by Google as part of the WebM Project.
+1. License Terms.
 
-Google hereby grants to you a perpetual, worldwide, non-exclusive, no-charge,
-royalty-free, irrevocable (except as stated in this section) patent license to
-make, have made, use, offer to sell, sell, import, transfer, and otherwise
-run, modify and propagate the contents of these implementations of WebM, where
-such license applies only to those patent claims, both currently owned by
-Google and acquired in the future, licensable by Google that are necessarily
-infringed by these implementations of WebM. This grant does not include claims
-that would be infringed only as a consequence of further modification of these
-implementations. If you or your agent or exclusive licensee institute or order
-or agree to the institution of patent litigation or any other patent
-enforcement activity against any entity (including a cross-claim or
-counterclaim in a lawsuit) alleging that any of these implementations of WebM
-or any code incorporated within any of these implementations of WebM
-constitute direct or contributory patent infringement, or inducement of
-patent infringement, then any patent rights granted to you under this License
-for these implementations of WebM shall terminate as of the date such
-litigation is filed.
+1.1. Patent License. Subject to the terms and conditions of this License, each
+     Licensor, on behalf of itself and successors in interest and assigns,
+     grants Licensee a non-sublicensable, perpetual, worldwide, non-exclusive,
+     no-charge, royalty-free, irrevocable (except as expressly stated in this
+     License) patent license to its Necessary Claims to make, use, sell, offer
+     for sale, import or distribute any Implementation.
+
+1.2. Conditions.
+
+1.2.1. Availability. As a condition to the grant of rights to Licensee to make,
+       sell, offer for sale, import or distribute an Implementation under
+       Section 1.1, Licensee must make its Necessary Claims available under
+       this License, and must reproduce this License with any Implementation
+       as follows:
+
+       a. For distribution in source code, by including this License in the
+          root directory of the source code with its Implementation.
+
+       b. For distribution in any other form (including binary, object form,
+          and/or hardware description code (e.g., HDL, RTL, Gate Level Netlist,
+          GDSII, etc.)), by including this License in the documentation, legal
+          notices, and/or other written materials provided with the
+          Implementation.
+
+1.2.2. Additional Conditions. This license is directly from Licensor to
+       Licensee.  Licensee acknowledges as a condition of benefiting from it
+       that no rights from Licensor are received from suppliers, distributors,
+       or otherwise in connection with this License.
+
+1.3. Defensive Termination. If any Licensee, its Affiliates, or its agents
+     initiates patent litigation or files, maintains, or voluntarily
+     participates in a lawsuit against another entity or any person asserting
+     that any Implementation infringes Necessary Claims, any patent licenses
+     granted under this License directly to the Licensee are immediately
+     terminated as of the date of the initiation of action unless 1) that suit
+     was in response to a corresponding suit regarding an Implementation first
+     brought against an initiating entity, or 2) that suit was brought to
+     enforce the terms of this License (including intervention in a third-party
+     action by a Licensee).
+
+1.4. Disclaimers. The Reference Implementation and Specification are provided
+     "AS IS" and without warranty. The entire risk as to implementing or
+     otherwise using the Reference Implementation or Specification is assumed
+     by the implementer and user. Licensor expressly disclaims any warranties
+     (express, implied, or otherwise), including implied warranties of
+     merchantability, non-infringement, fitness for a particular purpose, or
+     title, related to the material. IN NO EVENT WILL LICENSOR BE LIABLE TO
+     ANY OTHER PARTY FOR LOST PROFITS OR ANY FORM OF INDIRECT, SPECIAL,
+     INCIDENTAL, OR CONSEQUENTIAL DAMAGES OF ANY CHARACTER FROM ANY CAUSES OF
+     ACTION OF ANY KIND WITH RESPECT TO THIS LICENSE, WHETHER BASED ON BREACH
+     OF CONTRACT, TORT (INCLUDING NEGLIGENCE), OR OTHERWISE, AND WHETHER OR
+     NOT THE OTHER PARTRY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+2. Definitions.
+
+2.1. Affiliate.  “Affiliate” means an entity that directly or indirectly
+     Controls, is Controlled by, or is under common Control of that party.
+
+2.2. Control. “Control” means direct or indirect control of more than 50% of
+     the voting power to elect directors of that corporation, or for any other
+     entity, the power to direct management of such entity.
+
+2.3. Decoder.  "Decoder" means any decoder that conforms fully with all
+     non-optional portions of the Specification.
+
+2.4. Encoder.  "Encoder" means any encoder that produces a bitstream that can
+     be decoded by a Decoder only to the extent it produces such a bitstream.
+
+2.5. Final Deliverable.  “Final Deliverable” means the final version of a
+     deliverable approved by the Alliance for Open Media as a Final
+     Deliverable.
+
+2.6. Implementation.  "Implementation" means any implementation, including the
+     Reference Implementation, that is an Encoder and/or a Decoder. An
+     Implementation also includes components of an Implementation only to the
+     extent they are used as part of an Implementation.
+
+2.7. License. “License” means this license.
+
+2.8. Licensee. “Licensee” means any person or entity who exercises patent
+     rights granted under this License.
+
+2.9. Licensor.  "Licensor" means (i) any Licensee that makes, sells, offers
+     for sale, imports or distributes any Implementation, or (ii) a person
+     or entity that has a licensing obligation to the Implementation as a
+     result of its membership and/or participation in the Alliance for Open
+     Media working group that developed the Specification.
+
+2.10. Necessary Claims.  "Necessary Claims" means all claims of patents or
+      patent applications, (a) that currently or at any time in the future,
+      are owned or controlled by the Licensor, and (b) (i) would be an
+      Essential Claim as defined by the W3C Policy as of February 5, 2004
+      (https://www.w3.org/Consortium/Patent-Policy-20040205/#def-essential)
+      as if the Specification was a W3C Recommendation; or (ii) are infringed
+      by the Reference Implementation.
+
+2.11. Reference Implementation. “Reference Implementation” means an Encoder
+      and/or Decoder released by the Alliance for Open Media as a Final
+      Deliverable.
+
+2.12. Specification. “Specification” means the specification designated by
+      the Alliance for Open Media as a Final Deliverable for which this
+      License was issued.
+
diff --git a/aom/internal/aom_codec_internal.h b/aom/internal/aom_codec_internal.h
index fc758b8..4399279 100644
--- a/aom/internal/aom_codec_internal.h
+++ b/aom/internal/aom_codec_internal.h
@@ -43,6 +43,7 @@
  */
 #ifndef AOM_INTERNAL_AOM_CODEC_INTERNAL_H_
 #define AOM_INTERNAL_AOM_CODEC_INTERNAL_H_
+#include "./aom_config.h"
 #include "../aom_decoder.h"
 #include "../aom_encoder.h"
 #include <stdarg.h>
@@ -439,6 +440,24 @@
                         aom_codec_err_t error, const char *fmt,
                         ...) CLANG_ANALYZER_NORETURN;
 
+#if CONFIG_DEBUG
+#define AOM_CHECK_MEM_ERROR(error_info, lval, expr)                         \
+  do {                                                                      \
+    lval = (expr);                                                          \
+    if (!lval)                                                              \
+      aom_internal_error(error_info, AOM_CODEC_MEM_ERROR,                   \
+                         "Failed to allocate " #lval " at %s:%d", __FILE__, \
+                         __LINE__);                                         \
+  } while (0)
+#else
+#define AOM_CHECK_MEM_ERROR(error_info, lval, expr)       \
+  do {                                                    \
+    lval = (expr);                                        \
+    if (!lval)                                            \
+      aom_internal_error(error_info, AOM_CODEC_MEM_ERROR, \
+                         "Failed to allocate " #lval);    \
+  } while (0)
+#endif
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/aom_dsp/x86/sad_avx2.c b/aom_dsp/x86/sad_avx2.c
index bfa734d..6ce61fd 100644
--- a/aom_dsp/x86/sad_avx2.c
+++ b/aom_dsp/x86/sad_avx2.c
@@ -81,8 +81,10 @@
   FSAD32_H(32); \
   FSAD32_H(16);
 
-FSAD64;
-FSAD32;
+/* clang-format off */
+FSAD64
+FSAD32
+/* clang-format on */
 
 #undef FSAD64
 #undef FSAD32
@@ -170,8 +172,10 @@
   FSADAVG32_H(32); \
   FSADAVG32_H(16);
 
-FSADAVG64;
-FSADAVG32;
+/* clang-format off */
+FSADAVG64
+FSADAVG32
+/* clang-format on */
 
 #undef FSADAVG64
 #undef FSADAVG32
diff --git a/aom_mem/aom_mem.c b/aom_mem/aom_mem.c
index 43f8237..144085f 100644
--- a/aom_mem/aom_mem.c
+++ b/aom_mem/aom_mem.c
@@ -18,35 +18,47 @@
 #include "include/aom_mem_intrnl.h"
 #include "aom/aom_integer.h"
 
+static size_t GetAlignedMallocSize(size_t size, size_t align) {
+  return size + align - 1 + ADDRESS_STORAGE_SIZE;
+}
+
+static size_t *GetMallocAddressLocation(void *const mem) {
+  return ((size_t *)mem) - 1;
+}
+
+static void SetActualMallocAddress(void *const mem,
+                                   const void *const malloc_addr) {
+  size_t *const malloc_addr_location = GetMallocAddressLocation(mem);
+  *malloc_addr_location = (size_t)malloc_addr;
+}
+
+static void *GetActualMallocAddress(void *const mem) {
+  const size_t *const malloc_addr_location = GetMallocAddressLocation(mem);
+  return (void *)(*malloc_addr_location);
+}
+
 void *aom_memalign(size_t align, size_t size) {
-  void *addr, *x = NULL;
-
-  addr = malloc(size + align - 1 + ADDRESS_STORAGE_SIZE);
-
+  void *x = NULL;
+  const size_t aligned_size = GetAlignedMallocSize(size, align);
+  void *const addr = malloc(aligned_size);
   if (addr) {
     x = align_addr((unsigned char *)addr + ADDRESS_STORAGE_SIZE, (int)align);
-    /* save the actual malloc address */
-    ((size_t *)x)[-1] = (size_t)addr;
+    SetActualMallocAddress(x, addr);
   }
-
   return x;
 }
 
 void *aom_malloc(size_t size) { return aom_memalign(DEFAULT_ALIGNMENT, size); }
 
 void *aom_calloc(size_t num, size_t size) {
-  void *x;
-
-  x = aom_memalign(DEFAULT_ALIGNMENT, num * size);
-
-  if (x) memset(x, 0, num * size);
-
+  const size_t total_size = num * size;
+  void *const x = aom_malloc(total_size);
+  if (x) memset(x, 0, total_size);
   return x;
 }
 
 void *aom_realloc(void *memblk, size_t size) {
-  void *addr, *new_addr = NULL;
-  int align = DEFAULT_ALIGNMENT;
+  void *new_addr = NULL;
 
   /*
   The realloc() function changes the size of the object pointed to by
@@ -61,19 +73,14 @@
   else if (!size)
     aom_free(memblk);
   else {
-    addr = (void *)(((size_t *)memblk)[-1]);
+    void *addr = GetActualMallocAddress(memblk);
+    const size_t aligned_size = GetAlignedMallocSize(size, DEFAULT_ALIGNMENT);
     memblk = NULL;
-
-    new_addr = realloc(addr, size + align + ADDRESS_STORAGE_SIZE);
-
-    if (new_addr) {
-      addr = new_addr;
-      new_addr =
-          (void *)(((size_t)((unsigned char *)new_addr + ADDRESS_STORAGE_SIZE) +
-                    (align - 1)) &
-                   (size_t)-align);
-      /* save the actual malloc address */
-      ((size_t *)new_addr)[-1] = (size_t)addr;
+    addr = realloc(addr, aligned_size);
+    if (addr) {
+      new_addr = align_addr((unsigned char *)addr + ADDRESS_STORAGE_SIZE,
+                            DEFAULT_ALIGNMENT);
+      SetActualMallocAddress(new_addr, addr);
     }
   }
 
@@ -82,7 +89,7 @@
 
 void aom_free(void *memblk) {
   if (memblk) {
-    void *addr = (void *)(((size_t *)memblk)[-1]);
+    void *addr = GetActualMallocAddress(memblk);
     free(addr);
   }
 }
diff --git a/av1/common/common.h b/av1/common/common.h
index 5b8100b..551055a 100644
--- a/av1/common/common.h
+++ b/av1/common/common.h
@@ -16,7 +16,6 @@
 
 #include <assert.h>
 
-#include "./aom_config.h"
 #include "aom_dsp/aom_dsp_common.h"
 #include "aom_mem/aom_mem.h"
 #include "aom/aom_integer.h"
@@ -49,24 +48,8 @@
   return num_values > 0 ? get_msb(num_values) + 1 : 0;
 }
 
-#if CONFIG_DEBUG
-#define CHECK_MEM_ERROR(cm, lval, expr)                                     \
-  do {                                                                      \
-    lval = (expr);                                                          \
-    if (!lval)                                                              \
-      aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,                   \
-                         "Failed to allocate " #lval " at %s:%d", __FILE__, \
-                         __LINE__);                                         \
-  } while (0)
-#else
-#define CHECK_MEM_ERROR(cm, lval, expr)                   \
-  do {                                                    \
-    lval = (expr);                                        \
-    if (!lval)                                            \
-      aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR, \
-                         "Failed to allocate " #lval);    \
-  } while (0)
-#endif
+#define CHECK_MEM_ERROR(cm, lval, expr) \
+  AOM_CHECK_MEM_ERROR(&cm->error, lval, expr)
 // TODO(yaowu: validate the usage of these codes or develop new ones.)
 #define AV1_SYNC_CODE_0 0x49
 #define AV1_SYNC_CODE_1 0x83
diff --git a/av1/common/entropymode.c b/av1/common/entropymode.c
index b2cb6ff..e1593e3 100644
--- a/av1/common/entropymode.c
+++ b/av1/common/entropymode.c
@@ -857,6 +857,12 @@
       },
     };
 
+#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_VAR_TX
+// the probability of (0) using recursive square tx partition vs.
+// (1) biggest rect tx for 4X8-8X4/8X16-16X8/16X32-32X16 blocks
+static const aom_prob default_rect_tx_prob[TX_SIZES - 1] = { 192, 192, 192 };
+#endif  // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_VAR_TX
+
 int av1_get_palette_color_context(const uint8_t *color_map, int cols, int r,
                                   int c, int n, int *color_order) {
   int i, j, max, max_idx, temp;
@@ -1298,6 +1304,9 @@
   av1_copy(fc->tx_size_probs, default_tx_size_prob);
 #if CONFIG_VAR_TX
   av1_copy(fc->txfm_partition_prob, default_txfm_partition_probs);
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+  av1_copy(fc->rect_tx_prob, default_rect_tx_prob);
+#endif  // CONFIG_EXT_TX && CONFIG_RECT_TX
 #endif
   av1_copy(fc->skip_probs, default_skip_probs);
 #if CONFIG_REF_MV
@@ -1451,6 +1460,15 @@
   }
 #endif  // CONFIG_EXT_INTER
 
+#if CONFIG_VAR_TX && CONFIG_EXT_TX && CONFIG_RECT_TX
+  if (cm->tx_mode == TX_MODE_SELECT) {
+    for (i = 0; i < TX_SIZES - 1; ++i) {
+      fc->rect_tx_prob[i] =
+          av1_mode_mv_merge_probs(pre_fc->rect_tx_prob[i], counts->rect_tx[i]);
+    }
+  }
+#endif  // CONFIG_VAR_TX && CONFIG_EXT_TX && CONFIG_RECT_TX
+
   for (i = 0; i < BLOCK_SIZE_GROUPS; i++)
     aom_tree_merge_probs(av1_intra_mode_tree, pre_fc->y_mode_prob[i],
                          counts->y_mode[i], fc->y_mode_prob[i]);
@@ -1478,10 +1496,11 @@
   }
 
 #if CONFIG_VAR_TX
-  if (cm->tx_mode == TX_MODE_SELECT)
+  if (cm->tx_mode == TX_MODE_SELECT) {
     for (i = 0; i < TXFM_PARTITION_CONTEXTS; ++i)
       fc->txfm_partition_prob[i] = av1_mode_mv_merge_probs(
           pre_fc->txfm_partition_prob[i], counts->txfm_partition[i]);
+  }
 #endif
 
   for (i = 0; i < SKIP_CONTEXTS; ++i)
diff --git a/av1/common/entropymode.h b/av1/common/entropymode.h
index 956bc99..7968484 100644
--- a/av1/common/entropymode.h
+++ b/av1/common/entropymode.h
@@ -96,6 +96,10 @@
   aom_prob tx_size_probs[TX_SIZES - 1][TX_SIZE_CONTEXTS][TX_SIZES - 1];
 #if CONFIG_VAR_TX
   aom_prob txfm_partition_prob[TXFM_PARTITION_CONTEXTS];
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+  // TODO(yuec) make this flag harmonize with the original syntax
+  aom_prob rect_tx_prob[TX_SIZES - 1];
+#endif  // CONFIG_EXT_TX && CONFIG_RECT_TX
 #endif
   aom_prob skip_probs[SKIP_CONTEXTS];
 #if CONFIG_REF_MV
@@ -179,6 +183,9 @@
   unsigned int tx_size[TX_SIZES - 1][TX_SIZE_CONTEXTS][TX_SIZES];
 #if CONFIG_VAR_TX
   unsigned int txfm_partition[TXFM_PARTITION_CONTEXTS][2];
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+  unsigned int rect_tx[TX_SIZES - 1][2];
+#endif  // CONFIG_EXT_TX && CONFIG_RECT_TX
 #endif
   unsigned int skip[SKIP_CONTEXTS][2];
 #if CONFIG_REF_MV
diff --git a/av1/common/loopfilter.c b/av1/common/loopfilter.c
index 7086be8..2147bb8 100644
--- a/av1/common/loopfilter.c
+++ b/av1/common/loopfilter.c
@@ -1246,12 +1246,18 @@
         tx_size_mask = 0;
 
 #if CONFIG_VAR_TX
-      if (is_inter_block(mbmi) && !mbmi->skip)
-        tx_size =
-            (plane->plane_type == PLANE_TYPE_UV)
-                ? uv_txsize_lookup[sb_type][mbmi->inter_tx_size
-                                                [blk_row][blk_col]][ss_x][ss_y]
-                : mbmi->inter_tx_size[blk_row][blk_col];
+      if (is_inter_block(mbmi) && !mbmi->skip) {
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+        TX_SIZE mb_tx_size = is_rect_tx(mbmi->tx_size)
+                                 ? mbmi->tx_size
+                                 : mbmi->inter_tx_size[blk_row][blk_col];
+#else
+        TX_SIZE mb_tx_size = mbmi->inter_tx_size[blk_row][blk_col];
+#endif
+        tx_size = (plane->plane_type == PLANE_TYPE_UV)
+                      ? uv_txsize_lookup[sb_type][mb_tx_size][ss_x][ss_y]
+                      : mb_tx_size;
+      }
 
 #if CONFIG_EXT_TX && CONFIG_RECT_TX
       tx_size_r =
diff --git a/av1/common/odintrin.h b/av1/common/odintrin.h
index 4c5b9ae..5324cff 100644
--- a/av1/common/odintrin.h
+++ b/av1/common/odintrin.h
@@ -16,6 +16,10 @@
 #include "aom_dsp/aom_dsp_common.h"
 #include "aom_ports/bitops.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /*Smallest blocks are 4x4*/
 #define OD_LOG_BSIZE0 (2)
 /*There are 5 block sizes total (4x4, 8x8, 16x16, 32x32 and 64x64).*/
@@ -55,4 +59,8 @@
   We define a special version of the macro to use when x can be zero.*/
 #define OD_ILOG(x) ((x) ? OD_ILOG_NZ(x) : 0)
 
+#ifdef __cplusplus
+}  // extern "C"
 #endif
+
+#endif  // AV1_COMMON_ODINTRIN_H_
diff --git a/av1/common/warped_motion.c b/av1/common/warped_motion.c
index 2ff1fce..146a14d 100644
--- a/av1/common/warped_motion.c
+++ b/av1/common/warped_motion.c
@@ -477,7 +477,7 @@
   uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
   uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
   int gm_err = 0, no_gm_err = 0;
-  int gm_sumerr = 0, no_gm_sumerr = 0;
+  int64_t gm_sumerr = 0, no_gm_sumerr = 0;
   for (i = p_row; i < p_row + p_height; ++i) {
     for (j = p_col; j < p_col + p_width; ++j) {
       int in[2], out[2];
@@ -492,8 +492,8 @@
                                        stride, bd);
       no_gm_err = dst[(j - p_col) + (i - p_row) * p_stride] -
                   ref[(j - p_col) + (i - p_row) * stride];
-      gm_sumerr += gm_err * gm_err;
-      no_gm_sumerr += no_gm_err * no_gm_err;
+      gm_sumerr += (int64_t)gm_err * gm_err;
+      no_gm_sumerr += (int64_t)no_gm_err * no_gm_err;
     }
   }
   return (double)gm_sumerr / no_gm_sumerr;
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index 2daf4bf..8d3202c 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -1308,10 +1308,9 @@
         const int step = num_4x4_blocks_txsize_lookup[max_tx_size];
         int block = 0;
 #if CONFIG_EXT_TX && CONFIG_RECT_TX
-        const TX_SIZE tx_size =
-            plane ? get_uv_tx_size(mbmi, pd) : mbmi->tx_size;
-
-        if (tx_size >= TX_SIZES) {  // rect txsize is used
+        if (is_rect_tx(mbmi->tx_size)) {
+          const TX_SIZE tx_size =
+              plane ? get_uv_tx_size(mbmi, pd) : mbmi->tx_size;
           const int stepr = num_4x4_blocks_high_txsize_lookup[tx_size];
           const int stepc = num_4x4_blocks_wide_txsize_lookup[tx_size];
           const int max_blocks_wide =
@@ -3491,6 +3490,12 @@
 #if CONFIG_VAR_TX
   for (k = 0; k < TXFM_PARTITION_CONTEXTS; ++k)
     av1_diff_update_prob(&r, &fc->txfm_partition_prob[k]);
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+  if (cm->tx_mode == TX_MODE_SELECT) {
+    for (i = 1; i < TX_SIZES - 1; ++i)
+      av1_diff_update_prob(&r, &fc->rect_tx_prob[i]);
+  }
+#endif  // CONFIG_EXT_TX && CONFIG_RECT_TX
 #endif
 
   for (k = 0; k < SKIP_CONTEXTS; ++k)
diff --git a/av1/decoder/decodemv.c b/av1/decoder/decodemv.c
index 316d317..535c5b5 100644
--- a/av1/decoder/decodemv.c
+++ b/av1/decoder/decodemv.c
@@ -1632,13 +1632,34 @@
       const int width = num_4x4_blocks_wide_lookup[bsize];
       const int height = num_4x4_blocks_high_lookup[bsize];
       int idx, idy;
-      for (idy = 0; idy < height; idy += bs)
-        for (idx = 0; idx < width; idx += bs)
-          read_tx_size_vartx(cm, xd, mbmi, xd->counts, max_tx_size, idy, idx,
-                             r);
+      int tx_size_cat = inter_tx_size_cat_lookup[bsize];
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+      int is_rect_tx_allowed = inter_block && is_rect_tx_allowed_bsize(bsize);
+      int use_rect_tx = 0;
+
+      if (is_rect_tx_allowed) {
+        use_rect_tx = aom_read(r, cm->fc->rect_tx_prob[tx_size_cat]);
+        if (xd->counts) {
+          ++xd->counts->rect_tx[tx_size_cat][use_rect_tx];
+        }
+      }
+
+      if (use_rect_tx) {
+        mbmi->tx_size = max_txsize_rect_lookup[bsize];
+        set_txfm_ctxs(mbmi->tx_size, xd->n8_w, xd->n8_h, xd);
+      } else {
+#endif  // CONFIG_EXT_TX && CONFIG_RECT_TX
+        for (idy = 0; idy < height; idy += bs)
+          for (idx = 0; idx < width; idx += bs)
+            read_tx_size_vartx(cm, xd, mbmi, xd->counts, max_tx_size, idy, idx,
+                               r);
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+      }
+#endif
       if (xd->counts) {
         const int ctx = get_tx_size_context(xd);
-        ++xd->counts->tx_size[max_tx_size - TX_8X8][ctx][mbmi->tx_size];
+        ++xd->counts
+              ->tx_size[tx_size_cat][ctx][txsize_sqr_up_map[mbmi->tx_size]];
       }
     } else {
       if (inter_block)
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index 3bd5adc..7c33aea 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -1121,9 +1121,25 @@
       const int width = num_4x4_blocks_wide_lookup[bsize];
       const int height = num_4x4_blocks_high_lookup[bsize];
       int idx, idy;
-      for (idy = 0; idy < height; idy += bs)
-        for (idx = 0; idx < width; idx += bs)
-          write_tx_size_vartx(cm, xd, mbmi, max_tx_size, idy, idx, w);
+
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+      if (is_rect_tx_allowed(mbmi)) {
+        int tx_size_cat = inter_tx_size_cat_lookup[bsize];
+
+        aom_write(w, is_rect_tx(mbmi->tx_size),
+                  cm->fc->rect_tx_prob[tx_size_cat]);
+      }
+
+      if (is_rect_tx(mbmi->tx_size)) {
+        set_txfm_ctxs(mbmi->tx_size, xd->n8_w, xd->n8_h, xd);
+      } else {
+#endif  // CONFIG_EXT_TX && CONFIG_RECT_TX
+        for (idy = 0; idy < height; idy += bs)
+          for (idx = 0; idx < width; idx += bs)
+            write_tx_size_vartx(cm, xd, mbmi, max_tx_size, idy, idx, w);
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+      }
+#endif  // CONFIG_EXT_TX && CONFIG_RECT_TX
     } else {
       set_txfm_ctxs(mbmi->tx_size, xd->n8_w, xd->n8_h, xd);
       write_selected_tx_size(cm, xd, w);
@@ -1650,7 +1666,7 @@
       TX_SIZE tx_size =
           plane ? get_uv_tx_size(mbmi, &xd->plane[plane]) : mbmi->tx_size;
 
-      if (is_inter_block(mbmi) && tx_size < TX_SIZES) {
+      if (is_inter_block(mbmi) && !is_rect_tx(tx_size)) {
 #else
       if (is_inter_block(mbmi)) {
 #endif
@@ -3271,6 +3287,13 @@
 
 #if CONFIG_VAR_TX
   update_txfm_partition_probs(cm, header_bc, counts);
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+  if (cm->tx_mode == TX_MODE_SELECT) {
+    for (i = 1; i < TX_SIZES - 1; ++i)
+      av1_cond_prob_diff_update(header_bc, &fc->rect_tx_prob[i],
+                                counts->rect_tx[i]);
+  }
+#endif  // CONFIG_EXT_TX && CONFIG_RECT_TX
 #endif
 
   update_skip_probs(cm, header_bc, counts);
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index f172fbb..200b02c 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -5031,7 +5031,7 @@
     av1_encode_sb(x, AOMMAX(bsize, BLOCK_8X8));
 #if CONFIG_VAR_TX
 #if CONFIG_EXT_TX && CONFIG_RECT_TX
-    if (mbmi->tx_size >= TX_SIZES)
+    if (is_rect_tx(mbmi->tx_size))
       av1_tokenize_sb(cpi, td, t, !output_enabled, AOMMAX(bsize, BLOCK_8X8));
     else
 #endif
@@ -5054,8 +5054,17 @@
       assert(IMPLIES(is_rect_tx(mbmi->tx_size), is_rect_tx_allowed(mbmi)));
 #endif  // CONFIG_EXT_TX && CONFIG_RECT_TX
 #if CONFIG_VAR_TX
-      if (is_inter)
-        tx_partition_count_update(cm, xd, bsize, mi_row, mi_col, td->counts);
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+      if (is_rect_tx_allowed(mbmi)) {
+        td->counts->rect_tx[tx_size_cat][is_rect_tx(mbmi->tx_size)]++;
+      }
+      if (!is_rect_tx_allowed(mbmi) || !is_rect_tx(mbmi->tx_size)) {
+#endif  // CONFIG_EXT_TX && CONFIG_RECT_TX
+        if (is_inter)
+          tx_partition_count_update(cm, xd, bsize, mi_row, mi_col, td->counts);
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+      }
+#endif
 #endif
       ++td->counts->tx_size[tx_size_cat][tx_size_ctx][coded_tx_size];
     } else {
@@ -5118,6 +5127,11 @@
       is_inter_block(mbmi) && !(mbmi->skip || seg_skip)) {
     if (!output_enabled)
       tx_partition_set_contexts(cm, xd, bsize, mi_row, mi_col);
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+    if (is_rect_tx(mbmi->tx_size)) {
+      set_txfm_ctxs(mbmi->tx_size, xd->n8_w, xd->n8_h, xd);
+    }
+#endif  // CONFIG_EXT_TX && CONFIG_RECT_TX
   } else {
     TX_SIZE tx_size;
     // The new intra coding scheme requires no change of transform size
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index 5a21ebd..718bddb 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -791,7 +791,7 @@
 #endif
 
 #if CONFIG_VAR_TX
-  // Assert not magic number (uninitialised).
+  // Assert not magic number (uninitialized).
   assert(x->blk_skip[plane][(blk_row << bwl) + blk_col] != 234);
 
   if (x->blk_skip[plane][(blk_row << bwl) + blk_col] == 0) {
@@ -976,9 +976,6 @@
     int idx, idy;
     int block = 0;
     int step = num_4x4_blocks_txsize_lookup[max_tx_size];
-#if CONFIG_EXT_TX && CONFIG_RECT_TX
-    const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi, pd) : mbmi->tx_size;
-#endif
     av1_get_entropy_contexts(bsize, TX_4X4, pd, ctx.ta[plane], ctx.tl[plane]);
 #else
     const struct macroblockd_plane *const pd = &xd->plane[plane];
@@ -991,7 +988,7 @@
 
 #if CONFIG_VAR_TX
 #if CONFIG_EXT_TX && CONFIG_RECT_TX
-    if (tx_size >= TX_SIZES) {
+    if (is_rect_tx(mbmi->tx_size)) {
       av1_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block,
                                              &arg);
     } else {
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index c2e8275..65e1f3c 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -2526,9 +2526,8 @@
           SNPRINT2(results, "\t%7.3f", consistency);
           SNPRINT2(results, "\t%7.3f", cpi->worst_consistency);
         }
-
-        fprintf(f, "%s\t    Time  Rc-Err Abs Err\n", headings);
-        fprintf(f, "%s\t%8.0f %7.2f %7.2f\n", results, total_encode_time,
+        fprintf(f, "%s\t    Time\tRcErr\tAbsErr\n", headings);
+        fprintf(f, "%s\t%8.0f\t%7.2f\t%7.2f\n", results, total_encode_time,
                 rate_err, fabs(rate_err));
       }
 
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 4d28bf0..f27a100 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -2766,6 +2766,61 @@
   return best_rd;
 }
 
+// Return value 0: early termination triggered, no valid rd cost available;
+//              1: rd cost values are valid.
+static int super_block_uvrd(const AV1_COMP *cpi, MACROBLOCK *x, int *rate,
+                            int64_t *distortion, int *skippable, int64_t *sse,
+                            BLOCK_SIZE bsize, int64_t ref_best_rd) {
+  MACROBLOCKD *const xd = &x->e_mbd;
+  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+  const TX_SIZE uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]);
+  int plane;
+  int pnrate = 0, pnskip = 1;
+  int64_t pndist = 0, pnsse = 0;
+  int is_cost_valid = 1;
+
+  if (ref_best_rd < 0) is_cost_valid = 0;
+
+  if (is_inter_block(mbmi) && is_cost_valid) {
+    int plane;
+    for (plane = 1; plane < MAX_MB_PLANE; ++plane)
+      av1_subtract_plane(x, bsize, plane);
+  }
+
+  *rate = 0;
+  *distortion = 0;
+  *sse = 0;
+  *skippable = 1;
+
+  for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
+    txfm_rd_in_plane(x, cpi, &pnrate, &pndist, &pnskip, &pnsse, ref_best_rd,
+                     plane, bsize, uv_tx_size, cpi->sf.use_fast_coef_costing);
+    if (pnrate == INT_MAX) {
+      is_cost_valid = 0;
+      break;
+    }
+    *rate += pnrate;
+    *distortion += pndist;
+    *sse += pnsse;
+    *skippable &= pnskip;
+    if (RDCOST(x->rdmult, x->rddiv, *rate, *distortion) > ref_best_rd &&
+        RDCOST(x->rdmult, x->rddiv, 0, *sse) > ref_best_rd) {
+      is_cost_valid = 0;
+      break;
+    }
+  }
+
+  if (!is_cost_valid) {
+    // reset cost value
+    *rate = INT_MAX;
+    *distortion = INT64_MAX;
+    *sse = INT64_MAX;
+    *skippable = 0;
+  }
+
+  return is_cost_valid;
+}
+
 #if CONFIG_VAR_TX
 void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
                        int blk_row, int blk_col, int plane, int block,
@@ -3148,6 +3203,61 @@
 
   mbmi->tx_type = tx_type;
   inter_block_yrd(cpi, x, rate, dist, skippable, sse, bsize, ref_best_rd);
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+  if (is_rect_tx_allowed(mbmi)) {
+    int rate_rect_tx, skippable_rect_tx = 0;
+    int64_t dist_rect_tx, sse_rect_tx, rd, rd_rect_tx;
+    int tx_size_cat = inter_tx_size_cat_lookup[bsize];
+    TX_SIZE tx_size = max_txsize_rect_lookup[bsize];
+    TX_SIZE var_tx_size = mbmi->tx_size;
+
+    txfm_rd_in_plane(x, cpi, &rate_rect_tx, &dist_rect_tx, &skippable_rect_tx,
+                     &sse_rect_tx, ref_best_rd, 0, bsize, tx_size,
+                     cpi->sf.use_fast_coef_costing);
+
+    if (*rate != INT_MAX) {
+      *rate += av1_cost_bit(cm->fc->rect_tx_prob[tx_size_cat], 0);
+      if (*skippable) {
+        rd = RDCOST(x->rdmult, x->rddiv, s1, *sse);
+      } else {
+        rd = RDCOST(x->rdmult, x->rddiv, *rate + s0, *dist);
+        if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
+            !(*skippable))
+          rd = AOMMIN(rd, RDCOST(x->rdmult, x->rddiv, s1, *sse));
+      }
+    } else {
+      rd = INT64_MAX;
+    }
+
+    if (rate_rect_tx != INT_MAX) {
+      rate_rect_tx += av1_cost_bit(cm->fc->rect_tx_prob[tx_size_cat], 1);
+      if (skippable_rect_tx) {
+        rd_rect_tx = RDCOST(x->rdmult, x->rddiv, s1, sse_rect_tx);
+      } else {
+        rd_rect_tx =
+            RDCOST(x->rdmult, x->rddiv, rate_rect_tx + s0, dist_rect_tx);
+        if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
+            !(skippable_rect_tx))
+          rd_rect_tx =
+              AOMMIN(rd_rect_tx, RDCOST(x->rdmult, x->rddiv, s1, sse_rect_tx));
+      }
+    } else {
+      rd_rect_tx = INT64_MAX;
+    }
+
+    if (rd_rect_tx < rd) {
+      *rate = rate_rect_tx;
+      *dist = dist_rect_tx;
+      *sse = sse_rect_tx;
+      *skippable = skippable_rect_tx;
+      if (!xd->lossless[mbmi->segment_id]) x->blk_skip[0][0] = *skippable;
+      mbmi->tx_size = tx_size;
+      mbmi->inter_tx_size[0][0] = mbmi->tx_size;
+    } else {
+      mbmi->tx_size = var_tx_size;
+    }
+  }
+#endif  // CONFIG_EXT_TX && CONFIG_RECT_TX
 
   if (*rate == INT_MAX) return INT64_MAX;
 
@@ -3361,17 +3471,24 @@
 
   if (ref_best_rd < 0) is_cost_valid = 0;
 
+  *rate = 0;
+  *distortion = 0;
+  *sse = 0;
+  *skippable = 1;
+
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+  if (is_rect_tx(mbmi->tx_size)) {
+    return super_block_uvrd(cpi, x, rate, distortion, skippable, sse, bsize,
+                            ref_best_rd);
+  }
+#endif  // CONFIG_EXT_TX && CONFIG_RECT_TX
+
   if (is_inter_block(mbmi) && is_cost_valid) {
     int plane;
     for (plane = 1; plane < MAX_MB_PLANE; ++plane)
       av1_subtract_plane(x, bsize, plane);
   }
 
-  *rate = 0;
-  *distortion = 0;
-  *sse = 0;
-  *skippable = 1;
-
   for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
     const struct macroblockd_plane *const pd = &xd->plane[plane];
     const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
@@ -3429,61 +3546,6 @@
 }
 #endif  // CONFIG_VAR_TX
 
-// Return value 0: early termination triggered, no valid rd cost available;
-//              1: rd cost values are valid.
-static int super_block_uvrd(const AV1_COMP *cpi, MACROBLOCK *x, int *rate,
-                            int64_t *distortion, int *skippable, int64_t *sse,
-                            BLOCK_SIZE bsize, int64_t ref_best_rd) {
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
-  const TX_SIZE uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]);
-  int plane;
-  int pnrate = 0, pnskip = 1;
-  int64_t pndist = 0, pnsse = 0;
-  int is_cost_valid = 1;
-
-  if (ref_best_rd < 0) is_cost_valid = 0;
-
-  if (is_inter_block(mbmi) && is_cost_valid) {
-    int plane;
-    for (plane = 1; plane < MAX_MB_PLANE; ++plane)
-      av1_subtract_plane(x, bsize, plane);
-  }
-
-  *rate = 0;
-  *distortion = 0;
-  *sse = 0;
-  *skippable = 1;
-
-  for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
-    txfm_rd_in_plane(x, cpi, &pnrate, &pndist, &pnskip, &pnsse, ref_best_rd,
-                     plane, bsize, uv_tx_size, cpi->sf.use_fast_coef_costing);
-    if (pnrate == INT_MAX) {
-      is_cost_valid = 0;
-      break;
-    }
-    *rate += pnrate;
-    *distortion += pndist;
-    *sse += pnsse;
-    *skippable &= pnskip;
-    if (RDCOST(x->rdmult, x->rddiv, *rate, *distortion) > ref_best_rd &&
-        RDCOST(x->rdmult, x->rddiv, 0, *sse) > ref_best_rd) {
-      is_cost_valid = 0;
-      break;
-    }
-  }
-
-  if (!is_cost_valid) {
-    // reset cost value
-    *rate = INT_MAX;
-    *distortion = INT64_MAX;
-    *sse = INT64_MAX;
-    *skippable = 0;
-  }
-
-  return is_cost_valid;
-}
-
 static void rd_pick_palette_intra_sbuv(
     AV1_COMP *cpi, MACROBLOCK *x, int dc_mode_cost,
     PALETTE_MODE_INFO *palette_mode_info, uint8_t *best_palette_color_map,
diff --git a/third_party/googletest/README.libvpx b/third_party/googletest/README.libaom
similarity index 100%
rename from third_party/googletest/README.libvpx
rename to third_party/googletest/README.libaom
diff --git a/third_party/libwebm/README.libvpx b/third_party/libwebm/README.libaom
similarity index 100%
rename from third_party/libwebm/README.libvpx
rename to third_party/libwebm/README.libaom
diff --git a/third_party/libyuv/README.libvpx b/third_party/libyuv/README.libaom
similarity index 100%
rename from third_party/libyuv/README.libvpx
rename to third_party/libyuv/README.libaom
diff --git a/third_party/x86inc/README.libvpx b/third_party/x86inc/README.libaom
similarity index 100%
rename from third_party/x86inc/README.libvpx
rename to third_party/x86inc/README.libaom