rtc: Avoid aom_process_hadamard_lp_8x16() call in HBD path

The function aom_process_hadamard_lp_8x16() process hadamard
txfm for two 8x8 blocks simultaneously and it is specific to
low bit depth path. Currently, this function is invoked for
HBD path which is unnecessary. Hence this CL avoids this
call by invoking aom_process_hadamard_lp_8x16() appropriately.

Change-Id: I62eee730feee6b2bc29dd8965bc0ad22b0b95e7c
diff --git a/av1/encoder/nonrd_pickmode.c b/av1/encoder/nonrd_pickmode.c
index 1f13075..2fe803f 100644
--- a/av1/encoder/nonrd_pickmode.c
+++ b/av1/encoder/nonrd_pickmode.c
@@ -1014,9 +1014,16 @@
     // For block sizes 8x16 or above, Hadamard txfm of two adjacent 8x8 blocks
     // can be done per function call. Hence the call of Hadamard txfm is
     // abstracted here for the specified cases.
-    const int is_tx_8x8_dual_applicable =
+    int is_tx_8x8_dual_applicable =
         (tx_size == TX_8X8 && block_size_wide[bsize] >= 16 &&
          block_size_high[bsize] >= 8);
+
+#if CONFIG_AV1_HIGHBITDEPTH
+    // As of now, dual implementation of hadamard txfm is available for low
+    // bitdepth and when tx_type != IDTX.
+    if (use_hbd || tx_type == IDTX) is_tx_8x8_dual_applicable = 0;
+#endif
+
     if (is_tx_8x8_dual_applicable) {
       aom_process_hadamard_lp_8x16(x, max_blocks_high, max_blocks_wide,
                                    num_4x4_w, step, block_step);
@@ -1064,8 +1071,10 @@
             } else {
               if (tx_type == IDTX) {
                 aom_pixel_scale(src_diff, diff_stride, low_coeff, 3, 1, 1);
-              } else {
+              } else if (!is_tx_8x8_dual_applicable) {
                 aom_hadamard_lp_8x8(src_diff, diff_stride, low_coeff);
+              } else {
+                assert(is_tx_8x8_dual_applicable);
               }
               av1_quantize_lp(low_coeff, 8 * 8, p->round_fp_QTX,
                               p->quant_fp_QTX, low_qcoeff, low_dqcoeff,