Bitmask: init CL of building loop filter bitmask

Building bitmask for loop filter.

Here is a large patch that is not easy for code review and validation:
https://aomedia-review.googlesource.com/#/c/aom/+/37683/

I am going to break this patch into smaller CLs and validate its
functionality. Refactoring is also applied at the same time.

This is WIP, I set a macro LOOP_FILTER_BITMASK to track changes like
an experiment.

Change-Id: Ic19e8ddfaa280659b7f8f6139d8c69445481c6da
diff --git a/av1/common/alloccommon.c b/av1/common/alloccommon.c
index 2b7d78c..c8c9e54 100644
--- a/av1/common/alloccommon.c
+++ b/av1/common/alloccommon.c
@@ -187,6 +187,24 @@
   }
 }
 
+#if LOOP_FILTER_BITMASK
+static int alloc_loop_filter(AV1_COMMON *cm) {
+  aom_free(cm->lf.lfm);
+  // Each lfm holds bit masks for all the 4x4 blocks in a max
+  // 64x64 (128x128 for ext_partitions) region.  The stride
+  // and rows are rounded up / truncated to a multiple of 16
+  // (32 for ext_partition).
+  cm->lf.lfm_stride = (cm->mi_cols + (MAX_MIB_SIZE - 1)) >> MAX_MIB_SIZE_LOG2;
+  cm->lf.lfm_num = ((cm->mi_rows + (MAX_MIB_SIZE - 1)) >> MAX_MIB_SIZE_LOG2) *
+                   cm->lf.lfm_stride;
+  cm->lf.lfm =
+      (LoopFilterMask *)aom_calloc(cm->lf.lfm_num, sizeof(*cm->lf.lfm));
+  if (!cm->lf.lfm) return 1;
+
+  return 0;
+}
+#endif  // LOOP_FILTER_BITMASK
+
 void av1_free_context_buffers(AV1_COMMON *cm) {
   const int num_planes = av1_num_planes(cm);
   int i;
@@ -213,6 +231,13 @@
     aom_free(cm->top_txfm_context[i]);
     cm->top_txfm_context[i] = NULL;
   }
+
+#if LOOP_FILTER_BITMASK
+  aom_free(cm->lf.lfm);
+  cm->lf.lfm = NULL;
+  cm->lf.lfm_num = 0;
+  cm->lf.lfm_stride = 0;
+#endif  // LOOP_FILTER_BITMASK
 }
 
 int av1_alloc_context_buffers(AV1_COMMON *cm, int width, int height) {
@@ -281,6 +306,8 @@
     cm->above_context_alloc_cols = aligned_mi_cols;
   }
 
+  if (alloc_loop_filter(cm)) goto fail;
+
   return 0;
 
 fail:
diff --git a/av1/common/av1_loopfilter.h b/av1/common/av1_loopfilter.h
index 7770d8c..0c49d51 100644
--- a/av1/common/av1_loopfilter.h
+++ b/av1/common/av1_loopfilter.h
@@ -35,6 +35,87 @@
   LF_PATH_SLOW,
 };
 
+#if LOOP_FILTER_BITMASK
+typedef struct {
+  uint64_t bits[4];
+} FilterMaskY;
+
+typedef uint64_t FilterMaskUV;
+
+// This structure holds bit masks for all 4x4 blocks in a 64x64 region.
+// Each 1 bit represents a position in which we want to apply the loop filter.
+// For Y plane, 4x4 in 64x64 requires 16x16 = 256 bit, therefore we use 4
+// uint64_t; For U, V plane, for 420 format, plane size is 32x32, thus we use
+// a uint64_t to represent bitmask.
+// Left_ entries refer to whether we apply a filter on the border to the
+// left of the block.   Above_ entries refer to whether or not to apply a
+// filter on the above border.
+// Since each transform is accompanied by a potentially different type of
+// loop filter there is a different entry in the array for each transform size.
+typedef struct {
+  FilterMaskY left_y[TX_SIZES];
+  FilterMaskY above_y[TX_SIZES];
+  FilterMaskUV left_u[TX_SIZES];
+  FilterMaskUV above_u[TX_SIZES];
+  FilterMaskUV left_v[TX_SIZES];
+  FilterMaskUV above_v[TX_SIZES];
+
+  // Y plane vertical edge and horizontal edge filter level
+  uint8_t lfl_y_hor[MAX_MIB_SIZE / 2][MAX_MIB_SIZE / 2];
+  uint8_t lfl_y_ver[MAX_MIB_SIZE / 2][MAX_MIB_SIZE / 2];
+
+  // UV plane vertical edge and horizontal edge shares the same level
+  uint8_t lfl_u[MAX_MIB_SIZE / 4][MAX_MIB_SIZE / 4];
+  uint8_t lfl_v[MAX_MIB_SIZE / 4][MAX_MIB_SIZE / 4];
+} LoopFilterMaskInfo;
+// TODO(chengchen): remove old version of bitmask construction code once
+// new bitmask is complete.
+
+// Loopfilter bit mask per super block
+#define LOOP_FILTER_MASK_NUM 4
+typedef struct {
+  LoopFilterMaskInfo lfm_info[LOOP_FILTER_MASK_NUM];
+  int is_setup;
+} LoopFilterMask;
+
+// To determine whether to apply loop filtering at one transform block edge,
+// we need information of the neighboring transform block. Specifically,
+// in determining a vertical edge, we need the information of the tx block
+// to its left. For a horizontal edge, we need info of the tx block above it.
+// Thus, we need to record info of right column and bottom row of tx blocks.
+// We record the information of the neighboring superblock, when bitmask
+// building for a superblock is finished. And it will be used for next
+// superblock bitmask building.
+// Information includes:
+// ------------------------------------------------------------
+//                    MAX_MIB_SIZE
+// Y  tx_size above |--------------|
+// Y  tx_size left  |--------------|
+// UV tx_size above |--------------|
+// UV tx_size left  |--------------|
+// Y level above    |--------------|
+// Y level left     |--------------|
+// U level above    |--------------|
+// U level left     |--------------|
+// V level above    |--------------|
+// V level left     |--------------|
+// skip             |--------------|
+// ------------------------------------------------------------
+typedef struct {
+  TX_SIZE tx_size_y_above[MAX_MIB_SIZE];
+  TX_SIZE tx_size_y_left[MAX_MIB_SIZE];
+  TX_SIZE tx_size_uv_above[MAX_MIB_SIZE];
+  TX_SIZE tx_size_uv_left[MAX_MIB_SIZE];
+  uint8_t y_level_above[MAX_MIB_SIZE];
+  uint8_t y_level_left[MAX_MIB_SIZE];
+  uint8_t u_level_above[MAX_MIB_SIZE];
+  uint8_t u_level_left[MAX_MIB_SIZE];
+  uint8_t v_level_above[MAX_MIB_SIZE];
+  uint8_t v_level_left[MAX_MIB_SIZE];
+  uint8_t skip[MAX_MIB_SIZE];
+} LpfSuperblockInfo;
+#endif  // LOOP_FILTER_BITMASK
+
 struct loopfilter {
   int filter_level[2];
   int filter_level_u;
@@ -54,6 +135,13 @@
   // 0 = ZERO_MV, MV
   int8_t mode_deltas[MAX_MODE_LF_DELTAS];
   int8_t last_mode_deltas[MAX_MODE_LF_DELTAS];
+
+#if LOOP_FILTER_BITMASK
+  LoopFilterMask *lfm;
+  size_t lfm_num;
+  int lfm_stride;
+  LpfSuperblockInfo neighbor_sb_lpf_info;
+#endif
 };
 
 // Need to align this structure so when it is declared and
diff --git a/av1/common/enums.h b/av1/common/enums.h
index 61a7ccb..62b4c73 100644
--- a/av1/common/enums.h
+++ b/av1/common/enums.h
@@ -95,6 +95,11 @@
 
 #define FRAME_NUM_LIMIT (INT_MAX - MAX_FRAME_DISTANCE - 1)
 
+// TODO(chengchen): Temporal flag serve as experimental flag for WIP
+// bitmask construction.
+// Shall be removed when bitmask code is completely checkedin
+#define LOOP_FILTER_BITMASK 1
+
 // Bitstream profiles indicated by 2-3 bits in the uncompressed header.
 // 00: Profile 0.  8-bit 4:2:0 only.
 // 10: Profile 1.  8-bit 4:4:4, 4:2:2, and 4:4:0.