Implement row based multithreading of temporal filter

This CL adds support for row based multithreading of temporal
filter module.

cpu-used    Resolution    Tile    Average Encode Time
                                     Reduction (%)
    1        832x480      2x1       0.59 (2 threads)
    2        832x480      2x1       0.82 (2 threads)
    3       1280x720      2x2       8.50 (4 threads)
    4       1920x1080     4x2      11.89 (8 threads)
    5       3840x2160     4x2      24.62 (8 threads)
    6       3840x2160     4x2      25.88 (8 threads)

Change-Id: I9ef91774d71d1a811c5ad0cc447ab5f72a135352
diff --git a/av1/encoder/ethread.h b/av1/encoder/ethread.h
index 21bfc01..ab8e1bb 100644
--- a/av1/encoder/ethread.h
+++ b/av1/encoder/ethread.h
@@ -70,6 +70,10 @@
 
 #endif  // !CONFIG_REALTIME_ONLY
 
+void av1_tf_do_filtering_mt(AV1_COMP *cpi);
+
+void av1_tf_mt_dealloc(AV1TemporalFilterSync *tf_sync);
+
 int av1_compute_num_enc_workers(AV1_COMP *cpi, int max_workers);
 
 void av1_create_workers(AV1_COMP *cpi, int num_workers);