Add aom_sum_sse_2d_i16_c and associated SIMD versions

A new function aom_sum_sse_2d_i16_c is introduced which computes
the sum of values and sum of squared values of residual block.
The corresponding AVX2 and SSE2 functions have been added.

Change-Id: Ia9e92ef5d828a47e1dee112f631a9850e7452f7b
diff --git a/aom_dsp/x86/sum_squares_sse2.h b/aom_dsp/x86/sum_squares_sse2.h
index 491e31c..5ed3f2c 100644
--- a/aom_dsp/x86/sum_squares_sse2.h
+++ b/aom_dsp/x86/sum_squares_sse2.h
@@ -19,4 +19,10 @@
                                          int height);
 uint64_t aom_sum_squares_2d_i16_4x4_sse2(const int16_t *src, int stride);
 
+uint64_t aom_sum_sse_2d_i16_4x4_sse2(const int16_t *src, int stride, int *sum);
+uint64_t aom_sum_sse_2d_i16_4xn_sse2(const int16_t *src, int stride, int height,
+                                     int *sum);
+uint64_t aom_sum_sse_2d_i16_nxn_sse2(const int16_t *src, int stride, int width,
+                                     int height, int *sum);
+
 #endif  // AOM_DSP_X86_SUM_SQUARES_SSE2_H_