sse2 version of vp8_regular_quantize_b

Added sse2 version of vp8_regular_quantize_b which improved encode
performance(for the clip used) by ~10% for 32 bit builds and ~3% for
64 bit builds.

Also updated SHADOW_ARGS_TO_STACK to allow for more than 9 arguments.

Change-Id: I62f78eabc8040b39f3ffdf21be175811e96b39af
diff --git a/vpx_ports/x86_abi_support.asm b/vpx_ports/x86_abi_support.asm
index 7840e35..a1622e6 100644
--- a/vpx_ports/x86_abi_support.asm
+++ b/vpx_ports/x86_abi_support.asm
@@ -199,16 +199,13 @@
         push r9
     %endif
     %if %1 > 6
-        mov rax,[rbp+16]
+      %assign i %1-6
+      %assign off 16
+      %rep i
+        mov rax,[rbp+off]
         push rax
-    %endif
-    %if %1 > 7
-        mov rax,[rbp+24]
-        push rax
-    %endif
-    %if %1 > 8
-        mov rax,[rbp+32]
-        push rax
+        %assign off off+8
+      %endrep
     %endif
   %endm
 %endif