support building vp8 and vp9 into a single lib

Change-Id: Ib8f8a66c9fd31e508cdc9caa662192f38433aa3d
diff --git a/build/make/Makefile b/build/make/Makefile
index 830360e..be9ee6d 100644
--- a/build/make/Makefile
+++ b/build/make/Makefile
@@ -105,14 +105,14 @@
 
 xform_obj_prefix:=.objs/$(TOOLCHAIN)/
 xform_obj_path=$(if $(1),$(addprefix $(xform_obj_prefix),$(subst /,_,$(1))),$(xform_obj_prefix))
-xform_obj_path_o_d=$(call xform_obj_path,$(1).o) $(call xform_obj_path,$(1).d)
+xform_obj_path_o_d=$(call xform_obj_path,$(1)).o $(call xform_obj_path,$(1)).d
 define obj_rules_template
 $(BUILD_PFX)$(call xform_obj_path,$(1))%.c.d: $(1)%.c
 	$(if $(quiet),@echo "    [DEP] $$@")
 	$(qexec)mkdir -p $$(dir $$@)
 	$(qexec)$(CC) $$(INTERNAL_CFLAGS) $$(CFLAGS) -M $$< | $(fmt_deps) > $$@
 
-$(BUILD_PFX)$(call xform_obj_path,$(1))%.c.o: $(1)%.c
+$(BUILD_PFX)$(call xform_obj_path,$(1))%.c.o: $(1)%.c $(BUILD_PFX)$(call xform_obj_path,$(1))%.c.d
 	$(if $(quiet),@echo "    [CC] $$@")
 	$(qexec)$(CC) $$(INTERNAL_CFLAGS) $$(CFLAGS) -c -o $$@ $$<
 
diff --git a/build/make/gen_msvs_proj.sh b/build/make/gen_msvs_proj.sh
index 7d47389..35fb4df 100755
--- a/build/make/gen_msvs_proj.sh
+++ b/build/make/gen_msvs_proj.sh
@@ -143,7 +143,9 @@
             if [ "${f##*.}" == "$pat" ]; then
                 unset file_list[i]
 
+                objf=$(echo ${f%.*}.obj | sed -e 's/^[\./]\+//g' -e 's,/,_,g')
                 open_tag File RelativePath="./$f"
+
                 if [ "$pat" == "asm" ] && $asm_use_custom_step; then
                     for plat in "${platforms[@]}"; do
                         for cfg in Debug Release; do
@@ -154,13 +156,26 @@
                                 Name="VCCustomBuildTool" \
                                 Description="Assembling \$(InputFileName)" \
                                 CommandLine="$(eval echo \$asm_${cfg}_cmdline)" \
-                                Outputs="\$(InputName).obj" \
+                                Outputs="\$(IntDir)$objf" \
 
                             close_tag FileConfiguration
                         done
                     done
                 fi
+                if [ "$pat" == "c" ] || [ "$pat" == "cc" ] ; then
+                    for plat in "${platforms[@]}"; do
+                        for cfg in Debug Release; do
+                            open_tag FileConfiguration \
+                                Name="${cfg}|${plat}" \
 
+                            tag Tool \
+                                Name="VCCLCompilerTool" \
+                                ObjectFile="\$(IntDir)$objf" \
+
+                            close_tag FileConfiguration
+                        done
+                    done
+                fi
                 close_tag File
 
                 break
diff --git a/build/x86-msvs/obj_int_extract.bat b/build/x86-msvs/obj_int_extract.bat
index 90bb181..1b77dfa 100644
--- a/build/x86-msvs/obj_int_extract.bat
+++ b/build/x86-msvs/obj_int_extract.bat
@@ -13,3 +13,11 @@
 obj_int_extract.exe rvds "asm_com_offsets.obj" > "vp9_asm_com_offsets.asm"
 obj_int_extract.exe rvds "asm_dec_offsets.obj" > "vp9_asm_dec_offsets.asm"
 obj_int_extract.exe rvds "asm_enc_offsets.obj" > "vp9_asm_enc_offsets.asm"
+
+cl /I "./" /I "%1" /nologo /c "%1/vp8/common/asm_com_offsets.c"
+cl /I "./" /I "%1" /nologo /c "%1/vp8/decoder/asm_dec_offsets.c"
+cl /I "./" /I "%1" /nologo /c "%1/vp8/encoder/asm_enc_offsets.c"
+obj_int_extract.exe rvds "asm_com_offsets.obj" > "vp8_asm_com_offsets.asm"
+obj_int_extract.exe rvds "asm_dec_offsets.obj" > "vp8_asm_dec_offsets.asm"
+obj_int_extract.exe rvds "asm_enc_offsets.obj" > "vp8_asm_enc_offsets.asm"
+
diff --git a/examples.mk b/examples.mk
index aea29eb..e799a2b 100644
--- a/examples.mk
+++ b/examples.mk
@@ -38,7 +38,7 @@
 vpxenc.SRCS                 += libmkv/EbmlWriter.h
 vpxenc.GUID                  = 548DEC74-7A15-4B2B-AFC3-AA102E7C25C1
 vpxenc.DESCRIPTION           = Full featured encoder
-UTILS-$(CONFIG_ENCODERS)    += vp8_scalable_patterns.c
+UTILS-$(CONFIG_VP8_ENCODER)    += vp8_scalable_patterns.c
 vp8_scalable_patterns.GUID   = 0D6A210B-F482-4D6F-8570-4A9C01ACC88C
 vp8_scalable_patterns.DESCRIPTION = Temporal Scalability Encoder
 
@@ -56,37 +56,37 @@
 #example_xma.GUID             = A955FC4A-73F1-44F7-135E-30D84D32F022
 #example_xma.DESCRIPTION      = External Memory Allocation mode usage
 
-GEN_EXAMPLES-$(CONFIG_DECODERS) += simple_decoder.c
+GEN_EXAMPLES-$(CONFIG_VP8_DECODER) += simple_decoder.c
 simple_decoder.GUID              = D3BBF1E9-2427-450D-BBFF-B2843C1D44CC
 simple_decoder.DESCRIPTION       = Simplified decoder loop
-GEN_EXAMPLES-$(CONFIG_DECODERS) += postproc.c
+GEN_EXAMPLES-$(CONFIG_VP8_DECODER) += postproc.c
 postproc.GUID                    = 65E33355-F35E-4088-884D-3FD4905881D7
 postproc.DESCRIPTION             = Decoder postprocessor control
-GEN_EXAMPLES-$(CONFIG_DECODERS) += decode_to_md5.c
+GEN_EXAMPLES-$(CONFIG_VP8_DECODER) += decode_to_md5.c
 decode_to_md5.SRCS              += md5_utils.h md5_utils.c
 decode_to_md5.GUID               = 59120B9B-2735-4BFE-B022-146CA340FE42
 decode_to_md5.DESCRIPTION        = Frame by frame MD5 checksum
 
-GEN_EXAMPLES-$(CONFIG_ENCODERS) += simple_encoder.c
+GEN_EXAMPLES-$(CONFIG_VP8_ENCODER) += simple_encoder.c
 simple_encoder.GUID              = 4607D299-8A71-4D2C-9B1D-071899B6FBFD
 simple_encoder.DESCRIPTION       = Simplified encoder loop
-GEN_EXAMPLES-$(CONFIG_ENCODERS) += twopass_encoder.c
+GEN_EXAMPLES-$(CONFIG_VP8_ENCODER) += twopass_encoder.c
 twopass_encoder.GUID             = 73494FA6-4AF9-4763-8FBB-265C92402FD8
 twopass_encoder.DESCRIPTION      = Two-pass encoder loop
-GEN_EXAMPLES-$(CONFIG_ENCODERS) += force_keyframe.c
+GEN_EXAMPLES-$(CONFIG_VP8_ENCODER) += force_keyframe.c
 force_keyframe.GUID              = 3C67CADF-029F-4C86-81F5-D6D4F51177F0
 force_keyframe.DESCRIPTION       = Force generation of keyframes
 ifeq ($(CONFIG_DECODERS),yes)
-GEN_EXAMPLES-$(CONFIG_ENCODERS) += decode_with_drops.c
+GEN_EXAMPLES-$(CONFIG_VP8_ENCODER) += decode_with_drops.c
 endif
 decode_with_drops.GUID           = CE5C53C4-8DDA-438A-86ED-0DDD3CDB8D26
 decode_with_drops.DESCRIPTION    = Drops frames while decoding
-ifeq ($(CONFIG_DECODERS),yes)
+ifeq ($(CONFIG_VP8_DECODER),yes)
 GEN_EXAMPLES-$(CONFIG_ERROR_CONCEALMENT) += decode_with_partial_drops.c
 endif
 decode_with_partial_drops.GUID           = 61C2D026-5754-46AC-916F-1343ECC5537E
 decode_with_partial_drops.DESCRIPTION    = Drops parts of frames while decoding
-GEN_EXAMPLES-$(CONFIG_ENCODERS) += error_resilient.c
+GEN_EXAMPLES-$(CONFIG_VP8_ENCODER) += error_resilient.c
 error_resilient.GUID             = DF5837B9-4145-4F92-A031-44E4F832E00C
 error_resilient.DESCRIPTION      = Error Resiliency Feature
 
diff --git a/libs.mk b/libs.mk
index 792e305..64a636d 100644
--- a/libs.mk
+++ b/libs.mk
@@ -44,6 +44,19 @@
 endif # rvct
 endif # !gcc
 
+#
+# Rule to generate runtime cpu detection files
+#
+define rtcd_h_template
+$$(BUILD_PFX)$(1).h: $$(SRC_PATH_BARE)/$(2)
+	@echo "    [CREATE] $$@"
+	$$(qexec)$$(SRC_PATH_BARE)/build/make/rtcd.sh --arch=$$(TGT_ISA) \
+          --sym=$(1) \
+          --config=$$(target)$$(if $$(FAT_ARCHS),,-$$(TOOLCHAIN)).mk \
+          $$(RTCD_OPTIONS) $$^ > $$@
+CLEAN-OBJS += $$(BUILD_PFX)$(1).h
+RTCD += $$(BUILD_PFX)$(1).h
+endef
 
 CODEC_SRCS-yes += CHANGELOG
 CODEC_SRCS-yes += libs.mk
@@ -190,7 +203,8 @@
 INSTALL-LIBS-$(CONFIG_DEBUG_LIBS) += $(LIBSUBDIR)/libvpx_g.a
 endif
 
-CODEC_SRCS=$(filter-out %_test.cc,$(call enabled,CODEC_SRCS))
+CODEC_SRCS=$(filter-out %_offsets.c,\
+           $(filter-out %_test.cc,$(call enabled,CODEC_SRCS)))
 INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(CODEC_SRCS)
 INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(call enabled,CODEC_EXPORTS)
 
@@ -245,7 +259,7 @@
 PROJECTS-$(BUILD_LIBVPX) += vpx.vcproj
 
 vpx.vcproj: vpx_config.asm
-vpx.vcproj: vpx_rtcd.h
+vpx.vcproj: $(RTCD)
 
 endif
 else
@@ -362,18 +376,6 @@
 $(shell $(SRC_PATH_BARE)/build/make/version.sh "$(SRC_PATH_BARE)" $(BUILD_PFX)vpx_version.h)
 CLEAN-OBJS += $(BUILD_PFX)vpx_version.h
 
-#
-# Rule to generate runtime cpu detection files
-#
-$(BUILD_PFX)vpx_rtcd.h: $(SRC_PATH_BARE)/$(sort $(filter %rtcd_defs.sh,$(CODEC_SRCS)))
-	@echo "    [CREATE] $@"
-	$(qexec)$(SRC_PATH_BARE)/build/make/rtcd.sh --arch=$(TGT_ISA) \
-          --sym=vpx_rtcd \
-          --config=$(target)$(if $(FAT_ARCHS),,-$(TOOLCHAIN)).mk \
-          $(RTCD_OPTIONS) $^ > $@
-CLEAN-OBJS += $(BUILD_PFX)vpx_rtcd.h
-
-
 CODEC_DOC_SRCS += vpx/vpx_codec.h \
                   vpx/vpx_decoder.h \
                   vpx/vpx_encoder.h \
@@ -501,8 +503,8 @@
 	@echo "INCLUDE_PATH += ." >> $@;
 	@echo "ENABLED_SECTIONS += $(sort $(CODEC_DOC_SECTIONS))" >> $@
 
-## Generate vpx_rtcd.h for all objects
-$(OBJS-yes:.o=.d): $(BUILD_PFX)vpx_rtcd.h
+## Generate rtcd.h for all objects
+$(OBJS-yes:.o=.d): $(RTCD)
 
 ## Update the global src list
 SRCS += $(CODEC_SRCS) $(LIBVPX_TEST_SRCS) $(GTEST_SRCS)
diff --git a/test/dct16x16_test.cc b/test/dct16x16_test.cc
index d572ee6..d857280 100644
--- a/test/dct16x16_test.cc
+++ b/test/dct16x16_test.cc
@@ -17,7 +17,7 @@
 extern "C" {
 #include "vp9/common/entropy.h"
 #include "vp9/common/idct.h"
-#include "vpx_rtcd.h"
+#include "vp9_rtcd.h"
 }
 
 #include "acm_random.h"
diff --git a/test/fdct4x4_test.cc b/test/fdct4x4_test.cc
index e2eb28e..3fe4774 100644
--- a/test/fdct4x4_test.cc
+++ b/test/fdct4x4_test.cc
@@ -16,7 +16,7 @@
 
 extern "C" {
 #include "vp9/common/idct.h"
-#include "vpx_rtcd.h"
+#include "vp9_rtcd.h"
 }
 
 #include "acm_random.h"
diff --git a/test/fdct8x8_test.cc b/test/fdct8x8_test.cc
index fc7084e..729344d 100644
--- a/test/fdct8x8_test.cc
+++ b/test/fdct8x8_test.cc
@@ -16,7 +16,7 @@
 
 extern "C" {
 #include "vp9/common/idct.h"
-#include "vpx_rtcd.h"
+#include "vp9_rtcd.h"
 }
 
 #include "acm_random.h"
diff --git a/test/idct8x8_test.cc b/test/idct8x8_test.cc
index 0155716..099efbf 100644
--- a/test/idct8x8_test.cc
+++ b/test/idct8x8_test.cc
@@ -16,7 +16,7 @@
 
 extern "C" {
 #include "vp9/common/idct.h"
-#include "vpx_rtcd.h"
+#include "vp9_rtcd.h"
 }
 
 #include "acm_random.h"
diff --git a/test/idctllm_test.cc b/test/idctllm_test.cc
index dd42e22..3071a2a 100644
--- a/test/idctllm_test.cc
+++ b/test/idctllm_test.cc
@@ -11,7 +11,7 @@
 
 extern "C" {
 #include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
 }
 #include "third_party/googletest/src/include/gtest/gtest.h"
 
diff --git a/test/intrapred_test.cc b/test/intrapred_test.cc
index d2e0d61..692b67b 100644
--- a/test/intrapred_test.cc
+++ b/test/intrapred_test.cc
@@ -14,7 +14,7 @@
 #include "third_party/googletest/src/include/gtest/gtest.h"
 extern "C" {
 #include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
 #include "vp8/common/blockd.h"
 #include "vpx_mem/vpx_mem.h"
 }
diff --git a/test/pp_filter_test.cc b/test/pp_filter_test.cc
index af2f3bd..36d2e63 100644
--- a/test/pp_filter_test.cc
+++ b/test/pp_filter_test.cc
@@ -10,7 +10,7 @@
 #include "third_party/googletest/src/include/gtest/gtest.h"
 extern "C" {
 #include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
 #include "vpx/vpx_integer.h"
 #include "vpx_mem/vpx_mem.h"
 }
diff --git a/test/sad_test.cc b/test/sad_test.cc
index 2b562e6..8cd5286 100644
--- a/test/sad_test.cc
+++ b/test/sad_test.cc
@@ -15,7 +15,7 @@
 
 extern "C" {
 #include "./vpx_config.h"
-#include "./vpx_rtcd.h"
+#include "./vp8_rtcd.h"
 #include "vp8/common/blockd.h"
 #include "vpx_mem/vpx_mem.h"
 }
diff --git a/test/sixtap_predict_test.cc b/test/sixtap_predict_test.cc
index 06f14a1..22d5a84 100644
--- a/test/sixtap_predict_test.cc
+++ b/test/sixtap_predict_test.cc
@@ -16,7 +16,7 @@
 #include "third_party/googletest/src/include/gtest/gtest.h"
 extern "C" {
 #include "./vpx_config.h"
-#include "./vpx_rtcd.h"
+#include "./vp8_rtcd.h"
 #include "vpx/vpx_integer.h"
 #include "vpx_mem/vpx_mem.h"
 }
diff --git a/test/subtract_test.cc b/test/subtract_test.cc
index 99363de..f1c50d3 100644
--- a/test/subtract_test.cc
+++ b/test/subtract_test.cc
@@ -12,7 +12,7 @@
 #include "test/acm_random.h"
 extern "C" {
 #include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
 #include "vp8/common/blockd.h"
 #include "vp8/encoder/block.h"
 #include "vpx_mem/vpx_mem.h"
diff --git a/test/variance_test.cc b/test/variance_test.cc
index f838c05..2012b18 100644
--- a/test/variance_test.cc
+++ b/test/variance_test.cc
@@ -16,7 +16,7 @@
 extern "C" {
 #include "vp9/encoder/variance.h"
 #include "vpx/vpx_integer.h"
-#include "vpx_rtcd.h"
+#include "vp9_rtcd.h"
 }
 
 namespace {
diff --git a/test/vp8_fdct4x4_test.cc b/test/vp8_fdct4x4_test.cc
index 619b23d..3c60011 100644
--- a/test/vp8_fdct4x4_test.cc
+++ b/test/vp8_fdct4x4_test.cc
@@ -18,7 +18,7 @@
 
 
 extern "C" {
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
 }
 
 #include "test/acm_random.h"
diff --git a/tools/all_builds.py b/tools/all_builds.py
index 78581d9..d1f0c80 100755
--- a/tools/all_builds.py
+++ b/tools/all_builds.py
@@ -5,7 +5,7 @@
 import sys
 
 LONG_OPTIONS = ["shard=", "shards="]
-BASE_COMMAND = "./configure --disable-vp8 --disable-unit-tests --enable-internal-stats --enable-experimental"
+BASE_COMMAND = "./configure --enable-internal-stats --enable-experimental"
 
 def RunCommand(command):
   run = subprocess.Popen(command, shell=True)
diff --git a/vp8/common/arm/armv6/idct_blk_v6.c b/vp8/common/arm/armv6/idct_blk_v6.c
index 6002c0f..c94f84a 100644
--- a/vp8/common/arm/armv6/idct_blk_v6.c
+++ b/vp8/common/arm/armv6/idct_blk_v6.c
@@ -9,7 +9,7 @@
  */
 
 #include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
 
 
 void vp8_dequant_idct_add_y_block_v6(short *q, short *dq,
diff --git a/vp8/common/arm/bilinearfilter_arm.c b/vp8/common/arm/bilinearfilter_arm.c
index c63073c..799c8bd 100644
--- a/vp8/common/arm/bilinearfilter_arm.c
+++ b/vp8/common/arm/bilinearfilter_arm.c
@@ -9,7 +9,7 @@
  */
 
 #include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
 #include <math.h>
 #include "vp8/common/filter.h"
 #include "bilinearfilter_arm.h"
diff --git a/vp8/common/arm/filter_arm.c b/vp8/common/arm/filter_arm.c
index 148951a..7fe3967 100644
--- a/vp8/common/arm/filter_arm.c
+++ b/vp8/common/arm/filter_arm.c
@@ -10,7 +10,7 @@
 
 
 #include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
 #include <math.h>
 #include "vp8/common/filter.h"
 #include "vpx_ports/mem.h"
diff --git a/vp8/common/arm/loopfilter_arm.c b/vp8/common/arm/loopfilter_arm.c
index b8f9bd9..3bdc967 100644
--- a/vp8/common/arm/loopfilter_arm.c
+++ b/vp8/common/arm/loopfilter_arm.c
@@ -10,7 +10,7 @@
 
 
 #include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
 #include "vp8/common/loopfilter.h"
 #include "vp8/common/onyxc_int.h"
 
diff --git a/vp8/common/arm/neon/idct_blk_neon.c b/vp8/common/arm/neon/idct_blk_neon.c
index ee7f223..fb327a7 100644
--- a/vp8/common/arm/neon/idct_blk_neon.c
+++ b/vp8/common/arm/neon/idct_blk_neon.c
@@ -9,7 +9,7 @@
  */
 
 #include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
 
 /* place these declarations here because we don't want to maintain them
  * outside of this scope
diff --git a/vp8/common/arm/reconintra_arm.c b/vp8/common/arm/reconintra_arm.c
index 121e090..2874896 100644
--- a/vp8/common/arm/reconintra_arm.c
+++ b/vp8/common/arm/reconintra_arm.c
@@ -10,7 +10,7 @@
 
 
 #include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
 #include "vp8/common/blockd.h"
 #include "vpx_mem/vpx_mem.h"
 
diff --git a/vp8/common/arm/variance_arm.c b/vp8/common/arm/variance_arm.c
index 891d767..467a509 100644
--- a/vp8/common/arm/variance_arm.c
+++ b/vp8/common/arm/variance_arm.c
@@ -9,7 +9,7 @@
  */
 
 #include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
 #include "vp8/common/variance.h"
 #include "vp8/common/filter.h"
 
diff --git a/vp8/common/dequantize.c b/vp8/common/dequantize.c
index 8eda486..6e2f69a 100644
--- a/vp8/common/dequantize.c
+++ b/vp8/common/dequantize.c
@@ -10,7 +10,7 @@
 
 
 #include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
 #include "vp8/common/blockd.h"
 #include "vpx_mem/vpx_mem.h"
 
diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c
index 5a6ac7b..2de019d 100644
--- a/vp8/common/generic/systemdependent.c
+++ b/vp8/common/generic/systemdependent.c
@@ -10,7 +10,7 @@
 
 
 #include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
 #if ARCH_ARM
 #include "vpx_ports/arm.h"
 #elif ARCH_X86 || ARCH_X86_64
diff --git a/vp8/common/idct_blk.c b/vp8/common/idct_blk.c
index 0b058c7..8edfffb 100644
--- a/vp8/common/idct_blk.c
+++ b/vp8/common/idct_blk.c
@@ -9,7 +9,7 @@
  */
 
 #include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
 
 void vp8_dequant_idct_add_c(short *input, short *dq,
                             unsigned char *dest, int stride);
diff --git a/vp8/common/invtrans.h b/vp8/common/invtrans.h
index d048665..9262640 100644
--- a/vp8/common/invtrans.h
+++ b/vp8/common/invtrans.h
@@ -13,7 +13,7 @@
 #define __INC_INVTRANS_H
 
 #include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
 #include "blockd.h"
 #include "onyxc_int.h"
 
diff --git a/vp8/common/loopfilter.c b/vp8/common/loopfilter.c
index 41b4f12..2b1ee85 100644
--- a/vp8/common/loopfilter.c
+++ b/vp8/common/loopfilter.c
@@ -10,7 +10,7 @@
 
 
 #include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
 #include "loopfilter.h"
 #include "onyxc_int.h"
 #include "vpx_mem/vpx_mem.h"
diff --git a/vp8/common/loopfilter.h b/vp8/common/loopfilter.h
index b3af2d6..1e47f34 100644
--- a/vp8/common/loopfilter.h
+++ b/vp8/common/loopfilter.h
@@ -14,7 +14,7 @@
 
 #include "vpx_ports/mem.h"
 #include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
 
 #define MAX_LOOP_FILTER             63
 /* fraction of total macroblock rows to be used in fast filter level picking */
diff --git a/vp8/common/mfqe.c b/vp8/common/mfqe.c
index 3dff150..8a8f92f 100644
--- a/vp8/common/mfqe.c
+++ b/vp8/common/mfqe.c
@@ -20,7 +20,7 @@
 #include "postproc.h"
 #include "variance.h"
 #include "vpx_mem/vpx_mem.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
 #include "vpx_scale/yv12config.h"
 
 #include <limits.h>
diff --git a/vp8/common/mips/dspr2/dequantize_dspr2.c b/vp8/common/mips/dspr2/dequantize_dspr2.c
index 6823325..619ee80 100644
--- a/vp8/common/mips/dspr2/dequantize_dspr2.c
+++ b/vp8/common/mips/dspr2/dequantize_dspr2.c
@@ -10,7 +10,7 @@
 
 
 #include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
 #include "vpx_mem/vpx_mem.h"
 
 #if HAVE_DSPR2
diff --git a/vp8/common/mips/dspr2/filter_dspr2.c b/vp8/common/mips/dspr2/filter_dspr2.c
index 71fdcd7..ace5d40 100644
--- a/vp8/common/mips/dspr2/filter_dspr2.c
+++ b/vp8/common/mips/dspr2/filter_dspr2.c
@@ -10,7 +10,7 @@
 
 
 #include <stdlib.h>
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
 #include "vpx_ports/mem.h"
 
 #if HAVE_DSPR2
diff --git a/vp8/common/mips/dspr2/idct_blk_dspr2.c b/vp8/common/mips/dspr2/idct_blk_dspr2.c
index 1e0ebd1..ab938cd 100644
--- a/vp8/common/mips/dspr2/idct_blk_dspr2.c
+++ b/vp8/common/mips/dspr2/idct_blk_dspr2.c
@@ -9,7 +9,7 @@
  */
 
 #include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
 
 #if HAVE_DSPR2
 
diff --git a/vp8/common/mips/dspr2/idctllm_dspr2.c b/vp8/common/mips/dspr2/idctllm_dspr2.c
index 25b7936..2eff710 100644
--- a/vp8/common/mips/dspr2/idctllm_dspr2.c
+++ b/vp8/common/mips/dspr2/idctllm_dspr2.c
@@ -8,7 +8,7 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
 
 #if HAVE_DSPR2
 #define CROP_WIDTH 256
diff --git a/vp8/common/mips/dspr2/loopfilter_filters_dspr2.c b/vp8/common/mips/dspr2/loopfilter_filters_dspr2.c
index b8e5e4d..9ae6bc8 100644
--- a/vp8/common/mips/dspr2/loopfilter_filters_dspr2.c
+++ b/vp8/common/mips/dspr2/loopfilter_filters_dspr2.c
@@ -10,7 +10,7 @@
 
 
 #include <stdlib.h>
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
 #include "vp8/common/onyxc_int.h"
 
 #if HAVE_DSPR2
diff --git a/vp8/common/mips/dspr2/reconinter_dspr2.c b/vp8/common/mips/dspr2/reconinter_dspr2.c
index a5239a3..a14b397 100644
--- a/vp8/common/mips/dspr2/reconinter_dspr2.c
+++ b/vp8/common/mips/dspr2/reconinter_dspr2.c
@@ -10,7 +10,7 @@
 
 
 #include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
 #include "vpx/vpx_integer.h"
 
 #if HAVE_DSPR2
diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h
index 5325bac..03c9718 100644
--- a/vp8/common/onyxc_int.h
+++ b/vp8/common/onyxc_int.h
@@ -13,7 +13,7 @@
 #define __INC_VP8C_INT_H
 
 #include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
 #include "vpx/internal/vpx_codec_internal.h"
 #include "loopfilter.h"
 #include "entropymv.h"
diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c
index 80fa530..c6442c9 100644
--- a/vp8/common/postproc.c
+++ b/vp8/common/postproc.c
@@ -10,7 +10,8 @@
 
 
 #include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
+#include "vpx_scale_rtcd.h"
 #include "vpx_scale/yv12config.h"
 #include "postproc.h"
 #include "common.h"
diff --git a/vp8/common/reconinter.c b/vp8/common/reconinter.c
index 3da3bc7..43f84d0 100644
--- a/vp8/common/reconinter.c
+++ b/vp8/common/reconinter.c
@@ -11,7 +11,7 @@
 
 #include <limits.h>
 #include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
 #include "vpx/vpx_integer.h"
 #include "blockd.h"
 #include "reconinter.h"
diff --git a/vp8/common/reconintra.c b/vp8/common/reconintra.c
index 4067a68..a851215 100644
--- a/vp8/common/reconintra.c
+++ b/vp8/common/reconintra.c
@@ -10,7 +10,7 @@
 
 
 #include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
 #include "vpx_mem/vpx_mem.h"
 #include "blockd.h"
 
diff --git a/vp8/common/reconintra4x4.c b/vp8/common/reconintra4x4.c
index 7bb8d0a..3d4f2c4 100644
--- a/vp8/common/reconintra4x4.c
+++ b/vp8/common/reconintra4x4.c
@@ -10,17 +10,17 @@
 
 
 #include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
 #include "blockd.h"
 
 void vp8_intra4x4_predict_c(unsigned char *Above,
                             unsigned char *yleft, int left_stride,
-                            B_PREDICTION_MODE b_mode,
+                            int           _b_mode,
                             unsigned char *dst, int dst_stride,
                             unsigned char top_left)
 {
     int i, r, c;
-
+    B_PREDICTION_MODE b_mode = (B_PREDICTION_MODE)_b_mode;
     unsigned char Left[4];
     Left[0] = yleft[0];
     Left[1] = yleft[left_stride];
diff --git a/vp8/common/rtcd.c b/vp8/common/rtcd.c
index 01dad46..0b371b0 100644
--- a/vp8/common/rtcd.c
+++ b/vp8/common/rtcd.c
@@ -9,97 +9,13 @@
  */
 #include "vpx_config.h"
 #define RTCD_C
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
+#include "vpx_ports/vpx_once.h"
 
-#if CONFIG_MULTITHREAD && defined(_WIN32)
-#include <windows.h>
-#include <stdlib.h>
-static void once(void (*func)(void))
+extern void vpx_scale_rtcd(void);
+
+void vp8_rtcd()
 {
-    static CRITICAL_SECTION *lock;
-    static LONG waiters;
-    static int done;
-    void *lock_ptr = &lock;
-
-    /* If the initialization is complete, return early. This isn't just an
-     * optimization, it prevents races on the destruction of the global
-     * lock.
-     */
-    if(done)
-        return;
-
-    InterlockedIncrement(&waiters);
-
-    /* Get a lock. We create one and try to make it the one-true-lock,
-     * throwing it away if we lost the race.
-     */
-
-    {
-        /* Scope to protect access to new_lock */
-        CRITICAL_SECTION *new_lock = malloc(sizeof(CRITICAL_SECTION));
-        InitializeCriticalSection(new_lock);
-        if (InterlockedCompareExchangePointer(lock_ptr, new_lock, NULL) != NULL)
-        {
-            DeleteCriticalSection(new_lock);
-            free(new_lock);
-        }
-    }
-
-    /* At this point, we have a lock that can be synchronized on. We don't
-     * care which thread actually performed the allocation.
-     */
-
-    EnterCriticalSection(lock);
-
-    if (!done)
-    {
-        func();
-        done = 1;
-    }
-
-    LeaveCriticalSection(lock);
-
-    /* Last one out should free resources. The destructed objects are
-     * protected by checking if(done) above.
-     */
-    if(!InterlockedDecrement(&waiters))
-    {
-        DeleteCriticalSection(lock);
-        free(lock);
-        lock = NULL;
-    }
-}
-
-
-#elif CONFIG_MULTITHREAD && HAVE_PTHREAD_H
-#include <pthread.h>
-static void once(void (*func)(void))
-{
-    static pthread_once_t lock = PTHREAD_ONCE_INIT;
-    pthread_once(&lock, func);
-}
-
-
-#else
-/* No-op version that performs no synchronization. vpx_rtcd() is idempotent,
- * so as long as your platform provides atomic loads/stores of pointers
- * no synchronization is strictly necessary.
- */
-
-static void once(void (*func)(void))
-{
-    static int done;
-
-    if(!done)
-    {
-        func();
-        done = 1;
-    }
-}
-#endif
-
-
-void vpx_rtcd()
-{
+    vpx_scale_rtcd();
     once(setup_rtcd_internal);
 }
diff --git a/vp8/common/rtcd_defs.sh b/vp8/common/rtcd_defs.sh
index 0f950f8..4eb96b7 100644
--- a/vp8/common/rtcd_defs.sh
+++ b/vp8/common/rtcd_defs.sh
@@ -1,6 +1,8 @@
-common_forward_decls() {
+vp8_common_forward_decls() {
 cat <<EOF
-#include "vp8/common/blockd.h"
+/*
+ * VP8
+ */
 
 struct blockd;
 struct macroblockd;
@@ -14,7 +16,7 @@
 struct yv12_buffer_config;
 EOF
 }
-forward_decls common_forward_decls
+forward_decls vp8_common_forward_decls
 
 #
 # Dequant
@@ -146,7 +148,7 @@
 prototype void vp8_build_intra_predictors_mbuv_s "struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row,  unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride"
 specialize vp8_build_intra_predictors_mbuv_s sse2 ssse3
 
-prototype void vp8_intra4x4_predict "unsigned char *Above, unsigned char *yleft, int left_stride, B_PREDICTION_MODE b_mode, unsigned char *dst, int dst_stride, unsigned char top_left"
+prototype void vp8_intra4x4_predict "unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left"
 specialize vp8_intra4x4_predict media
 vp8_intra4x4_predict_media=vp8_intra4x4_predict_armv6
 
@@ -530,39 +532,3 @@
 
 # End of encoder only functions
 fi
-
-# Scaler functions
-if [ "CONFIG_SPATIAL_RESAMPLING" != "yes" ]; then
-    prototype void vp8_horizontal_line_4_5_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"
-    prototype void vp8_vertical_band_4_5_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
-    prototype void vp8_last_vertical_band_4_5_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
-    prototype void vp8_horizontal_line_2_3_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"
-    prototype void vp8_vertical_band_2_3_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
-    prototype void vp8_last_vertical_band_2_3_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
-    prototype void vp8_horizontal_line_3_5_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"
-    prototype void vp8_vertical_band_3_5_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
-    prototype void vp8_last_vertical_band_3_5_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
-    prototype void vp8_horizontal_line_3_4_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"
-    prototype void vp8_vertical_band_3_4_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
-    prototype void vp8_last_vertical_band_3_4_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
-    prototype void vp8_horizontal_line_1_2_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"
-    prototype void vp8_vertical_band_1_2_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
-    prototype void vp8_last_vertical_band_1_2_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
-    prototype void vp8_horizontal_line_5_4_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"
-    prototype void vp8_vertical_band_5_4_scale "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
-    prototype void vp8_horizontal_line_5_3_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"
-    prototype void vp8_vertical_band_5_3_scale "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
-    prototype void vp8_horizontal_line_2_1_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"
-    prototype void vp8_vertical_band_2_1_scale "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
-    prototype void vp8_vertical_band_2_1_scale_i "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
-fi
-
-prototype void vp8_yv12_extend_frame_borders "struct yv12_buffer_config *ybf"
-specialize vp8_yv12_extend_frame_borders neon
-
-prototype void vp8_yv12_copy_frame "struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc"
-specialize vp8_yv12_copy_frame neon
-
-prototype void vp8_yv12_copy_y "struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc"
-specialize vp8_yv12_copy_y neon
-
diff --git a/vp8/common/x86/idct_blk_mmx.c b/vp8/common/x86/idct_blk_mmx.c
index 4adf3f5..49b2013 100644
--- a/vp8/common/x86/idct_blk_mmx.c
+++ b/vp8/common/x86/idct_blk_mmx.c
@@ -9,7 +9,7 @@
  */
 
 #include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
 #include "vp8/common/blockd.h"
 
 extern void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q);
diff --git a/vp8/common/x86/idct_blk_sse2.c b/vp8/common/x86/idct_blk_sse2.c
index 056e052..ae96ec8 100644
--- a/vp8/common/x86/idct_blk_sse2.c
+++ b/vp8/common/x86/idct_blk_sse2.c
@@ -9,7 +9,7 @@
  */
 
 #include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
 
 void vp8_idct_dequant_0_2x_sse2
             (short *q, short *dq ,
diff --git a/vp8/common/x86/recon_wrapper_sse2.c b/vp8/common/x86/recon_wrapper_sse2.c
index b482faa..65f4251 100644
--- a/vp8/common/x86/recon_wrapper_sse2.c
+++ b/vp8/common/x86/recon_wrapper_sse2.c
@@ -9,7 +9,7 @@
  */
 
 #include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
 #include "vpx_mem/vpx_mem.h"
 #include "vp8/common/blockd.h"
 
diff --git a/vp8/common/x86/vp8_asm_stubs.c b/vp8/common/x86/vp8_asm_stubs.c
index 3437a23..c0416b7 100644
--- a/vp8/common/x86/vp8_asm_stubs.c
+++ b/vp8/common/x86/vp8_asm_stubs.c
@@ -10,7 +10,7 @@
 
 
 #include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
 #include "vpx_ports/mem.h"
 #include "filter_x86.h"
 
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index a4a00f6..a22f372 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -10,7 +10,8 @@
 
 
 #include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
+#include "./vpx_scale_rtcd.h"
 #include "onyxd_int.h"
 #include "vp8/common/header.h"
 #include "vp8/common/reconintra4x4.h"
diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c
index 8d6871b..459e34e 100644
--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c
@@ -25,6 +25,7 @@
 #include <assert.h>
 
 #include "vp8/common/quant_common.h"
+#include "./vpx_scale_rtcd.h"
 #include "vpx_scale/vpxscale.h"
 #include "vp8/common/systemdependent.h"
 #include "vpx_ports/vpx_timer.h"
diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c
index e52a707..a714856 100644
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
@@ -10,7 +10,7 @@
 
 
 #include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
 #if !defined(WIN32) && CONFIG_OS_SUPPORT == 1
 # include <unistd.h>
 #endif
diff --git a/vp8/encoder/arm/dct_arm.c b/vp8/encoder/arm/dct_arm.c
index af0fb27..f71300d 100644
--- a/vp8/encoder/arm/dct_arm.c
+++ b/vp8/encoder/arm/dct_arm.c
@@ -9,7 +9,7 @@
  */
 
 #include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
 
 #if HAVE_MEDIA
 
diff --git a/vp8/encoder/arm/quantize_arm.c b/vp8/encoder/arm/quantize_arm.c
index 8999e34..80d9ad0 100644
--- a/vp8/encoder/arm/quantize_arm.c
+++ b/vp8/encoder/arm/quantize_arm.c
@@ -10,7 +10,7 @@
 
 
 #include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
 #include "vp8/encoder/block.h"
 #include <math.h>
 #include "vpx_mem/vpx_mem.h"
diff --git a/vp8/encoder/denoising.c b/vp8/encoder/denoising.c
index c0dd7c1..98526d6 100644
--- a/vp8/encoder/denoising.c
+++ b/vp8/encoder/denoising.c
@@ -13,7 +13,7 @@
 #include "vp8/common/reconinter.h"
 #include "vpx/vpx_integer.h"
 #include "vpx_mem/vpx_mem.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
 
 static const unsigned int NOISE_MOTION_THRESHOLD = 25 * 25;
 /* SSE_DIFF_THRESHOLD is selected as ~95% confidence assuming
diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c
index 340dd63..cfa4cb9 100644
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -10,7 +10,7 @@
 
 
 #include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
 #include "quantize.h"
 #include "vp8/common/reconintra4x4.h"
 #include "encodemb.h"
diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c
index 7d494f2..7ed2fe1 100644
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@@ -10,7 +10,7 @@
 
 
 #include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
 #include "encodemb.h"
 #include "vp8/common/reconinter.h"
 #include "quantize.h"
diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index b668c8f..e5306a4 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -12,6 +12,7 @@
 #include <limits.h>
 #include <stdio.h>
 
+#include "./vpx_scale_rtcd.h"
 #include "block.h"
 #include "onyx_int.h"
 #include "vp8/common/variance.h"
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 4235662..e7d2270 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -10,6 +10,7 @@
 
 
 #include "vpx_config.h"
+#include "./vpx_scale_rtcd.h"
 #include "vp8/common/onyxc_int.h"
 #include "vp8/common/blockd.h"
 #include "onyx_int.h"
diff --git a/vp8/encoder/picklpf.c b/vp8/encoder/picklpf.c
index 4121349..8114ec3 100644
--- a/vp8/encoder/picklpf.c
+++ b/vp8/encoder/picklpf.c
@@ -9,6 +9,7 @@
  */
 
 
+#include "./vpx_scale_rtcd.h"
 #include "vp8/common/onyxc_int.h"
 #include "onyx_int.h"
 #include "quantize.h"
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index 28d5c1e..946d22a 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -14,7 +14,7 @@
 #include <limits.h>
 #include <assert.h>
 #include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
 #include "vp8/common/pragmas.h"
 #include "tokenize.h"
 #include "treewriter.h"
diff --git a/vp8/encoder/x86/denoising_sse2.c b/vp8/encoder/x86/denoising_sse2.c
index c1ac6c1..87cccaa 100644
--- a/vp8/encoder/x86/denoising_sse2.c
+++ b/vp8/encoder/x86/denoising_sse2.c
@@ -12,7 +12,7 @@
 #include "vp8/common/reconinter.h"
 #include "vpx/vpx_integer.h"
 #include "vpx_mem/vpx_mem.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
 
 #include <emmintrin.h>
 
diff --git a/vp8/encoder/x86/vp8_enc_stubs_mmx.c b/vp8/encoder/x86/vp8_enc_stubs_mmx.c
index da25f52..cf3d8ca 100644
--- a/vp8/encoder/x86/vp8_enc_stubs_mmx.c
+++ b/vp8/encoder/x86/vp8_enc_stubs_mmx.c
@@ -10,7 +10,7 @@
 
 
 #include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
 #include "vpx_ports/x86.h"
 #include "vp8/encoder/block.h"
 
diff --git a/vp8/encoder/x86/vp8_enc_stubs_sse2.c b/vp8/encoder/x86/vp8_enc_stubs_sse2.c
index 68db815..3dfbee3 100644
--- a/vp8/encoder/x86/vp8_enc_stubs_sse2.c
+++ b/vp8/encoder/x86/vp8_enc_stubs_sse2.c
@@ -10,7 +10,7 @@
 
 
 #include "vpx_config.h"
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
 #include "vpx_ports/x86.h"
 #include "vp8/encoder/block.h"
 
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk
index d54c233..2f73420 100644
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -194,3 +194,5 @@
 
 $(eval $(call asm_offsets_template,\
          vp8_asm_com_offsets.asm, $(VP8_PREFIX)common/asm_com_offsets.c))
+
+$(eval $(call rtcd_h_template,vp8_rtcd,vp8/common/rtcd_defs.sh))
diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index eeac3a8..83eecba 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -9,7 +9,7 @@
  */
 
 
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
 #include "vpx/vpx_codec.h"
 #include "vpx/internal/vpx_codec_internal.h"
 #include "vpx_version.h"
@@ -582,7 +582,7 @@
 
     struct VP8_COMP *optr;
 
-    vpx_rtcd();
+    vp8_rtcd();
 
     if (!ctx->priv)
     {
diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c
index c13d697..01482fc 100644
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c
@@ -11,7 +11,7 @@
 
 #include <stdlib.h>
 #include <string.h>
-#include "vpx_rtcd.h"
+#include "vp8_rtcd.h"
 #include "vpx/vpx_decoder.h"
 #include "vpx/vp8dx.h"
 #include "vpx/internal/vpx_codec_internal.h"
@@ -194,7 +194,7 @@
     vpx_codec_err_t        res = VPX_CODEC_OK;
     (void) data;
 
-    vpx_rtcd();
+    vp8_rtcd();
 
     /* This function only allocates space for the vpx_codec_alg_priv_t
      * structure. More memory may be required at the time the stream
diff --git a/vp9/common/filter.c b/vp9/common/filter.c
index 4667b46..429f408 100644
--- a/vp9/common/filter.c
+++ b/vp9/common/filter.c
@@ -12,7 +12,7 @@
 #include <stdlib.h>
 #include "filter.h"
 #include "vpx_ports/mem.h"
-#include "vpx_rtcd.h"
+#include "vp9_rtcd.h"
 
 DECLARE_ALIGNED(16, const short, vp9_bilinear_filters[SUBPEL_SHIFTS][2]) = {
   { 128,   0 },
diff --git a/vp9/common/generic/systemdependent.c b/vp9/common/generic/systemdependent.c
index a3d6cb4..749e3d3 100644
--- a/vp9/common/generic/systemdependent.c
+++ b/vp9/common/generic/systemdependent.c
@@ -10,7 +10,7 @@
 
 
 #include "vpx_ports/config.h"
-#include "vpx_rtcd.h"
+#include "vp9_rtcd.h"
 #include "vp9/common/subpixel.h"
 #include "vp9/common/loopfilter.h"
 #include "vp9/common/idct.h"
@@ -84,5 +84,5 @@
   vp9_arch_arm_common_init(ctx);
 #endif
 
-  vpx_rtcd();
+  vp9_rtcd();
 }
diff --git a/vp9/common/onyxc_int.h b/vp9/common/onyxc_int.h
index 516bea3..62d7978 100644
--- a/vp9/common/onyxc_int.h
+++ b/vp9/common/onyxc_int.h
@@ -14,7 +14,7 @@
 
 #include "vpx_config.h"
 #include "vpx/internal/vpx_codec_internal.h"
-#include "vpx_rtcd.h"
+#include "vp9_rtcd.h"
 #include "loopfilter.h"
 #include "entropymv.h"
 #include "entropy.h"
diff --git a/vp9/common/recon.c b/vp9/common/recon.c
index 4aee6c1..7703566 100644
--- a/vp9/common/recon.c
+++ b/vp9/common/recon.c
@@ -10,7 +10,7 @@
 
 
 #include "vpx_ports/config.h"
-#include "vpx_rtcd.h"
+#include "vp9_rtcd.h"
 #include "blockd.h"
 
 void vp9_recon_b_c
diff --git a/vp9/common/reconintra.c b/vp9/common/reconintra.c
index 60de8eb..98de2b4 100644
--- a/vp9/common/reconintra.c
+++ b/vp9/common/reconintra.c
@@ -10,7 +10,7 @@
 
 #include <stdio.h>
 #include "vpx_ports/config.h"
-#include "vpx_rtcd.h"
+#include "vp9_rtcd.h"
 #include "reconintra.h"
 #include "vpx_mem/vpx_mem.h"
 
diff --git a/vp9/common/reconintra4x4.c b/vp9/common/reconintra4x4.c
index 7af613d..1cf960c 100644
--- a/vp9/common/reconintra4x4.c
+++ b/vp9/common/reconintra4x4.c
@@ -12,7 +12,7 @@
 #include "vpx_ports/config.h"
 #include "vpx_mem/vpx_mem.h"
 #include "reconintra.h"
-#include "vpx_rtcd.h"
+#include "vp9_rtcd.h"
 
 #if CONFIG_NEWBINTRAMODES
 static int find_grad_measure(unsigned char *x, int stride, int n, int t,
diff --git a/vp9/common/rtcd.c b/vp9/common/rtcd.c
index 01dad46..277d5b2 100644
--- a/vp9/common/rtcd.c
+++ b/vp9/common/rtcd.c
@@ -9,97 +9,13 @@
  */
 #include "vpx_config.h"
 #define RTCD_C
-#include "vpx_rtcd.h"
+#include "vp9_rtcd.h"
+#include "vpx_ports/vpx_once.h"
 
-#if CONFIG_MULTITHREAD && defined(_WIN32)
-#include <windows.h>
-#include <stdlib.h>
-static void once(void (*func)(void))
+extern void vpx_scale_rtcd(void);
+
+void vp9_rtcd()
 {
-    static CRITICAL_SECTION *lock;
-    static LONG waiters;
-    static int done;
-    void *lock_ptr = &lock;
-
-    /* If the initialization is complete, return early. This isn't just an
-     * optimization, it prevents races on the destruction of the global
-     * lock.
-     */
-    if(done)
-        return;
-
-    InterlockedIncrement(&waiters);
-
-    /* Get a lock. We create one and try to make it the one-true-lock,
-     * throwing it away if we lost the race.
-     */
-
-    {
-        /* Scope to protect access to new_lock */
-        CRITICAL_SECTION *new_lock = malloc(sizeof(CRITICAL_SECTION));
-        InitializeCriticalSection(new_lock);
-        if (InterlockedCompareExchangePointer(lock_ptr, new_lock, NULL) != NULL)
-        {
-            DeleteCriticalSection(new_lock);
-            free(new_lock);
-        }
-    }
-
-    /* At this point, we have a lock that can be synchronized on. We don't
-     * care which thread actually performed the allocation.
-     */
-
-    EnterCriticalSection(lock);
-
-    if (!done)
-    {
-        func();
-        done = 1;
-    }
-
-    LeaveCriticalSection(lock);
-
-    /* Last one out should free resources. The destructed objects are
-     * protected by checking if(done) above.
-     */
-    if(!InterlockedDecrement(&waiters))
-    {
-        DeleteCriticalSection(lock);
-        free(lock);
-        lock = NULL;
-    }
-}
-
-
-#elif CONFIG_MULTITHREAD && HAVE_PTHREAD_H
-#include <pthread.h>
-static void once(void (*func)(void))
-{
-    static pthread_once_t lock = PTHREAD_ONCE_INIT;
-    pthread_once(&lock, func);
-}
-
-
-#else
-/* No-op version that performs no synchronization. vpx_rtcd() is idempotent,
- * so as long as your platform provides atomic loads/stores of pointers
- * no synchronization is strictly necessary.
- */
-
-static void once(void (*func)(void))
-{
-    static int done;
-
-    if(!done)
-    {
-        func();
-        done = 1;
-    }
-}
-#endif
-
-
-void vpx_rtcd()
-{
+    vpx_scale_rtcd();
     once(setup_rtcd_internal);
 }
diff --git a/vp9/common/rtcd_defs.sh b/vp9/common/rtcd_defs.sh
index 14e9d7a..2c94c9e 100644
--- a/vp9/common/rtcd_defs.sh
+++ b/vp9/common/rtcd_defs.sh
@@ -1,5 +1,8 @@
-common_forward_decls() {
+vp9_common_forward_decls() {
 cat <<EOF
+/*
+ * VP9
+ */
 
 struct loop_filter_info;
 struct blockd;
@@ -12,14 +15,11 @@
 struct variance_vtable;
 
 #define DEC_MVCOSTS int *mvjcost, int *mvcost[2]
-
-/* Encoder forward decls */
-struct variance_vtable;
 union int_mv;
 struct yv12_buffer_config;
 EOF
 }
-forward_decls common_forward_decls
+forward_decls vp9_common_forward_decls
 
 prototype void vp9_filter_block2d_4x4_8 "const unsigned char *src_ptr, const unsigned int src_stride, const short *HFilter_aligned16, const short *VFilter_aligned16, unsigned char *dst_ptr, unsigned int dst_stride"
 prototype void vp9_filter_block2d_8x4_8 "const unsigned char *src_ptr, const unsigned int src_stride, const short *HFilter_aligned16, const short *VFilter_aligned16, unsigned char *dst_ptr, unsigned int dst_stride"
@@ -94,9 +94,6 @@
 prototype void vp9_copy_mem8x4 "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"
 specialize vp9_copy_mem8x4 mmx
 
-prototype void vp9_intra4x4_predict "unsigned char *Above, unsigned char *yleft, int left_stride, B_PREDICTION_MODE b_mode, unsigned char *dst, int dst_stride, unsigned char top_left"
-specialize vp9_intra4x4_predict
-
 prototype void vp9_avg_mem16x16 "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"
 specialize vp9_avg_mem16x16
 
diff --git a/vp9/common/x86/filter_sse2.c b/vp9/common/x86/filter_sse2.c
index 400b3b4..92d0a2e 100644
--- a/vp9/common/x86/filter_sse2.c
+++ b/vp9/common/x86/filter_sse2.c
@@ -12,7 +12,7 @@
 #include <emmintrin.h> // SSE2
 #include "vp9/common/filter.h"
 #include "vpx_ports/mem.h" // for DECLARE_ALIGNED
-#include "vpx_rtcd.h"
+#include "vp9_rtcd.h"
 
 // TODO(cd): After cleanup, commit faster versions for non 4x4 size. This is
 //           just a quick partial snapshot so that other can already use some
diff --git a/vp9/common/x86/filter_sse4.c b/vp9/common/x86/filter_sse4.c
index ac2c92c..9efe9f4 100644
--- a/vp9/common/x86/filter_sse4.c
+++ b/vp9/common/x86/filter_sse4.c
@@ -12,7 +12,7 @@
 #include <smmintrin.h> // SSE4.1
 #include "vp9/common/filter.h"
 #include "vpx_ports/mem.h" // for DECLARE_ALIGNED
-#include "vpx_rtcd.h"
+#include "vp9_rtcd.h"
 
 // TODO(cd): After cleanup, commit faster versions for non 4x4 size. This is
 //           just a quick partial snapshot so that other can already use some
diff --git a/vp9/common/x86/sadmxn_x86.c b/vp9/common/x86/sadmxn_x86.c
index b12680d..1ba2d48 100644
--- a/vp9/common/x86/sadmxn_x86.c
+++ b/vp9/common/x86/sadmxn_x86.c
@@ -10,7 +10,7 @@
 
 #include <emmintrin.h>  // SSE2
 #include "./vpx_config.h"
-#include "./vpx_rtcd.h"
+#include "./vp9_rtcd.h"
 
 
 #if HAVE_SSE2
diff --git a/vp9/decoder/decodframe.c b/vp9/decoder/decodframe.c
index 5b56b15..03a5d3e 100644
--- a/vp9/decoder/decodframe.c
+++ b/vp9/decoder/decodframe.c
@@ -32,7 +32,7 @@
 
 #include "vp9/common/seg_common.h"
 #include "vp9/common/entropy.h"
-#include "vpx_rtcd.h"
+#include "vp9_rtcd.h"
 
 #include <assert.h>
 #include <stdio.h>
diff --git a/vp9/decoder/dequantize.c b/vp9/decoder/dequantize.c
index e564262..541293e 100644
--- a/vp9/decoder/dequantize.c
+++ b/vp9/decoder/dequantize.c
@@ -9,7 +9,7 @@
  */
 
 
-#include "vpx_rtcd.h"
+#include "vp9_rtcd.h"
 #include "dequantize.h"
 #include "vp9/common/idct.h"
 #include "vpx_mem/vpx_mem.h"
diff --git a/vp9/decoder/idct_blk.c b/vp9/decoder/idct_blk.c
index cbf96e0..0b440b4 100644
--- a/vp9/decoder/idct_blk.c
+++ b/vp9/decoder/idct_blk.c
@@ -8,7 +8,7 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#include "vpx_rtcd.h"
+#include "vp9_rtcd.h"
 #include "vp9/common/idct.h"
 
 void vp9_dequant_dc_idct_add_y_block_c(short *q, short *dq,
diff --git a/vp9/encoder/arm/dct_arm.c b/vp9/encoder/arm/dct_arm.c
index 3fd04f3..5e20a47 100644
--- a/vp9/encoder/arm/dct_arm.c
+++ b/vp9/encoder/arm/dct_arm.c
@@ -9,7 +9,7 @@
  */
 
 #include "vpx_config.h"
-#include "./vpx_rtcd.h"
+#include "./vp9_rtcd.h"
 
 #if HAVE_ARMV6
 
diff --git a/vp9/encoder/encodeframe.c b/vp9/encoder/encodeframe.c
index 4a739d4..b00ea3e 100644
--- a/vp9/encoder/encodeframe.c
+++ b/vp9/encoder/encodeframe.c
@@ -29,7 +29,7 @@
 #include "vp9/common/reconintra.h"
 #include "vp9/common/seg_common.h"
 #include "vp9/encoder/tokenize.h"
-#include "vpx_rtcd.h"
+#include "vp9_rtcd.h"
 #include <stdio.h>
 #include <math.h>
 #include <limits.h>
diff --git a/vp9/encoder/encodeintra.c b/vp9/encoder/encodeintra.c
index 365784c..a6bc1c3 100644
--- a/vp9/encoder/encodeintra.c
+++ b/vp9/encoder/encodeintra.c
@@ -9,7 +9,7 @@
  */
 
 #include "vpx_ports/config.h"
-#include "vpx_rtcd.h"
+#include "vp9_rtcd.h"
 #include "vp9/common/idct.h"
 #include "quantize.h"
 #include "vp9/common/reconintra.h"
diff --git a/vp9/encoder/encodemb.c b/vp9/encoder/encodemb.c
index 71e81ed..6774a88 100644
--- a/vp9/encoder/encodemb.c
+++ b/vp9/encoder/encodemb.c
@@ -18,7 +18,7 @@
 #include "vpx_mem/vpx_mem.h"
 #include "rdopt.h"
 #include "vp9/common/systemdependent.h"
-#include "vpx_rtcd.h"
+#include "vp9_rtcd.h"
 
 #if CONFIG_RUNTIME_CPU_DETECT
 #define IF_RTCD(x) (x)
diff --git a/vp9/encoder/rdopt.c b/vp9/encoder/rdopt.c
index c99076e..59cff85 100644
--- a/vp9/encoder/rdopt.c
+++ b/vp9/encoder/rdopt.c
@@ -40,7 +40,7 @@
 #include "vp9/common/seg_common.h"
 #include "vp9/common/pred_common.h"
 #include "vp9/common/entropy.h"
-#include "vpx_rtcd.h"
+#include "vp9_rtcd.h"
 #include "vp9/common/mvref_common.h"
 
 #if CONFIG_RUNTIME_CPU_DETECT
diff --git a/vp9/encoder/satd_c.c b/vp9/encoder/satd_c.c
index 102aa73..63944f0 100644
--- a/vp9/encoder/satd_c.c
+++ b/vp9/encoder/satd_c.c
@@ -10,7 +10,7 @@
 
 #include <stdlib.h>
 #include "vpx_ports/mem.h"
-#include "./vpx_rtcd.h"
+#include "./vp9_rtcd.h"
 unsigned int vp9_satd16x16_c(const unsigned char *src_ptr,
                              int  src_stride,
                              const unsigned char *ref_ptr,
diff --git a/vp9/exports_dec b/vp9/exports_dec
index 100ac5c..0a61fde 100644
--- a/vp9/exports_dec
+++ b/vp9/exports_dec
@@ -1,2 +1,2 @@
-data vpx_codec_vp8_dx_algo
-text vpx_codec_vp8_dx
+data vpx_codec_vp9_dx_algo
+text vpx_codec_vp9_dx
diff --git a/vp9/exports_enc b/vp9/exports_enc
index 5ae2098..25156e8 100644
--- a/vp9/exports_enc
+++ b/vp9/exports_enc
@@ -1,4 +1,4 @@
-data vpx_codec_vp8_cx_algo
-text vpx_codec_vp8_cx
-data vpx_codec_vp8x_cx_algo
-text vpx_codec_vp8x_cx
+data vpx_codec_vp9_cx_algo
+text vpx_codec_vp9_cx
+data vpx_codec_vp9x_cx_algo
+text vpx_codec_vp9x_cx
diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk
index f24313e..d6c9b2b 100644
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk
@@ -182,3 +182,5 @@
 
 $(eval $(call asm_offsets_template,\
          vp9_asm_com_offsets.asm, $(VP9_PREFIX)common/asm_com_offsets.c))
+
+$(eval $(call rtcd_h_template,vp9_rtcd,vp9/common/rtcd_defs.sh))
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index a10d7cd..f155319 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -1083,8 +1083,8 @@
 #ifndef VERSION_STRING
 #define VERSION_STRING
 #endif
-CODEC_INTERFACE(vpx_codec_vp8_cx) = {
-  "WebM Project VP8 Encoder" VERSION_STRING,
+CODEC_INTERFACE(vpx_codec_vp9_cx) = {
+  "WebM Project VP9 Encoder" VERSION_STRING,
   VPX_CODEC_INTERNAL_ABI_VERSION,
   VPX_CODEC_CAP_ENCODER | VPX_CODEC_CAP_PSNR |
   VPX_CODEC_CAP_OUTPUT_PARTITION,
@@ -1113,7 +1113,7 @@
 
 #if CONFIG_EXPERIMENTAL
 
-CODEC_INTERFACE(vpx_codec_vp8x_cx) = {
+CODEC_INTERFACE(vpx_codec_vp9x_cx) = {
   "VP8 Experimental Encoder" VERSION_STRING,
   VPX_CODEC_INTERNAL_ABI_VERSION,
   VPX_CODEC_CAP_ENCODER | VPX_CODEC_CAP_PSNR,
diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c
index 71d140c..c85b423 100644
--- a/vp9/vp9_dx_iface.c
+++ b/vp9/vp9_dx_iface.c
@@ -659,8 +659,8 @@
 #ifndef VERSION_STRING
 #define VERSION_STRING
 #endif
-CODEC_INTERFACE(vpx_codec_vp8_dx) = {
-  "WebM Project VP8 Decoder" VERSION_STRING,
+CODEC_INTERFACE(vpx_codec_vp9_dx) = {
+  "WebM Project VP9 Decoder" VERSION_STRING,
   VPX_CODEC_INTERNAL_ABI_VERSION,
   VPX_CODEC_CAP_DECODER | VP8_CAP_POSTPROC,
   /* vpx_codec_caps_t          caps; */
diff --git a/vpx/vp8cx.h b/vpx/vp8cx.h
index 50fc7f0..90b7169 100644
--- a/vpx/vp8cx.h
+++ b/vpx/vp8cx.h
@@ -34,8 +34,10 @@
 extern vpx_codec_iface_t *vpx_codec_vp8_cx(void);
 
 /* TODO(jkoleszar): These move to VP9 in a later patch set. */
-extern vpx_codec_iface_t  vpx_codec_vp8x_cx_algo;
-extern vpx_codec_iface_t *vpx_codec_vp8x_cx(void);
+extern vpx_codec_iface_t  vpx_codec_vp9_cx_algo;
+extern vpx_codec_iface_t *vpx_codec_vp9_cx(void);
+extern vpx_codec_iface_t  vpx_codec_vp9x_cx_algo;
+extern vpx_codec_iface_t *vpx_codec_vp9x_cx(void);
 
 /*!@} - end algorithm interface member group*/
 
diff --git a/vpx/vp8dx.h b/vpx/vp8dx.h
index a2c3a39..e2ec8b2 100644
--- a/vpx/vp8dx.h
+++ b/vpx/vp8dx.h
@@ -32,6 +32,10 @@
  */
 extern vpx_codec_iface_t  vpx_codec_vp8_dx_algo;
 extern vpx_codec_iface_t *vpx_codec_vp8_dx(void);
+
+/* TODO(jkoleszar): These move to VP9 in a later patch set. */
+extern vpx_codec_iface_t  vpx_codec_vp9_dx_algo;
+extern vpx_codec_iface_t *vpx_codec_vp9_dx(void);
 /*!@} - end algorithm interface member group*/
 
 /* Include controls common to both the encoder and decoder */
diff --git a/vpx_ports/vpx_once.h b/vpx_ports/vpx_once.h
new file mode 100644
index 0000000..16a735c
--- /dev/null
+++ b/vpx_ports/vpx_once.h
@@ -0,0 +1,97 @@
+/*
+ *  Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include "vpx_config.h"
+
+#if CONFIG_MULTITHREAD && defined(_WIN32)
+#include <windows.h>
+#include <stdlib.h>
+static void once(void (*func)(void))
+{
+    static CRITICAL_SECTION *lock;
+    static LONG waiters;
+    static int done;
+    void *lock_ptr = &lock;
+
+    /* If the initialization is complete, return early. This isn't just an
+     * optimization, it prevents races on the destruction of the global
+     * lock.
+     */
+    if(done)
+        return;
+
+    InterlockedIncrement(&waiters);
+
+    /* Get a lock. We create one and try to make it the one-true-lock,
+     * throwing it away if we lost the race.
+     */
+
+    {
+        /* Scope to protect access to new_lock */
+        CRITICAL_SECTION *new_lock = malloc(sizeof(CRITICAL_SECTION));
+        InitializeCriticalSection(new_lock);
+        if (InterlockedCompareExchangePointer(lock_ptr, new_lock, NULL) != NULL)
+        {
+            DeleteCriticalSection(new_lock);
+            free(new_lock);
+        }
+    }
+
+    /* At this point, we have a lock that can be synchronized on. We don't
+     * care which thread actually performed the allocation.
+     */
+
+    EnterCriticalSection(lock);
+
+    if (!done)
+    {
+        func();
+        done = 1;
+    }
+
+    LeaveCriticalSection(lock);
+
+    /* Last one out should free resources. The destructed objects are
+     * protected by checking if(done) above.
+     */
+    if(!InterlockedDecrement(&waiters))
+    {
+        DeleteCriticalSection(lock);
+        free(lock);
+        lock = NULL;
+    }
+}
+
+
+#elif CONFIG_MULTITHREAD && HAVE_PTHREAD_H
+#include <pthread.h>
+static void once(void (*func)(void))
+{
+    static pthread_once_t lock = PTHREAD_ONCE_INIT;
+    pthread_once(&lock, func);
+}
+
+
+#else
+/* No-op version that performs no synchronization. vp8_rtcd() is idempotent,
+ * so as long as your platform provides atomic loads/stores of pointers
+ * no synchronization is strictly necessary.
+ */
+
+static void once(void (*func)(void))
+{
+    static int done;
+
+    if(!done)
+    {
+        func();
+        done = 1;
+    }
+}
+#endif
diff --git a/vpx_scale/generic/vpxscale.c b/vpx_scale/generic/vpxscale.c
index 7de85ca..584ad4b 100644
--- a/vpx_scale/generic/vpxscale.c
+++ b/vpx_scale/generic/vpxscale.c
@@ -20,7 +20,7 @@
 /****************************************************************************
 *  Header Files
 ****************************************************************************/
-#include "./vpx_rtcd.h"
+#include "./vpx_scale_rtcd.h"
 #include "vpx_mem/vpx_mem.h"
 #include "vpx_scale/yv12config.h"
 #include "vpx_scale/scale_mode.h"
diff --git a/vpx_scale/rtcd.c b/vpx_scale/rtcd.c
new file mode 100644
index 0000000..656a22f
--- /dev/null
+++ b/vpx_scale/rtcd.c
@@ -0,0 +1,18 @@
+/*
+ *  Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include "vpx_config.h"
+#define RTCD_C
+#include "vpx_scale_rtcd.h"
+#include "vpx_ports/vpx_once.h"
+
+void vpx_scale_rtcd()
+{
+    once(setup_rtcd_internal);
+}
diff --git a/vpx_scale/vpx_scale.mk b/vpx_scale/vpx_scale.mk
index 378b917..6c93f9f 100644
--- a/vpx_scale/vpx_scale.mk
+++ b/vpx_scale/vpx_scale.mk
@@ -6,6 +6,7 @@
 SCALE_SRCS-yes += generic/yv12config.c
 SCALE_SRCS-yes += generic/yv12extend.c
 SCALE_SRCS-$(CONFIG_SPATIAL_RESAMPLING) += generic/gen_scalers.c
+SCALE_SRCS-yes += rtcd.c
 
 #neon
 SCALE_SRCS-$(HAVE_NEON)  += arm/neon/vp8_vpxyv12_copyframe_func_neon$(ASM)
@@ -15,3 +16,5 @@
 SCALE_SRCS-$(HAVE_NEON)  += arm/neon/yv12extend_arm.c
 
 SCALE_SRCS-no += $(SCALE_SRCS_REMOVE-yes)
+
+$(eval $(call rtcd_h_template,vpx_scale_rtcd,vpx_scale/vpx_scale_rtcd.sh))
diff --git a/vpx_scale/vpx_scale_rtcd.sh b/vpx_scale/vpx_scale_rtcd.sh
new file mode 100644
index 0000000..f709aa6
--- /dev/null
+++ b/vpx_scale/vpx_scale_rtcd.sh
@@ -0,0 +1,41 @@
+vpx_scale_forward_decls() {
+cat <<EOF
+struct yv12_buffer_config;
+EOF
+}
+forward_decls vpx_scale_forward_decls
+
+# Scaler functions
+if [ "CONFIG_SPATIAL_RESAMPLING" != "yes" ]; then
+    prototype void vp8_horizontal_line_4_5_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"
+    prototype void vp8_vertical_band_4_5_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
+    prototype void vp8_last_vertical_band_4_5_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
+    prototype void vp8_horizontal_line_2_3_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"
+    prototype void vp8_vertical_band_2_3_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
+    prototype void vp8_last_vertical_band_2_3_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
+    prototype void vp8_horizontal_line_3_5_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"
+    prototype void vp8_vertical_band_3_5_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
+    prototype void vp8_last_vertical_band_3_5_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
+    prototype void vp8_horizontal_line_3_4_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"
+    prototype void vp8_vertical_band_3_4_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
+    prototype void vp8_last_vertical_band_3_4_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
+    prototype void vp8_horizontal_line_1_2_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"
+    prototype void vp8_vertical_band_1_2_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
+    prototype void vp8_last_vertical_band_1_2_scale "unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
+    prototype void vp8_horizontal_line_5_4_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"
+    prototype void vp8_vertical_band_5_4_scale "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
+    prototype void vp8_horizontal_line_5_3_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"
+    prototype void vp8_vertical_band_5_3_scale "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
+    prototype void vp8_horizontal_line_2_1_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"
+    prototype void vp8_vertical_band_2_1_scale "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
+    prototype void vp8_vertical_band_2_1_scale_i "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
+fi
+
+prototype void vp8_yv12_extend_frame_borders "struct yv12_buffer_config *ybf"
+specialize vp8_yv12_extend_frame_borders neon
+
+prototype void vp8_yv12_copy_frame "struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc"
+specialize vp8_yv12_copy_frame neon
+
+prototype void vp8_yv12_copy_y "struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc"
+specialize vp8_yv12_copy_y neon
diff --git a/vpxdec.c b/vpxdec.c
index 44a80e3..aa3f342 100644
--- a/vpxdec.c
+++ b/vpxdec.c
@@ -49,6 +49,7 @@
 static const char *exec_name;
 
 #define VP8_FOURCC (0x00385056)
+#define VP9_FOURCC (0x00395056)
 static const struct {
   char const *name;
   const vpx_codec_iface_t *(*iface)(void);
@@ -59,7 +60,7 @@
   {"vp8",  vpx_codec_vp8_dx,   VP8_FOURCC, 0x00FFFFFF},
 #endif
 #if CONFIG_VP9_DECODER
-  {"vp9",  vpx_codec_vp8_dx,   VP8_FOURCC, 0x00FFFFFF},
+  {"vp9",  vpx_codec_vp9_dx,   VP9_FOURCC, 0x00FFFFFF},
 #endif
 };
 
@@ -109,7 +110,7 @@
   NULL
 };
 
-#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER
+#if CONFIG_VP8_DECODER
 static const arg_def_t addnoise_level = ARG_DEF(NULL, "noise-level", 1,
                                                 "Enable VP8 postproc add noise");
 static const arg_def_t deblock = ARG_DEF(NULL, "deblock", 0,
@@ -142,7 +143,7 @@
   fprintf(stderr, "Usage: %s <options> filename\n\n"
           "Options:\n", exec_name);
   arg_show_usage(stderr, all_args);
-#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER
+#if CONFIG_VP8_DECODER
   fprintf(stderr, "\nVP8 Postprocessing Options:\n");
   arg_show_usage(stderr, vp8_pp_args);
 #endif
@@ -692,7 +693,7 @@
   unsigned int            fps_num;
   void                   *out = NULL;
   vpx_codec_dec_cfg_t     cfg = {0};
-#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER
+#if CONFIG_VP8_DECODER
   vp8_postproc_cfg_t      vp8_pp_cfg = {0};
   int                     vp8_dbg_color_ref_frame = 0;
   int                     vp8_dbg_color_mb_modes = 0;
@@ -752,7 +753,7 @@
     else if (arg_match(&arg, &verbosearg, argi))
       quiet = 0;
 
-#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER
+#if CONFIG_VP8_DECODER
     else if (arg_match(&arg, &addnoise_level, argi)) {
       postproc = 1;
       vp8_pp_cfg.post_proc_flag |= VP8_ADDNOISE;
@@ -924,7 +925,7 @@
   if (!quiet)
     fprintf(stderr, "%s\n", decoder.name);
 
-#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER
+#if CONFIG_VP8_DECODER
 
   if (vp8_pp_cfg.post_proc_flag
       && vpx_codec_control(&decoder, VP8_SET_POSTPROC, &vp8_pp_cfg)) {
diff --git a/vpxenc.c b/vpxenc.c
index beb7030..68b7a5c 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -94,13 +94,13 @@
 } codecs[] = {
 #if CONFIG_VP8_ENCODER && CONFIG_VP8_DECODER
   {"vp8", &vpx_codec_vp8_cx, &vpx_codec_vp8_dx, 0x30385056},
-#elif CONFIG_VP9_ENCODER && !CONFIG_VP9_DECODER
+#elif CONFIG_VP8_ENCODER && !CONFIG_VP8_DECODER
   {"vp8", &vpx_codec_vp8_cx, NULL, 0x30385056},
 #endif
 #if CONFIG_VP9_ENCODER && CONFIG_VP9_DECODER
-  {"vp9", &vpx_codec_vp8_cx, &vpx_codec_vp8_dx, 0x30385056},
+  {"vp9", &vpx_codec_vp9_cx, &vpx_codec_vp9_dx, 0x30395056},
 #elif CONFIG_VP9_ENCODER && !CONFIG_VP9_DECODER
-  {"vp9", &vpx_codec_vp8_cx, NULL, 0x30385056},
+  {"vp9", &vpx_codec_vp9_cx, NULL, 0x30395056},
 #endif
 };
 
@@ -1058,22 +1058,14 @@
 };
 
 
-#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER
 static const arg_def_t noise_sens = ARG_DEF(NULL, "noise-sensitivity", 1,
                                             "Noise sensitivity (frames to blur)");
 static const arg_def_t sharpness = ARG_DEF(NULL, "sharpness", 1,
                                            "Filter sharpness (0-7)");
 static const arg_def_t static_thresh = ARG_DEF(NULL, "static-thresh", 1,
                                                "Motion detection threshold");
-#endif
-
-#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER
 static const arg_def_t cpu_used = ARG_DEF(NULL, "cpu-used", 1,
                                           "CPU Used (-16..16)");
-#endif
-
-
-#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER
 static const arg_def_t token_parts = ARG_DEF(NULL, "token-parts", 1,
                                              "Number of token partitions to use, log2");
 static const arg_def_t auto_altref = ARG_DEF(NULL, "auto-alt-ref", 1,
@@ -1099,16 +1091,34 @@
 static const arg_def_t lossless = ARG_DEF(NULL, "lossless", 1, "Lossless mode");
 #endif
 
+#if CONFIG_VP8_ENCODER
 static const arg_def_t *vp8_args[] = {
   &cpu_used, &auto_altref, &noise_sens, &sharpness, &static_thresh,
   &token_parts, &arnr_maxframes, &arnr_strength, &arnr_type,
   &tune_ssim, &cq_level, &max_intra_rate_pct,
+  NULL
+};
+static const int vp8_arg_ctrl_map[] = {
+  VP8E_SET_CPUUSED, VP8E_SET_ENABLEAUTOALTREF,
+  VP8E_SET_NOISE_SENSITIVITY, VP8E_SET_SHARPNESS, VP8E_SET_STATIC_THRESHOLD,
+  VP8E_SET_TOKEN_PARTITIONS,
+  VP8E_SET_ARNR_MAXFRAMES, VP8E_SET_ARNR_STRENGTH, VP8E_SET_ARNR_TYPE,
+  VP8E_SET_TUNING, VP8E_SET_CQ_LEVEL, VP8E_SET_MAX_INTRA_BITRATE_PCT,
+  0
+};
+#endif
+
+#if CONFIG_VP9_ENCODER
+static const arg_def_t *vp9_args[] = {
+  &cpu_used, &auto_altref, &noise_sens, &sharpness, &static_thresh,
+  &token_parts, &arnr_maxframes, &arnr_strength, &arnr_type,
+  &tune_ssim, &cq_level, &max_intra_rate_pct,
 #if CONFIG_LOSSLESS
   &lossless,
 #endif
   NULL
 };
-static const int vp8_arg_ctrl_map[] = {
+static const int vp9_arg_ctrl_map[] = {
   VP8E_SET_CPUUSED, VP8E_SET_ENABLEAUTOALTREF,
   VP8E_SET_NOISE_SENSITIVITY, VP8E_SET_SHARPNESS, VP8E_SET_STATIC_THRESHOLD,
   VP8E_SET_TOKEN_PARTITIONS,
@@ -1139,10 +1149,14 @@
   arg_show_usage(stdout, rc_twopass_args);
   fprintf(stderr, "\nKeyframe Placement Options:\n");
   arg_show_usage(stdout, kf_args);
-#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER
+#if CONFIG_VP8_ENCODER
   fprintf(stderr, "\nVP8 Specific Options:\n");
   arg_show_usage(stdout, vp8_args);
 #endif
+#if CONFIG_VP9_ENCODER
+  fprintf(stderr, "\nVP9 Specific Options:\n");
+  arg_show_usage(stdout, vp9_args);
+#endif
   fprintf(stderr, "\nStream timebase (--timebase):\n"
           "  The desired precision of timestamps in the output, expressed\n"
           "  in fractional seconds. Default is 1/1000.\n");
@@ -1456,8 +1470,15 @@
 
 
 #define NELEMENTS(x) (sizeof(x)/sizeof(x[0]))
+#define MAX(x,y) ((x)>(y)?(x):(y))
+#if CONFIG_VP8_ENCODER && !CONFIG_VP9_ENCODER
 #define ARG_CTRL_CNT_MAX NELEMENTS(vp8_arg_ctrl_map)
-
+#elif !CONFIG_VP8_ENCODER && CONFIG_VP9_ENCODER
+#define ARG_CTRL_CNT_MAX NELEMENTS(vp9_arg_ctrl_map)
+#else
+#define ARG_CTRL_CNT_MAX MAX(NELEMENTS(vp8_arg_ctrl_map), \
+                             NELEMENTS(vp9_arg_ctrl_map))
+#endif
 
 /* Configuration elements common to all streams */
 struct global_config {
@@ -1738,9 +1759,17 @@
   int                      eos_mark_found = 0;
 
   /* Handle codec specific options */
-  if (global->codec->iface == vpx_codec_vp8_cx) {
+  if (0) {
+#if CONFIG_VP8_ENCODER
+  } else if (global->codec->iface == vpx_codec_vp8_cx) {
     ctrl_args = vp8_args;
     ctrl_args_map = vp8_arg_ctrl_map;
+#endif
+#if CONFIG_VP9_ENCODER
+  } else if (global->codec->iface == vpx_codec_vp9_cx) {
+    ctrl_args = vp9_args;
+    ctrl_args_map = vp9_arg_ctrl_map;
+#endif
   }
 
   for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {