Use CMake to build TF-Lite This enables Windows builds. Update minimum requirements in documentation. BUG=aomedia:2613 Change-Id: I8f66ad9e733d905b98c3448e58f10d100cbac142
diff --git a/.gitattributes b/.gitattributes index 0bb5faf..ffc6912 100644 --- a/.gitattributes +++ b/.gitattributes
@@ -16,8 +16,3 @@ *.pjt -crlf *.vcp -crlf *.inf -crlf -# fft2d is not available through git, and the code -# is checked in verbatim. Do not apply diffing to -# it, otherwise it fails the 'git diff HEAD^ --check' -# test. -third_party/tensorflow_dependencies/fft2d/** -diff
diff --git a/.gitmodules b/.gitmodules index fae903e..b1cb803 100644 --- a/.gitmodules +++ b/.gitmodules
@@ -1,27 +1,3 @@ [submodule "third_party/tensorflow"] path = third_party/tensorflow url = https://github.com/tensorflow/tensorflow.git -[submodule "third_party/tensorflow_dependencies/absl"] - path = third_party/tensorflow_dependencies/absl - url = https://github.com/abseil/abseil-cpp.git -[submodule "third_party/tensorflow_dependencies/neon_2_sse"] - path = third_party/tensorflow_dependencies/neon_2_sse - url = https://github.com/intel/ARM_NEON_2_x86_SSE.git -[submodule "third_party/tensorflow_dependencies/farmhash"] - path = third_party/tensorflow_dependencies/farmhash - url = https://github.com/google/farmhash -[submodule "third_party/tensorflow_dependencies/eigen"] - path = third_party/tensorflow_dependencies/eigen - url = https://gitlab.com/libeigen/eigen.git -[submodule "third_party/tensorflow_dependencies/flatbuffers"] - path = third_party/tensorflow_dependencies/flatbuffers - url = https://github.com/google/flatbuffers -[submodule "third_party/tensorflow_dependencies/gemmlowp"] - path = third_party/tensorflow_dependencies/gemmlowp - url = https://github.com/google/gemmlowp.git -[submodule "third_party/tensorflow_dependencies/fp16"] - path = third_party/tensorflow_dependencies/fp16 - url = https://github.com/Maratyszcza/FP16.git -[submodule "third_party/tensorflow_dependencies/ruy"] - path = third_party/tensorflow_dependencies/ruy - url = https://github.com/google/ruy.git
diff --git a/CMakeLists.txt b/CMakeLists.txt index 3fb214d..2d96d6c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt
@@ -37,6 +37,7 @@ set(SO_VERSION 2) set(SO_FILE_VERSION 2.0.0) +include("${AOM_ROOT}/build/cmake/tensorflow_lite.cmake") include("${AOM_ROOT}/build/cmake/aom_configure.cmake") include("${AOM_ROOT}/aom_dsp/aom_dsp.cmake") include("${AOM_ROOT}/aom_mem/aom_mem.cmake") @@ -629,19 +630,19 @@ endif() if(CONFIG_TENSORFLOW_LITE) - include("${AOM_ROOT}/build/cmake/tensorflow_lite.cmake") - setup_tensorflow_lite() - - if(ENABLE_EXAMPLES) - add_executable(tf_lite_model "${AOM_ROOT}/examples/tf_lite_model.cc") - list(APPEND AOM_EXAMPLE_TARGETS tf_lite_model) - list(APPEND AOM_APP_TARGETS tf_lite_model) - add_dependencies(tf_lite_model tensorflowlite_a) - target_link_libraries( - tf_lite_model - PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/libtensorflow-lite.a" - PRIVATE Threads::Threads - PRIVATE ${CMAKE_DL_LIBS}) + set(tflite_supported 0) + is_tflite_supported(tflite_supported) + if(NOT ${tflite_supported}) + set(CONFIG_TENSORFLOW_LITE 0) + else() + setup_tensorflow_lite() + if(ENABLE_EXAMPLES) + add_executable(tf_lite_model "${AOM_ROOT}/examples/tf_lite_model.cc") + list(APPEND AOM_EXAMPLE_TARGETS tf_lite_model) + list(APPEND AOM_APP_TARGETS tf_lite_model) + target_link_tf_lite_libraries(tf_lite_model) + add_dependencies(tf_lite_model tensorflow_lite) + endif() endif() endif()
diff --git a/README.md b/README.md index c0fa329..2dbf5db 100644 --- a/README.md +++ b/README.md
@@ -46,7 +46,7 @@ ### Prerequisites {#prerequisites} - 1. [CMake](https://cmake.org) version 3.5 or higher. + 1. [CMake](https://cmake.org) version 3.16 or higher. 2. [Git](https://git-scm.com/). 3. [Perl](https://www.perl.org/). 4. For x86 targets, [yasm](http://yasm.tortall.net/), which is preferred, or a @@ -217,7 +217,7 @@ ### Microsoft Visual Studio builds {#microsoft-visual-studio-builds} Building the AV1 codec library in Microsoft Visual Studio is supported. Visual -Studio 2017 (15.0) or later is required. The following example demonstrates +Studio 2019 (16.7) or later is required. The following example demonstrates generating projects and a solution for the Microsoft IDE: ~~~ @@ -231,14 +231,6 @@ # To build with Visual Studio 2019 for the 32-bit x86 target: $ cmake path/to/aom -G "Visual Studio 16 2019" -A Win32 $ cmake --build . - - # To build with Visual Studio 2017 for the x64 target: - $ cmake path/to/aom -G "Visual Studio 15 2017" -T host=x64 -A x64 - $ cmake --build . - - # To build with Visual Studio 2017 for the 32-bit x86 target: - $ cmake path/to/aom -G "Visual Studio 15 2017" -T host=x64 - $ cmake --build . ~~~ NOTE: The build system targets Windows 7 or later by compiling files with
diff --git a/build/cmake/tensorflow_lite.cmake b/build/cmake/tensorflow_lite.cmake index 9fca969..5db1aee 100644 --- a/build/cmake/tensorflow_lite.cmake +++ b/build/cmake/tensorflow_lite.cmake
@@ -13,31 +13,22 @@ endif() # AOM_BUILD_CMAKE_TENSORFLOW_LITE_CMAKE_ set(AOM_BUILD_CMAKE_TENSORFLOW_LITE_CMAKE_ 1) +include(ExternalProject) include(FindGit) -# Checks if the dependencies on Tensorflow Lite are already checked out -- if -# not, uses the git submodule command to fetch them. -function(checkout_submodules_) +# Checks if Tensorflow has been checked out -- if not, uses the git submodule +# command to fetch it. +function(checkout_submodule_) # As a quick sanity check, see if at least 1 expected file or directory is # present in each submodule. If so, assume they are all checked out (if they # are not, then the base directory will be empty). - if( - (EXISTS "${AOM_ROOT}/third_party/tensorflow/tensorflow") - AND (EXISTS - "${AOM_ROOT}/third_party/tensorflow_dependencies/neon_2_sse/ReadMe.md") - AND (EXISTS "${AOM_ROOT}/third_party/tensorflow_aom/absl/absl") - AND (EXISTS "${AOM_ROOT}/third_party/tensorflow_aom/eigen/Eigen") - AND (EXISTS "${AOM_ROOT}/third_party/tensorflow_aom/farmhash/Makefile.am") - AND (EXISTS "${AOM_ROOT}/third_party/tensorflow_aom/flatbuffers/BUILD") - AND (EXISTS "${AOM_ROOT}/third_party/tensorflow_aom/fp16/CMakeLists.txt") - AND (EXISTS "${AOM_ROOT}/third_party/tensorflow_aom/gemmlowp/BUILD") - AND (EXISTS "${AOM_ROOT}/third_party/tensorflow_aom/ruy/BUILD")) + if(EXISTS "${AOM_ROOT}/third_party/tensorflow/tensorflow") return() endif() if(NOT GIT_FOUND) message( FATAL_ERROR - "Tensorflow-Lite/dependencies not present; " "git could not be found; " + "Tensorflow-Lite not present; " "git could not be found; " "please check out submodules with 'git submodule update --init'") endif() # Note that "git submodule update --init" must be run from inside the git @@ -55,43 +46,183 @@ endif() endfunction() -function(add_tensorflow_lite_dependency_) - if(NOT AOM_APP_TARGETS) - message(FATAL_ERROR "AOM_APP_TARGETS variable must not be empty.") +# Add the TF-lite link-related library to the named target (e.g., an executable +# or library). This function handles the different naming conventions of +# operating systems. +function(target_link_tf_lite_dep_ named_target subdir libname) + if(NOT (("${subdir}" STREQUAL "") OR ("${subdir}" MATCHES "/$"))) + message( + FATAL_ERROR "sub-directory must be empty or end with a slash: ${subdir}") endif() - # Build the library. - add_custom_command( - OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/libtensorflow-lite.a" - COMMAND "${AOM_ROOT}/third_party/tensorflow_dependencies/build.pl" - "${AOM_ROOT}" "${CMAKE_CURRENT_BINARY_DIR}/libtensorflow-lite.a" - WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}") - add_custom_target(tensorflowlite_a ALL - DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/libtensorflow-lite.a") - include_directories("${AOM_ROOT}/third_party/tensorflow") - include_directories( - "${AOM_ROOT}/third_party/tensorflow_dependencies/flatbuffers/include/") - # Add tensorflow-lite as a dependency on all AOM applications. - foreach(aom_app ${AOM_APP_TARGETS}) - add_dependencies(${aom_app} tensorflowlite_a) - target_link_libraries( - ${aom_app} - PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/libtensorflow-lite.a" - ${AOM_LIB_LINK_TYPE} Threads::Threads - PRIVATE ${CMAKE_DL_LIBS}) - endforeach() + + set(STATIC_LIBRARY_DIR "") + if(MSVC) + set(STATIC_LIBRARY_DIR "$<CONFIG>/") + endif() + target_link_libraries( + ${named_target} + PRIVATE + "${CMAKE_BINARY_DIR}/tensorflow_lite/${subdir}${STATIC_LIBRARY_DIR}${CMAKE_STATIC_LIBRARY_PREFIX}${libname}${CMAKE_STATIC_LIBRARY_SUFFIX}" + ) endfunction() -# If Tensorflow-Lite should be enabled, adds appropriate build rules / targets. -function(setup_tensorflow_lite) - if("${AOM_ROOT}" STREQUAL "") - message(FATAL_ERROR "AOM_ROOT variable must not be empty.") - endif() +# Add TF-lite libraries onto the named target at link time (e.g., an executable +# or library). For enabling TF-lite for experiments, prefer the +# "experiment_requires_tf_lite" function. +function(target_link_tf_lite_libraries named_target) + target_link_libraries(${named_target} ${AOM_LIB_LINK_TYPE} Threads::Threads) + target_link_libraries(${named_target} PRIVATE ${CMAKE_DL_LIBS}) + target_link_tf_lite_dep_(${named_target} "" tensorflow-lite) + target_link_tf_lite_dep_(${named_target} _deps/abseil-cpp-build/absl/flags/ + absl_flags) + target_link_tf_lite_dep_(${named_target} _deps/abseil-cpp-build/absl/flags/ + absl_flags_internal) + target_link_tf_lite_dep_(${named_target} _deps/abseil-cpp-build/absl/flags/ + absl_flags_registry) + target_link_tf_lite_dep_(${named_target} _deps/abseil-cpp-build/absl/flags/ + absl_flags_config) + target_link_tf_lite_dep_(${named_target} _deps/abseil-cpp-build/absl/flags/ + absl_flags_program_name) + target_link_tf_lite_dep_(${named_target} _deps/abseil-cpp-build/absl/flags/ + absl_flags_marshalling) + target_link_tf_lite_dep_(${named_target} _deps/abseil-cpp-build/absl/hash/ + absl_hash) + target_link_tf_lite_dep_(${named_target} _deps/abseil-cpp-build/absl/hash/ + absl_city) + target_link_tf_lite_dep_(${named_target} _deps/abseil-cpp-build/absl/status/ + absl_status) + target_link_tf_lite_dep_(${named_target} _deps/abseil-cpp-build/absl/types/ + absl_bad_optional_access) + target_link_tf_lite_dep_(${named_target} _deps/abseil-cpp-build/absl/strings/ + absl_cord) + target_link_tf_lite_dep_(${named_target} _deps/abseil-cpp-build/absl/strings/ + absl_str_format_internal) + target_link_tf_lite_dep_(${named_target} + _deps/abseil-cpp-build/absl/synchronization/ + absl_synchronization) + target_link_tf_lite_dep_(${named_target} + _deps/abseil-cpp-build/absl/debugging/ + absl_stacktrace) + target_link_tf_lite_dep_(${named_target} + _deps/abseil-cpp-build/absl/debugging/ + absl_symbolize) + target_link_tf_lite_dep_(${named_target} + _deps/abseil-cpp-build/absl/debugging/ + absl_debugging_internal) + target_link_tf_lite_dep_(${named_target} + _deps/abseil-cpp-build/absl/debugging/ + absl_demangle_internal) + target_link_tf_lite_dep_(${named_target} + _deps/abseil-cpp-build/absl/synchronization/ + absl_graphcycles_internal) + target_link_tf_lite_dep_(${named_target} _deps/abseil-cpp-build/absl/base/ + absl_malloc_internal) + target_link_tf_lite_dep_(${named_target} _deps/abseil-cpp-build/absl/time/ + absl_time) + target_link_tf_lite_dep_(${named_target} _deps/abseil-cpp-build/absl/strings/ + absl_strings) + target_link_tf_lite_dep_(${named_target} _deps/abseil-cpp-build/absl/strings/ + absl_strings_internal) + target_link_tf_lite_dep_(${named_target} _deps/abseil-cpp-build/absl/base/ + absl_throw_delegate) + target_link_tf_lite_dep_(${named_target} _deps/abseil-cpp-build/absl/base/ + absl_base) + target_link_tf_lite_dep_(${named_target} _deps/abseil-cpp-build/absl/base/ + absl_dynamic_annotations) + target_link_tf_lite_dep_(${named_target} _deps/abseil-cpp-build/absl/base/ + absl_spinlock_wait) + target_link_tf_lite_dep_(${named_target} _deps/abseil-cpp-build/absl/numeric/ + absl_int128) + target_link_tf_lite_dep_(${named_target} _deps/abseil-cpp-build/absl/time/ + absl_civil_time) + target_link_tf_lite_dep_(${named_target} _deps/abseil-cpp-build/absl/time/ + absl_time_zone) + target_link_tf_lite_dep_(${named_target} _deps/abseil-cpp-build/absl/types/ + absl_bad_variant_access) + target_link_tf_lite_dep_(${named_target} _deps/abseil-cpp-build/absl/base/ + absl_raw_logging_internal) + target_link_tf_lite_dep_(${named_target} _deps/abseil-cpp-build/absl/base/ + absl_log_severity) + target_link_tf_lite_dep_(${named_target} _deps/farmhash-build/ farmhash) + target_link_tf_lite_dep_(${named_target} _deps/fft2d-build/ fft2d_fftsg2d) + target_link_tf_lite_dep_(${named_target} _deps/fft2d-build/ fft2d_fftsg) + target_link_tf_lite_dep_(${named_target} _deps/flatbuffers-build/ flatbuffers) + target_link_tf_lite_dep_(${named_target} _deps/ruy-build/ ruy) +endfunction() + +# Can Tensorflow-Lite be enabled with the current build system? Sets the +# variable with either 0 or 1 as the value. If 0, also prints an explanatory +# message. +function(is_tflite_supported result) # Cross-compile is not currently implemented. if(CMAKE_TOOLCHAIN_FILE) message("TOOLCHAIN: ${CMAKE_TOOLCHAIN_FILE}") message(WARNING "No cross-compile support for TensorFlow Lite; disabling") - set(CONFIG_TENSORFLOW_LITE 0) + set(${result} 0 PARENT_SCOPE) + return() endif() - checkout_submodules_() - add_tensorflow_lite_dependency_() + # TF-Lite specifies a minimum CMake version of 3.16, but Jenkins uses 3.7.2. + # Until Jenkins is upgraded, disable TF-Lite if a lower version of CMake is + # detected. + if(${CMAKE_VERSION} VERSION_LESS "3.16") + message( + WARNING "Tensorflow Lite requres CMake version 3.16 or higher; version " + "${CMAKE_VERSION} detected; disabling") + set(${result} 0 PARENT_SCOPE) + return() + endif() + set(${result} 1 PARENT_SCOPE) +endfunction() + +# Adds appropriate build rules / targets. Only invoke this function if +# is_tf_lite_supported returns true. +function(setup_tensorflow_lite) + if("${AOM_ROOT}" STREQUAL "") + message(FATAL_ERROR "AOM_ROOT variable must not be empty.") + endif() + + if(MSVC) + add_compile_definitions(NOMINMAX=1) + endif() + checkout_submodule_() + + # Allow code to reference TF. + include_directories("${AOM_ROOT}/third_party/tensorflow") + + externalproject_add( + tensorflow_lite + SOURCE_DIR "${AOM_ROOT}/third_party/tensorflow/tensorflow/lite" + PREFIX "${CMAKE_BINARY_DIR}/tensorflow_lite" + BINARY_DIR "${CMAKE_BINARY_DIR}/tensorflow_lite" + DOWNLOAD_DIR "${CMAKE_BINARY_DIR}/tensorflow_lite" + LOG_BUILD 1) + + # TF-Lite depends on this, and downloads it during compilation. + include_directories( + "${CMAKE_CURRENT_BINARY_DIR}/tensorflow_lite/flatbuffers/include/") + + add_dependencies(aom_av1_common tensorflow_lite) + foreach(aom_app ${AOM_APP_TARGETS}) + add_dependencies(${aom_app} tensorflow_lite) + target_link_tf_lite_libraries(${aom_app}) + endforeach() +endfunction() + +# Signal that the experiment needs TF-lite enabled. +function(experiment_requires_tf_lite experiment_name) + # Experiment is not enabled, no need to include TF-Lite in the build. + if(NOT ${${experiment_name}}) + return() + endif() + set(supported 0) + is_tflite_supported(supported) + if(NOT ${supported}) + # Disable the experiment so Gerrit will not test this case. + message(WARNING "Disabling ${experiment_name}.") + set(${experiment_name} 0 PARENT_SCOPE) + set(CONFIG_TENSORFLOW_LITE 0 PARENT_SCOPE) + return() + endif() + # Otherwise, enable TF-lite. + set(CONFIG_TENSORFLOW_LITE 1 PARENT_SCOPE) endfunction()
diff --git a/third_party/tensorflow b/third_party/tensorflow index 18445b0..ce0d0a4 160000 --- a/third_party/tensorflow +++ b/third_party/tensorflow
@@ -1 +1 @@ -Subproject commit 18445b0e39b677a21c86b4cf3d2bcb823f27e3e2 +Subproject commit ce0d0a458c87c8c3c541d56bfbea85455e4c7003
diff --git a/third_party/tensorflow_dependencies/absl b/third_party/tensorflow_dependencies/absl deleted file mode 160000 index 3c2bed2..0000000 --- a/third_party/tensorflow_dependencies/absl +++ /dev/null
@@ -1 +0,0 @@ -Subproject commit 3c2bed2e77a8b77ce3092e3f75140dd21879bdba
diff --git a/third_party/tensorflow_dependencies/build.pl b/third_party/tensorflow_dependencies/build.pl deleted file mode 100755 index dcf14e5..0000000 --- a/third_party/tensorflow_dependencies/build.pl +++ /dev/null
@@ -1,160 +0,0 @@ -#!/usr/bin/env perl -# -# Copyright (c) 2020, Alliance for Open Media. All rights reserved -# -# This source code is subject to the terms of the BSD 2 Clause License and -# the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License -# was not distributed with this source code in the LICENSE file, you can -# obtain it at www.aomedia.org/license/software. If the Alliance for Open -# Media Patent License 1.0 was not distributed with this source code in the -# PATENTS file, you can obtain it at www.aomedia.org/license/patent. -# -########################################################################### -# -# Script to build the static TensorFlow lite library. -# -# Tensorflow's build process generates the static libraries in the same -# directory as the source code. AOM, however, generates binaries/objects/etc. -# in a different directory. This script: -# -# 1.) Copies the TensorFlow code to a temporary directory -# 2.) Copies the necessary dependencies into the temporary directory -# 3.) Compiles it -# 4.) Copies the static library to the AOM build directory -# -# Note that we do not use the download_dependencies.sh script directly, as -# it downloads directly into the source directory. - -use strict; -use warnings; -use autodie; -use Cwd; -use File::Basename; -use File::Copy; -use File::Spec::Functions; -use File::Temp; - -# Apply a tweak to Eigen (TF Lite's "download_dependencies.sh" -# performs 3 tweaks, but only 1 of them is relevant). -sub apply_eigen_tweak { - my $downloads_dir = $_[0]; - my $file = catfile($downloads_dir, "eigen", "Eigen", "src", "Core", - "arch", "NEON", "Complex.h"); - open(my $fh, '<', $file); - my @lines = <$fh>; - close($fh); - open($fh, '>', $file); - foreach my $line (@lines) { - $line =~ s/static uint64x2_t p2ul_CONJ_XOR = vld1q_u64\( p2ul_conj_XOR_DATA \);/static uint64x2_t p2ul_CONJ_XOR;/; - print $fh $line; - } - close($fh); -} - -# Pure Perl implementation of recursive directory-copy - -# File::Copy::Recursive is not part of core. -sub recursive_dircopy { - my $input_dir = $_[0]; - my $output_dir = $_[1]; - # If the output directory does not exist, create it. - if (!(-d $output_dir)) { - mkdir $output_dir; - } - opendir(my $dh, $input_dir); - while (my $file = readdir($dh)) { - # Ignore . and .. entries. - next if ($file eq "." or $file eq ".."); - my $in_file = catfile($input_dir, $file); - my $out_file = catfile($output_dir, $file); - # If it is a directory, recursively call. - if (-d $in_file) { - recursive_dircopy($in_file, $out_file); - } else { - # If a file, copy over. - copy($in_file, $out_file); - # Preserve execute permission. - if (-x $in_file) { - chmod 0755, $out_file; - } - } - } - closedir($dh); -} - -# Finds the first instance of the file in the directory, using a depth-first -# search. Returns "" if unable to find. -sub find_file { - my $source_dir = $_[0]; - my $fname = $_[1]; - opendir(my $dh, $source_dir); - while (my $file = readdir($dh)) { - # Ignore . and .. entries. - next if ($file eq "." or $file eq ".."); - if (-d catfile($source_dir, $file)) { - my $result = find_file(catfile($source_dir, $file), $fname); - if ($result ne "") { - closedir($dh); - return $result; - } - } elsif ($file eq $fname) { - closedir($dh); - return catfile($source_dir, $file); - } - } - closedir($dh); - return ""; -} - -sub copy_tensorflow_lite_dependencies { - my $source_dir = $_[0]; - my $output_dir = $_[1]; - my $dependencies_dir = catfile($source_dir, "third_party", - "tensorflow_dependencies"); - opendir(my $dh, $dependencies_dir); - while (my $file = readdir($dh)) { - # Ignore . and .. entries. - next if ($file eq "." or $file eq ".."); - # Ignore non-directories. - next unless (-d catfile($dependencies_dir, $file)); - # Copy the directory. - print " * $file\n"; - recursive_dircopy( - catfile($dependencies_dir, $file), - catfile($output_dir, $file)); - } - closedir($dh); -} - -# Start of program logic. -if ($#ARGV + 1 != 2) { - my $prog = basename($0); - warn("Usage: $prog <source directory> <output directory>\n"); - exit(1); -} -my ($source_dir, $output_dir) = @ARGV; - -my $temp_dir = File::Temp::tempdir( CLEANUP => 1 ); - -print "Copying TensorFlow code to temporary directory for building...\n"; -my $tf_dir = catfile($source_dir, "third_party", "tensorflow"); -recursive_dircopy($tf_dir, $temp_dir); - -my $downloads_dir = catfile($temp_dir, "tensorflow", "lite", "tools", - "make", "downloads"); -mkdir $downloads_dir; -print "Copying TensorFlow Lite dependencies to temporary directory...\n"; -copy_tensorflow_lite_dependencies($source_dir, $downloads_dir); -apply_eigen_tweak($downloads_dir); - -print "Building TensorFlow Lite...\n"; -my $build_sh = catfile($temp_dir, "tensorflow", "lite", "tools", "make", - "build_lib.sh"); -`$build_sh`; -# Find the libtensorflow-lite.a file, which is generated under -# tensorflow/lite/tools/make/gen (but in a different sub-directory depending -# on the architecture). -my $gen_dir = catfile($temp_dir, "tensorflow", "lite", "tools", "make", "gen"); -my $liblite = find_file($gen_dir, "libtensorflow-lite.a"); -$liblite ne "" or die("Unable to find libtensorflow-lite.a"); -print "Copying static library into build directory...\n"; -copy($liblite, $output_dir);
diff --git a/third_party/tensorflow_dependencies/eigen b/third_party/tensorflow_dependencies/eigen deleted file mode 160000 index b11f817..0000000 --- a/third_party/tensorflow_dependencies/eigen +++ /dev/null
@@ -1 +0,0 @@ -Subproject commit b11f817bcff04276f3024d6780f56a137968b81a
diff --git a/third_party/tensorflow_dependencies/farmhash b/third_party/tensorflow_dependencies/farmhash deleted file mode 160000 index 0d859a8..0000000 --- a/third_party/tensorflow_dependencies/farmhash +++ /dev/null
@@ -1 +0,0 @@ -Subproject commit 0d859a811870d10f53a594927d0d0b97573ad06d
diff --git a/third_party/tensorflow_dependencies/fft2d/alloc.c b/third_party/tensorflow_dependencies/fft2d/alloc.c deleted file mode 100644 index 7833d88..0000000 --- a/third_party/tensorflow_dependencies/fft2d/alloc.c +++ /dev/null
@@ -1,153 +0,0 @@ -/* ---- memory allocation ---- */ -#include "alloc.h" - - -#define alloc_error_check(p) { \ - if ((p) == NULL) { \ - fprintf(stderr, "Allocation Failure!\n"); \ - exit(1); \ - } \ -} - - -int *alloc_1d_int(int n1) -{ - int *i; - - i = (int *) malloc(sizeof(int) * n1); - alloc_error_check(i); - return i; -} - - -void free_1d_int(int *i) -{ - free(i); -} - - -double *alloc_1d_double(int n1) -{ - double *d; - - d = (double *) malloc(sizeof(double) * n1); - alloc_error_check(d); - return d; -} - - -void free_1d_double(double *d) -{ - free(d); -} - - -int **alloc_2d_int(int n1, int n2) -{ - int **ii, *i; - int j; - - ii = (int **) malloc(sizeof(int *) * n1); - alloc_error_check(ii); - i = (int *) malloc(sizeof(int) * n1 * n2); - alloc_error_check(i); - ii[0] = i; - for (j = 1; j < n1; j++) { - ii[j] = ii[j - 1] + n2; - } - return ii; -} - - -void free_2d_int(int **ii) -{ - free(ii[0]); - free(ii); -} - - -double **alloc_2d_double(int n1, int n2) -{ - double **dd, *d; - int j; - - dd = (double **) malloc(sizeof(double *) * n1); - alloc_error_check(dd); - d = (double *) malloc(sizeof(double) * n1 * n2); - alloc_error_check(d); - dd[0] = d; - for (j = 1; j < n1; j++) { - dd[j] = dd[j - 1] + n2; - } - return dd; -} - - -void free_2d_double(double **dd) -{ - free(dd[0]); - free(dd); -} - - -int ***alloc_3d_int(int n1, int n2, int n3) -{ - int ***iii, **ii, *i; - int j; - - iii = (int ***) malloc(sizeof(int **) * n1); - alloc_error_check(iii); - ii = (int **) malloc(sizeof(int *) * n1 * n2); - alloc_error_check(ii); - iii[0] = ii; - for (j = 1; j < n1; j++) { - iii[j] = iii[j - 1] + n2; - } - i = (int *) malloc(sizeof(int) * n1 * n2 * n3); - alloc_error_check(i); - ii[0] = i; - for (j = 1; j < n1 * n2; j++) { - ii[j] = ii[j - 1] + n3; - } - return iii; -} - - -void free_3d_int(int ***iii) -{ - free(iii[0][0]); - free(iii[0]); - free(iii); -} - - -double ***alloc_3d_double(int n1, int n2, int n3) -{ - double ***ddd, **dd, *d; - int j; - - ddd = (double ***) malloc(sizeof(double **) * n1); - alloc_error_check(ddd); - dd = (double **) malloc(sizeof(double *) * n1 * n2); - alloc_error_check(dd); - ddd[0] = dd; - for (j = 1; j < n1; j++) { - ddd[j] = ddd[j - 1] + n2; - } - d = (double *) malloc(sizeof(double) * n1 * n2 * n3); - alloc_error_check(d); - dd[0] = d; - for (j = 1; j < n1 * n2; j++) { - dd[j] = dd[j - 1] + n3; - } - return ddd; -} - - -void free_3d_double(double ***ddd) -{ - free(ddd[0][0]); - free(ddd[0]); - free(ddd); -} -
diff --git a/third_party/tensorflow_dependencies/fft2d/alloc.h b/third_party/tensorflow_dependencies/fft2d/alloc.h deleted file mode 100644 index 3467cc4..0000000 --- a/third_party/tensorflow_dependencies/fft2d/alloc.h +++ /dev/null
@@ -1,20 +0,0 @@ -/* ---- memory allocation ---- */ - - -#include <stdlib.h> -#include <stdio.h> - - -int *alloc_1d_int(int n1); -void free_1d_int(int *i); -double *alloc_1d_double(int n1); -void free_1d_double(double *d); -int **alloc_2d_int(int n1, int n2); -void free_2d_int(int **ii); -double **alloc_2d_double(int n1, int n2); -void free_2d_double(double **dd); -int ***alloc_3d_int(int n1, int n2, int n3); -void free_3d_int(int ***iii); -double ***alloc_3d_double(int n1, int n2, int n3); -void free_3d_double(double ***ddd); -
diff --git a/third_party/tensorflow_dependencies/fft2d/fft4f2d.c b/third_party/tensorflow_dependencies/fft2d/fft4f2d.c deleted file mode 100644 index 354c80e..0000000 --- a/third_party/tensorflow_dependencies/fft2d/fft4f2d.c +++ /dev/null
@@ -1,1705 +0,0 @@ -/* -Fast Fourier/Cosine/Sine Transform - dimension :two - data length :power of 2 - decimation :frequency - radix :4, 2, row-column - data :inplace - table :use -functions - cdft2d: Complex Discrete Fourier Transform - rdft2d: Real Discrete Fourier Transform - ddct2d: Discrete Cosine Transform - ddst2d: Discrete Sine Transform -function prototypes - void cdft2d(int, int, int, double **, int *, double *); - void rdft2d(int, int, int, double **, int *, double *); - void ddct2d(int, int, int, double **, double **, int *, double *); - void ddst2d(int, int, int, double **, double **, int *, double *); - - --------- Complex DFT (Discrete Fourier Transform) -------- - [definition] - <case1> - X[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 x[j1][j2] * - exp(2*pi*i*j1*k1/n1) * - exp(2*pi*i*j2*k2/n2), 0<=k1<n1, 0<=k2<n2 - <case2> - X[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 x[j1][j2] * - exp(-2*pi*i*j1*k1/n1) * - exp(-2*pi*i*j2*k2/n2), 0<=k1<n1, 0<=k2<n2 - (notes: sum_j=0^n-1 is a summation from j=0 to n-1) - [usage] - <case1> - ip[0] = 0; // first time only - cdft2d(n1, 2*n2, 1, a, ip, w); - <case2> - ip[0] = 0; // first time only - cdft2d(n1, 2*n2, -1, a, ip, w); - [parameters] - n1 :data length (int) - n1 >= 1, n1 = power of 2 - 2*n2 :data length (int) - n2 >= 1, n2 = power of 2 - a[0...n1-1][0...2*n2-1] - :input/output data (double **) - input data - a[j1][2*j2] = Re(x[j1][j2]), - a[j1][2*j2+1] = Im(x[j1][j2]), - 0<=j1<n1, 0<=j2<n2 - output data - a[k1][2*k2] = Re(X[k1][k2]), - a[k1][2*k2+1] = Im(X[k1][k2]), - 0<=k1<n1, 0<=k2<n2 - ip[0...*] - :work area for bit reversal (int *) - length of ip >= 2+sqrt(n) - (n = max(n1, n2)) - ip[0],ip[1] are pointers of the cos/sin table. - w[0...*] - :cos/sin table (double *) - length of w >= max(n1/2, n2/2) - w[],ip[] are initialized if ip[0] == 0. - [remark] - Inverse of - cdft2d(n1, 2*n2, -1, a, ip, w); - is - cdft2d(n1, 2*n2, 1, a, ip, w); - for (j1 = 0; j1 <= n1 - 1; j1++) { - for (j2 = 0; j2 <= 2 * n2 - 1; j2++) { - a[j1][j2] *= 1.0 / (n1 * n2); - } - } - . - - --------- Real DFT / Inverse of Real DFT -------- - [definition] - <case1> RDFT - R[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 a[j1][j2] * - cos(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2), - 0<=k1<n1, 0<=k2<n2 - I[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 a[j1][j2] * - sin(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2), - 0<=k1<n1, 0<=k2<n2 - <case2> IRDFT (excluding scale) - a[k1][k2] = (1/2) * sum_j1=0^n1-1 sum_j2=0^n2-1 - (R[j1][j2] * - cos(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2) + - I[j1][j2] * - sin(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2)), - 0<=k1<n1, 0<=k2<n2 - (notes: R[n1-k1][n2-k2] = R[k1][k2], - I[n1-k1][n2-k2] = -I[k1][k2], - R[n1-k1][0] = R[k1][0], - I[n1-k1][0] = -I[k1][0], - R[0][n2-k2] = R[0][k2], - I[0][n2-k2] = -I[0][k2], - 0<k1<n1, 0<k2<n2) - [usage] - <case1> - ip[0] = 0; // first time only - rdft2d(n1, n2, 1, a, ip, w); - <case2> - ip[0] = 0; // first time only - rdft2d(n1, n2, -1, a, ip, w); - [parameters] - n1 :data length (int) - n1 >= 2, n1 = power of 2 - n2 :data length (int) - n2 >= 2, n2 = power of 2 - a[0...n1-1][0...n2-1] - :input/output data (double **) - <case1> - output data - a[k1][2*k2] = R[k1][k2] = R[n1-k1][n2-k2], - a[k1][2*k2+1] = I[k1][k2] = -I[n1-k1][n2-k2], - 0<k1<n1, 0<k2<n2/2, - a[0][2*k2] = R[0][k2] = R[0][n2-k2], - a[0][2*k2+1] = I[0][k2] = -I[0][n2-k2], - 0<k2<n2/2, - a[k1][0] = R[k1][0] = R[n1-k1][0], - a[k1][1] = I[k1][0] = -I[n1-k1][0], - a[n1-k1][1] = R[k1][n2/2] = R[n1-k1][n2/2], - a[n1-k1][0] = -I[k1][n2/2] = I[n1-k1][n2/2], - 0<k1<n1/2, - a[0][0] = R[0][0], - a[0][1] = R[0][n2/2], - a[n1/2][0] = R[n1/2][0], - a[n1/2][1] = R[n1/2][n2/2] - <case2> - input data - a[j1][2*j2] = R[j1][j2] = R[n1-j1][n2-j2], - a[j1][2*j2+1] = I[j1][j2] = -I[n1-j1][n2-j2], - 0<j1<n1, 0<j2<n2/2, - a[0][2*j2] = R[0][j2] = R[0][n2-j2], - a[0][2*j2+1] = I[0][j2] = -I[0][n2-j2], - 0<j2<n2/2, - a[j1][0] = R[j1][0] = R[n1-j1][0], - a[j1][1] = I[j1][0] = -I[n1-j1][0], - a[n1-j1][1] = R[j1][n2/2] = R[n1-j1][n2/2], - a[n1-j1][0] = -I[j1][n2/2] = I[n1-j1][n2/2], - 0<j1<n1/2, - a[0][0] = R[0][0], - a[0][1] = R[0][n2/2], - a[n1/2][0] = R[n1/2][0], - a[n1/2][1] = R[n1/2][n2/2] - ip[0...*] - :work area for bit reversal (int *) - length of ip >= 2+sqrt(n) - (n = max(n1, n2/2)) - ip[0],ip[1] are pointers of the cos/sin table. - w[0...*] - :cos/sin table (double *) - length of w >= max(n1/2, n2/4) + n2/4 - w[],ip[] are initialized if ip[0] == 0. - [remark] - Inverse of - rdft2d(n1, n2, 1, a, ip, w); - is - rdft2d(n1, n2, -1, a, ip, w); - for (j1 = 0; j1 <= n1 - 1; j1++) { - for (j2 = 0; j2 <= n2 - 1; j2++) { - a[j1][j2] *= 2.0 / (n1 * n2); - } - } - . - - --------- DCT (Discrete Cosine Transform) / Inverse of DCT -------- - [definition] - <case1> IDCT (excluding scale) - C[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 a[j1][j2] * - cos(pi*j1*(k1+1/2)/n1) * - cos(pi*j2*(k2+1/2)/n2), - 0<=k1<n1, 0<=k2<n2 - <case2> DCT - C[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 a[j1][j2] * - cos(pi*(j1+1/2)*k1/n1) * - cos(pi*(j2+1/2)*k2/n2), - 0<=k1<n1, 0<=k2<n2 - [usage] - <case1> - ip[0] = 0; // first time only - ddct2d(n1, n2, 1, a, t, ip, w); - <case2> - ip[0] = 0; // first time only - ddct2d(n1, n2, -1, a, t, ip, w); - [parameters] - n1 :data length (int) - n1 >= 2, n1 = power of 2 - n2 :data length (int) - n2 >= 2, n2 = power of 2 - a[0...n1-1][0...n2-1] - :input/output data (double **) - output data - a[k1][k2] = C[k1][k2], 0<=k1<n1, 0<=k2<n2 - t[0...n1-1][0...n2-1] - :work area (double **) - ip[0...*] - :work area for bit reversal (int *) - length of ip >= 2+sqrt(n) - (n = max(n1, n2/2)) - ip[0],ip[1] are pointers of the cos/sin table. - w[0...*] - :cos/sin table (double *) - length of w >= max(n1/2, n2/4) + max(n1, n2) - w[],ip[] are initialized if ip[0] == 0. - [remark] - Inverse of - ddct2d(n1, n2, -1, a, t, ip, w); - is - for (j1 = 0; j1 <= n1 - 1; j1++) { - a[j1][0] *= 0.5; - } - for (j2 = 0; j2 <= n2 - 1; j2++) { - a[0][j2] *= 0.5; - } - ddct2d(n1, n2, 1, a, t, ip, w); - for (j1 = 0; j1 <= n1 - 1; j1++) { - for (j2 = 0; j2 <= n2 - 1; j2++) { - a[j1][j2] *= 4.0 / (n1 * n2); - } - } - . - - --------- DST (Discrete Sine Transform) / Inverse of DST -------- - [definition] - <case1> IDST (excluding scale) - S[k1][k2] = sum_j1=1^n1 sum_j2=1^n2 A[j1][j2] * - sin(pi*j1*(k1+1/2)/n1) * - sin(pi*j2*(k2+1/2)/n2), - 0<=k1<n1, 0<=k2<n2 - <case2> DST - S[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 a[j1][j2] * - sin(pi*(j1+1/2)*k1/n1) * - sin(pi*(j2+1/2)*k2/n2), - 0<k1<=n1, 0<k2<=n2 - [usage] - <case1> - ip[0] = 0; // first time only - ddst2d(n1, n2, 1, a, t, ip, w); - <case2> - ip[0] = 0; // first time only - ddst2d(n1, n2, -1, a, t, ip, w); - [parameters] - n1 :data length (int) - n1 >= 2, n1 = power of 2 - n2 :data length (int) - n2 >= 2, n2 = power of 2 - a[0...n1-1][0...n2-1] - :input/output data (double **) - <case1> - input data - a[j1][j2] = A[j1][j2], 0<j1<n1, 0<j2<n2, - a[j1][0] = A[j1][n2], 0<j1<n1, - a[0][j2] = A[n1][j2], 0<j2<n2, - a[0][0] = A[n1][n2] - (i.e. A[j1][j2] = a[j1 % n1][j2 % n2]) - output data - a[k1][k2] = S[k1][k2], 0<=k1<n1, 0<=k2<n2 - <case2> - output data - a[k1][k2] = S[k1][k2], 0<k1<n1, 0<k2<n2, - a[k1][0] = S[k1][n2], 0<k1<n1, - a[0][k2] = S[n1][k2], 0<k2<n2, - a[0][0] = S[n1][n2] - (i.e. S[k1][k2] = a[k1 % n1][k2 % n2]) - t[0...n1-1][0...n2-1] - :work area (double **) - ip[0...*] - :work area for bit reversal (int *) - length of ip >= 2+sqrt(n) - (n = max(n1, n2/2)) - ip[0],ip[1] are pointers of the cos/sin table. - w[0...*] - :cos/sin table (double *) - length of w >= max(n1/2, n2/4) + max(n1, n2) - w[],ip[] are initialized if ip[0] == 0. - [remark] - Inverse of - ddst2d(n1, n2, -1, a, t, ip, w); - is - for (j1 = 0; j1 <= n1 - 1; j1++) { - a[j1][0] *= 0.5; - } - for (j2 = 0; j2 <= n2 - 1; j2++) { - a[0][j2] *= 0.5; - } - ddst2d(n1, n2, 1, a, t, ip, w); - for (j1 = 0; j1 <= n1 - 1; j1++) { - for (j2 = 0; j2 <= n2 - 1; j2++) { - a[j1][j2] *= 4.0 / (n1 * n2); - } - } - . -*/ - - -void cdft2d(int n1, int n2, int isgn, double **a, int *ip, double *w) -{ - void makewt(int nw, int *ip, double *w); - void bitrv2col(int n1, int n, int *ip, double **a); - void bitrv2row(int n, int n2, int *ip, double **a); - void cftbcol(int n1, int n, double **a, double *w); - void cftbrow(int n, int n2, double **a, double *w); - void cftfcol(int n1, int n, double **a, double *w); - void cftfrow(int n, int n2, double **a, double *w); - int n; - - n = n1 << 1; - if (n < n2) { - n = n2; - } - if (n > (ip[0] << 2)) { - makewt(n >> 2, ip, w); - } - if (n2 > 4) { - bitrv2col(n1, n2, ip + 2, a); - } - if (n1 > 2) { - bitrv2row(n1, n2, ip + 2, a); - } - if (isgn < 0) { - cftfcol(n1, n2, a, w); - cftfrow(n1, n2, a, w); - } else { - cftbcol(n1, n2, a, w); - cftbrow(n1, n2, a, w); - } -} - - -void rdft2d(int n1, int n2, int isgn, double **a, int *ip, double *w) -{ - void makewt(int nw, int *ip, double *w); - void makect(int nc, int *ip, double *c); - void bitrv2col(int n1, int n, int *ip, double **a); - void bitrv2row(int n, int n2, int *ip, double **a); - void cftbcol(int n1, int n, double **a, double *w); - void cftbrow(int n, int n2, double **a, double *w); - void cftfcol(int n1, int n, double **a, double *w); - void cftfrow(int n, int n2, double **a, double *w); - void rftbcol(int n1, int n, double **a, int nc, double *c); - void rftfcol(int n1, int n, double **a, int nc, double *c); - int n, nw, nc, n1h, i, j; - double xi; - - n = n1 << 1; - if (n < n2) { - n = n2; - } - nw = ip[0]; - if (n > (nw << 2)) { - nw = n >> 2; - makewt(nw, ip, w); - } - nc = ip[1]; - if (n2 > (nc << 2)) { - nc = n2 >> 2; - makect(nc, ip, w + nw); - } - n1h = n1 >> 1; - if (isgn < 0) { - for (i = 1; i <= n1h - 1; i++) { - j = n1 - i; - xi = a[i][0] - a[j][0]; - a[i][0] += a[j][0]; - a[j][0] = xi; - xi = a[j][1] - a[i][1]; - a[i][1] += a[j][1]; - a[j][1] = xi; - } - if (n1 > 2) { - bitrv2row(n1, n2, ip + 2, a); - } - cftfrow(n1, n2, a, w); - for (i = 0; i <= n1 - 1; i++) { - a[i][1] = 0.5 * (a[i][0] - a[i][1]); - a[i][0] -= a[i][1]; - } - if (n2 > 4) { - rftfcol(n1, n2, a, nc, w + nw); - bitrv2col(n1, n2, ip + 2, a); - } - cftfcol(n1, n2, a, w); - } else { - if (n2 > 4) { - bitrv2col(n1, n2, ip + 2, a); - } - cftbcol(n1, n2, a, w); - if (n2 > 4) { - rftbcol(n1, n2, a, nc, w + nw); - } - for (i = 0; i <= n1 - 1; i++) { - xi = a[i][0] - a[i][1]; - a[i][0] += a[i][1]; - a[i][1] = xi; - } - if (n1 > 2) { - bitrv2row(n1, n2, ip + 2, a); - } - cftbrow(n1, n2, a, w); - for (i = 1; i <= n1h - 1; i++) { - j = n1 - i; - a[j][0] = 0.5 * (a[i][0] - a[j][0]); - a[i][0] -= a[j][0]; - a[j][1] = 0.5 * (a[i][1] + a[j][1]); - a[i][1] -= a[j][1]; - } - } -} - - -void ddct2d(int n1, int n2, int isgn, double **a, double **t, - int *ip, double *w) -{ - void makewt(int nw, int *ip, double *w); - void makect(int nc, int *ip, double *c); - void bitrv2col(int n1, int n, int *ip, double **a); - void bitrv2row(int n, int n2, int *ip, double **a); - void cftbcol(int n1, int n, double **a, double *w); - void cftbrow(int n, int n2, double **a, double *w); - void cftfcol(int n1, int n, double **a, double *w); - void cftfrow(int n, int n2, double **a, double *w); - void rftbcol(int n1, int n, double **a, int nc, double *c); - void rftfcol(int n1, int n, double **a, int nc, double *c); - void dctbsub(int n1, int n2, double **a, int nc, double *c); - void dctfsub(int n1, int n2, double **a, int nc, double *c); - int n, nw, nc, n1h, n2h, i, ix, ic, j, jx, jc; - double xi; - - n = n1 << 1; - if (n < n2) { - n = n2; - } - nw = ip[0]; - if (n > (nw << 2)) { - nw = n >> 2; - makewt(nw, ip, w); - } - nc = ip[1]; - if (n1 > nc || n2 > nc) { - if (n1 > n2) { - nc = n1; - } else { - nc = n2; - } - makect(nc, ip, w + nw); - } - n1h = n1 >> 1; - n2h = n2 >> 1; - if (isgn >= 0) { - for (i = 0; i <= n1 - 1; i++) { - for (j = 1; j <= n2h - 1; j++) { - jx = j << 1; - t[i][jx] = a[i][j]; - t[i][jx + 1] = a[i][n2 - j]; - } - } - t[0][0] = a[0][0]; - t[0][1] = a[0][n2h]; - t[n1h][0] = a[n1h][0]; - t[n1h][1] = a[n1h][n2h]; - for (i = 1; i <= n1h - 1; i++) { - ic = n1 - i; - t[i][0] = a[i][0]; - t[ic][1] = a[i][n2h]; - t[i][1] = a[ic][0]; - t[ic][0] = a[ic][n2h]; - } - dctfsub(n1, n2, t, nc, w + nw); - if (n1 > 2) { - bitrv2row(n1, n2, ip + 2, t); - } - cftfrow(n1, n2, t, w); - for (i = 0; i <= n1 - 1; i++) { - t[i][1] = 0.5 * (t[i][0] - t[i][1]); - t[i][0] -= t[i][1]; - } - if (n2 > 4) { - rftfcol(n1, n2, t, nc, w + nw); - bitrv2col(n1, n2, ip + 2, t); - } - cftfcol(n1, n2, t, w); - for (i = 0; i <= n1h - 1; i++) { - ix = i << 1; - ic = n1 - 1 - i; - for (j = 0; j <= n2h - 1; j++) { - jx = j << 1; - jc = n2 - 1 - j; - a[ix][jx] = t[i][j]; - a[ix][jx + 1] = t[i][jc]; - a[ix + 1][jx] = t[ic][j]; - a[ix + 1][jx + 1] = t[ic][jc]; - } - } - } else { - for (i = 0; i <= n1h - 1; i++) { - ix = i << 1; - ic = n1 - 1 - i; - for (j = 0; j <= n2h - 1; j++) { - jx = j << 1; - jc = n2 - 1 - j; - t[i][j] = a[ix][jx]; - t[i][jc] = a[ix][jx + 1]; - t[ic][j] = a[ix + 1][jx]; - t[ic][jc] = a[ix + 1][jx + 1]; - } - } - if (n2 > 4) { - bitrv2col(n1, n2, ip + 2, t); - } - cftbcol(n1, n2, t, w); - if (n2 > 4) { - rftbcol(n1, n2, t, nc, w + nw); - } - for (i = 0; i <= n1 - 1; i++) { - xi = t[i][0] - t[i][1]; - t[i][0] += t[i][1]; - t[i][1] = xi; - } - if (n1 > 2) { - bitrv2row(n1, n2, ip + 2, t); - } - cftbrow(n1, n2, t, w); - dctbsub(n1, n2, t, nc, w + nw); - for (i = 0; i <= n1 - 1; i++) { - for (j = 1; j <= n2h - 1; j++) { - jx = j << 1; - a[i][j] = t[i][jx]; - a[i][n2 - j] = t[i][jx + 1]; - } - } - a[0][0] = t[0][0]; - a[0][n2h] = t[0][1]; - a[n1h][0] = t[n1h][0]; - a[n1h][n2h] = t[n1h][1]; - for (i = 1; i <= n1h - 1; i++) { - ic = n1 - i; - a[i][0] = t[i][0]; - a[i][n2h] = t[ic][1]; - a[ic][0] = t[i][1]; - a[ic][n2h] = t[ic][0]; - } - } -} - - -void ddst2d(int n1, int n2, int isgn, double **a, double **t, - int *ip, double *w) -{ - void makewt(int nw, int *ip, double *w); - void makect(int nc, int *ip, double *c); - void bitrv2col(int n1, int n, int *ip, double **a); - void bitrv2row(int n, int n2, int *ip, double **a); - void cftbcol(int n1, int n, double **a, double *w); - void cftbrow(int n, int n2, double **a, double *w); - void cftfcol(int n1, int n, double **a, double *w); - void cftfrow(int n, int n2, double **a, double *w); - void rftbcol(int n1, int n, double **a, int nc, double *c); - void rftfcol(int n1, int n, double **a, int nc, double *c); - void dstbsub(int n1, int n2, double **a, int nc, double *c); - void dstfsub(int n1, int n2, double **a, int nc, double *c); - int n, nw, nc, n1h, n2h, i, ix, ic, j, jx, jc; - double xi; - - n = n1 << 1; - if (n < n2) { - n = n2; - } - nw = ip[0]; - if (n > (nw << 2)) { - nw = n >> 2; - makewt(nw, ip, w); - } - nc = ip[1]; - if (n1 > nc || n2 > nc) { - if (n1 > n2) { - nc = n1; - } else { - nc = n2; - } - makect(nc, ip, w + nw); - } - n1h = n1 >> 1; - n2h = n2 >> 1; - if (isgn >= 0) { - for (i = 0; i <= n1 - 1; i++) { - for (j = 1; j <= n2h - 1; j++) { - jx = j << 1; - t[i][jx] = a[i][j]; - t[i][jx + 1] = a[i][n2 - j]; - } - } - t[0][0] = a[0][0]; - t[0][1] = a[0][n2h]; - t[n1h][0] = a[n1h][0]; - t[n1h][1] = a[n1h][n2h]; - for (i = 1; i <= n1h - 1; i++) { - ic = n1 - i; - t[i][0] = a[i][0]; - t[ic][1] = a[i][n2h]; - t[i][1] = a[ic][0]; - t[ic][0] = a[ic][n2h]; - } - dstfsub(n1, n2, t, nc, w + nw); - if (n1 > 2) { - bitrv2row(n1, n2, ip + 2, t); - } - cftfrow(n1, n2, t, w); - for (i = 0; i <= n1 - 1; i++) { - t[i][1] = 0.5 * (t[i][0] - t[i][1]); - t[i][0] -= t[i][1]; - } - if (n2 > 4) { - rftfcol(n1, n2, t, nc, w + nw); - bitrv2col(n1, n2, ip + 2, t); - } - cftfcol(n1, n2, t, w); - for (i = 0; i <= n1h - 1; i++) { - ix = i << 1; - ic = n1 - 1 - i; - for (j = 0; j <= n2h - 1; j++) { - jx = j << 1; - jc = n2 - 1 - j; - a[ix][jx] = t[i][j]; - a[ix][jx + 1] = -t[i][jc]; - a[ix + 1][jx] = -t[ic][j]; - a[ix + 1][jx + 1] = t[ic][jc]; - } - } - } else { - for (i = 0; i <= n1h - 1; i++) { - ix = i << 1; - ic = n1 - 1 - i; - for (j = 0; j <= n2h - 1; j++) { - jx = j << 1; - jc = n2 - 1 - j; - t[i][j] = a[ix][jx]; - t[i][jc] = -a[ix][jx + 1]; - t[ic][j] = -a[ix + 1][jx]; - t[ic][jc] = a[ix + 1][jx + 1]; - } - } - if (n2 > 4) { - bitrv2col(n1, n2, ip + 2, t); - } - cftbcol(n1, n2, t, w); - if (n2 > 4) { - rftbcol(n1, n2, t, nc, w + nw); - } - for (i = 0; i <= n1 - 1; i++) { - xi = t[i][0] - t[i][1]; - t[i][0] += t[i][1]; - t[i][1] = xi; - } - if (n1 > 2) { - bitrv2row(n1, n2, ip + 2, t); - } - cftbrow(n1, n2, t, w); - dstbsub(n1, n2, t, nc, w + nw); - for (i = 0; i <= n1 - 1; i++) { - for (j = 1; j <= n2h - 1; j++) { - jx = j << 1; - a[i][j] = t[i][jx]; - a[i][n2 - j] = t[i][jx + 1]; - } - } - a[0][0] = t[0][0]; - a[0][n2h] = t[0][1]; - a[n1h][0] = t[n1h][0]; - a[n1h][n2h] = t[n1h][1]; - for (i = 1; i <= n1h - 1; i++) { - ic = n1 - i; - a[i][0] = t[i][0]; - a[i][n2h] = t[ic][1]; - a[ic][0] = t[i][1]; - a[ic][n2h] = t[ic][0]; - } - } -} - - -/* -------- initializing routines -------- */ - - -#include <math.h> - -void makewt(int nw, int *ip, double *w) -{ - void bitrv2(int n, int *ip, double *a); - int nwh, j; - double delta, x, y; - - ip[0] = nw; - ip[1] = 1; - if (nw > 2) { - nwh = nw >> 1; - delta = atan(1.0) / nwh; - w[0] = 1; - w[1] = 0; - w[nwh] = cos(delta * nwh); - w[nwh + 1] = w[nwh]; - for (j = 2; j <= nwh - 2; j += 2) { - x = cos(delta * j); - y = sin(delta * j); - w[j] = x; - w[j + 1] = y; - w[nw - j] = y; - w[nw - j + 1] = x; - } - bitrv2(nw, ip + 2, w); - } -} - - -void makect(int nc, int *ip, double *c) -{ - int nch, j; - double delta; - - ip[1] = nc; - if (nc > 1) { - nch = nc >> 1; - delta = atan(1.0) / nch; - c[0] = 0.5; - c[nch] = 0.5 * cos(delta * nch); - for (j = 1; j <= nch - 1; j++) { - c[j] = 0.5 * cos(delta * j); - c[nc - j] = 0.5 * sin(delta * j); - } - } -} - - -/* -------- child routines -------- */ - - -void bitrv2(int n, int *ip, double *a) -{ - int j, j1, k, k1, l, m, m2; - double xr, xi; - - ip[0] = 0; - l = n; - m = 1; - while ((m << 2) < l) { - l >>= 1; - for (j = 0; j <= m - 1; j++) { - ip[m + j] = ip[j] + l; - } - m <<= 1; - } - if ((m << 2) > l) { - for (k = 1; k <= m - 1; k++) { - for (j = 0; j <= k - 1; j++) { - j1 = (j << 1) + ip[k]; - k1 = (k << 1) + ip[j]; - xr = a[j1]; - xi = a[j1 + 1]; - a[j1] = a[k1]; - a[j1 + 1] = a[k1 + 1]; - a[k1] = xr; - a[k1 + 1] = xi; - } - } - } else { - m2 = m << 1; - for (k = 1; k <= m - 1; k++) { - for (j = 0; j <= k - 1; j++) { - j1 = (j << 1) + ip[k]; - k1 = (k << 1) + ip[j]; - xr = a[j1]; - xi = a[j1 + 1]; - a[j1] = a[k1]; - a[j1 + 1] = a[k1 + 1]; - a[k1] = xr; - a[k1 + 1] = xi; - j1 += m2; - k1 += m2; - xr = a[j1]; - xi = a[j1 + 1]; - a[j1] = a[k1]; - a[j1 + 1] = a[k1 + 1]; - a[k1] = xr; - a[k1 + 1] = xi; - } - } - } -} - - -void bitrv2col(int n1, int n, int *ip, double **a) -{ - int i, j, j1, k, k1, l, m, m2; - double xr, xi; - - ip[0] = 0; - l = n; - m = 1; - while ((m << 2) < l) { - l >>= 1; - for (j = 0; j <= m - 1; j++) { - ip[m + j] = ip[j] + l; - } - m <<= 1; - } - if ((m << 2) > l) { - for (i = 0; i <= n1 - 1; i++) { - for (k = 1; k <= m - 1; k++) { - for (j = 0; j <= k - 1; j++) { - j1 = (j << 1) + ip[k]; - k1 = (k << 1) + ip[j]; - xr = a[i][j1]; - xi = a[i][j1 + 1]; - a[i][j1] = a[i][k1]; - a[i][j1 + 1] = a[i][k1 + 1]; - a[i][k1] = xr; - a[i][k1 + 1] = xi; - } - } - } - } else { - m2 = m << 1; - for (i = 0; i <= n1 - 1; i++) { - for (k = 1; k <= m - 1; k++) { - for (j = 0; j <= k - 1; j++) { - j1 = (j << 1) + ip[k]; - k1 = (k << 1) + ip[j]; - xr = a[i][j1]; - xi = a[i][j1 + 1]; - a[i][j1] = a[i][k1]; - a[i][j1 + 1] = a[i][k1 + 1]; - a[i][k1] = xr; - a[i][k1 + 1] = xi; - j1 += m2; - k1 += m2; - xr = a[i][j1]; - xi = a[i][j1 + 1]; - a[i][j1] = a[i][k1]; - a[i][j1 + 1] = a[i][k1 + 1]; - a[i][k1] = xr; - a[i][k1 + 1] = xi; - } - } - } - } -} - - -void bitrv2row(int n, int n2, int *ip, double **a) -{ - int i, j, j1, k, k1, l, m; - double xr, xi; - - ip[0] = 0; - l = n; - m = 1; - while ((m << 1) < l) { - l >>= 1; - for (j = 0; j <= m - 1; j++) { - ip[m + j] = ip[j] + l; - } - m <<= 1; - } - if ((m << 1) > l) { - for (k = 1; k <= m - 1; k++) { - for (j = 0; j <= k - 1; j++) { - j1 = j + ip[k]; - k1 = k + ip[j]; - for (i = 0; i <= n2 - 2; i += 2) { - xr = a[j1][i]; - xi = a[j1][i + 1]; - a[j1][i] = a[k1][i]; - a[j1][i + 1] = a[k1][i + 1]; - a[k1][i] = xr; - a[k1][i + 1] = xi; - } - } - } - } else { - for (k = 1; k <= m - 1; k++) { - for (j = 0; j <= k - 1; j++) { - j1 = j + ip[k]; - k1 = k + ip[j]; - for (i = 0; i <= n2 - 2; i += 2) { - xr = a[j1][i]; - xi = a[j1][i + 1]; - a[j1][i] = a[k1][i]; - a[j1][i + 1] = a[k1][i + 1]; - a[k1][i] = xr; - a[k1][i + 1] = xi; - } - j1 += m; - k1 += m; - for (i = 0; i <= n2 - 2; i += 2) { - xr = a[j1][i]; - xi = a[j1][i + 1]; - a[j1][i] = a[k1][i]; - a[j1][i + 1] = a[k1][i + 1]; - a[k1][i] = xr; - a[k1][i + 1] = xi; - } - } - } - } -} - - -void cftbcol(int n1, int n, double **a, double *w) -{ - int i, j, j1, j2, j3, k, k1, ks, l, m; - double wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; - double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; - - for (i = 0; i <= n1 - 1; i++) { - l = 2; - while ((l << 1) < n) { - m = l << 2; - for (j = 0; j <= l - 2; j += 2) { - j1 = j + l; - j2 = j1 + l; - j3 = j2 + l; - x0r = a[i][j] + a[i][j1]; - x0i = a[i][j + 1] + a[i][j1 + 1]; - x1r = a[i][j] - a[i][j1]; - x1i = a[i][j + 1] - a[i][j1 + 1]; - x2r = a[i][j2] + a[i][j3]; - x2i = a[i][j2 + 1] + a[i][j3 + 1]; - x3r = a[i][j2] - a[i][j3]; - x3i = a[i][j2 + 1] - a[i][j3 + 1]; - a[i][j] = x0r + x2r; - a[i][j + 1] = x0i + x2i; - a[i][j2] = x0r - x2r; - a[i][j2 + 1] = x0i - x2i; - a[i][j1] = x1r - x3i; - a[i][j1 + 1] = x1i + x3r; - a[i][j3] = x1r + x3i; - a[i][j3 + 1] = x1i - x3r; - } - if (m < n) { - wk1r = w[2]; - for (j = m; j <= l + m - 2; j += 2) { - j1 = j + l; - j2 = j1 + l; - j3 = j2 + l; - x0r = a[i][j] + a[i][j1]; - x0i = a[i][j + 1] + a[i][j1 + 1]; - x1r = a[i][j] - a[i][j1]; - x1i = a[i][j + 1] - a[i][j1 + 1]; - x2r = a[i][j2] + a[i][j3]; - x2i = a[i][j2 + 1] + a[i][j3 + 1]; - x3r = a[i][j2] - a[i][j3]; - x3i = a[i][j2 + 1] - a[i][j3 + 1]; - a[i][j] = x0r + x2r; - a[i][j + 1] = x0i + x2i; - a[i][j2] = x2i - x0i; - a[i][j2 + 1] = x0r - x2r; - x0r = x1r - x3i; - x0i = x1i + x3r; - a[i][j1] = wk1r * (x0r - x0i); - a[i][j1 + 1] = wk1r * (x0r + x0i); - x0r = x3i + x1r; - x0i = x3r - x1i; - a[i][j3] = wk1r * (x0i - x0r); - a[i][j3 + 1] = wk1r * (x0i + x0r); - } - k1 = 1; - ks = -1; - for (k = (m << 1); k <= n - m; k += m) { - k1++; - ks = -ks; - wk1r = w[k1 << 1]; - wk1i = w[(k1 << 1) + 1]; - wk2r = ks * w[k1]; - wk2i = w[k1 + ks]; - wk3r = wk1r - 2 * wk2i * wk1i; - wk3i = 2 * wk2i * wk1r - wk1i; - for (j = k; j <= l + k - 2; j += 2) { - j1 = j + l; - j2 = j1 + l; - j3 = j2 + l; - x0r = a[i][j] + a[i][j1]; - x0i = a[i][j + 1] + a[i][j1 + 1]; - x1r = a[i][j] - a[i][j1]; - x1i = a[i][j + 1] - a[i][j1 + 1]; - x2r = a[i][j2] + a[i][j3]; - x2i = a[i][j2 + 1] + a[i][j3 + 1]; - x3r = a[i][j2] - a[i][j3]; - x3i = a[i][j2 + 1] - a[i][j3 + 1]; - a[i][j] = x0r + x2r; - a[i][j + 1] = x0i + x2i; - x0r -= x2r; - x0i -= x2i; - a[i][j2] = wk2r * x0r - wk2i * x0i; - a[i][j2 + 1] = wk2r * x0i + wk2i * x0r; - x0r = x1r - x3i; - x0i = x1i + x3r; - a[i][j1] = wk1r * x0r - wk1i * x0i; - a[i][j1 + 1] = wk1r * x0i + wk1i * x0r; - x0r = x1r + x3i; - x0i = x1i - x3r; - a[i][j3] = wk3r * x0r - wk3i * x0i; - a[i][j3 + 1] = wk3r * x0i + wk3i * x0r; - } - } - } - l = m; - } - if (l < n) { - for (j = 0; j <= l - 2; j += 2) { - j1 = j + l; - x0r = a[i][j] - a[i][j1]; - x0i = a[i][j + 1] - a[i][j1 + 1]; - a[i][j] += a[i][j1]; - a[i][j + 1] += a[i][j1 + 1]; - a[i][j1] = x0r; - a[i][j1 + 1] = x0i; - } - } - } -} - - -void cftbrow(int n, int n2, double **a, double *w) -{ - int i, j, j1, j2, j3, k, k1, ks, l, m; - double wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; - double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; - - l = 1; - while ((l << 1) < n) { - m = l << 2; - for (j = 0; j <= l - 1; j++) { - j1 = j + l; - j2 = j1 + l; - j3 = j2 + l; - for (i = 0; i <= n2 - 2; i += 2) { - x0r = a[j][i] + a[j1][i]; - x0i = a[j][i + 1] + a[j1][i + 1]; - x1r = a[j][i] - a[j1][i]; - x1i = a[j][i + 1] - a[j1][i + 1]; - x2r = a[j2][i] + a[j3][i]; - x2i = a[j2][i + 1] + a[j3][i + 1]; - x3r = a[j2][i] - a[j3][i]; - x3i = a[j2][i + 1] - a[j3][i + 1]; - a[j][i] = x0r + x2r; - a[j][i + 1] = x0i + x2i; - a[j2][i] = x0r - x2r; - a[j2][i + 1] = x0i - x2i; - a[j1][i] = x1r - x3i; - a[j1][i + 1] = x1i + x3r; - a[j3][i] = x1r + x3i; - a[j3][i + 1] = x1i - x3r; - } - } - if (m < n) { - wk1r = w[2]; - for (j = m; j <= l + m - 1; j++) { - j1 = j + l; - j2 = j1 + l; - j3 = j2 + l; - for (i = 0; i <= n2 - 2; i += 2) { - x0r = a[j][i] + a[j1][i]; - x0i = a[j][i + 1] + a[j1][i + 1]; - x1r = a[j][i] - a[j1][i]; - x1i = a[j][i + 1] - a[j1][i + 1]; - x2r = a[j2][i] + a[j3][i]; - x2i = a[j2][i + 1] + a[j3][i + 1]; - x3r = a[j2][i] - a[j3][i]; - x3i = a[j2][i + 1] - a[j3][i + 1]; - a[j][i] = x0r + x2r; - a[j][i + 1] = x0i + x2i; - a[j2][i] = x2i - x0i; - a[j2][i + 1] = x0r - x2r; - x0r = x1r - x3i; - x0i = x1i + x3r; - a[j1][i] = wk1r * (x0r - x0i); - a[j1][i + 1] = wk1r * (x0r + x0i); - x0r = x3i + x1r; - x0i = x3r - x1i; - a[j3][i] = wk1r * (x0i - x0r); - a[j3][i + 1] = wk1r * (x0i + x0r); - } - } - k1 = 1; - ks = -1; - for (k = (m << 1); k <= n - m; k += m) { - k1++; - ks = -ks; - wk1r = w[k1 << 1]; - wk1i = w[(k1 << 1) + 1]; - wk2r = ks * w[k1]; - wk2i = w[k1 + ks]; - wk3r = wk1r - 2 * wk2i * wk1i; - wk3i = 2 * wk2i * wk1r - wk1i; - for (j = k; j <= l + k - 1; j++) { - j1 = j + l; - j2 = j1 + l; - j3 = j2 + l; - for (i = 0; i <= n2 - 2; i += 2) { - x0r = a[j][i] + a[j1][i]; - x0i = a[j][i + 1] + a[j1][i + 1]; - x1r = a[j][i] - a[j1][i]; - x1i = a[j][i + 1] - a[j1][i + 1]; - x2r = a[j2][i] + a[j3][i]; - x2i = a[j2][i + 1] + a[j3][i + 1]; - x3r = a[j2][i] - a[j3][i]; - x3i = a[j2][i + 1] - a[j3][i + 1]; - a[j][i] = x0r + x2r; - a[j][i + 1] = x0i + x2i; - x0r -= x2r; - x0i -= x2i; - a[j2][i] = wk2r * x0r - wk2i * x0i; - a[j2][i + 1] = wk2r * x0i + wk2i * x0r; - x0r = x1r - x3i; - x0i = x1i + x3r; - a[j1][i] = wk1r * x0r - wk1i * x0i; - a[j1][i + 1] = wk1r * x0i + wk1i * x0r; - x0r = x1r + x3i; - x0i = x1i - x3r; - a[j3][i] = wk3r * x0r - wk3i * x0i; - a[j3][i + 1] = wk3r * x0i + wk3i * x0r; - } - } - } - } - l = m; - } - if (l < n) { - for (j = 0; j <= l - 1; j++) { - j1 = j + l; - for (i = 0; i <= n2 - 2; i += 2) { - x0r = a[j][i] - a[j1][i]; - x0i = a[j][i + 1] - a[j1][i + 1]; - a[j][i] += a[j1][i]; - a[j][i + 1] += a[j1][i + 1]; - a[j1][i] = x0r; - a[j1][i + 1] = x0i; - } - } - } -} - - -void cftfcol(int n1, int n, double **a, double *w) -{ - int i, j, j1, j2, j3, k, k1, ks, l, m; - double wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; - double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; - - for (i = 0; i <= n1 - 1; i++) { - l = 2; - while ((l << 1) < n) { - m = l << 2; - for (j = 0; j <= l - 2; j += 2) { - j1 = j + l; - j2 = j1 + l; - j3 = j2 + l; - x0r = a[i][j] + a[i][j1]; - x0i = a[i][j + 1] + a[i][j1 + 1]; - x1r = a[i][j] - a[i][j1]; - x1i = a[i][j + 1] - a[i][j1 + 1]; - x2r = a[i][j2] + a[i][j3]; - x2i = a[i][j2 + 1] + a[i][j3 + 1]; - x3r = a[i][j2] - a[i][j3]; - x3i = a[i][j2 + 1] - a[i][j3 + 1]; - a[i][j] = x0r + x2r; - a[i][j + 1] = x0i + x2i; - a[i][j2] = x0r - x2r; - a[i][j2 + 1] = x0i - x2i; - a[i][j1] = x1r + x3i; - a[i][j1 + 1] = x1i - x3r; - a[i][j3] = x1r - x3i; - a[i][j3 + 1] = x1i + x3r; - } - if (m < n) { - wk1r = w[2]; - for (j = m; j <= l + m - 2; j += 2) { - j1 = j + l; - j2 = j1 + l; - j3 = j2 + l; - x0r = a[i][j] + a[i][j1]; - x0i = a[i][j + 1] + a[i][j1 + 1]; - x1r = a[i][j] - a[i][j1]; - x1i = a[i][j + 1] - a[i][j1 + 1]; - x2r = a[i][j2] + a[i][j3]; - x2i = a[i][j2 + 1] + a[i][j3 + 1]; - x3r = a[i][j2] - a[i][j3]; - x3i = a[i][j2 + 1] - a[i][j3 + 1]; - a[i][j] = x0r + x2r; - a[i][j + 1] = x0i + x2i; - a[i][j2] = x0i - x2i; - a[i][j2 + 1] = x2r - x0r; - x0r = x1r + x3i; - x0i = x1i - x3r; - a[i][j1] = wk1r * (x0i + x0r); - a[i][j1 + 1] = wk1r * (x0i - x0r); - x0r = x3i - x1r; - x0i = x3r + x1i; - a[i][j3] = wk1r * (x0r + x0i); - a[i][j3 + 1] = wk1r * (x0r - x0i); - } - k1 = 1; - ks = -1; - for (k = (m << 1); k <= n - m; k += m) { - k1++; - ks = -ks; - wk1r = w[k1 << 1]; - wk1i = w[(k1 << 1) + 1]; - wk2r = ks * w[k1]; - wk2i = w[k1 + ks]; - wk3r = wk1r - 2 * wk2i * wk1i; - wk3i = 2 * wk2i * wk1r - wk1i; - for (j = k; j <= l + k - 2; j += 2) { - j1 = j + l; - j2 = j1 + l; - j3 = j2 + l; - x0r = a[i][j] + a[i][j1]; - x0i = a[i][j + 1] + a[i][j1 + 1]; - x1r = a[i][j] - a[i][j1]; - x1i = a[i][j + 1] - a[i][j1 + 1]; - x2r = a[i][j2] + a[i][j3]; - x2i = a[i][j2 + 1] + a[i][j3 + 1]; - x3r = a[i][j2] - a[i][j3]; - x3i = a[i][j2 + 1] - a[i][j3 + 1]; - a[i][j] = x0r + x2r; - a[i][j + 1] = x0i + x2i; - x0r -= x2r; - x0i -= x2i; - a[i][j2] = wk2r * x0r + wk2i * x0i; - a[i][j2 + 1] = wk2r * x0i - wk2i * x0r; - x0r = x1r + x3i; - x0i = x1i - x3r; - a[i][j1] = wk1r * x0r + wk1i * x0i; - a[i][j1 + 1] = wk1r * x0i - wk1i * x0r; - x0r = x1r - x3i; - x0i = x1i + x3r; - a[i][j3] = wk3r * x0r + wk3i * x0i; - a[i][j3 + 1] = wk3r * x0i - wk3i * x0r; - } - } - } - l = m; - } - if (l < n) { - for (j = 0; j <= l - 2; j += 2) { - j1 = j + l; - x0r = a[i][j] - a[i][j1]; - x0i = a[i][j + 1] - a[i][j1 + 1]; - a[i][j] += a[i][j1]; - a[i][j + 1] += a[i][j1 + 1]; - a[i][j1] = x0r; - a[i][j1 + 1] = x0i; - } - } - } -} - - -void cftfrow(int n, int n2, double **a, double *w) -{ - int i, j, j1, j2, j3, k, k1, ks, l, m; - double wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; - double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; - - l = 1; - while ((l << 1) < n) { - m = l << 2; - for (j = 0; j <= l - 1; j++) { - j1 = j + l; - j2 = j1 + l; - j3 = j2 + l; - for (i = 0; i <= n2 - 2; i += 2) { - x0r = a[j][i] + a[j1][i]; - x0i = a[j][i + 1] + a[j1][i + 1]; - x1r = a[j][i] - a[j1][i]; - x1i = a[j][i + 1] - a[j1][i + 1]; - x2r = a[j2][i] + a[j3][i]; - x2i = a[j2][i + 1] + a[j3][i + 1]; - x3r = a[j2][i] - a[j3][i]; - x3i = a[j2][i + 1] - a[j3][i + 1]; - a[j][i] = x0r + x2r; - a[j][i + 1] = x0i + x2i; - a[j2][i] = x0r - x2r; - a[j2][i + 1] = x0i - x2i; - a[j1][i] = x1r + x3i; - a[j1][i + 1] = x1i - x3r; - a[j3][i] = x1r - x3i; - a[j3][i + 1] = x1i + x3r; - } - } - if (m < n) { - wk1r = w[2]; - for (j = m; j <= l + m - 1; j++) { - j1 = j + l; - j2 = j1 + l; - j3 = j2 + l; - for (i = 0; i <= n2 - 2; i += 2) { - x0r = a[j][i] + a[j1][i]; - x0i = a[j][i + 1] + a[j1][i + 1]; - x1r = a[j][i] - a[j1][i]; - x1i = a[j][i + 1] - a[j1][i + 1]; - x2r = a[j2][i] + a[j3][i]; - x2i = a[j2][i + 1] + a[j3][i + 1]; - x3r = a[j2][i] - a[j3][i]; - x3i = a[j2][i + 1] - a[j3][i + 1]; - a[j][i] = x0r + x2r; - a[j][i + 1] = x0i + x2i; - a[j2][i] = x0i - x2i; - a[j2][i + 1] = x2r - x0r; - x0r = x1r + x3i; - x0i = x1i - x3r; - a[j1][i] = wk1r * (x0i + x0r); - a[j1][i + 1] = wk1r * (x0i - x0r); - x0r = x3i - x1r; - x0i = x3r + x1i; - a[j3][i] = wk1r * (x0r + x0i); - a[j3][i + 1] = wk1r * (x0r - x0i); - } - } - k1 = 1; - ks = -1; - for (k = (m << 1); k <= n - m; k += m) { - k1++; - ks = -ks; - wk1r = w[k1 << 1]; - wk1i = w[(k1 << 1) + 1]; - wk2r = ks * w[k1]; - wk2i = w[k1 + ks]; - wk3r = wk1r - 2 * wk2i * wk1i; - wk3i = 2 * wk2i * wk1r - wk1i; - for (j = k; j <= l + k - 1; j++) { - j1 = j + l; - j2 = j1 + l; - j3 = j2 + l; - for (i = 0; i <= n2 - 2; i += 2) { - x0r = a[j][i] + a[j1][i]; - x0i = a[j][i + 1] + a[j1][i + 1]; - x1r = a[j][i] - a[j1][i]; - x1i = a[j][i + 1] - a[j1][i + 1]; - x2r = a[j2][i] + a[j3][i]; - x2i = a[j2][i + 1] + a[j3][i + 1]; - x3r = a[j2][i] - a[j3][i]; - x3i = a[j2][i + 1] - a[j3][i + 1]; - a[j][i] = x0r + x2r; - a[j][i + 1] = x0i + x2i; - x0r -= x2r; - x0i -= x2i; - a[j2][i] = wk2r * x0r + wk2i * x0i; - a[j2][i + 1] = wk2r * x0i - wk2i * x0r; - x0r = x1r + x3i; - x0i = x1i - x3r; - a[j1][i] = wk1r * x0r + wk1i * x0i; - a[j1][i + 1] = wk1r * x0i - wk1i * x0r; - x0r = x1r - x3i; - x0i = x1i + x3r; - a[j3][i] = wk3r * x0r + wk3i * x0i; - a[j3][i + 1] = wk3r * x0i - wk3i * x0r; - } - } - } - } - l = m; - } - if (l < n) { - for (j = 0; j <= l - 1; j++) { - j1 = j + l; - for (i = 0; i <= n2 - 2; i += 2) { - x0r = a[j][i] - a[j1][i]; - x0i = a[j][i + 1] - a[j1][i + 1]; - a[j][i] += a[j1][i]; - a[j][i + 1] += a[j1][i + 1]; - a[j1][i] = x0r; - a[j1][i + 1] = x0i; - } - } - } -} - - -void rftbcol(int n1, int n, double **a, int nc, double *c) -{ - int i, j, k, kk, ks; - double wkr, wki, xr, xi, yr, yi; - - ks = (nc << 2) / n; - for (i = 0; i <= n1 - 1; i++) { - kk = 0; - for (k = (n >> 1) - 2; k >= 2; k -= 2) { - j = n - k; - kk += ks; - wkr = 0.5 - c[kk]; - wki = c[nc - kk]; - xr = a[i][k] - a[i][j]; - xi = a[i][k + 1] + a[i][j + 1]; - yr = wkr * xr - wki * xi; - yi = wkr * xi + wki * xr; - a[i][k] -= yr; - a[i][k + 1] -= yi; - a[i][j] += yr; - a[i][j + 1] -= yi; - } - } -} - - -void rftfcol(int n1, int n, double **a, int nc, double *c) -{ - int i, j, k, kk, ks; - double wkr, wki, xr, xi, yr, yi; - - ks = (nc << 2) / n; - for (i = 0; i <= n1 - 1; i++) { - kk = 0; - for (k = (n >> 1) - 2; k >= 2; k -= 2) { - j = n - k; - kk += ks; - wkr = 0.5 - c[kk]; - wki = c[nc - kk]; - xr = a[i][k] - a[i][j]; - xi = a[i][k + 1] + a[i][j + 1]; - yr = wkr * xr + wki * xi; - yi = wkr * xi - wki * xr; - a[i][k] -= yr; - a[i][k + 1] -= yi; - a[i][j] += yr; - a[i][j + 1] -= yi; - } - } -} - - -void dctbsub(int n1, int n2, double **a, int nc, double *c) -{ - int kk1, kk2, ks1, ks2, n1h, j1, k1, k2; - double w1r, w1i, wkr, wki, wjr, wji, x0r, x0i, x1r, x1i; - - ks1 = nc / n1; - ks2 = nc / n2; - n1h = n1 >> 1; - kk1 = ks1; - for (k1 = 1; k1 <= n1h - 1; k1++) { - j1 = n1 - k1; - w1r = 2 * c[kk1]; - w1i = 2 * c[nc - kk1]; - kk1 += ks1; - kk2 = ks2; - for (k2 = 2; k2 <= n2 - 2; k2 += 2) { - x0r = w1r * c[kk2]; - x0i = w1i * c[kk2]; - x1r = w1r * c[nc - kk2]; - x1i = w1i * c[nc - kk2]; - wkr = x0r - x1i; - wki = x0i + x1r; - wji = x0r + x1i; - wjr = x0i - x1r; - kk2 += ks2; - x0r = wkr * a[k1][k2] - wki * a[k1][k2 + 1]; - x0i = wkr * a[k1][k2 + 1] + wki * a[k1][k2]; - x1r = wjr * a[j1][k2] - wji * a[j1][k2 + 1]; - x1i = wjr * a[j1][k2 + 1] + wji * a[j1][k2]; - a[k1][k2] = x0r + x1i; - a[k1][k2 + 1] = x0i - x1r; - a[j1][k2] = x1r + x0i; - a[j1][k2 + 1] = x1i - x0r; - } - wkr = w1r * 0.5; - wki = w1i * 0.5; - wjr = w1r * c[kk2]; - wji = w1i * c[kk2]; - x0r = a[k1][0] + a[j1][0]; - x0i = a[k1][1] - a[j1][1]; - x1r = a[k1][0] - a[j1][0]; - x1i = a[k1][1] + a[j1][1]; - a[k1][0] = wkr * x0r - wki * x0i; - a[k1][1] = wkr * x0i + wki * x0r; - a[j1][0] = -wjr * x1r + wji * x1i; - a[j1][1] = wjr * x1i + wji * x1r; - } - w1r = 2 * c[kk1]; - kk2 = ks2; - for (k2 = 2; k2 <= n2 - 2; k2 += 2) { - wkr = 2 * c[kk2]; - wki = 2 * c[nc - kk2]; - wjr = w1r * wkr; - wji = w1r * wki; - kk2 += ks2; - x0i = wkr * a[0][k2 + 1] + wki * a[0][k2]; - a[0][k2] = wkr * a[0][k2] - wki * a[0][k2 + 1]; - a[0][k2 + 1] = x0i; - x0i = wjr * a[n1h][k2 + 1] + wji * a[n1h][k2]; - a[n1h][k2] = wjr * a[n1h][k2] - wji * a[n1h][k2 + 1]; - a[n1h][k2 + 1] = x0i; - } - a[0][1] *= w1r; - a[n1h][0] *= w1r; - a[n1h][1] *= 0.5; -} - - -void dctfsub(int n1, int n2, double **a, int nc, double *c) -{ - int kk1, kk2, ks1, ks2, n1h, j1, k1, k2; - double w1r, w1i, wkr, wki, wjr, wji, x0r, x0i, x1r, x1i; - - ks1 = nc / n1; - ks2 = nc / n2; - n1h = n1 >> 1; - kk1 = ks1; - for (k1 = 1; k1 <= n1h - 1; k1++) { - j1 = n1 - k1; - w1r = 2 * c[kk1]; - w1i = 2 * c[nc - kk1]; - kk1 += ks1; - kk2 = ks2; - for (k2 = 2; k2 <= n2 - 2; k2 += 2) { - x0r = w1r * c[kk2]; - x0i = w1i * c[kk2]; - x1r = w1r * c[nc - kk2]; - x1i = w1i * c[nc - kk2]; - wkr = x0r - x1i; - wki = x0i + x1r; - wji = x0r + x1i; - wjr = x0i - x1r; - kk2 += ks2; - x0r = a[k1][k2] - a[j1][k2 + 1]; - x0i = a[j1][k2] + a[k1][k2 + 1]; - x1r = a[j1][k2] - a[k1][k2 + 1]; - x1i = a[k1][k2] + a[j1][k2 + 1]; - a[k1][k2] = wkr * x0r + wki * x0i; - a[k1][k2 + 1] = wkr * x0i - wki * x0r; - a[j1][k2] = wjr * x1r + wji * x1i; - a[j1][k2 + 1] = wjr * x1i - wji * x1r; - } - x0r = 2 * c[kk2]; - wjr = x0r * w1r; - wji = x0r * w1i; - x0r = w1r * a[k1][0] + w1i * a[k1][1]; - x0i = w1r * a[k1][1] - w1i * a[k1][0]; - x1r = -wjr * a[j1][0] + wji * a[j1][1]; - x1i = wjr * a[j1][1] + wji * a[j1][0]; - a[k1][0] = x0r + x1r; - a[k1][1] = x1i + x0i; - a[j1][0] = x0r - x1r; - a[j1][1] = x1i - x0i; - } - w1r = 2 * c[kk1]; - kk2 = ks2; - for (k2 = 2; k2 <= n2 - 2; k2 += 2) { - wkr = 2 * c[kk2]; - wki = 2 * c[nc - kk2]; - wjr = w1r * wkr; - wji = w1r * wki; - kk2 += ks2; - x0i = wkr * a[0][k2 + 1] - wki * a[0][k2]; - a[0][k2] = wkr * a[0][k2] + wki * a[0][k2 + 1]; - a[0][k2 + 1] = x0i; - x0i = wjr * a[n1h][k2 + 1] - wji * a[n1h][k2]; - a[n1h][k2] = wjr * a[n1h][k2] + wji * a[n1h][k2 + 1]; - a[n1h][k2 + 1] = x0i; - } - w1r *= 2; - a[0][0] *= 2; - a[0][1] *= w1r; - a[n1h][0] *= w1r; -} - - -void dstbsub(int n1, int n2, double **a, int nc, double *c) -{ - int kk1, kk2, ks1, ks2, n1h, j1, k1, k2; - double w1r, w1i, wkr, wki, wjr, wji, x0r, x0i, x1r, x1i; - - ks1 = nc / n1; - ks2 = nc / n2; - n1h = n1 >> 1; - kk1 = ks1; - for (k1 = 1; k1 <= n1h - 1; k1++) { - j1 = n1 - k1; - w1r = 2 * c[kk1]; - w1i = 2 * c[nc - kk1]; - kk1 += ks1; - kk2 = ks2; - for (k2 = 2; k2 <= n2 - 2; k2 += 2) { - x0r = w1r * c[kk2]; - x0i = w1i * c[kk2]; - x1r = w1r * c[nc - kk2]; - x1i = w1i * c[nc - kk2]; - wkr = x0r - x1i; - wki = x0i + x1r; - wji = x0r + x1i; - wjr = x0i - x1r; - kk2 += ks2; - x0r = wkr * a[k1][k2] - wki * a[k1][k2 + 1]; - x0i = wkr * a[k1][k2 + 1] + wki * a[k1][k2]; - x1r = wjr * a[j1][k2] - wji * a[j1][k2 + 1]; - x1i = wjr * a[j1][k2 + 1] + wji * a[j1][k2]; - a[k1][k2] = x1i - x0r; - a[k1][k2 + 1] = x1r + x0i; - a[j1][k2] = x0i - x1r; - a[j1][k2 + 1] = x0r + x1i; - } - wkr = w1r * 0.5; - wki = w1i * 0.5; - wjr = w1r * c[kk2]; - wji = w1i * c[kk2]; - x0r = a[k1][0] + a[j1][0]; - x0i = a[k1][1] - a[j1][1]; - x1r = a[k1][0] - a[j1][0]; - x1i = a[k1][1] + a[j1][1]; - a[k1][1] = wkr * x0r - wki * x0i; - a[k1][0] = wkr * x0i + wki * x0r; - a[j1][1] = -wjr * x1r + wji * x1i; - a[j1][0] = wjr * x1i + wji * x1r; - } - w1r = 2 * c[kk1]; - kk2 = ks2; - for (k2 = 2; k2 <= n2 - 2; k2 += 2) { - wkr = 2 * c[kk2]; - wki = 2 * c[nc - kk2]; - wjr = w1r * wkr; - wji = w1r * wki; - kk2 += ks2; - x0i = wkr * a[0][k2 + 1] + wki * a[0][k2]; - a[0][k2 + 1] = wkr * a[0][k2] - wki * a[0][k2 + 1]; - a[0][k2] = x0i; - x0i = wjr * a[n1h][k2 + 1] + wji * a[n1h][k2]; - a[n1h][k2 + 1] = wjr * a[n1h][k2] - wji * a[n1h][k2 + 1]; - a[n1h][k2] = x0i; - } - a[0][1] *= w1r; - a[n1h][0] *= w1r; - a[n1h][1] *= 0.5; -} - - -void dstfsub(int n1, int n2, double **a, int nc, double *c) -{ - int kk1, kk2, ks1, ks2, n1h, j1, k1, k2; - double w1r, w1i, wkr, wki, wjr, wji, x0r, x0i, x1r, x1i; - - ks1 = nc / n1; - ks2 = nc / n2; - n1h = n1 >> 1; - kk1 = ks1; - for (k1 = 1; k1 <= n1h - 1; k1++) { - j1 = n1 - k1; - w1r = 2 * c[kk1]; - w1i = 2 * c[nc - kk1]; - kk1 += ks1; - kk2 = ks2; - for (k2 = 2; k2 <= n2 - 2; k2 += 2) { - x0r = w1r * c[kk2]; - x0i = w1i * c[kk2]; - x1r = w1r * c[nc - kk2]; - x1i = w1i * c[nc - kk2]; - wkr = x0r - x1i; - wki = x0i + x1r; - wji = x0r + x1i; - wjr = x0i - x1r; - kk2 += ks2; - x0r = a[j1][k2 + 1] - a[k1][k2]; - x0i = a[k1][k2 + 1] + a[j1][k2]; - x1r = a[k1][k2 + 1] - a[j1][k2]; - x1i = a[j1][k2 + 1] + a[k1][k2]; - a[k1][k2] = wkr * x0r + wki * x0i; - a[k1][k2 + 1] = wkr * x0i - wki * x0r; - a[j1][k2] = wjr * x1r + wji * x1i; - a[j1][k2 + 1] = wjr * x1i - wji * x1r; - } - x0r = 2 * c[kk2]; - wjr = x0r * w1r; - wji = x0r * w1i; - x0r = w1r * a[k1][1] + w1i * a[k1][0]; - x0i = w1r * a[k1][0] - w1i * a[k1][1]; - x1r = -wjr * a[j1][1] + wji * a[j1][0]; - x1i = wjr * a[j1][0] + wji * a[j1][1]; - a[k1][0] = x0r + x1r; - a[k1][1] = x1i + x0i; - a[j1][0] = x0r - x1r; - a[j1][1] = x1i - x0i; - } - w1r = 2 * c[kk1]; - kk2 = ks2; - for (k2 = 2; k2 <= n2 - 2; k2 += 2) { - wkr = 2 * c[kk2]; - wki = 2 * c[nc - kk2]; - wjr = w1r * wkr; - wji = w1r * wki; - kk2 += ks2; - x0i = wkr * a[0][k2] - wki * a[0][k2 + 1]; - a[0][k2] = wkr * a[0][k2 + 1] + wki * a[0][k2]; - a[0][k2 + 1] = x0i; - x0i = wjr * a[n1h][k2] - wji * a[n1h][k2 + 1]; - a[n1h][k2] = wjr * a[n1h][k2 + 1] + wji * a[n1h][k2]; - a[n1h][k2 + 1] = x0i; - } - w1r *= 2; - a[0][0] *= 2; - a[0][1] *= w1r; - a[n1h][0] *= w1r; -} -
diff --git a/third_party/tensorflow_dependencies/fft2d/fft4f2d.f b/third_party/tensorflow_dependencies/fft2d/fft4f2d.f deleted file mode 100644 index af529e3..0000000 --- a/third_party/tensorflow_dependencies/fft2d/fft4f2d.f +++ /dev/null
@@ -1,1591 +0,0 @@ -! Fast Fourier/Cosine/Sine Transform -! dimension :two -! data length :power of 2 -! decimation :frequency -! radix :4, 2, row-column -! data :inplace -! table :use -! subroutines -! cdft2d: Complex Discrete Fourier Transform -! rdft2d: Real Discrete Fourier Transform -! ddct2d: Discrete Cosine Transform -! ddst2d: Discrete Sine Transform -! -! -! -------- Complex DFT (Discrete Fourier Transform) -------- -! [definition] -! <case1> -! X(k1,k2) = sum_j1=0^n1-1 sum_j2=0^n2-1 x(j1,j2) * -! exp(2*pi*i*j1*k1/n1) * -! exp(2*pi*i*j2*k2/n2), -! 0<=k1<n1, 0<=k2<n2 -! <case2> -! X(k1,k2) = sum_j1=0^n1-1 sum_j2=0^n2-1 x(j1,j2) * -! exp(-2*pi*i*j1*k1/n1) * -! exp(-2*pi*i*j2*k2/n2), -! 0<=k1<n1, 0<=k2<n2 -! (notes: sum_j=0^n-1 is a summation from j=0 to n-1) -! [usage] -! <case1> -! ip(0) = 0 ! first time only -! call cdft2d(n1max, 2*n1, n2, 1, a, ip, w) -! <case2> -! ip(0) = 0 ! first time only -! call cdft2d(n1max, 2*n1, n2, -1, a, ip, w) -! [parameters] -! n1max :row size of the 2D array (integer) -! 2*n1 :data length (integer) -! n1 >= 1, n1 = power of 2 -! n2 :data length (integer) -! n2 >= 1, n2 = power of 2 -! a(0:2*n1-1,0:n2-1) -! :input/output data (real*8) -! input data -! a(2*j1,j2) = Re(x(j1,j2)), -! a(2*j1+1,j2) = Im(x(j1,j2)), -! 0<=j1<n1, 0<=j2<n2 -! output data -! a(2*k1,k2) = Re(X(k1,k2)), -! a(2*k1+1,k2) = Im(X(k1,k2)), -! 0<=k1<n1, 0<=k2<n2 -! ip(0:*):work area for bit reversal (integer) -! length of ip >= 2+sqrt(n) -! (n = max(n1, n2)) -! ip(0),ip(1) are pointers of the cos/sin table. -! w(0:*) :cos/sin table (real*8) -! length of w >= max(n1/2, n2/2) -! w(),ip() are initialized if ip(0) = 0. -! [remark] -! Inverse of -! call cdft2d(n1max, 2*n1, n2, -1, a, ip, w) -! is -! call cdft2d(n1max, 2*n1, n2, 1, a, ip, w) -! do j2 = 0, n2 - 1 -! do j1 = 0, 2 * n1 - 1 -! a(j1, j2) = a(j1, j2) * (1.0d0 / (n1 * n2)) -! end do -! end do -! . -! -! -! -------- Real DFT / Inverse of Real DFT -------- -! [definition] -! <case1> RDFT -! R(k1,k2) = sum_j1=0^n1-1 sum_j2=0^n2-1 a(j1,j2) * -! cos(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2), -! 0<=k1<n1, 0<=k2<n2 -! I(k1,k2) = sum_j1=0^n1-1 sum_j2=0^n2-1 a(j1,j2) * -! sin(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2), -! 0<=k1<n1, 0<=k2<n2 -! <case2> IRDFT (excluding scale) -! a(k1,k2) = (1/2) * sum_j1=0^n1-1 sum_j2=0^n2-1 -! (R(j1,j2) * -! cos(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2) + -! I(j1,j2) * -! sin(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2)), -! 0<=k1<n1, 0<=k2<n2 -! (notes: R(n1-k1,n2-k2) = R(k1,k2), -! I(n1-k1,n2-k2) = -I(k1,k2), -! R(n1-k1,0) = R(k1,0), -! I(n1-k1,0) = -I(k1,0), -! R(0,n2-k2) = R(0,k2), -! I(0,n2-k2) = -I(0,k2), -! 0<k1<n1, 0<k2<n2) -! [usage] -! <case1> -! ip(0) = 0 ! first time only -! call rdft2d(n1max, n1, n2, 1, a, ip, w) -! <case2> -! ip(0) = 0 ! first time only -! call rdft2d(n1max, n1, n2, -1, a, ip, w) -! [parameters] -! n1max :row size of the 2D array (integer) -! n1 :data length (integer) -! n1 >= 2, n1 = power of 2 -! n2 :data length (integer) -! n2 >= 2, n2 = power of 2 -! a(0:n1-1,0:n2-1) -! :input/output data (real*8) -! <case1> -! output data -! a(2*k1,k2) = R(k1,k2) = R(n1-k1,n2-k2), -! a(2*k1+1,k2) = I(k1,k2) = -I(n1-k1,n2-k2), -! 0<k1<n1/2, 0<k2<n2, -! a(2*k1,0) = R(k1,0) = R(n1-k1,0), -! a(2*k1+1,0) = I(k1,0) = -I(n1-k1,0), -! 0<k1<n1/2, -! a(0,k2) = R(0,k2) = R(0,n2-k2), -! a(1,k2) = I(0,k2) = -I(0,n2-k2), -! a(1,n2-k2) = R(n1/2,k2) = R(n1/2,n2-k2), -! a(0,n2-k2) = -I(n1/2,k2) = I(n1/2,n2-k2), -! 0<k2<n2/2, -! a(0,0) = R(0,0), -! a(1,0) = R(n1/2,0), -! a(0,n2/2) = R(0,n2/2), -! a(1,n2/2) = R(n1/2,n2/2) -! <case2> -! input data -! a(2*j1,j2) = R(j1,j2) = R(n1-j1,n2-j2), -! a(2*j1+1,j2) = I(j1,j2) = -I(n1-j1,n2-j2), -! 0<j1<n1/2, 0<j2<n2, -! a(2*j1,0) = R(j1,0) = R(n1-j1,0), -! a(2*j1+1,0) = I(j1,0) = -I(n1-j1,0), -! 0<j1<n1/2, -! a(0,j2) = R(0,j2) = R(0,n2-j2), -! a(1,j2) = I(0,j2) = -I(0,n2-j2), -! a(1,n2-j2) = R(n1/2,j2) = R(n1/2,n2-j2), -! a(0,n2-j2) = -I(n1/2,j2) = I(n1/2,n2-j2), -! 0<j2<n2/2, -! a(0,0) = R(0,0), -! a(1,0) = R(n1/2,0), -! a(0,n2/2) = R(0,n2/2), -! a(1,n2/2) = R(n1/2,n2/2) -! ip(0:*):work area for bit reversal (integer) -! length of ip >= 2+sqrt(n) -! (n = max(n1/2, n2)) -! ip(0),ip(1) are pointers of the cos/sin table. -! w(0:*) :cos/sin table (real*8) -! length of w >= max(n1/4, n2/2) + n1/4 -! w(),ip() are initialized if ip(0) = 0. -! [remark] -! Inverse of -! call rdft2d(n1max, n1, n2, 1, a, ip, w) -! is -! call rdft2d(n1max, n1, n2, -1, a, ip, w) -! do j2 = 0, n2 - 1 -! do j1 = 0, n1 - 1 -! a(j1, j2) = a(j1, j2) * (2.0d0 / (n1 * n2)) -! end do -! end do -! . -! -! -! -------- DCT (Discrete Cosine Transform) / Inverse of DCT -------- -! [definition] -! <case1> IDCT (excluding scale) -! C(k1,k2) = sum_j1=0^n1-1 sum_j2=0^n2-1 a(j1,j2) * -! cos(pi*j1*(k1+1/2)/n1) * -! cos(pi*j2*(k2+1/2)/n2), -! 0<=k1<n1, 0<=k2<n2 -! <case2> DCT -! C(k1,k2) = sum_j1=0^n1-1 sum_j2=0^n2-1 a(j1,j2) * -! cos(pi*(j1+1/2)*k1/n1) * -! cos(pi*(j2+1/2)*k2/n2), -! 0<=k1<n1, 0<=k2<n2 -! [usage] -! <case1> -! ip(0) = 0 ! first time only -! call ddct2d(n1max, n1, n2, 1, a, t, ip, w) -! <case2> -! ip(0) = 0 ! first time only -! call ddct2d(n1max, n1, n2, -1, a, t, ip, w) -! [parameters] -! n1max :row size of the 2D array (integer) -! n1 :data length (integer) -! n1 >= 2, n1 = power of 2 -! n2 :data length (integer) -! n2 >= 2, n2 = power of 2 -! a(0:n1-1,0:n2-1) -! :input/output data (real*8) -! output data -! a(k1,k2) = C(k1,k2), 0<=k1<n1, 0<=k2<n2 -! t(0:n1-1,0:n2-1) -! :work area (real*8) -! ip(0:*):work area for bit reversal (integer) -! length of ip >= 2+sqrt(n) -! (n = max(n1/2, n2)) -! ip(0),ip(1) are pointers of the cos/sin table. -! w(0:*) :cos/sin table (real*8) -! length of w >= max(n1/4, n2/2) + max(n1, n2) -! w(),ip() are initialized if ip(0) = 0. -! [remark] -! Inverse of -! call ddct2d(n1max, n1, n2, -1, a, t, ip, w) -! is -! do j1 = 0, n1 - 1 -! a(j1, 0) = a(j1, 0) * 0.5d0 -! end do -! do j2 = 0, n2 - 1 -! a(0, j2) = a(0, j2) * 0.5d0 -! end do -! call ddct2d(n1max, n1, n2, 1, a, t, ip, w) -! do j2 = 0, n2 - 1 -! do j1 = 0, n1 - 1 -! a(j1, j2) = a(j1, j2) * (4.0d0 / (n1 * n2)) -! end do -! end do -! . -! -! -! -------- DST (Discrete Sine Transform) / Inverse of DST -------- -! [definition] -! <case1> IDST (excluding scale) -! S(k1,k2) = sum_j1=1^n1 sum_j2=1^n2 A(j1,j2) * -! sin(pi*j1*(k1+1/2)/n1) * -! sin(pi*j2*(k2+1/2)/n2), -! 0<=k1<n1, 0<=k2<n2 -! <case2> DST -! S(k1,k2) = sum_j1=0^n1-1 sum_j2=0^n2-1 a(j1,j2) * -! sin(pi*(j1+1/2)*k1/n1) * -! sin(pi*(j2+1/2)*k2/n2), -! 0<k1<=n1, 0<k2<=n2 -! [usage] -! <case1> -! ip(0) = 0 ! first time only -! call ddst2d(n1max, n1, n2, 1, a, t, ip, w) -! <case2> -! ip(0) = 0 ! first time only -! call ddst2d(n1max, n1, n2, -1, a, t, ip, w) -! [parameters] -! n1max :row size of the 2D array (integer) -! n1 :data length (integer) -! n1 >= 2, n1 = power of 2 -! n2 :data length (integer) -! n2 >= 2, n2 = power of 2 -! a(0:n1-1,0:n2-1) -! :input/output data (real*8) -! <case1> -! input data -! a(j1,j2) = A(j1,j2), 0<j1<n1, 0<j2<n2, -! a(j1,0) = A(j1,n2), 0<j1<n1, -! a(0,j2) = A(n1,j2), 0<j2<n2, -! a(0,0) = A(n1,n2) -! (i.e. A(j1,j2) = a(mod(j1,n1),mod(j2,n2))) -! output data -! a(k1,k2) = S(k1,k2), 0<=k1<n1, 0<=k2<n2 -! <case2> -! output data -! a(k1,k2) = S(k1,k2), 0<k1<n1, 0<k2<n2, -! a(k1,0) = S(k1,n2), 0<k1<n1, -! a(0,k2) = S(n1,k2), 0<k2<n2, -! a(0,0) = S(n1,n2) -! (i.e. S(k1,k2) = a(mod(k1,n1),mod(k2,n2))) -! t(0:n1-1,0:n2-1) -! :work area (real*8) -! ip(0:*):work area for bit reversal (integer) -! length of ip >= 2+sqrt(n) -! (n = max(n1/2, n2)) -! ip(0),ip(1) are pointers of the cos/sin table. -! w(0:*) :cos/sin table (real*8) -! length of w >= max(n1/4, n2/2) + max(n1, n2) -! w(),ip() are initialized if ip(0) = 0. -! [remark] -! Inverse of -! call ddst2d(n1max, n1, n2, -1, a, t, ip, w) -! is -! do j1 = 0, n1 - 1 -! a(j1, 0) = a(j1, 0) * 0.5d0 -! end do -! do j2 = 0, n2 - 1 -! a(0, j2) = a(0, j2) * 0.5d0 -! end do -! call ddst2d(n1max, n1, n2, 1, a, t, ip, w) -! do j2 = 0, n2 - 1 -! do j1 = 0, n1 - 1 -! a(j1, j2) = a(j1, j2) * (4.0d0 / (n1 * n2)) -! end do -! end do -! . -! -! - subroutine cdft2d(n1max, n1, n2, isgn, a, ip, w) - integer n1max, n1, n2, isgn, ip(0 : *), n - real*8 a(0 : n1max - 1, 0 : n2 - 1), w(0 : *) - n = max(n1, 2 * n2) - if (n .gt. 4 * ip(0)) then - call makewt(n / 4, ip, w) - end if - if (n1 .gt. 4) then - call bitrv2row(n1max, n1, n2, ip(2), a) - end if - if (n2 .gt. 2) then - call bitrv2col(n1max, n1, n2, ip(2), a) - end if - if (isgn .lt. 0) then - call cftfrow(n1max, n1, n2, a, w) - call cftfcol(n1max, n1, n2, a, w) - else - call cftbrow(n1max, n1, n2, a, w) - call cftbcol(n1max, n1, n2, a, w) - end if - end -! - subroutine rdft2d(n1max, n1, n2, isgn, a, ip, w) - integer n1max, n1, n2, isgn, ip(0 : *), n, nw, nc, n2h, i, j - real*8 a(0 : n1max - 1, 0 : n2 - 1), w(0 : *), xi - n = max(n1, 2 * n2) - nw = ip(0) - if (n .gt. 4 * nw) then - nw = n / 4 - call makewt(nw, ip, w) - end if - nc = ip(1) - if (n1 .gt. 4 * nc) then - nc = n1 / 4 - call makect(nc, ip, w(nw)) - end if - n2h = n2 / 2 - if (isgn .lt. 0) then - do i = 1, n2h - 1 - j = n2 - i - xi = a(0, i) - a(0, j) - a(0, i) = a(0, i) + a(0, j) - a(0, j) = xi - xi = a(1, j) - a(1, i) - a(1, i) = a(1, i) + a(1, j) - a(1, j) = xi - end do - if (n2 .gt. 2) then - call bitrv2col(n1max, n1, n2, ip(2), a) - end if - call cftfcol(n1max, n1, n2, a, w) - do i = 0, n2 - 1 - a(1, i) = 0.5d0 * (a(0, i) - a(1, i)) - a(0, i) = a(0, i) - a(1, i) - end do - if (n1 .gt. 4) then - call rftfrow(n1max, n1, n2, a, nc, w(nw)) - call bitrv2row(n1max, n1, n2, ip(2), a) - end if - call cftfrow(n1max, n1, n2, a, w) - else - if (n1 .gt. 4) then - call bitrv2row(n1max, n1, n2, ip(2), a) - end if - call cftbrow(n1max, n1, n2, a, w) - if (n1 .gt. 4) then - call rftbrow(n1max, n1, n2, a, nc, w(nw)) - end if - do i = 0, n2 - 1 - xi = a(0, i) - a(1, i) - a(0, i) = a(0, i) + a(1, i) - a(1, i) = xi - end do - if (n2 .gt. 2) then - call bitrv2col(n1max, n1, n2, ip(2), a) - end if - call cftbcol(n1max, n1, n2, a, w) - do i = 1, n2h - 1 - j = n2 - i - a(0, j) = 0.5d0 * (a(0, i) - a(0, j)) - a(0, i) = a(0, i) - a(0, j) - a(1, j) = 0.5d0 * (a(1, i) + a(1, j)) - a(1, i) = a(1, i) - a(1, j) - end do - end if - end -! - subroutine ddct2d(n1max, n1, n2, isgn, a, t, ip, w) - integer n1max, n1, n2, isgn, ip(0 : *), n, nw, nc, n1h, n2h, - & i, ix, ic, j, jx, jc - real*8 a(0 : n1max - 1, 0 : n2 - 1), - & t(0 : n1max - 1, 0 : n2 - 1), w(0 : *), xi - n = max(n1, 2 * n2) - nw = ip(0) - if (n .gt. 4 * nw) then - nw = n / 4 - call makewt(nw, ip, w) - end if - nc = ip(1) - if (n1 .gt. nc .or. n2 .gt. nc) then - nc = max(n1, n2) - call makect(nc, ip, w(nw)) - end if - n1h = n1 / 2 - n2h = n2 / 2 - if (isgn .ge. 0) then - do i = 0, n2 - 1 - do j = 1, n1h - 1 - jx = 2 * j - t(jx, i) = a(j, i) - t(jx + 1, i) = a(n1 - j, i) - end do - end do - t(0, 0) = a(0, 0) - t(1, 0) = a(n1h, 0) - t(0, n2h) = a(0, n2h) - t(1, n2h) = a(n1h, n2h) - do i = 1, n2h - 1 - ic = n2 - i - t(0, i) = a(0, i) - t(1, ic) = a(n1h, i) - t(1, i) = a(0, ic) - t(0, ic) = a(n1h, ic) - end do - call dctfsub(n1max, n1, n2, t, nc, w(nw)) - if (n2 .gt. 2) then - call bitrv2col(n1max, n1, n2, ip(2), t) - end if - call cftfcol(n1max, n1, n2, t, w) - do i = 0, n2 - 1 - t(1, i) = 0.5d0 * (t(0, i) - t(1, i)) - t(0, i) = t(0, i) - t(1, i) - end do - if (n1 .gt. 4) then - call rftfrow(n1max, n1, n2, t, nc, w(nw)) - call bitrv2row(n1max, n1, n2, ip(2), t) - end if - call cftfrow(n1max, n1, n2, t, w) - do i = 0, n2h - 1 - ix = 2 * i - ic = n2 - 1 - i - do j = 0, n1h - 1 - jx = 2 * j - jc = n1 - 1 - j - a(jx, ix) = t(j, i) - a(jx + 1, ix) = t(jc, i) - a(jx, ix + 1) = t(j, ic) - a(jx + 1, ix + 1) = t(jc, ic) - end do - end do - else - do i = 0, n2h - 1 - ix = 2 * i - ic = n2 - 1 - i - do j = 0, n1h - 1 - jx = 2 * j - jc = n1 - 1 - j - t(j, i) = a(jx, ix) - t(jc, i) = a(jx + 1, ix) - t(j, ic) = a(jx, ix + 1) - t(jc, ic) = a(jx + 1, ix + 1) - end do - end do - if (n1 .gt. 4) then - call bitrv2row(n1max, n1, n2, ip(2), t) - end if - call cftbrow(n1max, n1, n2, t, w) - if (n1 .gt. 4) then - call rftbrow(n1max, n1, n2, t, nc, w(nw)) - end if - do i = 0, n2 - 1 - xi = t(0, i) - t(1, i) - t(0, i) = t(0, i) + t(1, i) - t(1, i) = xi - end do - if (n2 .gt. 2) then - call bitrv2col(n1max, n1, n2, ip(2), t) - end if - call cftbcol(n1max, n1, n2, t, w) - call dctbsub(n1max, n1, n2, t, nc, w(nw)) - do i = 0, n2 - 1 - do j = 1, n1h - 1 - jx = 2 * j - a(j, i) = t(jx, i) - a(n1 - j, i) = t(jx + 1, i) - end do - end do - a(0, 0) = t(0, 0) - a(n1h, 0) = t(1, 0) - a(0, n2h) = t(0, n2h) - a(n1h, n2h) = t(1, n2h) - do i = 1, n2h - 1 - ic = n2 - i - a(0, i) = t(0, i) - a(n1h, i) = t(1, ic) - a(0, ic) = t(1, i) - a(n1h, ic) = t(0, ic) - end do - end if - end -! - subroutine ddst2d(n1max, n1, n2, isgn, a, t, ip, w) - integer n1max, n1, n2, isgn, ip(0 : *), n, nw, nc, n1h, n2h, - & i, ix, ic, j, jx, jc - real*8 a(0 : n1max - 1, 0 : n2 - 1), - & t(0 : n1max - 1, 0 : n2 - 1), w(0 : *), xi - n = max(n1, 2 * n2) - nw = ip(0) - if (n .gt. 4 * nw) then - nw = n / 4 - call makewt(nw, ip, w) - end if - nc = ip(1) - if (n1 .gt. nc .or. n2 .gt. nc) then - nc = max(n1, n2) - call makect(nc, ip, w(nw)) - end if - n1h = n1 / 2 - n2h = n2 / 2 - if (isgn .ge. 0) then - do i = 0, n2 - 1 - do j = 1, n1h - 1 - jx = 2 * j - t(jx, i) = a(j, i) - t(jx + 1, i) = a(n1 - j, i) - end do - end do - t(0, 0) = a(0, 0) - t(1, 0) = a(n1h, 0) - t(0, n2h) = a(0, n2h) - t(1, n2h) = a(n1h, n2h) - do i = 1, n2h - 1 - ic = n2 - i - t(0, i) = a(0, i) - t(1, ic) = a(n1h, i) - t(1, i) = a(0, ic) - t(0, ic) = a(n1h, ic) - end do - call dstfsub(n1max, n1, n2, t, nc, w(nw)) - if (n2 .gt. 2) then - call bitrv2col(n1max, n1, n2, ip(2), t) - end if - call cftfcol(n1max, n1, n2, t, w) - do i = 0, n2 - 1 - t(1, i) = 0.5d0 * (t(0, i) - t(1, i)) - t(0, i) = t(0, i) - t(1, i) - end do - if (n1 .gt. 4) then - call rftfrow(n1max, n1, n2, t, nc, w(nw)) - call bitrv2row(n1max, n1, n2, ip(2), t) - end if - call cftfrow(n1max, n1, n2, t, w) - do i = 0, n2h - 1 - ix = 2 * i - ic = n2 - 1 - i - do j = 0, n1h - 1 - jx = 2 * j - jc = n1 - 1 - j - a(jx, ix) = t(j, i) - a(jx + 1, ix) = -t(jc, i) - a(jx, ix + 1) = -t(j, ic) - a(jx + 1, ix + 1) = t(jc, ic) - end do - end do - else - do i = 0, n2h - 1 - ix = 2 * i - ic = n2 - 1 - i - do j = 0, n1h - 1 - jx = 2 * j - jc = n1 - 1 - j - t(j, i) = a(jx, ix) - t(jc, i) = -a(jx + 1, ix) - t(j, ic) = -a(jx, ix + 1) - t(jc, ic) = a(jx + 1, ix + 1) - end do - end do - if (n1 .gt. 4) then - call bitrv2row(n1max, n1, n2, ip(2), t) - end if - call cftbrow(n1max, n1, n2, t, w) - if (n1 .gt. 4) then - call rftbrow(n1max, n1, n2, t, nc, w(nw)) - end if - do i = 0, n2 - 1 - xi = t(0, i) - t(1, i) - t(0, i) = t(0, i) + t(1, i) - t(1, i) = xi - end do - if (n2 .gt. 2) then - call bitrv2col(n1max, n1, n2, ip(2), t) - end if - call cftbcol(n1max, n1, n2, t, w) - call dstbsub(n1max, n1, n2, t, nc, w(nw)) - do i = 0, n2 - 1 - do j = 1, n1h - 1 - jx = 2 * j - a(j, i) = t(jx, i) - a(n1 - j, i) = t(jx + 1, i) - end do - end do - a(0, 0) = t(0, 0) - a(n1h, 0) = t(1, 0) - a(0, n2h) = t(0, n2h) - a(n1h, n2h) = t(1, n2h) - do i = 1, n2h - 1 - ic = n2 - i - a(0, i) = t(0, i) - a(n1h, i) = t(1, ic) - a(0, ic) = t(1, i) - a(n1h, ic) = t(0, ic) - end do - end if - end -! -! -------- initializing routines -------- -! - subroutine makewt(nw, ip, w) - integer nw, ip(0 : *), nwh, j - real*8 w(0 : nw - 1), delta, x, y - ip(0) = nw - ip(1) = 1 - if (nw .gt. 2) then - nwh = nw / 2 - delta = atan(1.0d0) / nwh - w(0) = 1 - w(1) = 0 - w(nwh) = cos(delta * nwh) - w(nwh + 1) = w(nwh) - do j = 2, nwh - 2, 2 - x = cos(delta * j) - y = sin(delta * j) - w(j) = x - w(j + 1) = y - w(nw - j) = y - w(nw - j + 1) = x - end do - call bitrv2(nw, ip(2), w) - end if - end -! - subroutine makect(nc, ip, c) - integer nc, ip(0 : *), nch, j - real*8 c(0 : nc - 1), delta - ip(1) = nc - if (nc .gt. 1) then - nch = nc / 2 - delta = atan(1.0d0) / nch - c(0) = 0.5d0 - c(nch) = 0.5d0 * cos(delta * nch) - do j = 1, nch - 1 - c(j) = 0.5d0 * cos(delta * j) - c(nc - j) = 0.5d0 * sin(delta * j) - end do - end if - end -! -! -------- child routines -------- -! - subroutine bitrv2(n, ip, a) - integer n, ip(0 : *), j, j1, k, k1, l, m, m2 - real*8 a(0 : n - 1), xr, xi - ip(0) = 0 - l = n - m = 1 - do while (4 * m .lt. l) - l = l / 2 - do j = 0, m - 1 - ip(m + j) = ip(j) + l - end do - m = m * 2 - end do - if (4 * m .gt. l) then - do k = 1, m - 1 - do j = 0, k - 1 - j1 = 2 * j + ip(k) - k1 = 2 * k + ip(j) - xr = a(j1) - xi = a(j1 + 1) - a(j1) = a(k1) - a(j1 + 1) = a(k1 + 1) - a(k1) = xr - a(k1 + 1) = xi - end do - end do - else - m2 = 2 * m - do k = 1, m - 1 - do j = 0, k - 1 - j1 = 2 * j + ip(k) - k1 = 2 * k + ip(j) - xr = a(j1) - xi = a(j1 + 1) - a(j1) = a(k1) - a(j1 + 1) = a(k1 + 1) - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 + m2 - k1 = k1 + m2 - xr = a(j1) - xi = a(j1 + 1) - a(j1) = a(k1) - a(j1 + 1) = a(k1 + 1) - a(k1) = xr - a(k1 + 1) = xi - end do - end do - end if - end -! - subroutine bitrv2row(n1max, n, n2, ip, a) - integer n1max, n, n2, ip(0 : *), i, j, j1, k, k1, l, m, m2 - real*8 a(0 : n1max - 1, 0 : n2 - 1), xr, xi - ip(0) = 0 - l = n - m = 1 - do while (4 * m .lt. l) - l = l / 2 - do j = 0, m - 1 - ip(m + j) = ip(j) + l - end do - m = m * 2 - end do - if (4 * m .gt. l) then - do i = 0, n2 - 1 - do k = 1, m - 1 - do j = 0, k - 1 - j1 = 2 * j + ip(k) - k1 = 2 * k + ip(j) - xr = a(j1, i) - xi = a(j1 + 1, i) - a(j1, i) = a(k1, i) - a(j1 + 1, i) = a(k1 + 1, i) - a(k1, i) = xr - a(k1 + 1, i) = xi - end do - end do - end do - else - m2 = 2 * m - do i = 0, n2 - 1 - do k = 1, m - 1 - do j = 0, k - 1 - j1 = 2 * j + ip(k) - k1 = 2 * k + ip(j) - xr = a(j1, i) - xi = a(j1 + 1, i) - a(j1, i) = a(k1, i) - a(j1 + 1, i) = a(k1 + 1, i) - a(k1, i) = xr - a(k1 + 1, i) = xi - j1 = j1 + m2 - k1 = k1 + m2 - xr = a(j1, i) - xi = a(j1 + 1, i) - a(j1, i) = a(k1, i) - a(j1 + 1, i) = a(k1 + 1, i) - a(k1, i) = xr - a(k1 + 1, i) = xi - end do - end do - end do - end if - end -! - subroutine bitrv2col(n1max, n1, n, ip, a) - integer n1max, n1, n, ip(0 : *), i, j, j1, k, k1, l, m - real*8 a(0 : n1max - 1, 0 : n - 1), xr, xi - ip(0) = 0 - l = n - m = 1 - do while (2 * m .lt. l) - l = l / 2 - do j = 0, m - 1 - ip(m + j) = ip(j) + l - end do - m = m * 2 - end do - if (2 * m .gt. l) then - do k = 1, m - 1 - do j = 0, k - 1 - j1 = j + ip(k) - k1 = k + ip(j) - do i = 0, n1 - 2, 2 - xr = a(i, j1) - xi = a(i + 1, j1) - a(i, j1) = a(i, k1) - a(i + 1, j1) = a(i + 1, k1) - a(i, k1) = xr - a(i + 1, k1) = xi - end do - end do - end do - else - do k = 1, m - 1 - do j = 0, k - 1 - j1 = j + ip(k) - k1 = k + ip(j) - do i = 0, n1 - 2, 2 - xr = a(i, j1) - xi = a(i + 1, j1) - a(i, j1) = a(i, k1) - a(i + 1, j1) = a(i + 1, k1) - a(i, k1) = xr - a(i + 1, k1) = xi - end do - j1 = j1 + m - k1 = k1 + m - do i = 0, n1 - 2, 2 - xr = a(i, j1) - xi = a(i + 1, j1) - a(i, j1) = a(i, k1) - a(i + 1, j1) = a(i + 1, k1) - a(i, k1) = xr - a(i + 1, k1) = xi - end do - end do - end do - end if - end -! - subroutine cftbrow(n1max, n, n2, a, w) - integer n1max, n, n2, i, j, j1, j2, j3, k, k1, ks, l, m - real*8 a(0 : n1max - 1, 0 : n2 - 1), w(0 : *) - real*8 wk1r, wk1i, wk2r, wk2i, wk3r, wk3i - real*8 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i - do i = 0, n2 - 1 - l = 2 - do while (2 * l .lt. n) - m = 4 * l - do j = 0, l - 2, 2 - j1 = j + l - j2 = j1 + l - j3 = j2 + l - x0r = a(j, i) + a(j1, i) - x0i = a(j + 1, i) + a(j1 + 1, i) - x1r = a(j, i) - a(j1, i) - x1i = a(j + 1, i) - a(j1 + 1, i) - x2r = a(j2, i) + a(j3, i) - x2i = a(j2 + 1, i) + a(j3 + 1, i) - x3r = a(j2, i) - a(j3, i) - x3i = a(j2 + 1, i) - a(j3 + 1, i) - a(j, i) = x0r + x2r - a(j + 1, i) = x0i + x2i - a(j2, i) = x0r - x2r - a(j2 + 1, i) = x0i - x2i - a(j1, i) = x1r - x3i - a(j1 + 1, i) = x1i + x3r - a(j3, i) = x1r + x3i - a(j3 + 1, i) = x1i - x3r - end do - if (m .lt. n) then - wk1r = w(2) - do j = m, l + m - 2, 2 - j1 = j + l - j2 = j1 + l - j3 = j2 + l - x0r = a(j, i) + a(j1, i) - x0i = a(j + 1, i) + a(j1 + 1, i) - x1r = a(j, i) - a(j1, i) - x1i = a(j + 1, i) - a(j1 + 1, i) - x2r = a(j2, i) + a(j3, i) - x2i = a(j2 + 1, i) + a(j3 + 1, i) - x3r = a(j2, i) - a(j3, i) - x3i = a(j2 + 1, i) - a(j3 + 1, i) - a(j, i) = x0r + x2r - a(j + 1, i) = x0i + x2i - a(j2, i) = x2i - x0i - a(j2 + 1, i) = x0r - x2r - x0r = x1r - x3i - x0i = x1i + x3r - a(j1, i) = wk1r * (x0r - x0i) - a(j1 + 1, i) = wk1r * (x0r + x0i) - x0r = x3i + x1r - x0i = x3r - x1i - a(j3, i) = wk1r * (x0i - x0r) - a(j3 + 1, i) = wk1r * (x0i + x0r) - end do - k1 = 1 - ks = -1 - do k = 2 * m, n - m, m - k1 = k1 + 1 - ks = -ks - wk1r = w(2 * k1) - wk1i = w(2 * k1 + 1) - wk2r = ks * w(k1) - wk2i = w(k1 + ks) - wk3r = wk1r - 2 * wk2i * wk1i - wk3i = 2 * wk2i * wk1r - wk1i - do j = k, l + k - 2, 2 - j1 = j + l - j2 = j1 + l - j3 = j2 + l - x0r = a(j, i) + a(j1, i) - x0i = a(j + 1, i) + a(j1 + 1, i) - x1r = a(j, i) - a(j1, i) - x1i = a(j + 1, i) - a(j1 + 1, i) - x2r = a(j2, i) + a(j3, i) - x2i = a(j2 + 1, i) + a(j3 + 1, i) - x3r = a(j2, i) - a(j3, i) - x3i = a(j2 + 1, i) - a(j3 + 1, i) - a(j, i) = x0r + x2r - a(j + 1, i) = x0i + x2i - x0r = x0r - x2r - x0i = x0i - x2i - a(j2, i) = wk2r * x0r - wk2i * x0i - a(j2 + 1, i) = wk2r * x0i + wk2i * x0r - x0r = x1r - x3i - x0i = x1i + x3r - a(j1, i) = wk1r * x0r - wk1i * x0i - a(j1 + 1, i) = wk1r * x0i + wk1i * x0r - x0r = x1r + x3i - x0i = x1i - x3r - a(j3, i) = wk3r * x0r - wk3i * x0i - a(j3 + 1, i) = wk3r * x0i + wk3i * x0r - end do - end do - end if - l = m - end do - if (l .lt. n) then - do j = 0, l - 2, 2 - j1 = j + l - x0r = a(j, i) - a(j1, i) - x0i = a(j + 1, i) - a(j1 + 1, i) - a(j, i) = a(j, i) + a(j1, i) - a(j + 1, i) = a(j + 1, i) + a(j1 + 1, i) - a(j1, i) = x0r - a(j1 + 1, i) = x0i - end do - end if - end do - end -! - subroutine cftbcol(n1max, n1, n, a, w) - integer n1max, n1, n, i, j, j1, j2, j3, k, k1, ks, l, m - real*8 a(0 : n1max - 1, 0 : n - 1), w(0 : *) - real*8 wk1r, wk1i, wk2r, wk2i, wk3r, wk3i - real*8 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i - l = 1 - do while (2 * l .lt. n) - m = 4 * l - do j = 0, l - 1 - j1 = j + l - j2 = j1 + l - j3 = j2 + l - do i = 0, n1 - 2, 2 - x0r = a(i, j) + a(i, j1) - x0i = a(i + 1, j) + a(i + 1, j1) - x1r = a(i, j) - a(i, j1) - x1i = a(i + 1, j) - a(i + 1, j1) - x2r = a(i, j2) + a(i, j3) - x2i = a(i + 1, j2) + a(i + 1, j3) - x3r = a(i, j2) - a(i, j3) - x3i = a(i + 1, j2) - a(i + 1, j3) - a(i, j) = x0r + x2r - a(i + 1, j) = x0i + x2i - a(i, j2) = x0r - x2r - a(i + 1, j2) = x0i - x2i - a(i, j1) = x1r - x3i - a(i + 1, j1) = x1i + x3r - a(i, j3) = x1r + x3i - a(i + 1, j3) = x1i - x3r - end do - end do - if (m .lt. n) then - wk1r = w(2) - do j = m, l + m - 1 - j1 = j + l - j2 = j1 + l - j3 = j2 + l - do i = 0, n1 - 2, 2 - x0r = a(i, j) + a(i, j1) - x0i = a(i + 1, j) + a(i + 1, j1) - x1r = a(i, j) - a(i, j1) - x1i = a(i + 1, j) - a(i + 1, j1) - x2r = a(i, j2) + a(i, j3) - x2i = a(i + 1, j2) + a(i + 1, j3) - x3r = a(i, j2) - a(i, j3) - x3i = a(i + 1, j2) - a(i + 1, j3) - a(i, j) = x0r + x2r - a(i + 1, j) = x0i + x2i - a(i, j2) = x2i - x0i - a(i + 1, j2) = x0r - x2r - x0r = x1r - x3i - x0i = x1i + x3r - a(i, j1) = wk1r * (x0r - x0i) - a(i + 1, j1) = wk1r * (x0r + x0i) - x0r = x3i + x1r - x0i = x3r - x1i - a(i, j3) = wk1r * (x0i - x0r) - a(i + 1, j3) = wk1r * (x0i + x0r) - end do - end do - k1 = 1 - ks = -1 - do k = 2 * m, n - m, m - k1 = k1 + 1 - ks = -ks - wk1r = w(2 * k1) - wk1i = w(2 * k1 + 1) - wk2r = ks * w(k1) - wk2i = w(k1 + ks) - wk3r = wk1r - 2 * wk2i * wk1i - wk3i = 2 * wk2i * wk1r - wk1i - do j = k, l + k - 1 - j1 = j + l - j2 = j1 + l - j3 = j2 + l - do i = 0, n1 - 2, 2 - x0r = a(i, j) + a(i, j1) - x0i = a(i + 1, j) + a(i + 1, j1) - x1r = a(i, j) - a(i, j1) - x1i = a(i + 1, j) - a(i + 1, j1) - x2r = a(i, j2) + a(i, j3) - x2i = a(i + 1, j2) + a(i + 1, j3) - x3r = a(i, j2) - a(i, j3) - x3i = a(i + 1, j2) - a(i + 1, j3) - a(i, j) = x0r + x2r - a(i + 1, j) = x0i + x2i - x0r = x0r - x2r - x0i = x0i - x2i - a(i, j2) = wk2r * x0r - wk2i * x0i - a(i + 1, j2) = wk2r * x0i + wk2i * x0r - x0r = x1r - x3i - x0i = x1i + x3r - a(i, j1) = wk1r * x0r - wk1i * x0i - a(i + 1, j1) = wk1r * x0i + wk1i * x0r - x0r = x1r + x3i - x0i = x1i - x3r - a(i, j3) = wk3r * x0r - wk3i * x0i - a(i + 1, j3) = wk3r * x0i + wk3i * x0r - end do - end do - end do - end if - l = m - end do - if (l .lt. n) then - do j = 0, l - 1 - j1 = j + l - do i = 0, n1 - 2, 2 - x0r = a(i, j) - a(i, j1) - x0i = a(i + 1, j) - a(i + 1, j1) - a(i, j) = a(i, j) + a(i, j1) - a(i + 1, j) = a(i + 1, j) + a(i + 1, j1) - a(i, j1) = x0r - a(i + 1, j1) = x0i - end do - end do - end if - end -! - subroutine cftfrow(n1max, n, n2, a, w) - integer n1max, n, n2, i, j, j1, j2, j3, k, k1, ks, l, m - real*8 a(0 : n1max - 1, 0 : n2 - 1), w(0 : *) - real*8 wk1r, wk1i, wk2r, wk2i, wk3r, wk3i - real*8 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i - do i = 0, n2 - 1 - l = 2 - do while (2 * l .lt. n) - m = 4 * l - do j = 0, l - 2, 2 - j1 = j + l - j2 = j1 + l - j3 = j2 + l - x0r = a(j, i) + a(j1, i) - x0i = a(j + 1, i) + a(j1 + 1, i) - x1r = a(j, i) - a(j1, i) - x1i = a(j + 1, i) - a(j1 + 1, i) - x2r = a(j2, i) + a(j3, i) - x2i = a(j2 + 1, i) + a(j3 + 1, i) - x3r = a(j2, i) - a(j3, i) - x3i = a(j2 + 1, i) - a(j3 + 1, i) - a(j, i) = x0r + x2r - a(j + 1, i) = x0i + x2i - a(j2, i) = x0r - x2r - a(j2 + 1, i) = x0i - x2i - a(j1, i) = x1r + x3i - a(j1 + 1, i) = x1i - x3r - a(j3, i) = x1r - x3i - a(j3 + 1, i) = x1i + x3r - end do - if (m .lt. n) then - wk1r = w(2) - do j = m, l + m - 2, 2 - j1 = j + l - j2 = j1 + l - j3 = j2 + l - x0r = a(j, i) + a(j1, i) - x0i = a(j + 1, i) + a(j1 + 1, i) - x1r = a(j, i) - a(j1, i) - x1i = a(j + 1, i) - a(j1 + 1, i) - x2r = a(j2, i) + a(j3, i) - x2i = a(j2 + 1, i) + a(j3 + 1, i) - x3r = a(j2, i) - a(j3, i) - x3i = a(j2 + 1, i) - a(j3 + 1, i) - a(j, i) = x0r + x2r - a(j + 1, i) = x0i + x2i - a(j2, i) = x0i - x2i - a(j2 + 1, i) = x2r - x0r - x0r = x1r + x3i - x0i = x1i - x3r - a(j1, i) = wk1r * (x0i + x0r) - a(j1 + 1, i) = wk1r * (x0i - x0r) - x0r = x3i - x1r - x0i = x3r + x1i - a(j3, i) = wk1r * (x0r + x0i) - a(j3 + 1, i) = wk1r * (x0r - x0i) - end do - k1 = 1 - ks = -1 - do k = 2 * m, n - m, m - k1 = k1 + 1 - ks = -ks - wk1r = w(2 * k1) - wk1i = w(2 * k1 + 1) - wk2r = ks * w(k1) - wk2i = w(k1 + ks) - wk3r = wk1r - 2 * wk2i * wk1i - wk3i = 2 * wk2i * wk1r - wk1i - do j = k, l + k - 2, 2 - j1 = j + l - j2 = j1 + l - j3 = j2 + l - x0r = a(j, i) + a(j1, i) - x0i = a(j + 1, i) + a(j1 + 1, i) - x1r = a(j, i) - a(j1, i) - x1i = a(j + 1, i) - a(j1 + 1, i) - x2r = a(j2, i) + a(j3, i) - x2i = a(j2 + 1, i) + a(j3 + 1, i) - x3r = a(j2, i) - a(j3, i) - x3i = a(j2 + 1, i) - a(j3 + 1, i) - a(j, i) = x0r + x2r - a(j + 1, i) = x0i + x2i - x0r = x0r - x2r - x0i = x0i - x2i - a(j2, i) = wk2r * x0r + wk2i * x0i - a(j2 + 1, i) = wk2r * x0i - wk2i * x0r - x0r = x1r + x3i - x0i = x1i - x3r - a(j1, i) = wk1r * x0r + wk1i * x0i - a(j1 + 1, i) = wk1r * x0i - wk1i * x0r - x0r = x1r - x3i - x0i = x1i + x3r - a(j3, i) = wk3r * x0r + wk3i * x0i - a(j3 + 1, i) = wk3r * x0i - wk3i * x0r - end do - end do - end if - l = m - end do - if (l .lt. n) then - do j = 0, l - 2, 2 - j1 = j + l - x0r = a(j, i) - a(j1, i) - x0i = a(j + 1, i) - a(j1 + 1, i) - a(j, i) = a(j, i) + a(j1, i) - a(j + 1, i) = a(j + 1, i) + a(j1 + 1, i) - a(j1, i) = x0r - a(j1 + 1, i) = x0i - end do - end if - end do - end -! - subroutine cftfcol(n1max, n1, n, a, w) - integer n1max, n1, n, i, j, j1, j2, j3, k, k1, ks, l, m - real*8 a(0 : n1max - 1, 0 : n - 1), w(0 : *) - real*8 wk1r, wk1i, wk2r, wk2i, wk3r, wk3i - real*8 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i - l = 1 - do while (2 * l .lt. n) - m = 4 * l - do j = 0, l - 1 - j1 = j + l - j2 = j1 + l - j3 = j2 + l - do i = 0, n1 - 2, 2 - x0r = a(i, j) + a(i, j1) - x0i = a(i + 1, j) + a(i + 1, j1) - x1r = a(i, j) - a(i, j1) - x1i = a(i + 1, j) - a(i + 1, j1) - x2r = a(i, j2) + a(i, j3) - x2i = a(i + 1, j2) + a(i + 1, j3) - x3r = a(i, j2) - a(i, j3) - x3i = a(i + 1, j2) - a(i + 1, j3) - a(i, j) = x0r + x2r - a(i + 1, j) = x0i + x2i - a(i, j2) = x0r - x2r - a(i + 1, j2) = x0i - x2i - a(i, j1) = x1r + x3i - a(i + 1, j1) = x1i - x3r - a(i, j3) = x1r - x3i - a(i + 1, j3) = x1i + x3r - end do - end do - if (m .lt. n) then - wk1r = w(2) - do j = m, l + m - 1 - j1 = j + l - j2 = j1 + l - j3 = j2 + l - do i = 0, n1 - 2, 2 - x0r = a(i, j) + a(i, j1) - x0i = a(i + 1, j) + a(i + 1, j1) - x1r = a(i, j) - a(i, j1) - x1i = a(i + 1, j) - a(i + 1, j1) - x2r = a(i, j2) + a(i, j3) - x2i = a(i + 1, j2) + a(i + 1, j3) - x3r = a(i, j2) - a(i, j3) - x3i = a(i + 1, j2) - a(i + 1, j3) - a(i, j) = x0r + x2r - a(i + 1, j) = x0i + x2i - a(i, j2) = x0i - x2i - a(i + 1, j2) = x2r - x0r - x0r = x1r + x3i - x0i = x1i - x3r - a(i, j1) = wk1r * (x0i + x0r) - a(i + 1, j1) = wk1r * (x0i - x0r) - x0r = x3i - x1r - x0i = x3r + x1i - a(i, j3) = wk1r * (x0r + x0i) - a(i + 1, j3) = wk1r * (x0r - x0i) - end do - end do - k1 = 1 - ks = -1 - do k = 2 * m, n - m, m - k1 = k1 + 1 - ks = -ks - wk1r = w(2 * k1) - wk1i = w(2 * k1 + 1) - wk2r = ks * w(k1) - wk2i = w(k1 + ks) - wk3r = wk1r - 2 * wk2i * wk1i - wk3i = 2 * wk2i * wk1r - wk1i - do j = k, l + k - 1 - j1 = j + l - j2 = j1 + l - j3 = j2 + l - do i = 0, n1 - 2, 2 - x0r = a(i, j) + a(i, j1) - x0i = a(i + 1, j) + a(i + 1, j1) - x1r = a(i, j) - a(i, j1) - x1i = a(i + 1, j) - a(i + 1, j1) - x2r = a(i, j2) + a(i, j3) - x2i = a(i + 1, j2) + a(i + 1, j3) - x3r = a(i, j2) - a(i, j3) - x3i = a(i + 1, j2) - a(i + 1, j3) - a(i, j) = x0r + x2r - a(i + 1, j) = x0i + x2i - x0r = x0r - x2r - x0i = x0i - x2i - a(i, j2) = wk2r * x0r + wk2i * x0i - a(i + 1, j2) = wk2r * x0i - wk2i * x0r - x0r = x1r + x3i - x0i = x1i - x3r - a(i, j1) = wk1r * x0r + wk1i * x0i - a(i + 1, j1) = wk1r * x0i - wk1i * x0r - x0r = x1r - x3i - x0i = x1i + x3r - a(i, j3) = wk3r * x0r + wk3i * x0i - a(i + 1, j3) = wk3r * x0i - wk3i * x0r - end do - end do - end do - end if - l = m - end do - if (l .lt. n) then - do j = 0, l - 1 - j1 = j + l - do i = 0, n1 - 2, 2 - x0r = a(i, j) - a(i, j1) - x0i = a(i + 1, j) - a(i + 1, j1) - a(i, j) = a(i, j) + a(i, j1) - a(i + 1, j) = a(i + 1, j) + a(i + 1, j1) - a(i, j1) = x0r - a(i + 1, j1) = x0i - end do - end do - end if - end -! - subroutine rftbrow(n1max, n, n2, a, nc, c) - integer n1max, n, n2, nc, i, j, k, kk, ks - real*8 a(0 : n1max - 1, 0 : n2 - 1), c(0 : nc - 1), - & wkr, wki, xr, xi, yr, yi - ks = 4 * nc / n - do i = 0, n2 - 1 - kk = 0 - do k = n / 2 - 2, 2, -2 - j = n - k - kk = kk + ks - wkr = 0.5d0 - c(kk) - wki = c(nc - kk) - xr = a(k, i) - a(j, i) - xi = a(k + 1, i) + a(j + 1, i) - yr = wkr * xr - wki * xi - yi = wkr * xi + wki * xr - a(k, i) = a(k, i) - yr - a(k + 1, i) = a(k + 1, i) - yi - a(j, i) = a(j, i) + yr - a(j + 1, i) = a(j + 1, i) - yi - end do - end do - end -! - subroutine rftfrow(n1max, n, n2, a, nc, c) - integer n1max, n, n2, nc, i, j, k, kk, ks - real*8 a(0 : n1max - 1, 0 : n2 - 1), c(0 : nc - 1), - & wkr, wki, xr, xi, yr, yi - ks = 4 * nc / n - do i = 0, n2 - 1 - kk = 0 - do k = n / 2 - 2, 2, -2 - j = n - k - kk = kk + ks - wkr = 0.5d0 - c(kk) - wki = c(nc - kk) - xr = a(k, i) - a(j, i) - xi = a(k + 1, i) + a(j + 1, i) - yr = wkr * xr + wki * xi - yi = wkr * xi - wki * xr - a(k, i) = a(k, i) - yr - a(k + 1, i) = a(k + 1, i) - yi - a(j, i) = a(j, i) + yr - a(j + 1, i) = a(j + 1, i) - yi - end do - end do - end -! - subroutine dctbsub(n1max, n1, n2, a, nc, c) - integer n1max, n1, n2, nc, kk1, kk2, ks1, ks2, n2h, j2, - & k1, k2 - real*8 a(0 : n1max - 1, 0 : n2 - 1), c(0 : nc - 1), - & w2r, w2i, wkr, wki, wjr, wji, x0r, x0i, x1r, x1i - ks1 = nc / n1 - ks2 = nc / n2 - n2h = n2 / 2 - kk2 = ks2 - do k2 = 1, n2h - 1 - j2 = n2 - k2 - w2r = 2 * c(kk2) - w2i = 2 * c(nc - kk2) - kk2 = kk2 + ks2 - kk1 = ks1 - do k1 = 2, n1 - 2, 2 - x0r = w2r * c(kk1) - x0i = w2i * c(kk1) - x1r = w2r * c(nc - kk1) - x1i = w2i * c(nc - kk1) - wkr = x0r - x1i - wki = x0i + x1r - wji = x0r + x1i - wjr = x0i - x1r - kk1 = kk1 + ks1 - x0r = wkr * a(k1, k2) - wki * a(k1 + 1, k2) - x0i = wkr * a(k1 + 1, k2) + wki * a(k1, k2) - x1r = wjr * a(k1, j2) - wji * a(k1 + 1, j2) - x1i = wjr * a(k1 + 1, j2) + wji * a(k1, j2) - a(k1, k2) = x0r + x1i - a(k1 + 1, k2) = x0i - x1r - a(k1, j2) = x1r + x0i - a(k1 + 1, j2) = x1i - x0r - end do - wkr = w2r * 0.5d0 - wki = w2i * 0.5d0 - wjr = w2r * c(kk1) - wji = w2i * c(kk1) - x0r = a(0, k2) + a(0, j2) - x0i = a(1, k2) - a(1, j2) - x1r = a(0, k2) - a(0, j2) - x1i = a(1, k2) + a(1, j2) - a(0, k2) = wkr * x0r - wki * x0i - a(1, k2) = wkr * x0i + wki * x0r - a(0, j2) = -wjr * x1r + wji * x1i - a(1, j2) = wjr * x1i + wji * x1r - end do - w2r = 2 * c(kk2) - kk1 = ks1 - do k1 = 2, n1 - 2, 2 - wkr = 2 * c(kk1) - wki = 2 * c(nc - kk1) - wjr = w2r * wkr - wji = w2r * wki - kk1 = kk1 + ks1 - x0i = wkr * a(k1 + 1, 0) + wki * a(k1, 0) - a(k1, 0) = wkr * a(k1, 0) - wki * a(k1 + 1, 0) - a(k1 + 1, 0) = x0i - x0i = wjr * a(k1 + 1, n2h) + wji * a(k1, n2h) - a(k1, n2h) = wjr * a(k1, n2h) - wji * a(k1 + 1, n2h) - a(k1 + 1, n2h) = x0i - end do - a(1, 0) = a(1, 0) * w2r - a(0, n2h) = a(0, n2h) * w2r - a(1, n2h) = a(1, n2h) * 0.5d0 - end -! - subroutine dctfsub(n1max, n1, n2, a, nc, c) - integer n1max, n1, n2, nc, kk1, kk2, ks1, ks2, n2h, j2, - & k1, k2 - real*8 a(0 : n1max - 1, 0 : n2 - 1), c(0 : nc - 1), - & w2r, w2i, wkr, wki, wjr, wji, x0r, x0i, x1r, x1i - ks1 = nc / n1 - ks2 = nc / n2 - n2h = n2 / 2 - kk2 = ks2 - do k2 = 1, n2h - 1 - j2 = n2 - k2 - w2r = 2 * c(kk2) - w2i = 2 * c(nc - kk2) - kk2 = kk2 + ks2 - kk1 = ks1 - do k1 = 2, n1 - 2, 2 - x0r = w2r * c(kk1) - x0i = w2i * c(kk1) - x1r = w2r * c(nc - kk1) - x1i = w2i * c(nc - kk1) - wkr = x0r - x1i - wki = x0i + x1r - wji = x0r + x1i - wjr = x0i - x1r - kk1 = kk1 + ks1 - x0r = a(k1, k2) - a(k1 + 1, j2) - x0i = a(k1, j2) + a(k1 + 1, k2) - x1r = a(k1, j2) - a(k1 + 1, k2) - x1i = a(k1, k2) + a(k1 + 1, j2) - a(k1, k2) = wkr * x0r + wki * x0i - a(k1 + 1, k2) = wkr * x0i - wki * x0r - a(k1, j2) = wjr * x1r + wji * x1i - a(k1 + 1, j2) = wjr * x1i - wji * x1r - end do - x0r = 2 * c(kk1) - wjr = x0r * w2r - wji = x0r * w2i - x0r = w2r * a(0, k2) + w2i * a(1, k2) - x0i = w2r * a(1, k2) - w2i * a(0, k2) - x1r = -wjr * a(0, j2) + wji * a(1, j2) - x1i = wjr * a(1, j2) + wji * a(0, j2) - a(0, k2) = x0r + x1r - a(1, k2) = x1i + x0i - a(0, j2) = x0r - x1r - a(1, j2) = x1i - x0i - end do - w2r = 2 * c(kk2) - kk1 = ks1 - do k1 = 2, n1 - 2, 2 - wkr = 2 * c(kk1) - wki = 2 * c(nc - kk1) - wjr = w2r * wkr - wji = w2r * wki - kk1 = kk1 + ks1 - x0i = wkr * a(k1 + 1, 0) - wki * a(k1, 0) - a(k1, 0) = wkr * a(k1, 0) + wki * a(k1 + 1, 0) - a(k1 + 1, 0) = x0i - x0i = wjr * a(k1 + 1, n2h) - wji * a(k1, n2h) - a(k1, n2h) = wjr * a(k1, n2h) + wji * a(k1 + 1, n2h) - a(k1 + 1, n2h) = x0i - end do - w2r = w2r * 2 - a(0, 0) = a(0, 0) * 2 - a(1, 0) = a(1, 0) * w2r - a(0, n2h) = a(0, n2h) * w2r - end -! - subroutine dstbsub(n1max, n1, n2, a, nc, c) - integer n1max, n1, n2, nc, kk1, kk2, ks1, ks2, n2h, j2, - & k1, k2 - real*8 a(0 : n1max - 1, 0 : n2 - 1), c(0 : nc - 1), - & w2r, w2i, wkr, wki, wjr, wji, x0r, x0i, x1r, x1i - ks1 = nc / n1 - ks2 = nc / n2 - n2h = n2 / 2 - kk2 = ks2 - do k2 = 1, n2h - 1 - j2 = n2 - k2 - w2r = 2 * c(kk2) - w2i = 2 * c(nc - kk2) - kk2 = kk2 + ks2 - kk1 = ks1 - do k1 = 2, n1 - 2, 2 - x0r = w2r * c(kk1) - x0i = w2i * c(kk1) - x1r = w2r * c(nc - kk1) - x1i = w2i * c(nc - kk1) - wkr = x0r - x1i - wki = x0i + x1r - wji = x0r + x1i - wjr = x0i - x1r - kk1 = kk1 + ks1 - x0r = wkr * a(k1, k2) - wki * a(k1 + 1, k2) - x0i = wkr * a(k1 + 1, k2) + wki * a(k1, k2) - x1r = wjr * a(k1, j2) - wji * a(k1 + 1, j2) - x1i = wjr * a(k1 + 1, j2) + wji * a(k1, j2) - a(k1, k2) = x1i - x0r - a(k1 + 1, k2) = x1r + x0i - a(k1, j2) = x0i - x1r - a(k1 + 1, j2) = x0r + x1i - end do - wkr = w2r * 0.5d0 - wki = w2i * 0.5d0 - wjr = w2r * c(kk1) - wji = w2i * c(kk1) - x0r = a(0, k2) + a(0, j2) - x0i = a(1, k2) - a(1, j2) - x1r = a(0, k2) - a(0, j2) - x1i = a(1, k2) + a(1, j2) - a(1, k2) = wkr * x0r - wki * x0i - a(0, k2) = wkr * x0i + wki * x0r - a(1, j2) = -wjr * x1r + wji * x1i - a(0, j2) = wjr * x1i + wji * x1r - end do - w2r = 2 * c(kk2) - kk1 = ks1 - do k1 = 2, n1 - 2, 2 - wkr = 2 * c(kk1) - wki = 2 * c(nc - kk1) - wjr = w2r * wkr - wji = w2r * wki - kk1 = kk1 + ks1 - x0i = wkr * a(k1 + 1, 0) + wki * a(k1, 0) - a(k1 + 1, 0) = wkr * a(k1, 0) - wki * a(k1 + 1, 0) - a(k1, 0) = x0i - x0i = wjr * a(k1 + 1, n2h) + wji * a(k1, n2h) - a(k1 + 1, n2h) = wjr * a(k1, n2h) - wji * a(k1 + 1, n2h) - a(k1, n2h) = x0i - end do - a(1, 0) = a(1, 0) * w2r - a(0, n2h) = a(0, n2h) * w2r - a(1, n2h) = a(1, n2h) * 0.5d0 - end -! - subroutine dstfsub(n1max, n1, n2, a, nc, c) - integer n1max, n1, n2, nc, kk1, kk2, ks1, ks2, n2h, j2, - & k1, k2 - real*8 a(0 : n1max - 1, 0 : n2 - 1), c(0 : nc - 1), - & w2r, w2i, wkr, wki, wjr, wji, x0r, x0i, x1r, x1i - ks1 = nc / n1 - ks2 = nc / n2 - n2h = n2 / 2 - kk2 = ks2 - do k2 = 1, n2h - 1 - j2 = n2 - k2 - w2r = 2 * c(kk2) - w2i = 2 * c(nc - kk2) - kk2 = kk2 + ks2 - kk1 = ks1 - do k1 = 2, n1 - 2, 2 - x0r = w2r * c(kk1) - x0i = w2i * c(kk1) - x1r = w2r * c(nc - kk1) - x1i = w2i * c(nc - kk1) - wkr = x0r - x1i - wki = x0i + x1r - wji = x0r + x1i - wjr = x0i - x1r - kk1 = kk1 + ks1 - x0r = a(k1 + 1, j2) - a(k1, k2) - x0i = a(k1 + 1, k2) + a(k1, j2) - x1r = a(k1 + 1, k2) - a(k1, j2) - x1i = a(k1 + 1, j2) + a(k1, k2) - a(k1, k2) = wkr * x0r + wki * x0i - a(k1 + 1, k2) = wkr * x0i - wki * x0r - a(k1, j2) = wjr * x1r + wji * x1i - a(k1 + 1, j2) = wjr * x1i - wji * x1r - end do - x0r = 2 * c(kk1) - wjr = x0r * w2r - wji = x0r * w2i - x0r = w2r * a(1, k2) + w2i * a(0, k2) - x0i = w2r * a(0, k2) - w2i * a(1, k2) - x1r = -wjr * a(1, j2) + wji * a(0, j2) - x1i = wjr * a(0, j2) + wji * a(1, j2) - a(0, k2) = x0r + x1r - a(1, k2) = x1i + x0i - a(0, j2) = x0r - x1r - a(1, j2) = x1i - x0i - end do - w2r = 2 * c(kk2) - kk1 = ks1 - do k1 = 2, n1 - 2, 2 - wkr = 2 * c(kk1) - wki = 2 * c(nc - kk1) - wjr = w2r * wkr - wji = w2r * wki - kk1 = kk1 + ks1 - x0i = wkr * a(k1, 0) - wki * a(k1 + 1, 0) - a(k1, 0) = wkr * a(k1 + 1, 0) + wki * a(k1, 0) - a(k1 + 1, 0) = x0i - x0i = wjr * a(k1, n2h) - wji * a(k1 + 1, n2h) - a(k1, n2h) = wjr * a(k1 + 1, n2h) + wji * a(k1, n2h) - a(k1 + 1, n2h) = x0i - end do - w2r = w2r * 2 - a(0, 0) = a(0, 0) * 2 - a(1, 0) = a(1, 0) * w2r - a(0, n2h) = a(0, n2h) * w2r - end -!
diff --git a/third_party/tensorflow_dependencies/fft2d/fftsg.c b/third_party/tensorflow_dependencies/fft2d/fftsg.c deleted file mode 100644 index 43d7534..0000000 --- a/third_party/tensorflow_dependencies/fft2d/fftsg.c +++ /dev/null
@@ -1,3314 +0,0 @@ -/* -Fast Fourier/Cosine/Sine Transform - dimension :one - data length :power of 2 - decimation :frequency - radix :split-radix - data :inplace - table :use -functions - cdft: Complex Discrete Fourier Transform - rdft: Real Discrete Fourier Transform - ddct: Discrete Cosine Transform - ddst: Discrete Sine Transform - dfct: Cosine Transform of RDFT (Real Symmetric DFT) - dfst: Sine Transform of RDFT (Real Anti-symmetric DFT) -function prototypes - void cdft(int, int, double *, int *, double *); - void rdft(int, int, double *, int *, double *); - void ddct(int, int, double *, int *, double *); - void ddst(int, int, double *, int *, double *); - void dfct(int, double *, double *, int *, double *); - void dfst(int, double *, double *, int *, double *); -macro definitions - USE_CDFT_PTHREADS : default=not defined - CDFT_THREADS_BEGIN_N : must be >= 512, default=8192 - CDFT_4THREADS_BEGIN_N : must be >= 512, default=65536 - USE_CDFT_WINTHREADS : default=not defined - CDFT_THREADS_BEGIN_N : must be >= 512, default=32768 - CDFT_4THREADS_BEGIN_N : must be >= 512, default=524288 - - --------- Complex DFT (Discrete Fourier Transform) -------- - [definition] - <case1> - X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k<n - <case2> - X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k<n - (notes: sum_j=0^n-1 is a summation from j=0 to n-1) - [usage] - <case1> - ip[0] = 0; // first time only - cdft(2*n, 1, a, ip, w); - <case2> - ip[0] = 0; // first time only - cdft(2*n, -1, a, ip, w); - [parameters] - 2*n :data length (int) - n >= 1, n = power of 2 - a[0...2*n-1] :input/output data (double *) - input data - a[2*j] = Re(x[j]), - a[2*j+1] = Im(x[j]), 0<=j<n - output data - a[2*k] = Re(X[k]), - a[2*k+1] = Im(X[k]), 0<=k<n - ip[0...*] :work area for bit reversal (int *) - length of ip >= 2+sqrt(n) - strictly, - length of ip >= - 2+(1<<(int)(log(n+0.5)/log(2))/2). - ip[0],ip[1] are pointers of the cos/sin table. - w[0...n/2-1] :cos/sin table (double *) - w[],ip[] are initialized if ip[0] == 0. - [remark] - Inverse of - cdft(2*n, -1, a, ip, w); - is - cdft(2*n, 1, a, ip, w); - for (j = 0; j <= 2 * n - 1; j++) { - a[j] *= 1.0 / n; - } - . - - --------- Real DFT / Inverse of Real DFT -------- - [definition] - <case1> RDFT - R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2 - I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0<k<n/2 - <case2> IRDFT (excluding scale) - a[k] = (R[0] + R[n/2]*cos(pi*k))/2 + - sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) + - sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k<n - [usage] - <case1> - ip[0] = 0; // first time only - rdft(n, 1, a, ip, w); - <case2> - ip[0] = 0; // first time only - rdft(n, -1, a, ip, w); - [parameters] - n :data length (int) - n >= 2, n = power of 2 - a[0...n-1] :input/output data (double *) - <case1> - output data - a[2*k] = R[k], 0<=k<n/2 - a[2*k+1] = I[k], 0<k<n/2 - a[1] = R[n/2] - <case2> - input data - a[2*j] = R[j], 0<=j<n/2 - a[2*j+1] = I[j], 0<j<n/2 - a[1] = R[n/2] - ip[0...*] :work area for bit reversal (int *) - length of ip >= 2+sqrt(n/2) - strictly, - length of ip >= - 2+(1<<(int)(log(n/2+0.5)/log(2))/2). - ip[0],ip[1] are pointers of the cos/sin table. - w[0...n/2-1] :cos/sin table (double *) - w[],ip[] are initialized if ip[0] == 0. - [remark] - Inverse of - rdft(n, 1, a, ip, w); - is - rdft(n, -1, a, ip, w); - for (j = 0; j <= n - 1; j++) { - a[j] *= 2.0 / n; - } - . - - --------- DCT (Discrete Cosine Transform) / Inverse of DCT -------- - [definition] - <case1> IDCT (excluding scale) - C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k<n - <case2> DCT - C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k<n - [usage] - <case1> - ip[0] = 0; // first time only - ddct(n, 1, a, ip, w); - <case2> - ip[0] = 0; // first time only - ddct(n, -1, a, ip, w); - [parameters] - n :data length (int) - n >= 2, n = power of 2 - a[0...n-1] :input/output data (double *) - output data - a[k] = C[k], 0<=k<n - ip[0...*] :work area for bit reversal (int *) - length of ip >= 2+sqrt(n/2) - strictly, - length of ip >= - 2+(1<<(int)(log(n/2+0.5)/log(2))/2). - ip[0],ip[1] are pointers of the cos/sin table. - w[0...n*5/4-1] :cos/sin table (double *) - w[],ip[] are initialized if ip[0] == 0. - [remark] - Inverse of - ddct(n, -1, a, ip, w); - is - a[0] *= 0.5; - ddct(n, 1, a, ip, w); - for (j = 0; j <= n - 1; j++) { - a[j] *= 2.0 / n; - } - . - - --------- DST (Discrete Sine Transform) / Inverse of DST -------- - [definition] - <case1> IDST (excluding scale) - S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k<n - <case2> DST - S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0<k<=n - [usage] - <case1> - ip[0] = 0; // first time only - ddst(n, 1, a, ip, w); - <case2> - ip[0] = 0; // first time only - ddst(n, -1, a, ip, w); - [parameters] - n :data length (int) - n >= 2, n = power of 2 - a[0...n-1] :input/output data (double *) - <case1> - input data - a[j] = A[j], 0<j<n - a[0] = A[n] - output data - a[k] = S[k], 0<=k<n - <case2> - output data - a[k] = S[k], 0<k<n - a[0] = S[n] - ip[0...*] :work area for bit reversal (int *) - length of ip >= 2+sqrt(n/2) - strictly, - length of ip >= - 2+(1<<(int)(log(n/2+0.5)/log(2))/2). - ip[0],ip[1] are pointers of the cos/sin table. - w[0...n*5/4-1] :cos/sin table (double *) - w[],ip[] are initialized if ip[0] == 0. - [remark] - Inverse of - ddst(n, -1, a, ip, w); - is - a[0] *= 0.5; - ddst(n, 1, a, ip, w); - for (j = 0; j <= n - 1; j++) { - a[j] *= 2.0 / n; - } - . - - --------- Cosine Transform of RDFT (Real Symmetric DFT) -------- - [definition] - C[k] = sum_j=0^n a[j]*cos(pi*j*k/n), 0<=k<=n - [usage] - ip[0] = 0; // first time only - dfct(n, a, t, ip, w); - [parameters] - n :data length - 1 (int) - n >= 2, n = power of 2 - a[0...n] :input/output data (double *) - output data - a[k] = C[k], 0<=k<=n - t[0...n/2] :work area (double *) - ip[0...*] :work area for bit reversal (int *) - length of ip >= 2+sqrt(n/4) - strictly, - length of ip >= - 2+(1<<(int)(log(n/4+0.5)/log(2))/2). - ip[0],ip[1] are pointers of the cos/sin table. - w[0...n*5/8-1] :cos/sin table (double *) - w[],ip[] are initialized if ip[0] == 0. - [remark] - Inverse of - a[0] *= 0.5; - a[n] *= 0.5; - dfct(n, a, t, ip, w); - is - a[0] *= 0.5; - a[n] *= 0.5; - dfct(n, a, t, ip, w); - for (j = 0; j <= n; j++) { - a[j] *= 2.0 / n; - } - . - - --------- Sine Transform of RDFT (Real Anti-symmetric DFT) -------- - [definition] - S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0<k<n - [usage] - ip[0] = 0; // first time only - dfst(n, a, t, ip, w); - [parameters] - n :data length + 1 (int) - n >= 2, n = power of 2 - a[0...n-1] :input/output data (double *) - output data - a[k] = S[k], 0<k<n - (a[0] is used for work area) - t[0...n/2-1] :work area (double *) - ip[0...*] :work area for bit reversal (int *) - length of ip >= 2+sqrt(n/4) - strictly, - length of ip >= - 2+(1<<(int)(log(n/4+0.5)/log(2))/2). - ip[0],ip[1] are pointers of the cos/sin table. - w[0...n*5/8-1] :cos/sin table (double *) - w[],ip[] are initialized if ip[0] == 0. - [remark] - Inverse of - dfst(n, a, t, ip, w); - is - dfst(n, a, t, ip, w); - for (j = 1; j <= n - 1; j++) { - a[j] *= 2.0 / n; - } - . - - -Appendix : - The cos/sin table is recalculated when the larger table required. - w[] and ip[] are compatible with all routines. -*/ - - -void cdft(int n, int isgn, double *a, int *ip, double *w) -{ - void makewt(int nw, int *ip, double *w); - void cftfsub(int n, double *a, int *ip, int nw, double *w); - void cftbsub(int n, double *a, int *ip, int nw, double *w); - int nw; - - nw = ip[0]; - if (n > (nw << 2)) { - nw = n >> 2; - makewt(nw, ip, w); - } - if (isgn >= 0) { - cftfsub(n, a, ip, nw, w); - } else { - cftbsub(n, a, ip, nw, w); - } -} - - -void rdft(int n, int isgn, double *a, int *ip, double *w) -{ - void makewt(int nw, int *ip, double *w); - void makect(int nc, int *ip, double *c); - void cftfsub(int n, double *a, int *ip, int nw, double *w); - void cftbsub(int n, double *a, int *ip, int nw, double *w); - void rftfsub(int n, double *a, int nc, double *c); - void rftbsub(int n, double *a, int nc, double *c); - int nw, nc; - double xi; - - nw = ip[0]; - if (n > (nw << 2)) { - nw = n >> 2; - makewt(nw, ip, w); - } - nc = ip[1]; - if (n > (nc << 2)) { - nc = n >> 2; - makect(nc, ip, w + nw); - } - if (isgn >= 0) { - if (n > 4) { - cftfsub(n, a, ip, nw, w); - rftfsub(n, a, nc, w + nw); - } else if (n == 4) { - cftfsub(n, a, ip, nw, w); - } - xi = a[0] - a[1]; - a[0] += a[1]; - a[1] = xi; - } else { - a[1] = 0.5 * (a[0] - a[1]); - a[0] -= a[1]; - if (n > 4) { - rftbsub(n, a, nc, w + nw); - cftbsub(n, a, ip, nw, w); - } else if (n == 4) { - cftbsub(n, a, ip, nw, w); - } - } -} - - -void ddct(int n, int isgn, double *a, int *ip, double *w) -{ - void makewt(int nw, int *ip, double *w); - void makect(int nc, int *ip, double *c); - void cftfsub(int n, double *a, int *ip, int nw, double *w); - void cftbsub(int n, double *a, int *ip, int nw, double *w); - void rftfsub(int n, double *a, int nc, double *c); - void rftbsub(int n, double *a, int nc, double *c); - void dctsub(int n, double *a, int nc, double *c); - int j, nw, nc; - double xr; - - nw = ip[0]; - if (n > (nw << 2)) { - nw = n >> 2; - makewt(nw, ip, w); - } - nc = ip[1]; - if (n > nc) { - nc = n; - makect(nc, ip, w + nw); - } - if (isgn < 0) { - xr = a[n - 1]; - for (j = n - 2; j >= 2; j -= 2) { - a[j + 1] = a[j] - a[j - 1]; - a[j] += a[j - 1]; - } - a[1] = a[0] - xr; - a[0] += xr; - if (n > 4) { - rftbsub(n, a, nc, w + nw); - cftbsub(n, a, ip, nw, w); - } else if (n == 4) { - cftbsub(n, a, ip, nw, w); - } - } - dctsub(n, a, nc, w + nw); - if (isgn >= 0) { - if (n > 4) { - cftfsub(n, a, ip, nw, w); - rftfsub(n, a, nc, w + nw); - } else if (n == 4) { - cftfsub(n, a, ip, nw, w); - } - xr = a[0] - a[1]; - a[0] += a[1]; - for (j = 2; j < n; j += 2) { - a[j - 1] = a[j] - a[j + 1]; - a[j] += a[j + 1]; - } - a[n - 1] = xr; - } -} - - -void ddst(int n, int isgn, double *a, int *ip, double *w) -{ - void makewt(int nw, int *ip, double *w); - void makect(int nc, int *ip, double *c); - void cftfsub(int n, double *a, int *ip, int nw, double *w); - void cftbsub(int n, double *a, int *ip, int nw, double *w); - void rftfsub(int n, double *a, int nc, double *c); - void rftbsub(int n, double *a, int nc, double *c); - void dstsub(int n, double *a, int nc, double *c); - int j, nw, nc; - double xr; - - nw = ip[0]; - if (n > (nw << 2)) { - nw = n >> 2; - makewt(nw, ip, w); - } - nc = ip[1]; - if (n > nc) { - nc = n; - makect(nc, ip, w + nw); - } - if (isgn < 0) { - xr = a[n - 1]; - for (j = n - 2; j >= 2; j -= 2) { - a[j + 1] = -a[j] - a[j - 1]; - a[j] -= a[j - 1]; - } - a[1] = a[0] + xr; - a[0] -= xr; - if (n > 4) { - rftbsub(n, a, nc, w + nw); - cftbsub(n, a, ip, nw, w); - } else if (n == 4) { - cftbsub(n, a, ip, nw, w); - } - } - dstsub(n, a, nc, w + nw); - if (isgn >= 0) { - if (n > 4) { - cftfsub(n, a, ip, nw, w); - rftfsub(n, a, nc, w + nw); - } else if (n == 4) { - cftfsub(n, a, ip, nw, w); - } - xr = a[0] - a[1]; - a[0] += a[1]; - for (j = 2; j < n; j += 2) { - a[j - 1] = -a[j] - a[j + 1]; - a[j] -= a[j + 1]; - } - a[n - 1] = -xr; - } -} - - -void dfct(int n, double *a, double *t, int *ip, double *w) -{ - void makewt(int nw, int *ip, double *w); - void makect(int nc, int *ip, double *c); - void cftfsub(int n, double *a, int *ip, int nw, double *w); - void rftfsub(int n, double *a, int nc, double *c); - void dctsub(int n, double *a, int nc, double *c); - int j, k, l, m, mh, nw, nc; - double xr, xi, yr, yi; - - nw = ip[0]; - if (n > (nw << 3)) { - nw = n >> 3; - makewt(nw, ip, w); - } - nc = ip[1]; - if (n > (nc << 1)) { - nc = n >> 1; - makect(nc, ip, w + nw); - } - m = n >> 1; - yi = a[m]; - xi = a[0] + a[n]; - a[0] -= a[n]; - t[0] = xi - yi; - t[m] = xi + yi; - if (n > 2) { - mh = m >> 1; - for (j = 1; j < mh; j++) { - k = m - j; - xr = a[j] - a[n - j]; - xi = a[j] + a[n - j]; - yr = a[k] - a[n - k]; - yi = a[k] + a[n - k]; - a[j] = xr; - a[k] = yr; - t[j] = xi - yi; - t[k] = xi + yi; - } - t[mh] = a[mh] + a[n - mh]; - a[mh] -= a[n - mh]; - dctsub(m, a, nc, w + nw); - if (m > 4) { - cftfsub(m, a, ip, nw, w); - rftfsub(m, a, nc, w + nw); - } else if (m == 4) { - cftfsub(m, a, ip, nw, w); - } - a[n - 1] = a[0] - a[1]; - a[1] = a[0] + a[1]; - for (j = m - 2; j >= 2; j -= 2) { - a[2 * j + 1] = a[j] + a[j + 1]; - a[2 * j - 1] = a[j] - a[j + 1]; - } - l = 2; - m = mh; - while (m >= 2) { - dctsub(m, t, nc, w + nw); - if (m > 4) { - cftfsub(m, t, ip, nw, w); - rftfsub(m, t, nc, w + nw); - } else if (m == 4) { - cftfsub(m, t, ip, nw, w); - } - a[n - l] = t[0] - t[1]; - a[l] = t[0] + t[1]; - k = 0; - for (j = 2; j < m; j += 2) { - k += l << 2; - a[k - l] = t[j] - t[j + 1]; - a[k + l] = t[j] + t[j + 1]; - } - l <<= 1; - mh = m >> 1; - for (j = 0; j < mh; j++) { - k = m - j; - t[j] = t[m + k] - t[m + j]; - t[k] = t[m + k] + t[m + j]; - } - t[mh] = t[m + mh]; - m = mh; - } - a[l] = t[0]; - a[n] = t[2] - t[1]; - a[0] = t[2] + t[1]; - } else { - a[1] = a[0]; - a[2] = t[0]; - a[0] = t[1]; - } -} - - -void dfst(int n, double *a, double *t, int *ip, double *w) -{ - void makewt(int nw, int *ip, double *w); - void makect(int nc, int *ip, double *c); - void cftfsub(int n, double *a, int *ip, int nw, double *w); - void rftfsub(int n, double *a, int nc, double *c); - void dstsub(int n, double *a, int nc, double *c); - int j, k, l, m, mh, nw, nc; - double xr, xi, yr, yi; - - nw = ip[0]; - if (n > (nw << 3)) { - nw = n >> 3; - makewt(nw, ip, w); - } - nc = ip[1]; - if (n > (nc << 1)) { - nc = n >> 1; - makect(nc, ip, w + nw); - } - if (n > 2) { - m = n >> 1; - mh = m >> 1; - for (j = 1; j < mh; j++) { - k = m - j; - xr = a[j] + a[n - j]; - xi = a[j] - a[n - j]; - yr = a[k] + a[n - k]; - yi = a[k] - a[n - k]; - a[j] = xr; - a[k] = yr; - t[j] = xi + yi; - t[k] = xi - yi; - } - t[0] = a[mh] - a[n - mh]; - a[mh] += a[n - mh]; - a[0] = a[m]; - dstsub(m, a, nc, w + nw); - if (m > 4) { - cftfsub(m, a, ip, nw, w); - rftfsub(m, a, nc, w + nw); - } else if (m == 4) { - cftfsub(m, a, ip, nw, w); - } - a[n - 1] = a[1] - a[0]; - a[1] = a[0] + a[1]; - for (j = m - 2; j >= 2; j -= 2) { - a[2 * j + 1] = a[j] - a[j + 1]; - a[2 * j - 1] = -a[j] - a[j + 1]; - } - l = 2; - m = mh; - while (m >= 2) { - dstsub(m, t, nc, w + nw); - if (m > 4) { - cftfsub(m, t, ip, nw, w); - rftfsub(m, t, nc, w + nw); - } else if (m == 4) { - cftfsub(m, t, ip, nw, w); - } - a[n - l] = t[1] - t[0]; - a[l] = t[0] + t[1]; - k = 0; - for (j = 2; j < m; j += 2) { - k += l << 2; - a[k - l] = -t[j] - t[j + 1]; - a[k + l] = t[j] - t[j + 1]; - } - l <<= 1; - mh = m >> 1; - for (j = 1; j < mh; j++) { - k = m - j; - t[j] = t[m + k] + t[m + j]; - t[k] = t[m + k] - t[m + j]; - } - t[0] = t[m + mh]; - m = mh; - } - a[l] = t[0]; - } - a[0] = 0; -} - - -/* -------- initializing routines -------- */ - - -#include <math.h> - -void makewt(int nw, int *ip, double *w) -{ - void makeipt(int nw, int *ip); - int j, nwh, nw0, nw1; - double delta, wn4r, wk1r, wk1i, wk3r, wk3i; - - ip[0] = nw; - ip[1] = 1; - if (nw > 2) { - nwh = nw >> 1; - delta = atan(1.0) / nwh; - wn4r = cos(delta * nwh); - w[0] = 1; - w[1] = wn4r; - if (nwh == 4) { - w[2] = cos(delta * 2); - w[3] = sin(delta * 2); - } else if (nwh > 4) { - makeipt(nw, ip); - w[2] = 0.5 / cos(delta * 2); - w[3] = 0.5 / cos(delta * 6); - for (j = 4; j < nwh; j += 4) { - w[j] = cos(delta * j); - w[j + 1] = sin(delta * j); - w[j + 2] = cos(3 * delta * j); - w[j + 3] = -sin(3 * delta * j); - } - } - nw0 = 0; - while (nwh > 2) { - nw1 = nw0 + nwh; - nwh >>= 1; - w[nw1] = 1; - w[nw1 + 1] = wn4r; - if (nwh == 4) { - wk1r = w[nw0 + 4]; - wk1i = w[nw0 + 5]; - w[nw1 + 2] = wk1r; - w[nw1 + 3] = wk1i; - } else if (nwh > 4) { - wk1r = w[nw0 + 4]; - wk3r = w[nw0 + 6]; - w[nw1 + 2] = 0.5 / wk1r; - w[nw1 + 3] = 0.5 / wk3r; - for (j = 4; j < nwh; j += 4) { - wk1r = w[nw0 + 2 * j]; - wk1i = w[nw0 + 2 * j + 1]; - wk3r = w[nw0 + 2 * j + 2]; - wk3i = w[nw0 + 2 * j + 3]; - w[nw1 + j] = wk1r; - w[nw1 + j + 1] = wk1i; - w[nw1 + j + 2] = wk3r; - w[nw1 + j + 3] = wk3i; - } - } - nw0 = nw1; - } - } -} - - -void makeipt(int nw, int *ip) -{ - int j, l, m, m2, p, q; - - ip[2] = 0; - ip[3] = 16; - m = 2; - for (l = nw; l > 32; l >>= 2) { - m2 = m << 1; - q = m2 << 3; - for (j = m; j < m2; j++) { - p = ip[j] << 2; - ip[m + j] = p; - ip[m2 + j] = p + q; - } - m = m2; - } -} - - -void makect(int nc, int *ip, double *c) -{ - int j, nch; - double delta; - - ip[1] = nc; - if (nc > 1) { - nch = nc >> 1; - delta = atan(1.0) / nch; - c[0] = cos(delta * nch); - c[nch] = 0.5 * c[0]; - for (j = 1; j < nch; j++) { - c[j] = 0.5 * cos(delta * j); - c[nc - j] = 0.5 * sin(delta * j); - } - } -} - - -/* -------- child routines -------- */ - - -#ifdef USE_CDFT_PTHREADS -#define USE_CDFT_THREADS -#ifndef CDFT_THREADS_BEGIN_N -#define CDFT_THREADS_BEGIN_N 8192 -#endif -#ifndef CDFT_4THREADS_BEGIN_N -#define CDFT_4THREADS_BEGIN_N 65536 -#endif -#include <pthread.h> -#include <stdio.h> -#include <stdlib.h> -#define cdft_thread_t pthread_t -#define cdft_thread_create(thp,func,argp) { \ - if (pthread_create(thp, NULL, func, (void *) argp) != 0) { \ - fprintf(stderr, "cdft thread error\n"); \ - exit(1); \ - } \ -} -#define cdft_thread_wait(th) { \ - if (pthread_join(th, NULL) != 0) { \ - fprintf(stderr, "cdft thread error\n"); \ - exit(1); \ - } \ -} -#endif /* USE_CDFT_PTHREADS */ - - -#ifdef USE_CDFT_WINTHREADS -#define USE_CDFT_THREADS -#ifndef CDFT_THREADS_BEGIN_N -#define CDFT_THREADS_BEGIN_N 32768 -#endif -#ifndef CDFT_4THREADS_BEGIN_N -#define CDFT_4THREADS_BEGIN_N 524288 -#endif -#include <windows.h> -#include <stdio.h> -#include <stdlib.h> -#define cdft_thread_t HANDLE -#define cdft_thread_create(thp,func,argp) { \ - DWORD thid; \ - *(thp) = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE) func, (LPVOID) argp, 0, &thid); \ - if (*(thp) == 0) { \ - fprintf(stderr, "cdft thread error\n"); \ - exit(1); \ - } \ -} -#define cdft_thread_wait(th) { \ - WaitForSingleObject(th, INFINITE); \ - CloseHandle(th); \ -} -#endif /* USE_CDFT_WINTHREADS */ - - -void cftfsub(int n, double *a, int *ip, int nw, double *w) -{ - void bitrv2(int n, int *ip, double *a); - void bitrv216(double *a); - void bitrv208(double *a); - void cftf1st(int n, double *a, double *w); - void cftrec4(int n, double *a, int nw, double *w); - void cftleaf(int n, int isplt, double *a, int nw, double *w); - void cftfx41(int n, double *a, int nw, double *w); - void cftf161(double *a, double *w); - void cftf081(double *a, double *w); - void cftf040(double *a); - void cftx020(double *a); -#ifdef USE_CDFT_THREADS - void cftrec4_th(int n, double *a, int nw, double *w); -#endif /* USE_CDFT_THREADS */ - - if (n > 8) { - if (n > 32) { - cftf1st(n, a, &w[nw - (n >> 2)]); -#ifdef USE_CDFT_THREADS - if (n > CDFT_THREADS_BEGIN_N) { - cftrec4_th(n, a, nw, w); - } else -#endif /* USE_CDFT_THREADS */ - if (n > 512) { - cftrec4(n, a, nw, w); - } else if (n > 128) { - cftleaf(n, 1, a, nw, w); - } else { - cftfx41(n, a, nw, w); - } - bitrv2(n, ip, a); - } else if (n == 32) { - cftf161(a, &w[nw - 8]); - bitrv216(a); - } else { - cftf081(a, w); - bitrv208(a); - } - } else if (n == 8) { - cftf040(a); - } else if (n == 4) { - cftx020(a); - } -} - - -void cftbsub(int n, double *a, int *ip, int nw, double *w) -{ - void bitrv2conj(int n, int *ip, double *a); - void bitrv216neg(double *a); - void bitrv208neg(double *a); - void cftb1st(int n, double *a, double *w); - void cftrec4(int n, double *a, int nw, double *w); - void cftleaf(int n, int isplt, double *a, int nw, double *w); - void cftfx41(int n, double *a, int nw, double *w); - void cftf161(double *a, double *w); - void cftf081(double *a, double *w); - void cftb040(double *a); - void cftx020(double *a); -#ifdef USE_CDFT_THREADS - void cftrec4_th(int n, double *a, int nw, double *w); -#endif /* USE_CDFT_THREADS */ - - if (n > 8) { - if (n > 32) { - cftb1st(n, a, &w[nw - (n >> 2)]); -#ifdef USE_CDFT_THREADS - if (n > CDFT_THREADS_BEGIN_N) { - cftrec4_th(n, a, nw, w); - } else -#endif /* USE_CDFT_THREADS */ - if (n > 512) { - cftrec4(n, a, nw, w); - } else if (n > 128) { - cftleaf(n, 1, a, nw, w); - } else { - cftfx41(n, a, nw, w); - } - bitrv2conj(n, ip, a); - } else if (n == 32) { - cftf161(a, &w[nw - 8]); - bitrv216neg(a); - } else { - cftf081(a, w); - bitrv208neg(a); - } - } else if (n == 8) { - cftb040(a); - } else if (n == 4) { - cftx020(a); - } -} - - -void bitrv2(int n, int *ip, double *a) -{ - int j, j1, k, k1, l, m, nh, nm; - double xr, xi, yr, yi; - - m = 1; - for (l = n >> 2; l > 8; l >>= 2) { - m <<= 1; - } - nh = n >> 1; - nm = 4 * m; - if (l == 8) { - for (k = 0; k < m; k++) { - for (j = 0; j < k; j++) { - j1 = 4 * j + 2 * ip[m + k]; - k1 = 4 * k + 2 * ip[m + j]; - xr = a[j1]; - xi = a[j1 + 1]; - yr = a[k1]; - yi = a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 += nm; - k1 += 2 * nm; - xr = a[j1]; - xi = a[j1 + 1]; - yr = a[k1]; - yi = a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 += nm; - k1 -= nm; - xr = a[j1]; - xi = a[j1 + 1]; - yr = a[k1]; - yi = a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 += nm; - k1 += 2 * nm; - xr = a[j1]; - xi = a[j1 + 1]; - yr = a[k1]; - yi = a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 += nh; - k1 += 2; - xr = a[j1]; - xi = a[j1 + 1]; - yr = a[k1]; - yi = a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 -= nm; - k1 -= 2 * nm; - xr = a[j1]; - xi = a[j1 + 1]; - yr = a[k1]; - yi = a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 -= nm; - k1 += nm; - xr = a[j1]; - xi = a[j1 + 1]; - yr = a[k1]; - yi = a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 -= nm; - k1 -= 2 * nm; - xr = a[j1]; - xi = a[j1 + 1]; - yr = a[k1]; - yi = a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 += 2; - k1 += nh; - xr = a[j1]; - xi = a[j1 + 1]; - yr = a[k1]; - yi = a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 += nm; - k1 += 2 * nm; - xr = a[j1]; - xi = a[j1 + 1]; - yr = a[k1]; - yi = a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 += nm; - k1 -= nm; - xr = a[j1]; - xi = a[j1 + 1]; - yr = a[k1]; - yi = a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 += nm; - k1 += 2 * nm; - xr = a[j1]; - xi = a[j1 + 1]; - yr = a[k1]; - yi = a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 -= nh; - k1 -= 2; - xr = a[j1]; - xi = a[j1 + 1]; - yr = a[k1]; - yi = a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 -= nm; - k1 -= 2 * nm; - xr = a[j1]; - xi = a[j1 + 1]; - yr = a[k1]; - yi = a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 -= nm; - k1 += nm; - xr = a[j1]; - xi = a[j1 + 1]; - yr = a[k1]; - yi = a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 -= nm; - k1 -= 2 * nm; - xr = a[j1]; - xi = a[j1 + 1]; - yr = a[k1]; - yi = a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - } - k1 = 4 * k + 2 * ip[m + k]; - j1 = k1 + 2; - k1 += nh; - xr = a[j1]; - xi = a[j1 + 1]; - yr = a[k1]; - yi = a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 += nm; - k1 += 2 * nm; - xr = a[j1]; - xi = a[j1 + 1]; - yr = a[k1]; - yi = a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 += nm; - k1 -= nm; - xr = a[j1]; - xi = a[j1 + 1]; - yr = a[k1]; - yi = a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 -= 2; - k1 -= nh; - xr = a[j1]; - xi = a[j1 + 1]; - yr = a[k1]; - yi = a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 += nh + 2; - k1 += nh + 2; - xr = a[j1]; - xi = a[j1 + 1]; - yr = a[k1]; - yi = a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 -= nh - nm; - k1 += 2 * nm - 2; - xr = a[j1]; - xi = a[j1 + 1]; - yr = a[k1]; - yi = a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - } - } else { - for (k = 0; k < m; k++) { - for (j = 0; j < k; j++) { - j1 = 4 * j + ip[m + k]; - k1 = 4 * k + ip[m + j]; - xr = a[j1]; - xi = a[j1 + 1]; - yr = a[k1]; - yi = a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 += nm; - k1 += nm; - xr = a[j1]; - xi = a[j1 + 1]; - yr = a[k1]; - yi = a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 += nh; - k1 += 2; - xr = a[j1]; - xi = a[j1 + 1]; - yr = a[k1]; - yi = a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 -= nm; - k1 -= nm; - xr = a[j1]; - xi = a[j1 + 1]; - yr = a[k1]; - yi = a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 += 2; - k1 += nh; - xr = a[j1]; - xi = a[j1 + 1]; - yr = a[k1]; - yi = a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 += nm; - k1 += nm; - xr = a[j1]; - xi = a[j1 + 1]; - yr = a[k1]; - yi = a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 -= nh; - k1 -= 2; - xr = a[j1]; - xi = a[j1 + 1]; - yr = a[k1]; - yi = a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 -= nm; - k1 -= nm; - xr = a[j1]; - xi = a[j1 + 1]; - yr = a[k1]; - yi = a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - } - k1 = 4 * k + ip[m + k]; - j1 = k1 + 2; - k1 += nh; - xr = a[j1]; - xi = a[j1 + 1]; - yr = a[k1]; - yi = a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 += nm; - k1 += nm; - xr = a[j1]; - xi = a[j1 + 1]; - yr = a[k1]; - yi = a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - } - } -} - - -void bitrv2conj(int n, int *ip, double *a) -{ - int j, j1, k, k1, l, m, nh, nm; - double xr, xi, yr, yi; - - m = 1; - for (l = n >> 2; l > 8; l >>= 2) { - m <<= 1; - } - nh = n >> 1; - nm = 4 * m; - if (l == 8) { - for (k = 0; k < m; k++) { - for (j = 0; j < k; j++) { - j1 = 4 * j + 2 * ip[m + k]; - k1 = 4 * k + 2 * ip[m + j]; - xr = a[j1]; - xi = -a[j1 + 1]; - yr = a[k1]; - yi = -a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 += nm; - k1 += 2 * nm; - xr = a[j1]; - xi = -a[j1 + 1]; - yr = a[k1]; - yi = -a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 += nm; - k1 -= nm; - xr = a[j1]; - xi = -a[j1 + 1]; - yr = a[k1]; - yi = -a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 += nm; - k1 += 2 * nm; - xr = a[j1]; - xi = -a[j1 + 1]; - yr = a[k1]; - yi = -a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 += nh; - k1 += 2; - xr = a[j1]; - xi = -a[j1 + 1]; - yr = a[k1]; - yi = -a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 -= nm; - k1 -= 2 * nm; - xr = a[j1]; - xi = -a[j1 + 1]; - yr = a[k1]; - yi = -a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 -= nm; - k1 += nm; - xr = a[j1]; - xi = -a[j1 + 1]; - yr = a[k1]; - yi = -a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 -= nm; - k1 -= 2 * nm; - xr = a[j1]; - xi = -a[j1 + 1]; - yr = a[k1]; - yi = -a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 += 2; - k1 += nh; - xr = a[j1]; - xi = -a[j1 + 1]; - yr = a[k1]; - yi = -a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 += nm; - k1 += 2 * nm; - xr = a[j1]; - xi = -a[j1 + 1]; - yr = a[k1]; - yi = -a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 += nm; - k1 -= nm; - xr = a[j1]; - xi = -a[j1 + 1]; - yr = a[k1]; - yi = -a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 += nm; - k1 += 2 * nm; - xr = a[j1]; - xi = -a[j1 + 1]; - yr = a[k1]; - yi = -a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 -= nh; - k1 -= 2; - xr = a[j1]; - xi = -a[j1 + 1]; - yr = a[k1]; - yi = -a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 -= nm; - k1 -= 2 * nm; - xr = a[j1]; - xi = -a[j1 + 1]; - yr = a[k1]; - yi = -a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 -= nm; - k1 += nm; - xr = a[j1]; - xi = -a[j1 + 1]; - yr = a[k1]; - yi = -a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 -= nm; - k1 -= 2 * nm; - xr = a[j1]; - xi = -a[j1 + 1]; - yr = a[k1]; - yi = -a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - } - k1 = 4 * k + 2 * ip[m + k]; - j1 = k1 + 2; - k1 += nh; - a[j1 - 1] = -a[j1 - 1]; - xr = a[j1]; - xi = -a[j1 + 1]; - yr = a[k1]; - yi = -a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - a[k1 + 3] = -a[k1 + 3]; - j1 += nm; - k1 += 2 * nm; - xr = a[j1]; - xi = -a[j1 + 1]; - yr = a[k1]; - yi = -a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 += nm; - k1 -= nm; - xr = a[j1]; - xi = -a[j1 + 1]; - yr = a[k1]; - yi = -a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 -= 2; - k1 -= nh; - xr = a[j1]; - xi = -a[j1 + 1]; - yr = a[k1]; - yi = -a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 += nh + 2; - k1 += nh + 2; - xr = a[j1]; - xi = -a[j1 + 1]; - yr = a[k1]; - yi = -a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 -= nh - nm; - k1 += 2 * nm - 2; - a[j1 - 1] = -a[j1 - 1]; - xr = a[j1]; - xi = -a[j1 + 1]; - yr = a[k1]; - yi = -a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - a[k1 + 3] = -a[k1 + 3]; - } - } else { - for (k = 0; k < m; k++) { - for (j = 0; j < k; j++) { - j1 = 4 * j + ip[m + k]; - k1 = 4 * k + ip[m + j]; - xr = a[j1]; - xi = -a[j1 + 1]; - yr = a[k1]; - yi = -a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 += nm; - k1 += nm; - xr = a[j1]; - xi = -a[j1 + 1]; - yr = a[k1]; - yi = -a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 += nh; - k1 += 2; - xr = a[j1]; - xi = -a[j1 + 1]; - yr = a[k1]; - yi = -a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 -= nm; - k1 -= nm; - xr = a[j1]; - xi = -a[j1 + 1]; - yr = a[k1]; - yi = -a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 += 2; - k1 += nh; - xr = a[j1]; - xi = -a[j1 + 1]; - yr = a[k1]; - yi = -a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 += nm; - k1 += nm; - xr = a[j1]; - xi = -a[j1 + 1]; - yr = a[k1]; - yi = -a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 -= nh; - k1 -= 2; - xr = a[j1]; - xi = -a[j1 + 1]; - yr = a[k1]; - yi = -a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - j1 -= nm; - k1 -= nm; - xr = a[j1]; - xi = -a[j1 + 1]; - yr = a[k1]; - yi = -a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - } - k1 = 4 * k + ip[m + k]; - j1 = k1 + 2; - k1 += nh; - a[j1 - 1] = -a[j1 - 1]; - xr = a[j1]; - xi = -a[j1 + 1]; - yr = a[k1]; - yi = -a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - a[k1 + 3] = -a[k1 + 3]; - j1 += nm; - k1 += nm; - a[j1 - 1] = -a[j1 - 1]; - xr = a[j1]; - xi = -a[j1 + 1]; - yr = a[k1]; - yi = -a[k1 + 1]; - a[j1] = yr; - a[j1 + 1] = yi; - a[k1] = xr; - a[k1 + 1] = xi; - a[k1 + 3] = -a[k1 + 3]; - } - } -} - - -void bitrv216(double *a) -{ - double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, - x5r, x5i, x7r, x7i, x8r, x8i, x10r, x10i, - x11r, x11i, x12r, x12i, x13r, x13i, x14r, x14i; - - x1r = a[2]; - x1i = a[3]; - x2r = a[4]; - x2i = a[5]; - x3r = a[6]; - x3i = a[7]; - x4r = a[8]; - x4i = a[9]; - x5r = a[10]; - x5i = a[11]; - x7r = a[14]; - x7i = a[15]; - x8r = a[16]; - x8i = a[17]; - x10r = a[20]; - x10i = a[21]; - x11r = a[22]; - x11i = a[23]; - x12r = a[24]; - x12i = a[25]; - x13r = a[26]; - x13i = a[27]; - x14r = a[28]; - x14i = a[29]; - a[2] = x8r; - a[3] = x8i; - a[4] = x4r; - a[5] = x4i; - a[6] = x12r; - a[7] = x12i; - a[8] = x2r; - a[9] = x2i; - a[10] = x10r; - a[11] = x10i; - a[14] = x14r; - a[15] = x14i; - a[16] = x1r; - a[17] = x1i; - a[20] = x5r; - a[21] = x5i; - a[22] = x13r; - a[23] = x13i; - a[24] = x3r; - a[25] = x3i; - a[26] = x11r; - a[27] = x11i; - a[28] = x7r; - a[29] = x7i; -} - - -void bitrv216neg(double *a) -{ - double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, - x5r, x5i, x6r, x6i, x7r, x7i, x8r, x8i, - x9r, x9i, x10r, x10i, x11r, x11i, x12r, x12i, - x13r, x13i, x14r, x14i, x15r, x15i; - - x1r = a[2]; - x1i = a[3]; - x2r = a[4]; - x2i = a[5]; - x3r = a[6]; - x3i = a[7]; - x4r = a[8]; - x4i = a[9]; - x5r = a[10]; - x5i = a[11]; - x6r = a[12]; - x6i = a[13]; - x7r = a[14]; - x7i = a[15]; - x8r = a[16]; - x8i = a[17]; - x9r = a[18]; - x9i = a[19]; - x10r = a[20]; - x10i = a[21]; - x11r = a[22]; - x11i = a[23]; - x12r = a[24]; - x12i = a[25]; - x13r = a[26]; - x13i = a[27]; - x14r = a[28]; - x14i = a[29]; - x15r = a[30]; - x15i = a[31]; - a[2] = x15r; - a[3] = x15i; - a[4] = x7r; - a[5] = x7i; - a[6] = x11r; - a[7] = x11i; - a[8] = x3r; - a[9] = x3i; - a[10] = x13r; - a[11] = x13i; - a[12] = x5r; - a[13] = x5i; - a[14] = x9r; - a[15] = x9i; - a[16] = x1r; - a[17] = x1i; - a[18] = x14r; - a[19] = x14i; - a[20] = x6r; - a[21] = x6i; - a[22] = x10r; - a[23] = x10i; - a[24] = x2r; - a[25] = x2i; - a[26] = x12r; - a[27] = x12i; - a[28] = x4r; - a[29] = x4i; - a[30] = x8r; - a[31] = x8i; -} - - -void bitrv208(double *a) -{ - double x1r, x1i, x3r, x3i, x4r, x4i, x6r, x6i; - - x1r = a[2]; - x1i = a[3]; - x3r = a[6]; - x3i = a[7]; - x4r = a[8]; - x4i = a[9]; - x6r = a[12]; - x6i = a[13]; - a[2] = x4r; - a[3] = x4i; - a[6] = x6r; - a[7] = x6i; - a[8] = x1r; - a[9] = x1i; - a[12] = x3r; - a[13] = x3i; -} - - -void bitrv208neg(double *a) -{ - double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, - x5r, x5i, x6r, x6i, x7r, x7i; - - x1r = a[2]; - x1i = a[3]; - x2r = a[4]; - x2i = a[5]; - x3r = a[6]; - x3i = a[7]; - x4r = a[8]; - x4i = a[9]; - x5r = a[10]; - x5i = a[11]; - x6r = a[12]; - x6i = a[13]; - x7r = a[14]; - x7i = a[15]; - a[2] = x7r; - a[3] = x7i; - a[4] = x3r; - a[5] = x3i; - a[6] = x5r; - a[7] = x5i; - a[8] = x1r; - a[9] = x1i; - a[10] = x6r; - a[11] = x6i; - a[12] = x2r; - a[13] = x2i; - a[14] = x4r; - a[15] = x4i; -} - - -void cftf1st(int n, double *a, double *w) -{ - int j, j0, j1, j2, j3, k, m, mh; - double wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i, - wd1r, wd1i, wd3r, wd3i; - double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, - y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i; - - mh = n >> 3; - m = 2 * mh; - j1 = m; - j2 = j1 + m; - j3 = j2 + m; - x0r = a[0] + a[j2]; - x0i = a[1] + a[j2 + 1]; - x1r = a[0] - a[j2]; - x1i = a[1] - a[j2 + 1]; - x2r = a[j1] + a[j3]; - x2i = a[j1 + 1] + a[j3 + 1]; - x3r = a[j1] - a[j3]; - x3i = a[j1 + 1] - a[j3 + 1]; - a[0] = x0r + x2r; - a[1] = x0i + x2i; - a[j1] = x0r - x2r; - a[j1 + 1] = x0i - x2i; - a[j2] = x1r - x3i; - a[j2 + 1] = x1i + x3r; - a[j3] = x1r + x3i; - a[j3 + 1] = x1i - x3r; - wn4r = w[1]; - csc1 = w[2]; - csc3 = w[3]; - wd1r = 1; - wd1i = 0; - wd3r = 1; - wd3i = 0; - k = 0; - for (j = 2; j < mh - 2; j += 4) { - k += 4; - wk1r = csc1 * (wd1r + w[k]); - wk1i = csc1 * (wd1i + w[k + 1]); - wk3r = csc3 * (wd3r + w[k + 2]); - wk3i = csc3 * (wd3i + w[k + 3]); - wd1r = w[k]; - wd1i = w[k + 1]; - wd3r = w[k + 2]; - wd3i = w[k + 3]; - j1 = j + m; - j2 = j1 + m; - j3 = j2 + m; - x0r = a[j] + a[j2]; - x0i = a[j + 1] + a[j2 + 1]; - x1r = a[j] - a[j2]; - x1i = a[j + 1] - a[j2 + 1]; - y0r = a[j + 2] + a[j2 + 2]; - y0i = a[j + 3] + a[j2 + 3]; - y1r = a[j + 2] - a[j2 + 2]; - y1i = a[j + 3] - a[j2 + 3]; - x2r = a[j1] + a[j3]; - x2i = a[j1 + 1] + a[j3 + 1]; - x3r = a[j1] - a[j3]; - x3i = a[j1 + 1] - a[j3 + 1]; - y2r = a[j1 + 2] + a[j3 + 2]; - y2i = a[j1 + 3] + a[j3 + 3]; - y3r = a[j1 + 2] - a[j3 + 2]; - y3i = a[j1 + 3] - a[j3 + 3]; - a[j] = x0r + x2r; - a[j + 1] = x0i + x2i; - a[j + 2] = y0r + y2r; - a[j + 3] = y0i + y2i; - a[j1] = x0r - x2r; - a[j1 + 1] = x0i - x2i; - a[j1 + 2] = y0r - y2r; - a[j1 + 3] = y0i - y2i; - x0r = x1r - x3i; - x0i = x1i + x3r; - a[j2] = wk1r * x0r - wk1i * x0i; - a[j2 + 1] = wk1r * x0i + wk1i * x0r; - x0r = y1r - y3i; - x0i = y1i + y3r; - a[j2 + 2] = wd1r * x0r - wd1i * x0i; - a[j2 + 3] = wd1r * x0i + wd1i * x0r; - x0r = x1r + x3i; - x0i = x1i - x3r; - a[j3] = wk3r * x0r + wk3i * x0i; - a[j3 + 1] = wk3r * x0i - wk3i * x0r; - x0r = y1r + y3i; - x0i = y1i - y3r; - a[j3 + 2] = wd3r * x0r + wd3i * x0i; - a[j3 + 3] = wd3r * x0i - wd3i * x0r; - j0 = m - j; - j1 = j0 + m; - j2 = j1 + m; - j3 = j2 + m; - x0r = a[j0] + a[j2]; - x0i = a[j0 + 1] + a[j2 + 1]; - x1r = a[j0] - a[j2]; - x1i = a[j0 + 1] - a[j2 + 1]; - y0r = a[j0 - 2] + a[j2 - 2]; - y0i = a[j0 - 1] + a[j2 - 1]; - y1r = a[j0 - 2] - a[j2 - 2]; - y1i = a[j0 - 1] - a[j2 - 1]; - x2r = a[j1] + a[j3]; - x2i = a[j1 + 1] + a[j3 + 1]; - x3r = a[j1] - a[j3]; - x3i = a[j1 + 1] - a[j3 + 1]; - y2r = a[j1 - 2] + a[j3 - 2]; - y2i = a[j1 - 1] + a[j3 - 1]; - y3r = a[j1 - 2] - a[j3 - 2]; - y3i = a[j1 - 1] - a[j3 - 1]; - a[j0] = x0r + x2r; - a[j0 + 1] = x0i + x2i; - a[j0 - 2] = y0r + y2r; - a[j0 - 1] = y0i + y2i; - a[j1] = x0r - x2r; - a[j1 + 1] = x0i - x2i; - a[j1 - 2] = y0r - y2r; - a[j1 - 1] = y0i - y2i; - x0r = x1r - x3i; - x0i = x1i + x3r; - a[j2] = wk1i * x0r - wk1r * x0i; - a[j2 + 1] = wk1i * x0i + wk1r * x0r; - x0r = y1r - y3i; - x0i = y1i + y3r; - a[j2 - 2] = wd1i * x0r - wd1r * x0i; - a[j2 - 1] = wd1i * x0i + wd1r * x0r; - x0r = x1r + x3i; - x0i = x1i - x3r; - a[j3] = wk3i * x0r + wk3r * x0i; - a[j3 + 1] = wk3i * x0i - wk3r * x0r; - x0r = y1r + y3i; - x0i = y1i - y3r; - a[j3 - 2] = wd3i * x0r + wd3r * x0i; - a[j3 - 1] = wd3i * x0i - wd3r * x0r; - } - wk1r = csc1 * (wd1r + wn4r); - wk1i = csc1 * (wd1i + wn4r); - wk3r = csc3 * (wd3r - wn4r); - wk3i = csc3 * (wd3i - wn4r); - j0 = mh; - j1 = j0 + m; - j2 = j1 + m; - j3 = j2 + m; - x0r = a[j0 - 2] + a[j2 - 2]; - x0i = a[j0 - 1] + a[j2 - 1]; - x1r = a[j0 - 2] - a[j2 - 2]; - x1i = a[j0 - 1] - a[j2 - 1]; - x2r = a[j1 - 2] + a[j3 - 2]; - x2i = a[j1 - 1] + a[j3 - 1]; - x3r = a[j1 - 2] - a[j3 - 2]; - x3i = a[j1 - 1] - a[j3 - 1]; - a[j0 - 2] = x0r + x2r; - a[j0 - 1] = x0i + x2i; - a[j1 - 2] = x0r - x2r; - a[j1 - 1] = x0i - x2i; - x0r = x1r - x3i; - x0i = x1i + x3r; - a[j2 - 2] = wk1r * x0r - wk1i * x0i; - a[j2 - 1] = wk1r * x0i + wk1i * x0r; - x0r = x1r + x3i; - x0i = x1i - x3r; - a[j3 - 2] = wk3r * x0r + wk3i * x0i; - a[j3 - 1] = wk3r * x0i - wk3i * x0r; - x0r = a[j0] + a[j2]; - x0i = a[j0 + 1] + a[j2 + 1]; - x1r = a[j0] - a[j2]; - x1i = a[j0 + 1] - a[j2 + 1]; - x2r = a[j1] + a[j3]; - x2i = a[j1 + 1] + a[j3 + 1]; - x3r = a[j1] - a[j3]; - x3i = a[j1 + 1] - a[j3 + 1]; - a[j0] = x0r + x2r; - a[j0 + 1] = x0i + x2i; - a[j1] = x0r - x2r; - a[j1 + 1] = x0i - x2i; - x0r = x1r - x3i; - x0i = x1i + x3r; - a[j2] = wn4r * (x0r - x0i); - a[j2 + 1] = wn4r * (x0i + x0r); - x0r = x1r + x3i; - x0i = x1i - x3r; - a[j3] = -wn4r * (x0r + x0i); - a[j3 + 1] = -wn4r * (x0i - x0r); - x0r = a[j0 + 2] + a[j2 + 2]; - x0i = a[j0 + 3] + a[j2 + 3]; - x1r = a[j0 + 2] - a[j2 + 2]; - x1i = a[j0 + 3] - a[j2 + 3]; - x2r = a[j1 + 2] + a[j3 + 2]; - x2i = a[j1 + 3] + a[j3 + 3]; - x3r = a[j1 + 2] - a[j3 + 2]; - x3i = a[j1 + 3] - a[j3 + 3]; - a[j0 + 2] = x0r + x2r; - a[j0 + 3] = x0i + x2i; - a[j1 + 2] = x0r - x2r; - a[j1 + 3] = x0i - x2i; - x0r = x1r - x3i; - x0i = x1i + x3r; - a[j2 + 2] = wk1i * x0r - wk1r * x0i; - a[j2 + 3] = wk1i * x0i + wk1r * x0r; - x0r = x1r + x3i; - x0i = x1i - x3r; - a[j3 + 2] = wk3i * x0r + wk3r * x0i; - a[j3 + 3] = wk3i * x0i - wk3r * x0r; -} - - -void cftb1st(int n, double *a, double *w) -{ - int j, j0, j1, j2, j3, k, m, mh; - double wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i, - wd1r, wd1i, wd3r, wd3i; - double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, - y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i; - - mh = n >> 3; - m = 2 * mh; - j1 = m; - j2 = j1 + m; - j3 = j2 + m; - x0r = a[0] + a[j2]; - x0i = -a[1] - a[j2 + 1]; - x1r = a[0] - a[j2]; - x1i = -a[1] + a[j2 + 1]; - x2r = a[j1] + a[j3]; - x2i = a[j1 + 1] + a[j3 + 1]; - x3r = a[j1] - a[j3]; - x3i = a[j1 + 1] - a[j3 + 1]; - a[0] = x0r + x2r; - a[1] = x0i - x2i; - a[j1] = x0r - x2r; - a[j1 + 1] = x0i + x2i; - a[j2] = x1r + x3i; - a[j2 + 1] = x1i + x3r; - a[j3] = x1r - x3i; - a[j3 + 1] = x1i - x3r; - wn4r = w[1]; - csc1 = w[2]; - csc3 = w[3]; - wd1r = 1; - wd1i = 0; - wd3r = 1; - wd3i = 0; - k = 0; - for (j = 2; j < mh - 2; j += 4) { - k += 4; - wk1r = csc1 * (wd1r + w[k]); - wk1i = csc1 * (wd1i + w[k + 1]); - wk3r = csc3 * (wd3r + w[k + 2]); - wk3i = csc3 * (wd3i + w[k + 3]); - wd1r = w[k]; - wd1i = w[k + 1]; - wd3r = w[k + 2]; - wd3i = w[k + 3]; - j1 = j + m; - j2 = j1 + m; - j3 = j2 + m; - x0r = a[j] + a[j2]; - x0i = -a[j + 1] - a[j2 + 1]; - x1r = a[j] - a[j2]; - x1i = -a[j + 1] + a[j2 + 1]; - y0r = a[j + 2] + a[j2 + 2]; - y0i = -a[j + 3] - a[j2 + 3]; - y1r = a[j + 2] - a[j2 + 2]; - y1i = -a[j + 3] + a[j2 + 3]; - x2r = a[j1] + a[j3]; - x2i = a[j1 + 1] + a[j3 + 1]; - x3r = a[j1] - a[j3]; - x3i = a[j1 + 1] - a[j3 + 1]; - y2r = a[j1 + 2] + a[j3 + 2]; - y2i = a[j1 + 3] + a[j3 + 3]; - y3r = a[j1 + 2] - a[j3 + 2]; - y3i = a[j1 + 3] - a[j3 + 3]; - a[j] = x0r + x2r; - a[j + 1] = x0i - x2i; - a[j + 2] = y0r + y2r; - a[j + 3] = y0i - y2i; - a[j1] = x0r - x2r; - a[j1 + 1] = x0i + x2i; - a[j1 + 2] = y0r - y2r; - a[j1 + 3] = y0i + y2i; - x0r = x1r + x3i; - x0i = x1i + x3r; - a[j2] = wk1r * x0r - wk1i * x0i; - a[j2 + 1] = wk1r * x0i + wk1i * x0r; - x0r = y1r + y3i; - x0i = y1i + y3r; - a[j2 + 2] = wd1r * x0r - wd1i * x0i; - a[j2 + 3] = wd1r * x0i + wd1i * x0r; - x0r = x1r - x3i; - x0i = x1i - x3r; - a[j3] = wk3r * x0r + wk3i * x0i; - a[j3 + 1] = wk3r * x0i - wk3i * x0r; - x0r = y1r - y3i; - x0i = y1i - y3r; - a[j3 + 2] = wd3r * x0r + wd3i * x0i; - a[j3 + 3] = wd3r * x0i - wd3i * x0r; - j0 = m - j; - j1 = j0 + m; - j2 = j1 + m; - j3 = j2 + m; - x0r = a[j0] + a[j2]; - x0i = -a[j0 + 1] - a[j2 + 1]; - x1r = a[j0] - a[j2]; - x1i = -a[j0 + 1] + a[j2 + 1]; - y0r = a[j0 - 2] + a[j2 - 2]; - y0i = -a[j0 - 1] - a[j2 - 1]; - y1r = a[j0 - 2] - a[j2 - 2]; - y1i = -a[j0 - 1] + a[j2 - 1]; - x2r = a[j1] + a[j3]; - x2i = a[j1 + 1] + a[j3 + 1]; - x3r = a[j1] - a[j3]; - x3i = a[j1 + 1] - a[j3 + 1]; - y2r = a[j1 - 2] + a[j3 - 2]; - y2i = a[j1 - 1] + a[j3 - 1]; - y3r = a[j1 - 2] - a[j3 - 2]; - y3i = a[j1 - 1] - a[j3 - 1]; - a[j0] = x0r + x2r; - a[j0 + 1] = x0i - x2i; - a[j0 - 2] = y0r + y2r; - a[j0 - 1] = y0i - y2i; - a[j1] = x0r - x2r; - a[j1 + 1] = x0i + x2i; - a[j1 - 2] = y0r - y2r; - a[j1 - 1] = y0i + y2i; - x0r = x1r + x3i; - x0i = x1i + x3r; - a[j2] = wk1i * x0r - wk1r * x0i; - a[j2 + 1] = wk1i * x0i + wk1r * x0r; - x0r = y1r + y3i; - x0i = y1i + y3r; - a[j2 - 2] = wd1i * x0r - wd1r * x0i; - a[j2 - 1] = wd1i * x0i + wd1r * x0r; - x0r = x1r - x3i; - x0i = x1i - x3r; - a[j3] = wk3i * x0r + wk3r * x0i; - a[j3 + 1] = wk3i * x0i - wk3r * x0r; - x0r = y1r - y3i; - x0i = y1i - y3r; - a[j3 - 2] = wd3i * x0r + wd3r * x0i; - a[j3 - 1] = wd3i * x0i - wd3r * x0r; - } - wk1r = csc1 * (wd1r + wn4r); - wk1i = csc1 * (wd1i + wn4r); - wk3r = csc3 * (wd3r - wn4r); - wk3i = csc3 * (wd3i - wn4r); - j0 = mh; - j1 = j0 + m; - j2 = j1 + m; - j3 = j2 + m; - x0r = a[j0 - 2] + a[j2 - 2]; - x0i = -a[j0 - 1] - a[j2 - 1]; - x1r = a[j0 - 2] - a[j2 - 2]; - x1i = -a[j0 - 1] + a[j2 - 1]; - x2r = a[j1 - 2] + a[j3 - 2]; - x2i = a[j1 - 1] + a[j3 - 1]; - x3r = a[j1 - 2] - a[j3 - 2]; - x3i = a[j1 - 1] - a[j3 - 1]; - a[j0 - 2] = x0r + x2r; - a[j0 - 1] = x0i - x2i; - a[j1 - 2] = x0r - x2r; - a[j1 - 1] = x0i + x2i; - x0r = x1r + x3i; - x0i = x1i + x3r; - a[j2 - 2] = wk1r * x0r - wk1i * x0i; - a[j2 - 1] = wk1r * x0i + wk1i * x0r; - x0r = x1r - x3i; - x0i = x1i - x3r; - a[j3 - 2] = wk3r * x0r + wk3i * x0i; - a[j3 - 1] = wk3r * x0i - wk3i * x0r; - x0r = a[j0] + a[j2]; - x0i = -a[j0 + 1] - a[j2 + 1]; - x1r = a[j0] - a[j2]; - x1i = -a[j0 + 1] + a[j2 + 1]; - x2r = a[j1] + a[j3]; - x2i = a[j1 + 1] + a[j3 + 1]; - x3r = a[j1] - a[j3]; - x3i = a[j1 + 1] - a[j3 + 1]; - a[j0] = x0r + x2r; - a[j0 + 1] = x0i - x2i; - a[j1] = x0r - x2r; - a[j1 + 1] = x0i + x2i; - x0r = x1r + x3i; - x0i = x1i + x3r; - a[j2] = wn4r * (x0r - x0i); - a[j2 + 1] = wn4r * (x0i + x0r); - x0r = x1r - x3i; - x0i = x1i - x3r; - a[j3] = -wn4r * (x0r + x0i); - a[j3 + 1] = -wn4r * (x0i - x0r); - x0r = a[j0 + 2] + a[j2 + 2]; - x0i = -a[j0 + 3] - a[j2 + 3]; - x1r = a[j0 + 2] - a[j2 + 2]; - x1i = -a[j0 + 3] + a[j2 + 3]; - x2r = a[j1 + 2] + a[j3 + 2]; - x2i = a[j1 + 3] + a[j3 + 3]; - x3r = a[j1 + 2] - a[j3 + 2]; - x3i = a[j1 + 3] - a[j3 + 3]; - a[j0 + 2] = x0r + x2r; - a[j0 + 3] = x0i - x2i; - a[j1 + 2] = x0r - x2r; - a[j1 + 3] = x0i + x2i; - x0r = x1r + x3i; - x0i = x1i + x3r; - a[j2 + 2] = wk1i * x0r - wk1r * x0i; - a[j2 + 3] = wk1i * x0i + wk1r * x0r; - x0r = x1r - x3i; - x0i = x1i - x3r; - a[j3 + 2] = wk3i * x0r + wk3r * x0i; - a[j3 + 3] = wk3i * x0i - wk3r * x0r; -} - - -#ifdef USE_CDFT_THREADS -struct cdft_arg_st { - int n0; - int n; - double *a; - int nw; - double *w; -}; -typedef struct cdft_arg_st cdft_arg_t; - - -void cftrec4_th(int n, double *a, int nw, double *w) -{ - void *cftrec1_th(void *p); - void *cftrec2_th(void *p); - int i, idiv4, m, nthread; - cdft_thread_t th[4]; - cdft_arg_t ag[4]; - - nthread = 2; - idiv4 = 0; - m = n >> 1; - if (n > CDFT_4THREADS_BEGIN_N) { - nthread = 4; - idiv4 = 1; - m >>= 1; - } - for (i = 0; i < nthread; i++) { - ag[i].n0 = n; - ag[i].n = m; - ag[i].a = &a[i * m]; - ag[i].nw = nw; - ag[i].w = w; - if (i != idiv4) { - cdft_thread_create(&th[i], cftrec1_th, &ag[i]); - } else { - cdft_thread_create(&th[i], cftrec2_th, &ag[i]); - } - } - for (i = 0; i < nthread; i++) { - cdft_thread_wait(th[i]); - } -} - - -void *cftrec1_th(void *p) -{ - int cfttree(int n, int j, int k, double *a, int nw, double *w); - void cftleaf(int n, int isplt, double *a, int nw, double *w); - void cftmdl1(int n, double *a, double *w); - int isplt, j, k, m, n, n0, nw; - double *a, *w; - - n0 = ((cdft_arg_t *) p)->n0; - n = ((cdft_arg_t *) p)->n; - a = ((cdft_arg_t *) p)->a; - nw = ((cdft_arg_t *) p)->nw; - w = ((cdft_arg_t *) p)->w; - m = n0; - while (m > 512) { - m >>= 2; - cftmdl1(m, &a[n - m], &w[nw - (m >> 1)]); - } - cftleaf(m, 1, &a[n - m], nw, w); - k = 0; - for (j = n - m; j > 0; j -= m) { - k++; - isplt = cfttree(m, j, k, a, nw, w); - cftleaf(m, isplt, &a[j - m], nw, w); - } - return (void *) 0; -} - - -void *cftrec2_th(void *p) -{ - int cfttree(int n, int j, int k, double *a, int nw, double *w); - void cftleaf(int n, int isplt, double *a, int nw, double *w); - void cftmdl2(int n, double *a, double *w); - int isplt, j, k, m, n, n0, nw; - double *a, *w; - - n0 = ((cdft_arg_t *) p)->n0; - n = ((cdft_arg_t *) p)->n; - a = ((cdft_arg_t *) p)->a; - nw = ((cdft_arg_t *) p)->nw; - w = ((cdft_arg_t *) p)->w; - k = 1; - m = n0; - while (m > 512) { - m >>= 2; - k <<= 2; - cftmdl2(m, &a[n - m], &w[nw - m]); - } - cftleaf(m, 0, &a[n - m], nw, w); - k >>= 1; - for (j = n - m; j > 0; j -= m) { - k++; - isplt = cfttree(m, j, k, a, nw, w); - cftleaf(m, isplt, &a[j - m], nw, w); - } - return (void *) 0; -} -#endif /* USE_CDFT_THREADS */ - - -void cftrec4(int n, double *a, int nw, double *w) -{ - int cfttree(int n, int j, int k, double *a, int nw, double *w); - void cftleaf(int n, int isplt, double *a, int nw, double *w); - void cftmdl1(int n, double *a, double *w); - int isplt, j, k, m; - - m = n; - while (m > 512) { - m >>= 2; - cftmdl1(m, &a[n - m], &w[nw - (m >> 1)]); - } - cftleaf(m, 1, &a[n - m], nw, w); - k = 0; - for (j = n - m; j > 0; j -= m) { - k++; - isplt = cfttree(m, j, k, a, nw, w); - cftleaf(m, isplt, &a[j - m], nw, w); - } -} - - -int cfttree(int n, int j, int k, double *a, int nw, double *w) -{ - void cftmdl1(int n, double *a, double *w); - void cftmdl2(int n, double *a, double *w); - int i, isplt, m; - - if ((k & 3) != 0) { - isplt = k & 1; - if (isplt != 0) { - cftmdl1(n, &a[j - n], &w[nw - (n >> 1)]); - } else { - cftmdl2(n, &a[j - n], &w[nw - n]); - } - } else { - m = n; - for (i = k; (i & 3) == 0; i >>= 2) { - m <<= 2; - } - isplt = i & 1; - if (isplt != 0) { - while (m > 128) { - cftmdl1(m, &a[j - m], &w[nw - (m >> 1)]); - m >>= 2; - } - } else { - while (m > 128) { - cftmdl2(m, &a[j - m], &w[nw - m]); - m >>= 2; - } - } - } - return isplt; -} - - -void cftleaf(int n, int isplt, double *a, int nw, double *w) -{ - void cftmdl1(int n, double *a, double *w); - void cftmdl2(int n, double *a, double *w); - void cftf161(double *a, double *w); - void cftf162(double *a, double *w); - void cftf081(double *a, double *w); - void cftf082(double *a, double *w); - - if (n == 512) { - cftmdl1(128, a, &w[nw - 64]); - cftf161(a, &w[nw - 8]); - cftf162(&a[32], &w[nw - 32]); - cftf161(&a[64], &w[nw - 8]); - cftf161(&a[96], &w[nw - 8]); - cftmdl2(128, &a[128], &w[nw - 128]); - cftf161(&a[128], &w[nw - 8]); - cftf162(&a[160], &w[nw - 32]); - cftf161(&a[192], &w[nw - 8]); - cftf162(&a[224], &w[nw - 32]); - cftmdl1(128, &a[256], &w[nw - 64]); - cftf161(&a[256], &w[nw - 8]); - cftf162(&a[288], &w[nw - 32]); - cftf161(&a[320], &w[nw - 8]); - cftf161(&a[352], &w[nw - 8]); - if (isplt != 0) { - cftmdl1(128, &a[384], &w[nw - 64]); - cftf161(&a[480], &w[nw - 8]); - } else { - cftmdl2(128, &a[384], &w[nw - 128]); - cftf162(&a[480], &w[nw - 32]); - } - cftf161(&a[384], &w[nw - 8]); - cftf162(&a[416], &w[nw - 32]); - cftf161(&a[448], &w[nw - 8]); - } else { - cftmdl1(64, a, &w[nw - 32]); - cftf081(a, &w[nw - 8]); - cftf082(&a[16], &w[nw - 8]); - cftf081(&a[32], &w[nw - 8]); - cftf081(&a[48], &w[nw - 8]); - cftmdl2(64, &a[64], &w[nw - 64]); - cftf081(&a[64], &w[nw - 8]); - cftf082(&a[80], &w[nw - 8]); - cftf081(&a[96], &w[nw - 8]); - cftf082(&a[112], &w[nw - 8]); - cftmdl1(64, &a[128], &w[nw - 32]); - cftf081(&a[128], &w[nw - 8]); - cftf082(&a[144], &w[nw - 8]); - cftf081(&a[160], &w[nw - 8]); - cftf081(&a[176], &w[nw - 8]); - if (isplt != 0) { - cftmdl1(64, &a[192], &w[nw - 32]); - cftf081(&a[240], &w[nw - 8]); - } else { - cftmdl2(64, &a[192], &w[nw - 64]); - cftf082(&a[240], &w[nw - 8]); - } - cftf081(&a[192], &w[nw - 8]); - cftf082(&a[208], &w[nw - 8]); - cftf081(&a[224], &w[nw - 8]); - } -} - - -void cftmdl1(int n, double *a, double *w) -{ - int j, j0, j1, j2, j3, k, m, mh; - double wn4r, wk1r, wk1i, wk3r, wk3i; - double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; - - mh = n >> 3; - m = 2 * mh; - j1 = m; - j2 = j1 + m; - j3 = j2 + m; - x0r = a[0] + a[j2]; - x0i = a[1] + a[j2 + 1]; - x1r = a[0] - a[j2]; - x1i = a[1] - a[j2 + 1]; - x2r = a[j1] + a[j3]; - x2i = a[j1 + 1] + a[j3 + 1]; - x3r = a[j1] - a[j3]; - x3i = a[j1 + 1] - a[j3 + 1]; - a[0] = x0r + x2r; - a[1] = x0i + x2i; - a[j1] = x0r - x2r; - a[j1 + 1] = x0i - x2i; - a[j2] = x1r - x3i; - a[j2 + 1] = x1i + x3r; - a[j3] = x1r + x3i; - a[j3 + 1] = x1i - x3r; - wn4r = w[1]; - k = 0; - for (j = 2; j < mh; j += 2) { - k += 4; - wk1r = w[k]; - wk1i = w[k + 1]; - wk3r = w[k + 2]; - wk3i = w[k + 3]; - j1 = j + m; - j2 = j1 + m; - j3 = j2 + m; - x0r = a[j] + a[j2]; - x0i = a[j + 1] + a[j2 + 1]; - x1r = a[j] - a[j2]; - x1i = a[j + 1] - a[j2 + 1]; - x2r = a[j1] + a[j3]; - x2i = a[j1 + 1] + a[j3 + 1]; - x3r = a[j1] - a[j3]; - x3i = a[j1 + 1] - a[j3 + 1]; - a[j] = x0r + x2r; - a[j + 1] = x0i + x2i; - a[j1] = x0r - x2r; - a[j1 + 1] = x0i - x2i; - x0r = x1r - x3i; - x0i = x1i + x3r; - a[j2] = wk1r * x0r - wk1i * x0i; - a[j2 + 1] = wk1r * x0i + wk1i * x0r; - x0r = x1r + x3i; - x0i = x1i - x3r; - a[j3] = wk3r * x0r + wk3i * x0i; - a[j3 + 1] = wk3r * x0i - wk3i * x0r; - j0 = m - j; - j1 = j0 + m; - j2 = j1 + m; - j3 = j2 + m; - x0r = a[j0] + a[j2]; - x0i = a[j0 + 1] + a[j2 + 1]; - x1r = a[j0] - a[j2]; - x1i = a[j0 + 1] - a[j2 + 1]; - x2r = a[j1] + a[j3]; - x2i = a[j1 + 1] + a[j3 + 1]; - x3r = a[j1] - a[j3]; - x3i = a[j1 + 1] - a[j3 + 1]; - a[j0] = x0r + x2r; - a[j0 + 1] = x0i + x2i; - a[j1] = x0r - x2r; - a[j1 + 1] = x0i - x2i; - x0r = x1r - x3i; - x0i = x1i + x3r; - a[j2] = wk1i * x0r - wk1r * x0i; - a[j2 + 1] = wk1i * x0i + wk1r * x0r; - x0r = x1r + x3i; - x0i = x1i - x3r; - a[j3] = wk3i * x0r + wk3r * x0i; - a[j3 + 1] = wk3i * x0i - wk3r * x0r; - } - j0 = mh; - j1 = j0 + m; - j2 = j1 + m; - j3 = j2 + m; - x0r = a[j0] + a[j2]; - x0i = a[j0 + 1] + a[j2 + 1]; - x1r = a[j0] - a[j2]; - x1i = a[j0 + 1] - a[j2 + 1]; - x2r = a[j1] + a[j3]; - x2i = a[j1 + 1] + a[j3 + 1]; - x3r = a[j1] - a[j3]; - x3i = a[j1 + 1] - a[j3 + 1]; - a[j0] = x0r + x2r; - a[j0 + 1] = x0i + x2i; - a[j1] = x0r - x2r; - a[j1 + 1] = x0i - x2i; - x0r = x1r - x3i; - x0i = x1i + x3r; - a[j2] = wn4r * (x0r - x0i); - a[j2 + 1] = wn4r * (x0i + x0r); - x0r = x1r + x3i; - x0i = x1i - x3r; - a[j3] = -wn4r * (x0r + x0i); - a[j3 + 1] = -wn4r * (x0i - x0r); -} - - -void cftmdl2(int n, double *a, double *w) -{ - int j, j0, j1, j2, j3, k, kr, m, mh; - double wn4r, wk1r, wk1i, wk3r, wk3i, wd1r, wd1i, wd3r, wd3i; - double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y2r, y2i; - - mh = n >> 3; - m = 2 * mh; - wn4r = w[1]; - j1 = m; - j2 = j1 + m; - j3 = j2 + m; - x0r = a[0] - a[j2 + 1]; - x0i = a[1] + a[j2]; - x1r = a[0] + a[j2 + 1]; - x1i = a[1] - a[j2]; - x2r = a[j1] - a[j3 + 1]; - x2i = a[j1 + 1] + a[j3]; - x3r = a[j1] + a[j3 + 1]; - x3i = a[j1 + 1] - a[j3]; - y0r = wn4r * (x2r - x2i); - y0i = wn4r * (x2i + x2r); - a[0] = x0r + y0r; - a[1] = x0i + y0i; - a[j1] = x0r - y0r; - a[j1 + 1] = x0i - y0i; - y0r = wn4r * (x3r - x3i); - y0i = wn4r * (x3i + x3r); - a[j2] = x1r - y0i; - a[j2 + 1] = x1i + y0r; - a[j3] = x1r + y0i; - a[j3 + 1] = x1i - y0r; - k = 0; - kr = 2 * m; - for (j = 2; j < mh; j += 2) { - k += 4; - wk1r = w[k]; - wk1i = w[k + 1]; - wk3r = w[k + 2]; - wk3i = w[k + 3]; - kr -= 4; - wd1i = w[kr]; - wd1r = w[kr + 1]; - wd3i = w[kr + 2]; - wd3r = w[kr + 3]; - j1 = j + m; - j2 = j1 + m; - j3 = j2 + m; - x0r = a[j] - a[j2 + 1]; - x0i = a[j + 1] + a[j2]; - x1r = a[j] + a[j2 + 1]; - x1i = a[j + 1] - a[j2]; - x2r = a[j1] - a[j3 + 1]; - x2i = a[j1 + 1] + a[j3]; - x3r = a[j1] + a[j3 + 1]; - x3i = a[j1 + 1] - a[j3]; - y0r = wk1r * x0r - wk1i * x0i; - y0i = wk1r * x0i + wk1i * x0r; - y2r = wd1r * x2r - wd1i * x2i; - y2i = wd1r * x2i + wd1i * x2r; - a[j] = y0r + y2r; - a[j + 1] = y0i + y2i; - a[j1] = y0r - y2r; - a[j1 + 1] = y0i - y2i; - y0r = wk3r * x1r + wk3i * x1i; - y0i = wk3r * x1i - wk3i * x1r; - y2r = wd3r * x3r + wd3i * x3i; - y2i = wd3r * x3i - wd3i * x3r; - a[j2] = y0r + y2r; - a[j2 + 1] = y0i + y2i; - a[j3] = y0r - y2r; - a[j3 + 1] = y0i - y2i; - j0 = m - j; - j1 = j0 + m; - j2 = j1 + m; - j3 = j2 + m; - x0r = a[j0] - a[j2 + 1]; - x0i = a[j0 + 1] + a[j2]; - x1r = a[j0] + a[j2 + 1]; - x1i = a[j0 + 1] - a[j2]; - x2r = a[j1] - a[j3 + 1]; - x2i = a[j1 + 1] + a[j3]; - x3r = a[j1] + a[j3 + 1]; - x3i = a[j1 + 1] - a[j3]; - y0r = wd1i * x0r - wd1r * x0i; - y0i = wd1i * x0i + wd1r * x0r; - y2r = wk1i * x2r - wk1r * x2i; - y2i = wk1i * x2i + wk1r * x2r; - a[j0] = y0r + y2r; - a[j0 + 1] = y0i + y2i; - a[j1] = y0r - y2r; - a[j1 + 1] = y0i - y2i; - y0r = wd3i * x1r + wd3r * x1i; - y0i = wd3i * x1i - wd3r * x1r; - y2r = wk3i * x3r + wk3r * x3i; - y2i = wk3i * x3i - wk3r * x3r; - a[j2] = y0r + y2r; - a[j2 + 1] = y0i + y2i; - a[j3] = y0r - y2r; - a[j3 + 1] = y0i - y2i; - } - wk1r = w[m]; - wk1i = w[m + 1]; - j0 = mh; - j1 = j0 + m; - j2 = j1 + m; - j3 = j2 + m; - x0r = a[j0] - a[j2 + 1]; - x0i = a[j0 + 1] + a[j2]; - x1r = a[j0] + a[j2 + 1]; - x1i = a[j0 + 1] - a[j2]; - x2r = a[j1] - a[j3 + 1]; - x2i = a[j1 + 1] + a[j3]; - x3r = a[j1] + a[j3 + 1]; - x3i = a[j1 + 1] - a[j3]; - y0r = wk1r * x0r - wk1i * x0i; - y0i = wk1r * x0i + wk1i * x0r; - y2r = wk1i * x2r - wk1r * x2i; - y2i = wk1i * x2i + wk1r * x2r; - a[j0] = y0r + y2r; - a[j0 + 1] = y0i + y2i; - a[j1] = y0r - y2r; - a[j1 + 1] = y0i - y2i; - y0r = wk1i * x1r - wk1r * x1i; - y0i = wk1i * x1i + wk1r * x1r; - y2r = wk1r * x3r - wk1i * x3i; - y2i = wk1r * x3i + wk1i * x3r; - a[j2] = y0r - y2r; - a[j2 + 1] = y0i - y2i; - a[j3] = y0r + y2r; - a[j3 + 1] = y0i + y2i; -} - - -void cftfx41(int n, double *a, int nw, double *w) -{ - void cftf161(double *a, double *w); - void cftf162(double *a, double *w); - void cftf081(double *a, double *w); - void cftf082(double *a, double *w); - - if (n == 128) { - cftf161(a, &w[nw - 8]); - cftf162(&a[32], &w[nw - 32]); - cftf161(&a[64], &w[nw - 8]); - cftf161(&a[96], &w[nw - 8]); - } else { - cftf081(a, &w[nw - 8]); - cftf082(&a[16], &w[nw - 8]); - cftf081(&a[32], &w[nw - 8]); - cftf081(&a[48], &w[nw - 8]); - } -} - - -void cftf161(double *a, double *w) -{ - double wn4r, wk1r, wk1i, - x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, - y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, - y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i, - y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i, - y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i; - - wn4r = w[1]; - wk1r = w[2]; - wk1i = w[3]; - x0r = a[0] + a[16]; - x0i = a[1] + a[17]; - x1r = a[0] - a[16]; - x1i = a[1] - a[17]; - x2r = a[8] + a[24]; - x2i = a[9] + a[25]; - x3r = a[8] - a[24]; - x3i = a[9] - a[25]; - y0r = x0r + x2r; - y0i = x0i + x2i; - y4r = x0r - x2r; - y4i = x0i - x2i; - y8r = x1r - x3i; - y8i = x1i + x3r; - y12r = x1r + x3i; - y12i = x1i - x3r; - x0r = a[2] + a[18]; - x0i = a[3] + a[19]; - x1r = a[2] - a[18]; - x1i = a[3] - a[19]; - x2r = a[10] + a[26]; - x2i = a[11] + a[27]; - x3r = a[10] - a[26]; - x3i = a[11] - a[27]; - y1r = x0r + x2r; - y1i = x0i + x2i; - y5r = x0r - x2r; - y5i = x0i - x2i; - x0r = x1r - x3i; - x0i = x1i + x3r; - y9r = wk1r * x0r - wk1i * x0i; - y9i = wk1r * x0i + wk1i * x0r; - x0r = x1r + x3i; - x0i = x1i - x3r; - y13r = wk1i * x0r - wk1r * x0i; - y13i = wk1i * x0i + wk1r * x0r; - x0r = a[4] + a[20]; - x0i = a[5] + a[21]; - x1r = a[4] - a[20]; - x1i = a[5] - a[21]; - x2r = a[12] + a[28]; - x2i = a[13] + a[29]; - x3r = a[12] - a[28]; - x3i = a[13] - a[29]; - y2r = x0r + x2r; - y2i = x0i + x2i; - y6r = x0r - x2r; - y6i = x0i - x2i; - x0r = x1r - x3i; - x0i = x1i + x3r; - y10r = wn4r * (x0r - x0i); - y10i = wn4r * (x0i + x0r); - x0r = x1r + x3i; - x0i = x1i - x3r; - y14r = wn4r * (x0r + x0i); - y14i = wn4r * (x0i - x0r); - x0r = a[6] + a[22]; - x0i = a[7] + a[23]; - x1r = a[6] - a[22]; - x1i = a[7] - a[23]; - x2r = a[14] + a[30]; - x2i = a[15] + a[31]; - x3r = a[14] - a[30]; - x3i = a[15] - a[31]; - y3r = x0r + x2r; - y3i = x0i + x2i; - y7r = x0r - x2r; - y7i = x0i - x2i; - x0r = x1r - x3i; - x0i = x1i + x3r; - y11r = wk1i * x0r - wk1r * x0i; - y11i = wk1i * x0i + wk1r * x0r; - x0r = x1r + x3i; - x0i = x1i - x3r; - y15r = wk1r * x0r - wk1i * x0i; - y15i = wk1r * x0i + wk1i * x0r; - x0r = y12r - y14r; - x0i = y12i - y14i; - x1r = y12r + y14r; - x1i = y12i + y14i; - x2r = y13r - y15r; - x2i = y13i - y15i; - x3r = y13r + y15r; - x3i = y13i + y15i; - a[24] = x0r + x2r; - a[25] = x0i + x2i; - a[26] = x0r - x2r; - a[27] = x0i - x2i; - a[28] = x1r - x3i; - a[29] = x1i + x3r; - a[30] = x1r + x3i; - a[31] = x1i - x3r; - x0r = y8r + y10r; - x0i = y8i + y10i; - x1r = y8r - y10r; - x1i = y8i - y10i; - x2r = y9r + y11r; - x2i = y9i + y11i; - x3r = y9r - y11r; - x3i = y9i - y11i; - a[16] = x0r + x2r; - a[17] = x0i + x2i; - a[18] = x0r - x2r; - a[19] = x0i - x2i; - a[20] = x1r - x3i; - a[21] = x1i + x3r; - a[22] = x1r + x3i; - a[23] = x1i - x3r; - x0r = y5r - y7i; - x0i = y5i + y7r; - x2r = wn4r * (x0r - x0i); - x2i = wn4r * (x0i + x0r); - x0r = y5r + y7i; - x0i = y5i - y7r; - x3r = wn4r * (x0r - x0i); - x3i = wn4r * (x0i + x0r); - x0r = y4r - y6i; - x0i = y4i + y6r; - x1r = y4r + y6i; - x1i = y4i - y6r; - a[8] = x0r + x2r; - a[9] = x0i + x2i; - a[10] = x0r - x2r; - a[11] = x0i - x2i; - a[12] = x1r - x3i; - a[13] = x1i + x3r; - a[14] = x1r + x3i; - a[15] = x1i - x3r; - x0r = y0r + y2r; - x0i = y0i + y2i; - x1r = y0r - y2r; - x1i = y0i - y2i; - x2r = y1r + y3r; - x2i = y1i + y3i; - x3r = y1r - y3r; - x3i = y1i - y3i; - a[0] = x0r + x2r; - a[1] = x0i + x2i; - a[2] = x0r - x2r; - a[3] = x0i - x2i; - a[4] = x1r - x3i; - a[5] = x1i + x3r; - a[6] = x1r + x3i; - a[7] = x1i - x3r; -} - - -void cftf162(double *a, double *w) -{ - double wn4r, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i, - x0r, x0i, x1r, x1i, x2r, x2i, - y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, - y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i, - y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i, - y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i; - - wn4r = w[1]; - wk1r = w[4]; - wk1i = w[5]; - wk3r = w[6]; - wk3i = -w[7]; - wk2r = w[8]; - wk2i = w[9]; - x1r = a[0] - a[17]; - x1i = a[1] + a[16]; - x0r = a[8] - a[25]; - x0i = a[9] + a[24]; - x2r = wn4r * (x0r - x0i); - x2i = wn4r * (x0i + x0r); - y0r = x1r + x2r; - y0i = x1i + x2i; - y4r = x1r - x2r; - y4i = x1i - x2i; - x1r = a[0] + a[17]; - x1i = a[1] - a[16]; - x0r = a[8] + a[25]; - x0i = a[9] - a[24]; - x2r = wn4r * (x0r - x0i); - x2i = wn4r * (x0i + x0r); - y8r = x1r - x2i; - y8i = x1i + x2r; - y12r = x1r + x2i; - y12i = x1i - x2r; - x0r = a[2] - a[19]; - x0i = a[3] + a[18]; - x1r = wk1r * x0r - wk1i * x0i; - x1i = wk1r * x0i + wk1i * x0r; - x0r = a[10] - a[27]; - x0i = a[11] + a[26]; - x2r = wk3i * x0r - wk3r * x0i; - x2i = wk3i * x0i + wk3r * x0r; - y1r = x1r + x2r; - y1i = x1i + x2i; - y5r = x1r - x2r; - y5i = x1i - x2i; - x0r = a[2] + a[19]; - x0i = a[3] - a[18]; - x1r = wk3r * x0r - wk3i * x0i; - x1i = wk3r * x0i + wk3i * x0r; - x0r = a[10] + a[27]; - x0i = a[11] - a[26]; - x2r = wk1r * x0r + wk1i * x0i; - x2i = wk1r * x0i - wk1i * x0r; - y9r = x1r - x2r; - y9i = x1i - x2i; - y13r = x1r + x2r; - y13i = x1i + x2i; - x0r = a[4] - a[21]; - x0i = a[5] + a[20]; - x1r = wk2r * x0r - wk2i * x0i; - x1i = wk2r * x0i + wk2i * x0r; - x0r = a[12] - a[29]; - x0i = a[13] + a[28]; - x2r = wk2i * x0r - wk2r * x0i; - x2i = wk2i * x0i + wk2r * x0r; - y2r = x1r + x2r; - y2i = x1i + x2i; - y6r = x1r - x2r; - y6i = x1i - x2i; - x0r = a[4] + a[21]; - x0i = a[5] - a[20]; - x1r = wk2i * x0r - wk2r * x0i; - x1i = wk2i * x0i + wk2r * x0r; - x0r = a[12] + a[29]; - x0i = a[13] - a[28]; - x2r = wk2r * x0r - wk2i * x0i; - x2i = wk2r * x0i + wk2i * x0r; - y10r = x1r - x2r; - y10i = x1i - x2i; - y14r = x1r + x2r; - y14i = x1i + x2i; - x0r = a[6] - a[23]; - x0i = a[7] + a[22]; - x1r = wk3r * x0r - wk3i * x0i; - x1i = wk3r * x0i + wk3i * x0r; - x0r = a[14] - a[31]; - x0i = a[15] + a[30]; - x2r = wk1i * x0r - wk1r * x0i; - x2i = wk1i * x0i + wk1r * x0r; - y3r = x1r + x2r; - y3i = x1i + x2i; - y7r = x1r - x2r; - y7i = x1i - x2i; - x0r = a[6] + a[23]; - x0i = a[7] - a[22]; - x1r = wk1i * x0r + wk1r * x0i; - x1i = wk1i * x0i - wk1r * x0r; - x0r = a[14] + a[31]; - x0i = a[15] - a[30]; - x2r = wk3i * x0r - wk3r * x0i; - x2i = wk3i * x0i + wk3r * x0r; - y11r = x1r + x2r; - y11i = x1i + x2i; - y15r = x1r - x2r; - y15i = x1i - x2i; - x1r = y0r + y2r; - x1i = y0i + y2i; - x2r = y1r + y3r; - x2i = y1i + y3i; - a[0] = x1r + x2r; - a[1] = x1i + x2i; - a[2] = x1r - x2r; - a[3] = x1i - x2i; - x1r = y0r - y2r; - x1i = y0i - y2i; - x2r = y1r - y3r; - x2i = y1i - y3i; - a[4] = x1r - x2i; - a[5] = x1i + x2r; - a[6] = x1r + x2i; - a[7] = x1i - x2r; - x1r = y4r - y6i; - x1i = y4i + y6r; - x0r = y5r - y7i; - x0i = y5i + y7r; - x2r = wn4r * (x0r - x0i); - x2i = wn4r * (x0i + x0r); - a[8] = x1r + x2r; - a[9] = x1i + x2i; - a[10] = x1r - x2r; - a[11] = x1i - x2i; - x1r = y4r + y6i; - x1i = y4i - y6r; - x0r = y5r + y7i; - x0i = y5i - y7r; - x2r = wn4r * (x0r - x0i); - x2i = wn4r * (x0i + x0r); - a[12] = x1r - x2i; - a[13] = x1i + x2r; - a[14] = x1r + x2i; - a[15] = x1i - x2r; - x1r = y8r + y10r; - x1i = y8i + y10i; - x2r = y9r - y11r; - x2i = y9i - y11i; - a[16] = x1r + x2r; - a[17] = x1i + x2i; - a[18] = x1r - x2r; - a[19] = x1i - x2i; - x1r = y8r - y10r; - x1i = y8i - y10i; - x2r = y9r + y11r; - x2i = y9i + y11i; - a[20] = x1r - x2i; - a[21] = x1i + x2r; - a[22] = x1r + x2i; - a[23] = x1i - x2r; - x1r = y12r - y14i; - x1i = y12i + y14r; - x0r = y13r + y15i; - x0i = y13i - y15r; - x2r = wn4r * (x0r - x0i); - x2i = wn4r * (x0i + x0r); - a[24] = x1r + x2r; - a[25] = x1i + x2i; - a[26] = x1r - x2r; - a[27] = x1i - x2i; - x1r = y12r + y14i; - x1i = y12i - y14r; - x0r = y13r - y15i; - x0i = y13i + y15r; - x2r = wn4r * (x0r - x0i); - x2i = wn4r * (x0i + x0r); - a[28] = x1r - x2i; - a[29] = x1i + x2r; - a[30] = x1r + x2i; - a[31] = x1i - x2r; -} - - -void cftf081(double *a, double *w) -{ - double wn4r, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, - y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, - y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i; - - wn4r = w[1]; - x0r = a[0] + a[8]; - x0i = a[1] + a[9]; - x1r = a[0] - a[8]; - x1i = a[1] - a[9]; - x2r = a[4] + a[12]; - x2i = a[5] + a[13]; - x3r = a[4] - a[12]; - x3i = a[5] - a[13]; - y0r = x0r + x2r; - y0i = x0i + x2i; - y2r = x0r - x2r; - y2i = x0i - x2i; - y1r = x1r - x3i; - y1i = x1i + x3r; - y3r = x1r + x3i; - y3i = x1i - x3r; - x0r = a[2] + a[10]; - x0i = a[3] + a[11]; - x1r = a[2] - a[10]; - x1i = a[3] - a[11]; - x2r = a[6] + a[14]; - x2i = a[7] + a[15]; - x3r = a[6] - a[14]; - x3i = a[7] - a[15]; - y4r = x0r + x2r; - y4i = x0i + x2i; - y6r = x0r - x2r; - y6i = x0i - x2i; - x0r = x1r - x3i; - x0i = x1i + x3r; - x2r = x1r + x3i; - x2i = x1i - x3r; - y5r = wn4r * (x0r - x0i); - y5i = wn4r * (x0r + x0i); - y7r = wn4r * (x2r - x2i); - y7i = wn4r * (x2r + x2i); - a[8] = y1r + y5r; - a[9] = y1i + y5i; - a[10] = y1r - y5r; - a[11] = y1i - y5i; - a[12] = y3r - y7i; - a[13] = y3i + y7r; - a[14] = y3r + y7i; - a[15] = y3i - y7r; - a[0] = y0r + y4r; - a[1] = y0i + y4i; - a[2] = y0r - y4r; - a[3] = y0i - y4i; - a[4] = y2r - y6i; - a[5] = y2i + y6r; - a[6] = y2r + y6i; - a[7] = y2i - y6r; -} - - -void cftf082(double *a, double *w) -{ - double wn4r, wk1r, wk1i, x0r, x0i, x1r, x1i, - y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, - y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i; - - wn4r = w[1]; - wk1r = w[2]; - wk1i = w[3]; - y0r = a[0] - a[9]; - y0i = a[1] + a[8]; - y1r = a[0] + a[9]; - y1i = a[1] - a[8]; - x0r = a[4] - a[13]; - x0i = a[5] + a[12]; - y2r = wn4r * (x0r - x0i); - y2i = wn4r * (x0i + x0r); - x0r = a[4] + a[13]; - x0i = a[5] - a[12]; - y3r = wn4r * (x0r - x0i); - y3i = wn4r * (x0i + x0r); - x0r = a[2] - a[11]; - x0i = a[3] + a[10]; - y4r = wk1r * x0r - wk1i * x0i; - y4i = wk1r * x0i + wk1i * x0r; - x0r = a[2] + a[11]; - x0i = a[3] - a[10]; - y5r = wk1i * x0r - wk1r * x0i; - y5i = wk1i * x0i + wk1r * x0r; - x0r = a[6] - a[15]; - x0i = a[7] + a[14]; - y6r = wk1i * x0r - wk1r * x0i; - y6i = wk1i * x0i + wk1r * x0r; - x0r = a[6] + a[15]; - x0i = a[7] - a[14]; - y7r = wk1r * x0r - wk1i * x0i; - y7i = wk1r * x0i + wk1i * x0r; - x0r = y0r + y2r; - x0i = y0i + y2i; - x1r = y4r + y6r; - x1i = y4i + y6i; - a[0] = x0r + x1r; - a[1] = x0i + x1i; - a[2] = x0r - x1r; - a[3] = x0i - x1i; - x0r = y0r - y2r; - x0i = y0i - y2i; - x1r = y4r - y6r; - x1i = y4i - y6i; - a[4] = x0r - x1i; - a[5] = x0i + x1r; - a[6] = x0r + x1i; - a[7] = x0i - x1r; - x0r = y1r - y3i; - x0i = y1i + y3r; - x1r = y5r - y7r; - x1i = y5i - y7i; - a[8] = x0r + x1r; - a[9] = x0i + x1i; - a[10] = x0r - x1r; - a[11] = x0i - x1i; - x0r = y1r + y3i; - x0i = y1i - y3r; - x1r = y5r + y7r; - x1i = y5i + y7i; - a[12] = x0r - x1i; - a[13] = x0i + x1r; - a[14] = x0r + x1i; - a[15] = x0i - x1r; -} - - -void cftf040(double *a) -{ - double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; - - x0r = a[0] + a[4]; - x0i = a[1] + a[5]; - x1r = a[0] - a[4]; - x1i = a[1] - a[5]; - x2r = a[2] + a[6]; - x2i = a[3] + a[7]; - x3r = a[2] - a[6]; - x3i = a[3] - a[7]; - a[0] = x0r + x2r; - a[1] = x0i + x2i; - a[2] = x1r - x3i; - a[3] = x1i + x3r; - a[4] = x0r - x2r; - a[5] = x0i - x2i; - a[6] = x1r + x3i; - a[7] = x1i - x3r; -} - - -void cftb040(double *a) -{ - double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; - - x0r = a[0] + a[4]; - x0i = a[1] + a[5]; - x1r = a[0] - a[4]; - x1i = a[1] - a[5]; - x2r = a[2] + a[6]; - x2i = a[3] + a[7]; - x3r = a[2] - a[6]; - x3i = a[3] - a[7]; - a[0] = x0r + x2r; - a[1] = x0i + x2i; - a[2] = x1r + x3i; - a[3] = x1i - x3r; - a[4] = x0r - x2r; - a[5] = x0i - x2i; - a[6] = x1r - x3i; - a[7] = x1i + x3r; -} - - -void cftx020(double *a) -{ - double x0r, x0i; - - x0r = a[0] - a[2]; - x0i = a[1] - a[3]; - a[0] += a[2]; - a[1] += a[3]; - a[2] = x0r; - a[3] = x0i; -} - - -void rftfsub(int n, double *a, int nc, double *c) -{ - int j, k, kk, ks, m; - double wkr, wki, xr, xi, yr, yi; - - m = n >> 1; - ks = 2 * nc / m; - kk = 0; - for (j = 2; j < m; j += 2) { - k = n - j; - kk += ks; - wkr = 0.5 - c[nc - kk]; - wki = c[kk]; - xr = a[j] - a[k]; - xi = a[j + 1] + a[k + 1]; - yr = wkr * xr - wki * xi; - yi = wkr * xi + wki * xr; - a[j] -= yr; - a[j + 1] -= yi; - a[k] += yr; - a[k + 1] -= yi; - } -} - - -void rftbsub(int n, double *a, int nc, double *c) -{ - int j, k, kk, ks, m; - double wkr, wki, xr, xi, yr, yi; - - m = n >> 1; - ks = 2 * nc / m; - kk = 0; - for (j = 2; j < m; j += 2) { - k = n - j; - kk += ks; - wkr = 0.5 - c[nc - kk]; - wki = c[kk]; - xr = a[j] - a[k]; - xi = a[j + 1] + a[k + 1]; - yr = wkr * xr + wki * xi; - yi = wkr * xi - wki * xr; - a[j] -= yr; - a[j + 1] -= yi; - a[k] += yr; - a[k + 1] -= yi; - } -} - - -void dctsub(int n, double *a, int nc, double *c) -{ - int j, k, kk, ks, m; - double wkr, wki, xr; - - m = n >> 1; - ks = nc / n; - kk = 0; - for (j = 1; j < m; j++) { - k = n - j; - kk += ks; - wkr = c[kk] - c[nc - kk]; - wki = c[kk] + c[nc - kk]; - xr = wki * a[j] - wkr * a[k]; - a[j] = wkr * a[j] + wki * a[k]; - a[k] = xr; - } - a[m] *= c[0]; -} - - -void dstsub(int n, double *a, int nc, double *c) -{ - int j, k, kk, ks, m; - double wkr, wki, xr; - - m = n >> 1; - ks = nc / n; - kk = 0; - for (j = 1; j < m; j++) { - k = n - j; - kk += ks; - wkr = c[kk] - c[nc - kk]; - wki = c[kk] + c[nc - kk]; - xr = wki * a[k] - wkr * a[j]; - a[k] = wkr * a[k] + wki * a[j]; - a[j] = xr; - } - a[m] *= c[0]; -} -
diff --git a/third_party/tensorflow_dependencies/fft2d/fftsg.f b/third_party/tensorflow_dependencies/fft2d/fftsg.f deleted file mode 100644 index 6fb26c4..0000000 --- a/third_party/tensorflow_dependencies/fft2d/fftsg.f +++ /dev/null
@@ -1,2967 +0,0 @@ -! Fast Fourier/Cosine/Sine Transform -! dimension :one -! data length :power of 2 -! decimation :frequency -! radix :split-radix -! data :inplace -! table :use -! subroutines -! cdft: Complex Discrete Fourier Transform -! rdft: Real Discrete Fourier Transform -! ddct: Discrete Cosine Transform -! ddst: Discrete Sine Transform -! dfct: Cosine Transform of RDFT (Real Symmetric DFT) -! dfst: Sine Transform of RDFT (Real Anti-symmetric DFT) -! -! -! -------- Complex DFT (Discrete Fourier Transform) -------- -! [definition] -! <case1> -! X(k) = sum_j=0^n-1 x(j)*exp(2*pi*i*j*k/n), 0<=k<n -! <case2> -! X(k) = sum_j=0^n-1 x(j)*exp(-2*pi*i*j*k/n), 0<=k<n -! (notes: sum_j=0^n-1 is a summation from j=0 to n-1) -! [usage] -! <case1> -! ip(0) = 0 ! first time only -! call cdft(2*n, 1, a, ip, w) -! <case2> -! ip(0) = 0 ! first time only -! call cdft(2*n, -1, a, ip, w) -! [parameters] -! 2*n :data length (integer) -! n >= 1, n = power of 2 -! a(0:2*n-1) :input/output data (real*8) -! input data -! a(2*j) = Re(x(j)), -! a(2*j+1) = Im(x(j)), 0<=j<n -! output data -! a(2*k) = Re(X(k)), -! a(2*k+1) = Im(X(k)), 0<=k<n -! ip(0:*) :work area for bit reversal (integer) -! length of ip >= 2+sqrt(n) -! strictly, -! length of ip >= -! 2+2**(int(log(n+0.5)/log(2.0))/2). -! ip(0),ip(1) are pointers of the cos/sin table. -! w(0:n/2-1) :cos/sin table (real*8) -! w(),ip() are initialized if ip(0) = 0. -! [remark] -! Inverse of -! call cdft(2*n, -1, a, ip, w) -! is -! call cdft(2*n, 1, a, ip, w) -! do j = 0, 2 * n - 1 -! a(j) = a(j) / n -! end do -! . -! -! -! -------- Real DFT / Inverse of Real DFT -------- -! [definition] -! <case1> RDFT -! R(k) = sum_j=0^n-1 a(j)*cos(2*pi*j*k/n), 0<=k<=n/2 -! I(k) = sum_j=0^n-1 a(j)*sin(2*pi*j*k/n), 0<k<n/2 -! <case2> IRDFT (excluding scale) -! a(k) = (R(0) + R(n/2)*cos(pi*k))/2 + -! sum_j=1^n/2-1 R(j)*cos(2*pi*j*k/n) + -! sum_j=1^n/2-1 I(j)*sin(2*pi*j*k/n), 0<=k<n -! [usage] -! <case1> -! ip(0) = 0 ! first time only -! call rdft(n, 1, a, ip, w) -! <case2> -! ip(0) = 0 ! first time only -! call rdft(n, -1, a, ip, w) -! [parameters] -! n :data length (integer) -! n >= 2, n = power of 2 -! a(0:n-1) :input/output data (real*8) -! <case1> -! output data -! a(2*k) = R(k), 0<=k<n/2 -! a(2*k+1) = I(k), 0<k<n/2 -! a(1) = R(n/2) -! <case2> -! input data -! a(2*j) = R(j), 0<=j<n/2 -! a(2*j+1) = I(j), 0<j<n/2 -! a(1) = R(n/2) -! ip(0:*) :work area for bit reversal (integer) -! length of ip >= 2+sqrt(n/2) -! strictly, -! length of ip >= -! 2+2**(int(log(n/2+0.5)/log(2.0))/2). -! ip(0),ip(1) are pointers of the cos/sin table. -! w(0:n/2-1) :cos/sin table (real*8) -! w(),ip() are initialized if ip(0) = 0. -! [remark] -! Inverse of -! call rdft(n, 1, a, ip, w) -! is -! call rdft(n, -1, a, ip, w) -! do j = 0, n - 1 -! a(j) = a(j) * 2 / n -! end do -! . -! -! -! -------- DCT (Discrete Cosine Transform) / Inverse of DCT -------- -! [definition] -! <case1> IDCT (excluding scale) -! C(k) = sum_j=0^n-1 a(j)*cos(pi*j*(k+1/2)/n), 0<=k<n -! <case2> DCT -! C(k) = sum_j=0^n-1 a(j)*cos(pi*(j+1/2)*k/n), 0<=k<n -! [usage] -! <case1> -! ip(0) = 0 ! first time only -! call ddct(n, 1, a, ip, w) -! <case2> -! ip(0) = 0 ! first time only -! call ddct(n, -1, a, ip, w) -! [parameters] -! n :data length (integer) -! n >= 2, n = power of 2 -! a(0:n-1) :input/output data (real*8) -! output data -! a(k) = C(k), 0<=k<n -! ip(0:*) :work area for bit reversal (integer) -! length of ip >= 2+sqrt(n/2) -! strictly, -! length of ip >= -! 2+2**(int(log(n/2+0.5)/log(2.0))/2). -! ip(0),ip(1) are pointers of the cos/sin table. -! w(0:n*5/4-1) :cos/sin table (real*8) -! w(),ip() are initialized if ip(0) = 0. -! [remark] -! Inverse of -! call ddct(n, -1, a, ip, w) -! is -! a(0) = a(0) / 2 -! call ddct(n, 1, a, ip, w) -! do j = 0, n - 1 -! a(j) = a(j) * 2 / n -! end do -! . -! -! -! -------- DST (Discrete Sine Transform) / Inverse of DST -------- -! [definition] -! <case1> IDST (excluding scale) -! S(k) = sum_j=1^n A(j)*sin(pi*j*(k+1/2)/n), 0<=k<n -! <case2> DST -! S(k) = sum_j=0^n-1 a(j)*sin(pi*(j+1/2)*k/n), 0<k<=n -! [usage] -! <case1> -! ip(0) = 0 ! first time only -! call ddst(n, 1, a, ip, w) -! <case2> -! ip(0) = 0 ! first time only -! call ddst(n, -1, a, ip, w) -! [parameters] -! n :data length (integer) -! n >= 2, n = power of 2 -! a(0:n-1) :input/output data (real*8) -! <case1> -! input data -! a(j) = A(j), 0<j<n -! a(0) = A(n) -! output data -! a(k) = S(k), 0<=k<n -! <case2> -! output data -! a(k) = S(k), 0<k<n -! a(0) = S(n) -! ip(0:*) :work area for bit reversal (integer) -! length of ip >= 2+sqrt(n/2) -! strictly, -! length of ip >= -! 2+2**(int(log(n/2+0.5)/log(2.0))/2). -! ip(0),ip(1) are pointers of the cos/sin table. -! w(0:n*5/4-1) :cos/sin table (real*8) -! w(),ip() are initialized if ip(0) = 0. -! [remark] -! Inverse of -! call ddst(n, -1, a, ip, w) -! is -! a(0) = a(0) / 2 -! call ddst(n, 1, a, ip, w) -! do j = 0, n - 1 -! a(j) = a(j) * 2 / n -! end do -! . -! -! -! -------- Cosine Transform of RDFT (Real Symmetric DFT) -------- -! [definition] -! C(k) = sum_j=0^n a(j)*cos(pi*j*k/n), 0<=k<=n -! [usage] -! ip(0) = 0 ! first time only -! call dfct(n, a, t, ip, w) -! [parameters] -! n :data length - 1 (integer) -! n >= 2, n = power of 2 -! a(0:n) :input/output data (real*8) -! output data -! a(k) = C(k), 0<=k<=n -! t(0:n/2) :work area (real*8) -! ip(0:*) :work area for bit reversal (integer) -! length of ip >= 2+sqrt(n/4) -! strictly, -! length of ip >= -! 2+2**(int(log(n/4+0.5)/log(2.0))/2). -! ip(0),ip(1) are pointers of the cos/sin table. -! w(0:n*5/8-1) :cos/sin table (real*8) -! w(),ip() are initialized if ip(0) = 0. -! [remark] -! Inverse of -! a(0) = a(0) / 2 -! a(n) = a(n) / 2 -! call dfct(n, a, t, ip, w) -! is -! a(0) = a(0) / 2 -! a(n) = a(n) / 2 -! call dfct(n, a, t, ip, w) -! do j = 0, n -! a(j) = a(j) * 2 / n -! end do -! . -! -! -! -------- Sine Transform of RDFT (Real Anti-symmetric DFT) -------- -! [definition] -! S(k) = sum_j=1^n-1 a(j)*sin(pi*j*k/n), 0<k<n -! [usage] -! ip(0) = 0 ! first time only -! call dfst(n, a, t, ip, w) -! [parameters] -! n :data length + 1 (integer) -! n >= 2, n = power of 2 -! a(0:n-1) :input/output data (real*8) -! output data -! a(k) = S(k), 0<k<n -! (a(0) is used for work area) -! t(0:n/2-1) :work area (real*8) -! ip(0:*) :work area for bit reversal (integer) -! length of ip >= 2+sqrt(n/4) -! strictly, -! length of ip >= -! 2+2**(int(log(n/4+0.5)/log(2.0))/2). -! ip(0),ip(1) are pointers of the cos/sin table. -! w(0:n*5/8-1) :cos/sin table (real*8) -! w(),ip() are initialized if ip(0) = 0. -! [remark] -! Inverse of -! call dfst(n, a, t, ip, w) -! is -! call dfst(n, a, t, ip, w) -! do j = 1, n - 1 -! a(j) = a(j) * 2 / n -! end do -! . -! -! -! Appendix : -! The cos/sin table is recalculated when the larger table required. -! w() and ip() are compatible with all routines. -! -! - subroutine cdft(n, isgn, a, ip, w) - integer n, isgn, ip(0 : *), nw - real*8 a(0 : n - 1), w(0 : *) - nw = ip(0) - if (n .gt. 4 * nw) then - nw = n / 4 - call makewt(nw, ip, w) - end if - if (isgn .ge. 0) then - call cftfsub(n, a, ip, nw, w) - else - call cftbsub(n, a, ip, nw, w) - end if - end -! - subroutine rdft(n, isgn, a, ip, w) - integer n, isgn, ip(0 : *), nw, nc - real*8 a(0 : n - 1), w(0 : *), xi - nw = ip(0) - if (n .gt. 4 * nw) then - nw = n / 4 - call makewt(nw, ip, w) - end if - nc = ip(1) - if (n .gt. 4 * nc) then - nc = n / 4 - call makect(nc, ip, w(nw)) - end if - if (isgn .ge. 0) then - if (n .gt. 4) then - call cftfsub(n, a, ip, nw, w) - call rftfsub(n, a, nc, w(nw)) - else if (n .eq. 4) then - call cftfsub(n, a, ip, nw, w) - end if - xi = a(0) - a(1) - a(0) = a(0) + a(1) - a(1) = xi - else - a(1) = 0.5d0 * (a(0) - a(1)) - a(0) = a(0) - a(1) - if (n .gt. 4) then - call rftbsub(n, a, nc, w(nw)) - call cftbsub(n, a, ip, nw, w) - else if (n .eq. 4) then - call cftbsub(n, a, ip, nw, w) - end if - end if - end -! - subroutine ddct(n, isgn, a, ip, w) - integer n, isgn, ip(0 : *), j, nw, nc - real*8 a(0 : n - 1), w(0 : *), xr - nw = ip(0) - if (n .gt. 4 * nw) then - nw = n / 4 - call makewt(nw, ip, w) - end if - nc = ip(1) - if (n .gt. nc) then - nc = n - call makect(nc, ip, w(nw)) - end if - if (isgn .lt. 0) then - xr = a(n - 1) - do j = n - 2, 2, -2 - a(j + 1) = a(j) - a(j - 1) - a(j) = a(j) + a(j - 1) - end do - a(1) = a(0) - xr - a(0) = a(0) + xr - if (n .gt. 4) then - call rftbsub(n, a, nc, w(nw)) - call cftbsub(n, a, ip, nw, w) - else if (n .eq. 4) then - call cftbsub(n, a, ip, nw, w) - end if - end if - call dctsub(n, a, nc, w(nw)) - if (isgn .ge. 0) then - if (n .gt. 4) then - call cftfsub(n, a, ip, nw, w) - call rftfsub(n, a, nc, w(nw)) - else if (n .eq. 4) then - call cftfsub(n, a, ip, nw, w) - end if - xr = a(0) - a(1) - a(0) = a(0) + a(1) - do j = 2, n - 2, 2 - a(j - 1) = a(j) - a(j + 1) - a(j) = a(j) + a(j + 1) - end do - a(n - 1) = xr - end if - end -! - subroutine ddst(n, isgn, a, ip, w) - integer n, isgn, ip(0 : *), j, nw, nc - real*8 a(0 : n - 1), w(0 : *), xr - nw = ip(0) - if (n .gt. 4 * nw) then - nw = n / 4 - call makewt(nw, ip, w) - end if - nc = ip(1) - if (n .gt. nc) then - nc = n - call makect(nc, ip, w(nw)) - end if - if (isgn .lt. 0) then - xr = a(n - 1) - do j = n - 2, 2, -2 - a(j + 1) = -a(j) - a(j - 1) - a(j) = a(j) - a(j - 1) - end do - a(1) = a(0) + xr - a(0) = a(0) - xr - if (n .gt. 4) then - call rftbsub(n, a, nc, w(nw)) - call cftbsub(n, a, ip, nw, w) - else if (n .eq. 4) then - call cftbsub(n, a, ip, nw, w) - end if - end if - call dstsub(n, a, nc, w(nw)) - if (isgn .ge. 0) then - if (n .gt. 4) then - call cftfsub(n, a, ip, nw, w) - call rftfsub(n, a, nc, w(nw)) - else if (n .eq. 4) then - call cftfsub(n, a, ip, nw, w) - end if - xr = a(0) - a(1) - a(0) = a(0) + a(1) - do j = 2, n - 2, 2 - a(j - 1) = -a(j) - a(j + 1) - a(j) = a(j) - a(j + 1) - end do - a(n - 1) = -xr - end if - end -! - subroutine dfct(n, a, t, ip, w) - integer n, ip(0 : *), j, k, l, m, mh, nw, nc - real*8 a(0 : n), t(0 : n / 2), w(0 : *), xr, xi, yr, yi - nw = ip(0) - if (n .gt. 8 * nw) then - nw = n / 8 - call makewt(nw, ip, w) - end if - nc = ip(1) - if (n .gt. 2 * nc) then - nc = n / 2 - call makect(nc, ip, w(nw)) - end if - m = n / 2 - yi = a(m) - xi = a(0) + a(n) - a(0) = a(0) - a(n) - t(0) = xi - yi - t(m) = xi + yi - if (n .gt. 2) then - mh = m / 2 - do j = 1, mh - 1 - k = m - j - xr = a(j) - a(n - j) - xi = a(j) + a(n - j) - yr = a(k) - a(n - k) - yi = a(k) + a(n - k) - a(j) = xr - a(k) = yr - t(j) = xi - yi - t(k) = xi + yi - end do - t(mh) = a(mh) + a(n - mh) - a(mh) = a(mh) - a(n - mh) - call dctsub(m, a, nc, w(nw)) - if (m .gt. 4) then - call cftfsub(m, a, ip, nw, w) - call rftfsub(m, a, nc, w(nw)) - else if (m .eq. 4) then - call cftfsub(m, a, ip, nw, w) - end if - a(n - 1) = a(0) - a(1) - a(1) = a(0) + a(1) - do j = m - 2, 2, -2 - a(2 * j + 1) = a(j) + a(j + 1) - a(2 * j - 1) = a(j) - a(j + 1) - end do - l = 2 - m = mh - do while (m .ge. 2) - call dctsub(m, t, nc, w(nw)) - if (m .gt. 4) then - call cftfsub(m, t, ip, nw, w) - call rftfsub(m, t, nc, w(nw)) - else if (m .eq. 4) then - call cftfsub(m, t, ip, nw, w) - end if - a(n - l) = t(0) - t(1) - a(l) = t(0) + t(1) - k = 0 - do j = 2, m - 2, 2 - k = k + 4 * l - a(k - l) = t(j) - t(j + 1) - a(k + l) = t(j) + t(j + 1) - end do - l = 2 * l - mh = m / 2 - do j = 0, mh - 1 - k = m - j - t(j) = t(m + k) - t(m + j) - t(k) = t(m + k) + t(m + j) - end do - t(mh) = t(m + mh) - m = mh - end do - a(l) = t(0) - a(n) = t(2) - t(1) - a(0) = t(2) + t(1) - else - a(1) = a(0) - a(2) = t(0) - a(0) = t(1) - end if - end -! - subroutine dfst(n, a, t, ip, w) - integer n, ip(0 : *), j, k, l, m, mh, nw, nc - real*8 a(0 : n - 1), t(0 : n / 2 - 1), w(0 : *), xr, xi, yr, yi - nw = ip(0) - if (n .gt. 8 * nw) then - nw = n / 8 - call makewt(nw, ip, w) - end if - nc = ip(1) - if (n .gt. 2 * nc) then - nc = n / 2 - call makect(nc, ip, w(nw)) - end if - if (n .gt. 2) then - m = n / 2 - mh = m / 2 - do j = 1, mh - 1 - k = m - j - xr = a(j) + a(n - j) - xi = a(j) - a(n - j) - yr = a(k) + a(n - k) - yi = a(k) - a(n - k) - a(j) = xr - a(k) = yr - t(j) = xi + yi - t(k) = xi - yi - end do - t(0) = a(mh) - a(n - mh) - a(mh) = a(mh) + a(n - mh) - a(0) = a(m) - call dstsub(m, a, nc, w(nw)) - if (m .gt. 4) then - call cftfsub(m, a, ip, nw, w) - call rftfsub(m, a, nc, w(nw)) - else if (m .eq. 4) then - call cftfsub(m, a, ip, nw, w) - end if - a(n - 1) = a(1) - a(0) - a(1) = a(0) + a(1) - do j = m - 2, 2, -2 - a(2 * j + 1) = a(j) - a(j + 1) - a(2 * j - 1) = -a(j) - a(j + 1) - end do - l = 2 - m = mh - do while (m .ge. 2) - call dstsub(m, t, nc, w(nw)) - if (m .gt. 4) then - call cftfsub(m, t, ip, nw, w) - call rftfsub(m, t, nc, w(nw)) - else if (m .eq. 4) then - call cftfsub(m, t, ip, nw, w) - end if - a(n - l) = t(1) - t(0) - a(l) = t(0) + t(1) - k = 0 - do j = 2, m - 2, 2 - k = k + 4 * l - a(k - l) = -t(j) - t(j + 1) - a(k + l) = t(j) - t(j + 1) - end do - l = 2 * l - mh = m / 2 - do j = 1, mh - 1 - k = m - j - t(j) = t(m + k) + t(m + j) - t(k) = t(m + k) - t(m + j) - end do - t(0) = t(m + mh) - m = mh - end do - a(l) = t(0) - end if - a(0) = 0 - end -! -! -------- initializing routines -------- -! - subroutine makewt(nw, ip, w) - integer nw, ip(0 : *), j, nwh, nw0, nw1 - real*8 w(0 : nw - 1), delta, wn4r, wk1r, wk1i, wk3r, wk3i - ip(0) = nw - ip(1) = 1 - if (nw .gt. 2) then - nwh = nw / 2 - delta = atan(1.0d0) / nwh - wn4r = cos(delta * nwh) - w(0) = 1 - w(1) = wn4r - if (nwh .eq. 4) then - w(2) = cos(delta * 2) - w(3) = sin(delta * 2) - else if (nwh .gt. 4) then - call makeipt(nw, ip) - w(2) = 0.5d0 / cos(delta * 2) - w(3) = 0.5d0 / cos(delta * 6) - do j = 4, nwh - 4, 4 - w(j) = cos(delta * j) - w(j + 1) = sin(delta * j) - w(j + 2) = cos(3 * delta * j) - w(j + 3) = -sin(3 * delta * j) - end do - end if - nw0 = 0 - do while (nwh .gt. 2) - nw1 = nw0 + nwh - nwh = nwh / 2 - w(nw1) = 1 - w(nw1 + 1) = wn4r - if (nwh .eq. 4) then - wk1r = w(nw0 + 4) - wk1i = w(nw0 + 5) - w(nw1 + 2) = wk1r - w(nw1 + 3) = wk1i - else if (nwh .gt. 4) then - wk1r = w(nw0 + 4) - wk3r = w(nw0 + 6) - w(nw1 + 2) = 0.5d0 / wk1r - w(nw1 + 3) = 0.5d0 / wk3r - do j = 4, nwh - 4, 4 - wk1r = w(nw0 + 2 * j) - wk1i = w(nw0 + 2 * j + 1) - wk3r = w(nw0 + 2 * j + 2) - wk3i = w(nw0 + 2 * j + 3) - w(nw1 + j) = wk1r - w(nw1 + j + 1) = wk1i - w(nw1 + j + 2) = wk3r - w(nw1 + j + 3) = wk3i - end do - end if - nw0 = nw1 - end do - end if - end -! - subroutine makeipt(nw, ip) - integer nw, ip(0 : *), j, l, m, m2, p, q - ip(2) = 0 - ip(3) = 16 - m = 2 - l = nw - do while (l .gt. 32) - m2 = 2 * m - q = 8 * m2 - do j = m, m2 - 1 - p = 4 * ip(j) - ip(m + j) = p - ip(m2 + j) = p + q - end do - m = m2 - l = l / 4 - end do - end -! - subroutine makect(nc, ip, c) - integer nc, ip(0 : *), j, nch - real*8 c(0 : nc - 1), delta - ip(1) = nc - if (nc .gt. 1) then - nch = nc / 2 - delta = atan(1.0d0) / nch - c(0) = cos(delta * nch) - c(nch) = 0.5d0 * c(0) - do j = 1, nch - 1 - c(j) = 0.5d0 * cos(delta * j) - c(nc - j) = 0.5d0 * sin(delta * j) - end do - end if - end -! -! -------- child routines -------- -! - subroutine cftfsub(n, a, ip, nw, w) - integer n, ip(0 : *), nw - real*8 a(0 : n - 1), w(0 : nw - 1) - if (n .gt. 8) then - if (n .gt. 32) then - call cftf1st(n, a, w(nw - n / 4)) - if (n .gt. 512) then - call cftrec4(n, a, nw, w) - else if (n .gt. 128) then - call cftleaf(n, 1, a, nw, w) - else - call cftfx41(n, a, nw, w) - end if - call bitrv2(n, ip, a) - else if (n .eq. 32) then - call cftf161(a, w(nw - 8)) - call bitrv216(a) - else - call cftf081(a, w) - call bitrv208(a) - end if - else if (n .eq. 8) then - call cftf040(a) - else if (n .eq. 4) then - call cftx020(a) - end if - end -! - subroutine cftbsub(n, a, ip, nw, w) - integer n, ip(0 : *), nw - real*8 a(0 : n - 1), w(0 : nw - 1) - if (n .gt. 8) then - if (n .gt. 32) then - call cftb1st(n, a, w(nw - n / 4)) - if (n .gt. 512) then - call cftrec4(n, a, nw, w) - else if (n .gt. 128) then - call cftleaf(n, 1, a, nw, w) - else - call cftfx41(n, a, nw, w) - end if - call bitrv2conj(n, ip, a) - else if (n .eq. 32) then - call cftf161(a, w(nw - 8)) - call bitrv216neg(a) - else - call cftf081(a, w) - call bitrv208neg(a) - end if - else if (n .eq. 8) then - call cftb040(a) - else if (n .eq. 4) then - call cftx020(a) - end if - end -! - subroutine bitrv2(n, ip, a) - integer n, ip(0 : *), j, j1, k, k1, l, m, nh, nm - real*8 a(0 : n - 1), xr, xi, yr, yi - m = 1 - l = n / 4 - do while (l .gt. 8) - m = m * 2 - l = l / 4 - end do - nh = n / 2 - nm = 4 * m - if (l .eq. 8) then - do k = 0, m - 1 - do j = 0, k - 1 - j1 = 4 * j + 2 * ip(m + k) - k1 = 4 * k + 2 * ip(m + j) - xr = a(j1) - xi = a(j1 + 1) - yr = a(k1) - yi = a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 + nm - k1 = k1 + 2 * nm - xr = a(j1) - xi = a(j1 + 1) - yr = a(k1) - yi = a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 + nm - k1 = k1 - nm - xr = a(j1) - xi = a(j1 + 1) - yr = a(k1) - yi = a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 + nm - k1 = k1 + 2 * nm - xr = a(j1) - xi = a(j1 + 1) - yr = a(k1) - yi = a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 + nh - k1 = k1 + 2 - xr = a(j1) - xi = a(j1 + 1) - yr = a(k1) - yi = a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 - nm - k1 = k1 - 2 * nm - xr = a(j1) - xi = a(j1 + 1) - yr = a(k1) - yi = a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 - nm - k1 = k1 + nm - xr = a(j1) - xi = a(j1 + 1) - yr = a(k1) - yi = a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 - nm - k1 = k1 - 2 * nm - xr = a(j1) - xi = a(j1 + 1) - yr = a(k1) - yi = a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 + 2 - k1 = k1 + nh - xr = a(j1) - xi = a(j1 + 1) - yr = a(k1) - yi = a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 + nm - k1 = k1 + 2 * nm - xr = a(j1) - xi = a(j1 + 1) - yr = a(k1) - yi = a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 + nm - k1 = k1 - nm - xr = a(j1) - xi = a(j1 + 1) - yr = a(k1) - yi = a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 + nm - k1 = k1 + 2 * nm - xr = a(j1) - xi = a(j1 + 1) - yr = a(k1) - yi = a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 - nh - k1 = k1 - 2 - xr = a(j1) - xi = a(j1 + 1) - yr = a(k1) - yi = a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 - nm - k1 = k1 - 2 * nm - xr = a(j1) - xi = a(j1 + 1) - yr = a(k1) - yi = a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 - nm - k1 = k1 + nm - xr = a(j1) - xi = a(j1 + 1) - yr = a(k1) - yi = a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 - nm - k1 = k1 - 2 * nm - xr = a(j1) - xi = a(j1 + 1) - yr = a(k1) - yi = a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - end do - k1 = 4 * k + 2 * ip(m + k) - j1 = k1 + 2 - k1 = k1 + nh - xr = a(j1) - xi = a(j1 + 1) - yr = a(k1) - yi = a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 + nm - k1 = k1 + 2 * nm - xr = a(j1) - xi = a(j1 + 1) - yr = a(k1) - yi = a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 + nm - k1 = k1 - nm - xr = a(j1) - xi = a(j1 + 1) - yr = a(k1) - yi = a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 - 2 - k1 = k1 - nh - xr = a(j1) - xi = a(j1 + 1) - yr = a(k1) - yi = a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 + nh + 2 - k1 = k1 + nh + 2 - xr = a(j1) - xi = a(j1 + 1) - yr = a(k1) - yi = a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 - nh + nm - k1 = k1 + 2 * nm - 2 - xr = a(j1) - xi = a(j1 + 1) - yr = a(k1) - yi = a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - end do - else - do k = 0, m - 1 - do j = 0, k - 1 - j1 = 4 * j + ip(m + k) - k1 = 4 * k + ip(m + j) - xr = a(j1) - xi = a(j1 + 1) - yr = a(k1) - yi = a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 + nm - k1 = k1 + nm - xr = a(j1) - xi = a(j1 + 1) - yr = a(k1) - yi = a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 + nh - k1 = k1 + 2 - xr = a(j1) - xi = a(j1 + 1) - yr = a(k1) - yi = a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 - nm - k1 = k1 - nm - xr = a(j1) - xi = a(j1 + 1) - yr = a(k1) - yi = a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 + 2 - k1 = k1 + nh - xr = a(j1) - xi = a(j1 + 1) - yr = a(k1) - yi = a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 + nm - k1 = k1 + nm - xr = a(j1) - xi = a(j1 + 1) - yr = a(k1) - yi = a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 - nh - k1 = k1 - 2 - xr = a(j1) - xi = a(j1 + 1) - yr = a(k1) - yi = a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 - nm - k1 = k1 - nm - xr = a(j1) - xi = a(j1 + 1) - yr = a(k1) - yi = a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - end do - k1 = 4 * k + ip(m + k) - j1 = k1 + 2 - k1 = k1 + nh - xr = a(j1) - xi = a(j1 + 1) - yr = a(k1) - yi = a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 + nm - k1 = k1 + nm - xr = a(j1) - xi = a(j1 + 1) - yr = a(k1) - yi = a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - end do - end if - end -! - subroutine bitrv2conj(n, ip, a) - integer n, ip(0 : *), j, j1, k, k1, l, m, nh, nm - real*8 a(0 : n - 1), xr, xi, yr, yi - m = 1 - l = n / 4 - do while (l .gt. 8) - m = m * 2 - l = l / 4 - end do - nh = n / 2 - nm = 4 * m - if (l .eq. 8) then - do k = 0, m - 1 - do j = 0, k - 1 - j1 = 4 * j + 2 * ip(m + k) - k1 = 4 * k + 2 * ip(m + j) - xr = a(j1) - xi = -a(j1 + 1) - yr = a(k1) - yi = -a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 + nm - k1 = k1 + 2 * nm - xr = a(j1) - xi = -a(j1 + 1) - yr = a(k1) - yi = -a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 + nm - k1 = k1 - nm - xr = a(j1) - xi = -a(j1 + 1) - yr = a(k1) - yi = -a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 + nm - k1 = k1 + 2 * nm - xr = a(j1) - xi = -a(j1 + 1) - yr = a(k1) - yi = -a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 + nh - k1 = k1 + 2 - xr = a(j1) - xi = -a(j1 + 1) - yr = a(k1) - yi = -a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 - nm - k1 = k1 - 2 * nm - xr = a(j1) - xi = -a(j1 + 1) - yr = a(k1) - yi = -a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 - nm - k1 = k1 + nm - xr = a(j1) - xi = -a(j1 + 1) - yr = a(k1) - yi = -a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 - nm - k1 = k1 - 2 * nm - xr = a(j1) - xi = -a(j1 + 1) - yr = a(k1) - yi = -a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 + 2 - k1 = k1 + nh - xr = a(j1) - xi = -a(j1 + 1) - yr = a(k1) - yi = -a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 + nm - k1 = k1 + 2 * nm - xr = a(j1) - xi = -a(j1 + 1) - yr = a(k1) - yi = -a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 + nm - k1 = k1 - nm - xr = a(j1) - xi = -a(j1 + 1) - yr = a(k1) - yi = -a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 + nm - k1 = k1 + 2 * nm - xr = a(j1) - xi = -a(j1 + 1) - yr = a(k1) - yi = -a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 - nh - k1 = k1 - 2 - xr = a(j1) - xi = -a(j1 + 1) - yr = a(k1) - yi = -a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 - nm - k1 = k1 - 2 * nm - xr = a(j1) - xi = -a(j1 + 1) - yr = a(k1) - yi = -a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 - nm - k1 = k1 + nm - xr = a(j1) - xi = -a(j1 + 1) - yr = a(k1) - yi = -a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 - nm - k1 = k1 - 2 * nm - xr = a(j1) - xi = -a(j1 + 1) - yr = a(k1) - yi = -a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - end do - k1 = 4 * k + 2 * ip(m + k) - j1 = k1 + 2 - k1 = k1 + nh - a(j1 - 1) = -a(j1 - 1) - xr = a(j1) - xi = -a(j1 + 1) - yr = a(k1) - yi = -a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - a(k1 + 3) = -a(k1 + 3) - j1 = j1 + nm - k1 = k1 + 2 * nm - xr = a(j1) - xi = -a(j1 + 1) - yr = a(k1) - yi = -a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 + nm - k1 = k1 - nm - xr = a(j1) - xi = -a(j1 + 1) - yr = a(k1) - yi = -a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 - 2 - k1 = k1 - nh - xr = a(j1) - xi = -a(j1 + 1) - yr = a(k1) - yi = -a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 + nh + 2 - k1 = k1 + nh + 2 - xr = a(j1) - xi = -a(j1 + 1) - yr = a(k1) - yi = -a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 - nh + nm - k1 = k1 + 2 * nm - 2 - a(j1 - 1) = -a(j1 - 1) - xr = a(j1) - xi = -a(j1 + 1) - yr = a(k1) - yi = -a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - a(k1 + 3) = -a(k1 + 3) - end do - else - do k = 0, m - 1 - do j = 0, k - 1 - j1 = 4 * j + ip(m + k) - k1 = 4 * k + ip(m + j) - xr = a(j1) - xi = -a(j1 + 1) - yr = a(k1) - yi = -a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 + nm - k1 = k1 + nm - xr = a(j1) - xi = -a(j1 + 1) - yr = a(k1) - yi = -a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 + nh - k1 = k1 + 2 - xr = a(j1) - xi = -a(j1 + 1) - yr = a(k1) - yi = -a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 - nm - k1 = k1 - nm - xr = a(j1) - xi = -a(j1 + 1) - yr = a(k1) - yi = -a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 + 2 - k1 = k1 + nh - xr = a(j1) - xi = -a(j1 + 1) - yr = a(k1) - yi = -a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 + nm - k1 = k1 + nm - xr = a(j1) - xi = -a(j1 + 1) - yr = a(k1) - yi = -a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 - nh - k1 = k1 - 2 - xr = a(j1) - xi = -a(j1 + 1) - yr = a(k1) - yi = -a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - j1 = j1 - nm - k1 = k1 - nm - xr = a(j1) - xi = -a(j1 + 1) - yr = a(k1) - yi = -a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - end do - k1 = 4 * k + ip(m + k) - j1 = k1 + 2 - k1 = k1 + nh - a(j1 - 1) = -a(j1 - 1) - xr = a(j1) - xi = -a(j1 + 1) - yr = a(k1) - yi = -a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - a(k1 + 3) = -a(k1 + 3) - j1 = j1 + nm - k1 = k1 + nm - a(j1 - 1) = -a(j1 - 1) - xr = a(j1) - xi = -a(j1 + 1) - yr = a(k1) - yi = -a(k1 + 1) - a(j1) = yr - a(j1 + 1) = yi - a(k1) = xr - a(k1 + 1) = xi - a(k1 + 3) = -a(k1 + 3) - end do - end if - end -! - subroutine bitrv216(a) - real*8 a(0 : 31), x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i - real*8 x5r, x5i, x7r, x7i, x8r, x8i, x10r, x10i - real*8 x11r, x11i, x12r, x12i, x13r, x13i, x14r, x14i - x1r = a(2) - x1i = a(3) - x2r = a(4) - x2i = a(5) - x3r = a(6) - x3i = a(7) - x4r = a(8) - x4i = a(9) - x5r = a(10) - x5i = a(11) - x7r = a(14) - x7i = a(15) - x8r = a(16) - x8i = a(17) - x10r = a(20) - x10i = a(21) - x11r = a(22) - x11i = a(23) - x12r = a(24) - x12i = a(25) - x13r = a(26) - x13i = a(27) - x14r = a(28) - x14i = a(29) - a(2) = x8r - a(3) = x8i - a(4) = x4r - a(5) = x4i - a(6) = x12r - a(7) = x12i - a(8) = x2r - a(9) = x2i - a(10) = x10r - a(11) = x10i - a(14) = x14r - a(15) = x14i - a(16) = x1r - a(17) = x1i - a(20) = x5r - a(21) = x5i - a(22) = x13r - a(23) = x13i - a(24) = x3r - a(25) = x3i - a(26) = x11r - a(27) = x11i - a(28) = x7r - a(29) = x7i - end -! - subroutine bitrv216neg(a) - real*8 a(0 : 31), x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i - real*8 x5r, x5i, x6r, x6i, x7r, x7i, x8r, x8i - real*8 x9r, x9i, x10r, x10i, x11r, x11i, x12r, x12i - real*8 x13r, x13i, x14r, x14i, x15r, x15i - x1r = a(2) - x1i = a(3) - x2r = a(4) - x2i = a(5) - x3r = a(6) - x3i = a(7) - x4r = a(8) - x4i = a(9) - x5r = a(10) - x5i = a(11) - x6r = a(12) - x6i = a(13) - x7r = a(14) - x7i = a(15) - x8r = a(16) - x8i = a(17) - x9r = a(18) - x9i = a(19) - x10r = a(20) - x10i = a(21) - x11r = a(22) - x11i = a(23) - x12r = a(24) - x12i = a(25) - x13r = a(26) - x13i = a(27) - x14r = a(28) - x14i = a(29) - x15r = a(30) - x15i = a(31) - a(2) = x15r - a(3) = x15i - a(4) = x7r - a(5) = x7i - a(6) = x11r - a(7) = x11i - a(8) = x3r - a(9) = x3i - a(10) = x13r - a(11) = x13i - a(12) = x5r - a(13) = x5i - a(14) = x9r - a(15) = x9i - a(16) = x1r - a(17) = x1i - a(18) = x14r - a(19) = x14i - a(20) = x6r - a(21) = x6i - a(22) = x10r - a(23) = x10i - a(24) = x2r - a(25) = x2i - a(26) = x12r - a(27) = x12i - a(28) = x4r - a(29) = x4i - a(30) = x8r - a(31) = x8i - end -! - subroutine bitrv208(a) - real*8 a(0 : 15), x1r, x1i, x3r, x3i, x4r, x4i, x6r, x6i - x1r = a(2) - x1i = a(3) - x3r = a(6) - x3i = a(7) - x4r = a(8) - x4i = a(9) - x6r = a(12) - x6i = a(13) - a(2) = x4r - a(3) = x4i - a(6) = x6r - a(7) = x6i - a(8) = x1r - a(9) = x1i - a(12) = x3r - a(13) = x3i - end -! - subroutine bitrv208neg(a) - real*8 a(0 : 15), x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i - real*8 x5r, x5i, x6r, x6i, x7r, x7i - x1r = a(2) - x1i = a(3) - x2r = a(4) - x2i = a(5) - x3r = a(6) - x3i = a(7) - x4r = a(8) - x4i = a(9) - x5r = a(10) - x5i = a(11) - x6r = a(12) - x6i = a(13) - x7r = a(14) - x7i = a(15) - a(2) = x7r - a(3) = x7i - a(4) = x3r - a(5) = x3i - a(6) = x5r - a(7) = x5i - a(8) = x1r - a(9) = x1i - a(10) = x6r - a(11) = x6i - a(12) = x2r - a(13) = x2i - a(14) = x4r - a(15) = x4i - end -! - subroutine cftf1st(n, a, w) - integer n, j, j0, j1, j2, j3, k, m, mh - real*8 a(0 : n - 1), w(0 : *) - real*8 wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i - real*8 wd1r, wd1i, wd3r, wd3i - real*8 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i - real*8 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i - mh = n / 8 - m = 2 * mh - j1 = m - j2 = j1 + m - j3 = j2 + m - x0r = a(0) + a(j2) - x0i = a(1) + a(j2 + 1) - x1r = a(0) - a(j2) - x1i = a(1) - a(j2 + 1) - x2r = a(j1) + a(j3) - x2i = a(j1 + 1) + a(j3 + 1) - x3r = a(j1) - a(j3) - x3i = a(j1 + 1) - a(j3 + 1) - a(0) = x0r + x2r - a(1) = x0i + x2i - a(j1) = x0r - x2r - a(j1 + 1) = x0i - x2i - a(j2) = x1r - x3i - a(j2 + 1) = x1i + x3r - a(j3) = x1r + x3i - a(j3 + 1) = x1i - x3r - wn4r = w(1) - csc1 = w(2) - csc3 = w(3) - wd1r = 1 - wd1i = 0 - wd3r = 1 - wd3i = 0 - k = 0 - do j = 2, mh - 6, 4 - k = k + 4 - wk1r = csc1 * (wd1r + w(k)) - wk1i = csc1 * (wd1i + w(k + 1)) - wk3r = csc3 * (wd3r + w(k + 2)) - wk3i = csc3 * (wd3i + w(k + 3)) - wd1r = w(k) - wd1i = w(k + 1) - wd3r = w(k + 2) - wd3i = w(k + 3) - j1 = j + m - j2 = j1 + m - j3 = j2 + m - x0r = a(j) + a(j2) - x0i = a(j + 1) + a(j2 + 1) - x1r = a(j) - a(j2) - x1i = a(j + 1) - a(j2 + 1) - y0r = a(j + 2) + a(j2 + 2) - y0i = a(j + 3) + a(j2 + 3) - y1r = a(j + 2) - a(j2 + 2) - y1i = a(j + 3) - a(j2 + 3) - x2r = a(j1) + a(j3) - x2i = a(j1 + 1) + a(j3 + 1) - x3r = a(j1) - a(j3) - x3i = a(j1 + 1) - a(j3 + 1) - y2r = a(j1 + 2) + a(j3 + 2) - y2i = a(j1 + 3) + a(j3 + 3) - y3r = a(j1 + 2) - a(j3 + 2) - y3i = a(j1 + 3) - a(j3 + 3) - a(j) = x0r + x2r - a(j + 1) = x0i + x2i - a(j + 2) = y0r + y2r - a(j + 3) = y0i + y2i - a(j1) = x0r - x2r - a(j1 + 1) = x0i - x2i - a(j1 + 2) = y0r - y2r - a(j1 + 3) = y0i - y2i - x0r = x1r - x3i - x0i = x1i + x3r - a(j2) = wk1r * x0r - wk1i * x0i - a(j2 + 1) = wk1r * x0i + wk1i * x0r - x0r = y1r - y3i - x0i = y1i + y3r - a(j2 + 2) = wd1r * x0r - wd1i * x0i - a(j2 + 3) = wd1r * x0i + wd1i * x0r - x0r = x1r + x3i - x0i = x1i - x3r - a(j3) = wk3r * x0r + wk3i * x0i - a(j3 + 1) = wk3r * x0i - wk3i * x0r - x0r = y1r + y3i - x0i = y1i - y3r - a(j3 + 2) = wd3r * x0r + wd3i * x0i - a(j3 + 3) = wd3r * x0i - wd3i * x0r - j0 = m - j - j1 = j0 + m - j2 = j1 + m - j3 = j2 + m - x0r = a(j0) + a(j2) - x0i = a(j0 + 1) + a(j2 + 1) - x1r = a(j0) - a(j2) - x1i = a(j0 + 1) - a(j2 + 1) - y0r = a(j0 - 2) + a(j2 - 2) - y0i = a(j0 - 1) + a(j2 - 1) - y1r = a(j0 - 2) - a(j2 - 2) - y1i = a(j0 - 1) - a(j2 - 1) - x2r = a(j1) + a(j3) - x2i = a(j1 + 1) + a(j3 + 1) - x3r = a(j1) - a(j3) - x3i = a(j1 + 1) - a(j3 + 1) - y2r = a(j1 - 2) + a(j3 - 2) - y2i = a(j1 - 1) + a(j3 - 1) - y3r = a(j1 - 2) - a(j3 - 2) - y3i = a(j1 - 1) - a(j3 - 1) - a(j0) = x0r + x2r - a(j0 + 1) = x0i + x2i - a(j0 - 2) = y0r + y2r - a(j0 - 1) = y0i + y2i - a(j1) = x0r - x2r - a(j1 + 1) = x0i - x2i - a(j1 - 2) = y0r - y2r - a(j1 - 1) = y0i - y2i - x0r = x1r - x3i - x0i = x1i + x3r - a(j2) = wk1i * x0r - wk1r * x0i - a(j2 + 1) = wk1i * x0i + wk1r * x0r - x0r = y1r - y3i - x0i = y1i + y3r - a(j2 - 2) = wd1i * x0r - wd1r * x0i - a(j2 - 1) = wd1i * x0i + wd1r * x0r - x0r = x1r + x3i - x0i = x1i - x3r - a(j3) = wk3i * x0r + wk3r * x0i - a(j3 + 1) = wk3i * x0i - wk3r * x0r - x0r = y1r + y3i - x0i = y1i - y3r - a(j3 - 2) = wd3i * x0r + wd3r * x0i - a(j3 - 1) = wd3i * x0i - wd3r * x0r - end do - wk1r = csc1 * (wd1r + wn4r) - wk1i = csc1 * (wd1i + wn4r) - wk3r = csc3 * (wd3r - wn4r) - wk3i = csc3 * (wd3i - wn4r) - j0 = mh - j1 = j0 + m - j2 = j1 + m - j3 = j2 + m - x0r = a(j0 - 2) + a(j2 - 2) - x0i = a(j0 - 1) + a(j2 - 1) - x1r = a(j0 - 2) - a(j2 - 2) - x1i = a(j0 - 1) - a(j2 - 1) - x2r = a(j1 - 2) + a(j3 - 2) - x2i = a(j1 - 1) + a(j3 - 1) - x3r = a(j1 - 2) - a(j3 - 2) - x3i = a(j1 - 1) - a(j3 - 1) - a(j0 - 2) = x0r + x2r - a(j0 - 1) = x0i + x2i - a(j1 - 2) = x0r - x2r - a(j1 - 1) = x0i - x2i - x0r = x1r - x3i - x0i = x1i + x3r - a(j2 - 2) = wk1r * x0r - wk1i * x0i - a(j2 - 1) = wk1r * x0i + wk1i * x0r - x0r = x1r + x3i - x0i = x1i - x3r - a(j3 - 2) = wk3r * x0r + wk3i * x0i - a(j3 - 1) = wk3r * x0i - wk3i * x0r - x0r = a(j0) + a(j2) - x0i = a(j0 + 1) + a(j2 + 1) - x1r = a(j0) - a(j2) - x1i = a(j0 + 1) - a(j2 + 1) - x2r = a(j1) + a(j3) - x2i = a(j1 + 1) + a(j3 + 1) - x3r = a(j1) - a(j3) - x3i = a(j1 + 1) - a(j3 + 1) - a(j0) = x0r + x2r - a(j0 + 1) = x0i + x2i - a(j1) = x0r - x2r - a(j1 + 1) = x0i - x2i - x0r = x1r - x3i - x0i = x1i + x3r - a(j2) = wn4r * (x0r - x0i) - a(j2 + 1) = wn4r * (x0i + x0r) - x0r = x1r + x3i - x0i = x1i - x3r - a(j3) = -wn4r * (x0r + x0i) - a(j3 + 1) = -wn4r * (x0i - x0r) - x0r = a(j0 + 2) + a(j2 + 2) - x0i = a(j0 + 3) + a(j2 + 3) - x1r = a(j0 + 2) - a(j2 + 2) - x1i = a(j0 + 3) - a(j2 + 3) - x2r = a(j1 + 2) + a(j3 + 2) - x2i = a(j1 + 3) + a(j3 + 3) - x3r = a(j1 + 2) - a(j3 + 2) - x3i = a(j1 + 3) - a(j3 + 3) - a(j0 + 2) = x0r + x2r - a(j0 + 3) = x0i + x2i - a(j1 + 2) = x0r - x2r - a(j1 + 3) = x0i - x2i - x0r = x1r - x3i - x0i = x1i + x3r - a(j2 + 2) = wk1i * x0r - wk1r * x0i - a(j2 + 3) = wk1i * x0i + wk1r * x0r - x0r = x1r + x3i - x0i = x1i - x3r - a(j3 + 2) = wk3i * x0r + wk3r * x0i - a(j3 + 3) = wk3i * x0i - wk3r * x0r - end -! - subroutine cftb1st(n, a, w) - integer n, j, j0, j1, j2, j3, k, m, mh - real*8 a(0 : n - 1), w(0 : *) - real*8 wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i - real*8 wd1r, wd1i, wd3r, wd3i - real*8 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i - real*8 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i - mh = n / 8 - m = 2 * mh - j1 = m - j2 = j1 + m - j3 = j2 + m - x0r = a(0) + a(j2) - x0i = -a(1) - a(j2 + 1) - x1r = a(0) - a(j2) - x1i = -a(1) + a(j2 + 1) - x2r = a(j1) + a(j3) - x2i = a(j1 + 1) + a(j3 + 1) - x3r = a(j1) - a(j3) - x3i = a(j1 + 1) - a(j3 + 1) - a(0) = x0r + x2r - a(1) = x0i - x2i - a(j1) = x0r - x2r - a(j1 + 1) = x0i + x2i - a(j2) = x1r + x3i - a(j2 + 1) = x1i + x3r - a(j3) = x1r - x3i - a(j3 + 1) = x1i - x3r - wn4r = w(1) - csc1 = w(2) - csc3 = w(3) - wd1r = 1 - wd1i = 0 - wd3r = 1 - wd3i = 0 - k = 0 - do j = 2, mh - 6, 4 - k = k + 4 - wk1r = csc1 * (wd1r + w(k)) - wk1i = csc1 * (wd1i + w(k + 1)) - wk3r = csc3 * (wd3r + w(k + 2)) - wk3i = csc3 * (wd3i + w(k + 3)) - wd1r = w(k) - wd1i = w(k + 1) - wd3r = w(k + 2) - wd3i = w(k + 3) - j1 = j + m - j2 = j1 + m - j3 = j2 + m - x0r = a(j) + a(j2) - x0i = -a(j + 1) - a(j2 + 1) - x1r = a(j) - a(j2) - x1i = -a(j + 1) + a(j2 + 1) - y0r = a(j + 2) + a(j2 + 2) - y0i = -a(j + 3) - a(j2 + 3) - y1r = a(j + 2) - a(j2 + 2) - y1i = -a(j + 3) + a(j2 + 3) - x2r = a(j1) + a(j3) - x2i = a(j1 + 1) + a(j3 + 1) - x3r = a(j1) - a(j3) - x3i = a(j1 + 1) - a(j3 + 1) - y2r = a(j1 + 2) + a(j3 + 2) - y2i = a(j1 + 3) + a(j3 + 3) - y3r = a(j1 + 2) - a(j3 + 2) - y3i = a(j1 + 3) - a(j3 + 3) - a(j) = x0r + x2r - a(j + 1) = x0i - x2i - a(j + 2) = y0r + y2r - a(j + 3) = y0i - y2i - a(j1) = x0r - x2r - a(j1 + 1) = x0i + x2i - a(j1 + 2) = y0r - y2r - a(j1 + 3) = y0i + y2i - x0r = x1r + x3i - x0i = x1i + x3r - a(j2) = wk1r * x0r - wk1i * x0i - a(j2 + 1) = wk1r * x0i + wk1i * x0r - x0r = y1r + y3i - x0i = y1i + y3r - a(j2 + 2) = wd1r * x0r - wd1i * x0i - a(j2 + 3) = wd1r * x0i + wd1i * x0r - x0r = x1r - x3i - x0i = x1i - x3r - a(j3) = wk3r * x0r + wk3i * x0i - a(j3 + 1) = wk3r * x0i - wk3i * x0r - x0r = y1r - y3i - x0i = y1i - y3r - a(j3 + 2) = wd3r * x0r + wd3i * x0i - a(j3 + 3) = wd3r * x0i - wd3i * x0r - j0 = m - j - j1 = j0 + m - j2 = j1 + m - j3 = j2 + m - x0r = a(j0) + a(j2) - x0i = -a(j0 + 1) - a(j2 + 1) - x1r = a(j0) - a(j2) - x1i = -a(j0 + 1) + a(j2 + 1) - y0r = a(j0 - 2) + a(j2 - 2) - y0i = -a(j0 - 1) - a(j2 - 1) - y1r = a(j0 - 2) - a(j2 - 2) - y1i = -a(j0 - 1) + a(j2 - 1) - x2r = a(j1) + a(j3) - x2i = a(j1 + 1) + a(j3 + 1) - x3r = a(j1) - a(j3) - x3i = a(j1 + 1) - a(j3 + 1) - y2r = a(j1 - 2) + a(j3 - 2) - y2i = a(j1 - 1) + a(j3 - 1) - y3r = a(j1 - 2) - a(j3 - 2) - y3i = a(j1 - 1) - a(j3 - 1) - a(j0) = x0r + x2r - a(j0 + 1) = x0i - x2i - a(j0 - 2) = y0r + y2r - a(j0 - 1) = y0i - y2i - a(j1) = x0r - x2r - a(j1 + 1) = x0i + x2i - a(j1 - 2) = y0r - y2r - a(j1 - 1) = y0i + y2i - x0r = x1r + x3i - x0i = x1i + x3r - a(j2) = wk1i * x0r - wk1r * x0i - a(j2 + 1) = wk1i * x0i + wk1r * x0r - x0r = y1r + y3i - x0i = y1i + y3r - a(j2 - 2) = wd1i * x0r - wd1r * x0i - a(j2 - 1) = wd1i * x0i + wd1r * x0r - x0r = x1r - x3i - x0i = x1i - x3r - a(j3) = wk3i * x0r + wk3r * x0i - a(j3 + 1) = wk3i * x0i - wk3r * x0r - x0r = y1r - y3i - x0i = y1i - y3r - a(j3 - 2) = wd3i * x0r + wd3r * x0i - a(j3 - 1) = wd3i * x0i - wd3r * x0r - end do - wk1r = csc1 * (wd1r + wn4r) - wk1i = csc1 * (wd1i + wn4r) - wk3r = csc3 * (wd3r - wn4r) - wk3i = csc3 * (wd3i - wn4r) - j0 = mh - j1 = j0 + m - j2 = j1 + m - j3 = j2 + m - x0r = a(j0 - 2) + a(j2 - 2) - x0i = -a(j0 - 1) - a(j2 - 1) - x1r = a(j0 - 2) - a(j2 - 2) - x1i = -a(j0 - 1) + a(j2 - 1) - x2r = a(j1 - 2) + a(j3 - 2) - x2i = a(j1 - 1) + a(j3 - 1) - x3r = a(j1 - 2) - a(j3 - 2) - x3i = a(j1 - 1) - a(j3 - 1) - a(j0 - 2) = x0r + x2r - a(j0 - 1) = x0i - x2i - a(j1 - 2) = x0r - x2r - a(j1 - 1) = x0i + x2i - x0r = x1r + x3i - x0i = x1i + x3r - a(j2 - 2) = wk1r * x0r - wk1i * x0i - a(j2 - 1) = wk1r * x0i + wk1i * x0r - x0r = x1r - x3i - x0i = x1i - x3r - a(j3 - 2) = wk3r * x0r + wk3i * x0i - a(j3 - 1) = wk3r * x0i - wk3i * x0r - x0r = a(j0) + a(j2) - x0i = -a(j0 + 1) - a(j2 + 1) - x1r = a(j0) - a(j2) - x1i = -a(j0 + 1) + a(j2 + 1) - x2r = a(j1) + a(j3) - x2i = a(j1 + 1) + a(j3 + 1) - x3r = a(j1) - a(j3) - x3i = a(j1 + 1) - a(j3 + 1) - a(j0) = x0r + x2r - a(j0 + 1) = x0i - x2i - a(j1) = x0r - x2r - a(j1 + 1) = x0i + x2i - x0r = x1r + x3i - x0i = x1i + x3r - a(j2) = wn4r * (x0r - x0i) - a(j2 + 1) = wn4r * (x0i + x0r) - x0r = x1r - x3i - x0i = x1i - x3r - a(j3) = -wn4r * (x0r + x0i) - a(j3 + 1) = -wn4r * (x0i - x0r) - x0r = a(j0 + 2) + a(j2 + 2) - x0i = -a(j0 + 3) - a(j2 + 3) - x1r = a(j0 + 2) - a(j2 + 2) - x1i = -a(j0 + 3) + a(j2 + 3) - x2r = a(j1 + 2) + a(j3 + 2) - x2i = a(j1 + 3) + a(j3 + 3) - x3r = a(j1 + 2) - a(j3 + 2) - x3i = a(j1 + 3) - a(j3 + 3) - a(j0 + 2) = x0r + x2r - a(j0 + 3) = x0i - x2i - a(j1 + 2) = x0r - x2r - a(j1 + 3) = x0i + x2i - x0r = x1r + x3i - x0i = x1i + x3r - a(j2 + 2) = wk1i * x0r - wk1r * x0i - a(j2 + 3) = wk1i * x0i + wk1r * x0r - x0r = x1r - x3i - x0i = x1i - x3r - a(j3 + 2) = wk3i * x0r + wk3r * x0i - a(j3 + 3) = wk3i * x0i - wk3r * x0r - end -! - subroutine cftrec4(n, a, nw, w) - integer n, nw, cfttree, isplt, j, k, m - real*8 a(0 : n - 1), w(0 : nw - 1) - m = n - do while (m .gt. 512) - m = m / 4 - call cftmdl1(m, a(n - m), w(nw - m / 2)) - end do - call cftleaf(m, 1, a(n - m), nw, w) - k = 0 - do j = n - m, m, -m - k = k + 1 - isplt = cfttree(m, j, k, a, nw, w) - call cftleaf(m, isplt, a(j - m), nw, w) - end do - end -! - integer function cfttree(n, j, k, a, nw, w) - integer n, j, k, nw, i, isplt, m - real*8 a(0 : j - 1), w(0 : nw - 1) - if (mod(k, 4) .ne. 0) then - isplt = mod(k, 2) - if (isplt .ne. 0) then - call cftmdl1(n, a(j - n), w(nw - n / 2)) - else - call cftmdl2(n, a(j - n), w(nw - n)) - end if - else - m = n - i = k - do while (mod(i, 4) .eq. 0) - m = m * 4 - i = i / 4 - end do - isplt = mod(i, 2) - if (isplt .ne. 0) then - do while (m .gt. 128) - call cftmdl1(m, a(j - m), w(nw - m / 2)) - m = m / 4 - end do - else - do while (m .gt. 128) - call cftmdl2(m, a(j - m), w(nw - m)) - m = m / 4 - end do - end if - end if - cfttree = isplt - end -! - subroutine cftleaf(n, isplt, a, nw, w) - integer n, isplt, nw - real*8 a(0 : n - 1), w(0 : nw - 1) - if (n .eq. 512) then - call cftmdl1(128, a, w(nw - 64)) - call cftf161(a, w(nw - 8)) - call cftf162(a(32), w(nw - 32)) - call cftf161(a(64), w(nw - 8)) - call cftf161(a(96), w(nw - 8)) - call cftmdl2(128, a(128), w(nw - 128)) - call cftf161(a(128), w(nw - 8)) - call cftf162(a(160), w(nw - 32)) - call cftf161(a(192), w(nw - 8)) - call cftf162(a(224), w(nw - 32)) - call cftmdl1(128, a(256), w(nw - 64)) - call cftf161(a(256), w(nw - 8)) - call cftf162(a(288), w(nw - 32)) - call cftf161(a(320), w(nw - 8)) - call cftf161(a(352), w(nw - 8)) - if (isplt .ne. 0) then - call cftmdl1(128, a(384), w(nw - 64)) - call cftf161(a(480), w(nw - 8)) - else - call cftmdl2(128, a(384), w(nw - 128)) - call cftf162(a(480), w(nw - 32)) - end if - call cftf161(a(384), w(nw - 8)) - call cftf162(a(416), w(nw - 32)) - call cftf161(a(448), w(nw - 8)) - else - call cftmdl1(64, a, w(nw - 32)) - call cftf081(a, w(nw - 8)) - call cftf082(a(16), w(nw - 8)) - call cftf081(a(32), w(nw - 8)) - call cftf081(a(48), w(nw - 8)) - call cftmdl2(64, a(64), w(nw - 64)) - call cftf081(a(64), w(nw - 8)) - call cftf082(a(80), w(nw - 8)) - call cftf081(a(96), w(nw - 8)) - call cftf082(a(112), w(nw - 8)) - call cftmdl1(64, a(128), w(nw - 32)) - call cftf081(a(128), w(nw - 8)) - call cftf082(a(144), w(nw - 8)) - call cftf081(a(160), w(nw - 8)) - call cftf081(a(176), w(nw - 8)) - if (isplt .ne. 0) then - call cftmdl1(64, a(192), w(nw - 32)) - call cftf081(a(240), w(nw - 8)) - else - call cftmdl2(64, a(192), w(nw - 64)) - call cftf082(a(240), w(nw - 8)) - end if - call cftf081(a(192), w(nw - 8)) - call cftf082(a(208), w(nw - 8)) - call cftf081(a(224), w(nw - 8)) - end if - end -! - subroutine cftmdl1(n, a, w) - integer n, j, j0, j1, j2, j3, k, m, mh - real*8 a(0 : n - 1), w(0 : *) - real*8 wn4r, wk1r, wk1i, wk3r, wk3i - real*8 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i - mh = n / 8 - m = 2 * mh - j1 = m - j2 = j1 + m - j3 = j2 + m - x0r = a(0) + a(j2) - x0i = a(1) + a(j2 + 1) - x1r = a(0) - a(j2) - x1i = a(1) - a(j2 + 1) - x2r = a(j1) + a(j3) - x2i = a(j1 + 1) + a(j3 + 1) - x3r = a(j1) - a(j3) - x3i = a(j1 + 1) - a(j3 + 1) - a(0) = x0r + x2r - a(1) = x0i + x2i - a(j1) = x0r - x2r - a(j1 + 1) = x0i - x2i - a(j2) = x1r - x3i - a(j2 + 1) = x1i + x3r - a(j3) = x1r + x3i - a(j3 + 1) = x1i - x3r - wn4r = w(1) - k = 0 - do j = 2, mh - 2, 2 - k = k + 4 - wk1r = w(k) - wk1i = w(k + 1) - wk3r = w(k + 2) - wk3i = w(k + 3) - j1 = j + m - j2 = j1 + m - j3 = j2 + m - x0r = a(j) + a(j2) - x0i = a(j + 1) + a(j2 + 1) - x1r = a(j) - a(j2) - x1i = a(j + 1) - a(j2 + 1) - x2r = a(j1) + a(j3) - x2i = a(j1 + 1) + a(j3 + 1) - x3r = a(j1) - a(j3) - x3i = a(j1 + 1) - a(j3 + 1) - a(j) = x0r + x2r - a(j + 1) = x0i + x2i - a(j1) = x0r - x2r - a(j1 + 1) = x0i - x2i - x0r = x1r - x3i - x0i = x1i + x3r - a(j2) = wk1r * x0r - wk1i * x0i - a(j2 + 1) = wk1r * x0i + wk1i * x0r - x0r = x1r + x3i - x0i = x1i - x3r - a(j3) = wk3r * x0r + wk3i * x0i - a(j3 + 1) = wk3r * x0i - wk3i * x0r - j0 = m - j - j1 = j0 + m - j2 = j1 + m - j3 = j2 + m - x0r = a(j0) + a(j2) - x0i = a(j0 + 1) + a(j2 + 1) - x1r = a(j0) - a(j2) - x1i = a(j0 + 1) - a(j2 + 1) - x2r = a(j1) + a(j3) - x2i = a(j1 + 1) + a(j3 + 1) - x3r = a(j1) - a(j3) - x3i = a(j1 + 1) - a(j3 + 1) - a(j0) = x0r + x2r - a(j0 + 1) = x0i + x2i - a(j1) = x0r - x2r - a(j1 + 1) = x0i - x2i - x0r = x1r - x3i - x0i = x1i + x3r - a(j2) = wk1i * x0r - wk1r * x0i - a(j2 + 1) = wk1i * x0i + wk1r * x0r - x0r = x1r + x3i - x0i = x1i - x3r - a(j3) = wk3i * x0r + wk3r * x0i - a(j3 + 1) = wk3i * x0i - wk3r * x0r - end do - j0 = mh - j1 = j0 + m - j2 = j1 + m - j3 = j2 + m - x0r = a(j0) + a(j2) - x0i = a(j0 + 1) + a(j2 + 1) - x1r = a(j0) - a(j2) - x1i = a(j0 + 1) - a(j2 + 1) - x2r = a(j1) + a(j3) - x2i = a(j1 + 1) + a(j3 + 1) - x3r = a(j1) - a(j3) - x3i = a(j1 + 1) - a(j3 + 1) - a(j0) = x0r + x2r - a(j0 + 1) = x0i + x2i - a(j1) = x0r - x2r - a(j1 + 1) = x0i - x2i - x0r = x1r - x3i - x0i = x1i + x3r - a(j2) = wn4r * (x0r - x0i) - a(j2 + 1) = wn4r * (x0i + x0r) - x0r = x1r + x3i - x0i = x1i - x3r - a(j3) = -wn4r * (x0r + x0i) - a(j3 + 1) = -wn4r * (x0i - x0r) - end -! - subroutine cftmdl2(n, a, w) - integer n, j, j0, j1, j2, j3, k, kr, m, mh - real*8 a(0 : n - 1), w(0 : *) - real*8 wn4r, wk1r, wk1i, wk3r, wk3i, wd1r, wd1i, wd3r, wd3i - real*8 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i - real*8 y0r, y0i, y2r, y2i - mh = n / 8 - m = 2 * mh - wn4r = w(1) - j1 = m - j2 = j1 + m - j3 = j2 + m - x0r = a(0) - a(j2 + 1) - x0i = a(1) + a(j2) - x1r = a(0) + a(j2 + 1) - x1i = a(1) - a(j2) - x2r = a(j1) - a(j3 + 1) - x2i = a(j1 + 1) + a(j3) - x3r = a(j1) + a(j3 + 1) - x3i = a(j1 + 1) - a(j3) - y0r = wn4r * (x2r - x2i) - y0i = wn4r * (x2i + x2r) - a(0) = x0r + y0r - a(1) = x0i + y0i - a(j1) = x0r - y0r - a(j1 + 1) = x0i - y0i - y0r = wn4r * (x3r - x3i) - y0i = wn4r * (x3i + x3r) - a(j2) = x1r - y0i - a(j2 + 1) = x1i + y0r - a(j3) = x1r + y0i - a(j3 + 1) = x1i - y0r - k = 0 - kr = 2 * m - do j = 2, mh - 2, 2 - k = k + 4 - wk1r = w(k) - wk1i = w(k + 1) - wk3r = w(k + 2) - wk3i = w(k + 3) - kr = kr - 4 - wd1i = w(kr) - wd1r = w(kr + 1) - wd3i = w(kr + 2) - wd3r = w(kr + 3) - j1 = j + m - j2 = j1 + m - j3 = j2 + m - x0r = a(j) - a(j2 + 1) - x0i = a(j + 1) + a(j2) - x1r = a(j) + a(j2 + 1) - x1i = a(j + 1) - a(j2) - x2r = a(j1) - a(j3 + 1) - x2i = a(j1 + 1) + a(j3) - x3r = a(j1) + a(j3 + 1) - x3i = a(j1 + 1) - a(j3) - y0r = wk1r * x0r - wk1i * x0i - y0i = wk1r * x0i + wk1i * x0r - y2r = wd1r * x2r - wd1i * x2i - y2i = wd1r * x2i + wd1i * x2r - a(j) = y0r + y2r - a(j + 1) = y0i + y2i - a(j1) = y0r - y2r - a(j1 + 1) = y0i - y2i - y0r = wk3r * x1r + wk3i * x1i - y0i = wk3r * x1i - wk3i * x1r - y2r = wd3r * x3r + wd3i * x3i - y2i = wd3r * x3i - wd3i * x3r - a(j2) = y0r + y2r - a(j2 + 1) = y0i + y2i - a(j3) = y0r - y2r - a(j3 + 1) = y0i - y2i - j0 = m - j - j1 = j0 + m - j2 = j1 + m - j3 = j2 + m - x0r = a(j0) - a(j2 + 1) - x0i = a(j0 + 1) + a(j2) - x1r = a(j0) + a(j2 + 1) - x1i = a(j0 + 1) - a(j2) - x2r = a(j1) - a(j3 + 1) - x2i = a(j1 + 1) + a(j3) - x3r = a(j1) + a(j3 + 1) - x3i = a(j1 + 1) - a(j3) - y0r = wd1i * x0r - wd1r * x0i - y0i = wd1i * x0i + wd1r * x0r - y2r = wk1i * x2r - wk1r * x2i - y2i = wk1i * x2i + wk1r * x2r - a(j0) = y0r + y2r - a(j0 + 1) = y0i + y2i - a(j1) = y0r - y2r - a(j1 + 1) = y0i - y2i - y0r = wd3i * x1r + wd3r * x1i - y0i = wd3i * x1i - wd3r * x1r - y2r = wk3i * x3r + wk3r * x3i - y2i = wk3i * x3i - wk3r * x3r - a(j2) = y0r + y2r - a(j2 + 1) = y0i + y2i - a(j3) = y0r - y2r - a(j3 + 1) = y0i - y2i - end do - wk1r = w(m) - wk1i = w(m + 1) - j0 = mh - j1 = j0 + m - j2 = j1 + m - j3 = j2 + m - x0r = a(j0) - a(j2 + 1) - x0i = a(j0 + 1) + a(j2) - x1r = a(j0) + a(j2 + 1) - x1i = a(j0 + 1) - a(j2) - x2r = a(j1) - a(j3 + 1) - x2i = a(j1 + 1) + a(j3) - x3r = a(j1) + a(j3 + 1) - x3i = a(j1 + 1) - a(j3) - y0r = wk1r * x0r - wk1i * x0i - y0i = wk1r * x0i + wk1i * x0r - y2r = wk1i * x2r - wk1r * x2i - y2i = wk1i * x2i + wk1r * x2r - a(j0) = y0r + y2r - a(j0 + 1) = y0i + y2i - a(j1) = y0r - y2r - a(j1 + 1) = y0i - y2i - y0r = wk1i * x1r - wk1r * x1i - y0i = wk1i * x1i + wk1r * x1r - y2r = wk1r * x3r - wk1i * x3i - y2i = wk1r * x3i + wk1i * x3r - a(j2) = y0r - y2r - a(j2 + 1) = y0i - y2i - a(j3) = y0r + y2r - a(j3 + 1) = y0i + y2i - end -! - subroutine cftfx41(n, a, nw, w) - integer n, nw - real*8 a(0 : n - 1), w(0 : nw - 1) - if (n .eq. 128) then - call cftf161(a, w(nw - 8)) - call cftf162(a(32), w(nw - 32)) - call cftf161(a(64), w(nw - 8)) - call cftf161(a(96), w(nw - 8)) - else - call cftf081(a, w(nw - 8)) - call cftf082(a(16), w(nw - 8)) - call cftf081(a(32), w(nw - 8)) - call cftf081(a(48), w(nw - 8)) - end if - end -! - subroutine cftf161(a, w) - real*8 a(0 : 31), w(0 : *), wn4r, wk1r, wk1i - real*8 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i - real*8 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i - real*8 y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i - real*8 y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i - real*8 y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i - wn4r = w(1) - wk1r = w(2) - wk1i = w(3) - x0r = a(0) + a(16) - x0i = a(1) + a(17) - x1r = a(0) - a(16) - x1i = a(1) - a(17) - x2r = a(8) + a(24) - x2i = a(9) + a(25) - x3r = a(8) - a(24) - x3i = a(9) - a(25) - y0r = x0r + x2r - y0i = x0i + x2i - y4r = x0r - x2r - y4i = x0i - x2i - y8r = x1r - x3i - y8i = x1i + x3r - y12r = x1r + x3i - y12i = x1i - x3r - x0r = a(2) + a(18) - x0i = a(3) + a(19) - x1r = a(2) - a(18) - x1i = a(3) - a(19) - x2r = a(10) + a(26) - x2i = a(11) + a(27) - x3r = a(10) - a(26) - x3i = a(11) - a(27) - y1r = x0r + x2r - y1i = x0i + x2i - y5r = x0r - x2r - y5i = x0i - x2i - x0r = x1r - x3i - x0i = x1i + x3r - y9r = wk1r * x0r - wk1i * x0i - y9i = wk1r * x0i + wk1i * x0r - x0r = x1r + x3i - x0i = x1i - x3r - y13r = wk1i * x0r - wk1r * x0i - y13i = wk1i * x0i + wk1r * x0r - x0r = a(4) + a(20) - x0i = a(5) + a(21) - x1r = a(4) - a(20) - x1i = a(5) - a(21) - x2r = a(12) + a(28) - x2i = a(13) + a(29) - x3r = a(12) - a(28) - x3i = a(13) - a(29) - y2r = x0r + x2r - y2i = x0i + x2i - y6r = x0r - x2r - y6i = x0i - x2i - x0r = x1r - x3i - x0i = x1i + x3r - y10r = wn4r * (x0r - x0i) - y10i = wn4r * (x0i + x0r) - x0r = x1r + x3i - x0i = x1i - x3r - y14r = wn4r * (x0r + x0i) - y14i = wn4r * (x0i - x0r) - x0r = a(6) + a(22) - x0i = a(7) + a(23) - x1r = a(6) - a(22) - x1i = a(7) - a(23) - x2r = a(14) + a(30) - x2i = a(15) + a(31) - x3r = a(14) - a(30) - x3i = a(15) - a(31) - y3r = x0r + x2r - y3i = x0i + x2i - y7r = x0r - x2r - y7i = x0i - x2i - x0r = x1r - x3i - x0i = x1i + x3r - y11r = wk1i * x0r - wk1r * x0i - y11i = wk1i * x0i + wk1r * x0r - x0r = x1r + x3i - x0i = x1i - x3r - y15r = wk1r * x0r - wk1i * x0i - y15i = wk1r * x0i + wk1i * x0r - x0r = y12r - y14r - x0i = y12i - y14i - x1r = y12r + y14r - x1i = y12i + y14i - x2r = y13r - y15r - x2i = y13i - y15i - x3r = y13r + y15r - x3i = y13i + y15i - a(24) = x0r + x2r - a(25) = x0i + x2i - a(26) = x0r - x2r - a(27) = x0i - x2i - a(28) = x1r - x3i - a(29) = x1i + x3r - a(30) = x1r + x3i - a(31) = x1i - x3r - x0r = y8r + y10r - x0i = y8i + y10i - x1r = y8r - y10r - x1i = y8i - y10i - x2r = y9r + y11r - x2i = y9i + y11i - x3r = y9r - y11r - x3i = y9i - y11i - a(16) = x0r + x2r - a(17) = x0i + x2i - a(18) = x0r - x2r - a(19) = x0i - x2i - a(20) = x1r - x3i - a(21) = x1i + x3r - a(22) = x1r + x3i - a(23) = x1i - x3r - x0r = y5r - y7i - x0i = y5i + y7r - x2r = wn4r * (x0r - x0i) - x2i = wn4r * (x0i + x0r) - x0r = y5r + y7i - x0i = y5i - y7r - x3r = wn4r * (x0r - x0i) - x3i = wn4r * (x0i + x0r) - x0r = y4r - y6i - x0i = y4i + y6r - x1r = y4r + y6i - x1i = y4i - y6r - a(8) = x0r + x2r - a(9) = x0i + x2i - a(10) = x0r - x2r - a(11) = x0i - x2i - a(12) = x1r - x3i - a(13) = x1i + x3r - a(14) = x1r + x3i - a(15) = x1i - x3r - x0r = y0r + y2r - x0i = y0i + y2i - x1r = y0r - y2r - x1i = y0i - y2i - x2r = y1r + y3r - x2i = y1i + y3i - x3r = y1r - y3r - x3i = y1i - y3i - a(0) = x0r + x2r - a(1) = x0i + x2i - a(2) = x0r - x2r - a(3) = x0i - x2i - a(4) = x1r - x3i - a(5) = x1i + x3r - a(6) = x1r + x3i - a(7) = x1i - x3r - end -! - subroutine cftf162(a, w) - real*8 a(0 : 31), w(0 : *) - real*8 wn4r, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i - real*8 x0r, x0i, x1r, x1i, x2r, x2i - real*8 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i - real*8 y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i - real*8 y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i - real*8 y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i - wn4r = w(1) - wk1r = w(4) - wk1i = w(5) - wk3r = w(6) - wk3i = -w(7) - wk2r = w(8) - wk2i = w(9) - x1r = a(0) - a(17) - x1i = a(1) + a(16) - x0r = a(8) - a(25) - x0i = a(9) + a(24) - x2r = wn4r * (x0r - x0i) - x2i = wn4r * (x0i + x0r) - y0r = x1r + x2r - y0i = x1i + x2i - y4r = x1r - x2r - y4i = x1i - x2i - x1r = a(0) + a(17) - x1i = a(1) - a(16) - x0r = a(8) + a(25) - x0i = a(9) - a(24) - x2r = wn4r * (x0r - x0i) - x2i = wn4r * (x0i + x0r) - y8r = x1r - x2i - y8i = x1i + x2r - y12r = x1r + x2i - y12i = x1i - x2r - x0r = a(2) - a(19) - x0i = a(3) + a(18) - x1r = wk1r * x0r - wk1i * x0i - x1i = wk1r * x0i + wk1i * x0r - x0r = a(10) - a(27) - x0i = a(11) + a(26) - x2r = wk3i * x0r - wk3r * x0i - x2i = wk3i * x0i + wk3r * x0r - y1r = x1r + x2r - y1i = x1i + x2i - y5r = x1r - x2r - y5i = x1i - x2i - x0r = a(2) + a(19) - x0i = a(3) - a(18) - x1r = wk3r * x0r - wk3i * x0i - x1i = wk3r * x0i + wk3i * x0r - x0r = a(10) + a(27) - x0i = a(11) - a(26) - x2r = wk1r * x0r + wk1i * x0i - x2i = wk1r * x0i - wk1i * x0r - y9r = x1r - x2r - y9i = x1i - x2i - y13r = x1r + x2r - y13i = x1i + x2i - x0r = a(4) - a(21) - x0i = a(5) + a(20) - x1r = wk2r * x0r - wk2i * x0i - x1i = wk2r * x0i + wk2i * x0r - x0r = a(12) - a(29) - x0i = a(13) + a(28) - x2r = wk2i * x0r - wk2r * x0i - x2i = wk2i * x0i + wk2r * x0r - y2r = x1r + x2r - y2i = x1i + x2i - y6r = x1r - x2r - y6i = x1i - x2i - x0r = a(4) + a(21) - x0i = a(5) - a(20) - x1r = wk2i * x0r - wk2r * x0i - x1i = wk2i * x0i + wk2r * x0r - x0r = a(12) + a(29) - x0i = a(13) - a(28) - x2r = wk2r * x0r - wk2i * x0i - x2i = wk2r * x0i + wk2i * x0r - y10r = x1r - x2r - y10i = x1i - x2i - y14r = x1r + x2r - y14i = x1i + x2i - x0r = a(6) - a(23) - x0i = a(7) + a(22) - x1r = wk3r * x0r - wk3i * x0i - x1i = wk3r * x0i + wk3i * x0r - x0r = a(14) - a(31) - x0i = a(15) + a(30) - x2r = wk1i * x0r - wk1r * x0i - x2i = wk1i * x0i + wk1r * x0r - y3r = x1r + x2r - y3i = x1i + x2i - y7r = x1r - x2r - y7i = x1i - x2i - x0r = a(6) + a(23) - x0i = a(7) - a(22) - x1r = wk1i * x0r + wk1r * x0i - x1i = wk1i * x0i - wk1r * x0r - x0r = a(14) + a(31) - x0i = a(15) - a(30) - x2r = wk3i * x0r - wk3r * x0i - x2i = wk3i * x0i + wk3r * x0r - y11r = x1r + x2r - y11i = x1i + x2i - y15r = x1r - x2r - y15i = x1i - x2i - x1r = y0r + y2r - x1i = y0i + y2i - x2r = y1r + y3r - x2i = y1i + y3i - a(0) = x1r + x2r - a(1) = x1i + x2i - a(2) = x1r - x2r - a(3) = x1i - x2i - x1r = y0r - y2r - x1i = y0i - y2i - x2r = y1r - y3r - x2i = y1i - y3i - a(4) = x1r - x2i - a(5) = x1i + x2r - a(6) = x1r + x2i - a(7) = x1i - x2r - x1r = y4r - y6i - x1i = y4i + y6r - x0r = y5r - y7i - x0i = y5i + y7r - x2r = wn4r * (x0r - x0i) - x2i = wn4r * (x0i + x0r) - a(8) = x1r + x2r - a(9) = x1i + x2i - a(10) = x1r - x2r - a(11) = x1i - x2i - x1r = y4r + y6i - x1i = y4i - y6r - x0r = y5r + y7i - x0i = y5i - y7r - x2r = wn4r * (x0r - x0i) - x2i = wn4r * (x0i + x0r) - a(12) = x1r - x2i - a(13) = x1i + x2r - a(14) = x1r + x2i - a(15) = x1i - x2r - x1r = y8r + y10r - x1i = y8i + y10i - x2r = y9r - y11r - x2i = y9i - y11i - a(16) = x1r + x2r - a(17) = x1i + x2i - a(18) = x1r - x2r - a(19) = x1i - x2i - x1r = y8r - y10r - x1i = y8i - y10i - x2r = y9r + y11r - x2i = y9i + y11i - a(20) = x1r - x2i - a(21) = x1i + x2r - a(22) = x1r + x2i - a(23) = x1i - x2r - x1r = y12r - y14i - x1i = y12i + y14r - x0r = y13r + y15i - x0i = y13i - y15r - x2r = wn4r * (x0r - x0i) - x2i = wn4r * (x0i + x0r) - a(24) = x1r + x2r - a(25) = x1i + x2i - a(26) = x1r - x2r - a(27) = x1i - x2i - x1r = y12r + y14i - x1i = y12i - y14r - x0r = y13r - y15i - x0i = y13i + y15r - x2r = wn4r * (x0r - x0i) - x2i = wn4r * (x0i + x0r) - a(28) = x1r - x2i - a(29) = x1i + x2r - a(30) = x1r + x2i - a(31) = x1i - x2r - end -! - subroutine cftf081(a, w) - real*8 a(0 : 15), w(0 : *) - real*8 wn4r, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i - real*8 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i - real*8 y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i - wn4r = w(1) - x0r = a(0) + a(8) - x0i = a(1) + a(9) - x1r = a(0) - a(8) - x1i = a(1) - a(9) - x2r = a(4) + a(12) - x2i = a(5) + a(13) - x3r = a(4) - a(12) - x3i = a(5) - a(13) - y0r = x0r + x2r - y0i = x0i + x2i - y2r = x0r - x2r - y2i = x0i - x2i - y1r = x1r - x3i - y1i = x1i + x3r - y3r = x1r + x3i - y3i = x1i - x3r - x0r = a(2) + a(10) - x0i = a(3) + a(11) - x1r = a(2) - a(10) - x1i = a(3) - a(11) - x2r = a(6) + a(14) - x2i = a(7) + a(15) - x3r = a(6) - a(14) - x3i = a(7) - a(15) - y4r = x0r + x2r - y4i = x0i + x2i - y6r = x0r - x2r - y6i = x0i - x2i - x0r = x1r - x3i - x0i = x1i + x3r - x2r = x1r + x3i - x2i = x1i - x3r - y5r = wn4r * (x0r - x0i) - y5i = wn4r * (x0r + x0i) - y7r = wn4r * (x2r - x2i) - y7i = wn4r * (x2r + x2i) - a(8) = y1r + y5r - a(9) = y1i + y5i - a(10) = y1r - y5r - a(11) = y1i - y5i - a(12) = y3r - y7i - a(13) = y3i + y7r - a(14) = y3r + y7i - a(15) = y3i - y7r - a(0) = y0r + y4r - a(1) = y0i + y4i - a(2) = y0r - y4r - a(3) = y0i - y4i - a(4) = y2r - y6i - a(5) = y2i + y6r - a(6) = y2r + y6i - a(7) = y2i - y6r - end -! - subroutine cftf082(a, w) - real*8 a(0 : 15), w(0 : *) - real*8 wn4r, wk1r, wk1i, x0r, x0i, x1r, x1i - real*8 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i - real*8 y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i - wn4r = w(1) - wk1r = w(2) - wk1i = w(3) - y0r = a(0) - a(9) - y0i = a(1) + a(8) - y1r = a(0) + a(9) - y1i = a(1) - a(8) - x0r = a(4) - a(13) - x0i = a(5) + a(12) - y2r = wn4r * (x0r - x0i) - y2i = wn4r * (x0i + x0r) - x0r = a(4) + a(13) - x0i = a(5) - a(12) - y3r = wn4r * (x0r - x0i) - y3i = wn4r * (x0i + x0r) - x0r = a(2) - a(11) - x0i = a(3) + a(10) - y4r = wk1r * x0r - wk1i * x0i - y4i = wk1r * x0i + wk1i * x0r - x0r = a(2) + a(11) - x0i = a(3) - a(10) - y5r = wk1i * x0r - wk1r * x0i - y5i = wk1i * x0i + wk1r * x0r - x0r = a(6) - a(15) - x0i = a(7) + a(14) - y6r = wk1i * x0r - wk1r * x0i - y6i = wk1i * x0i + wk1r * x0r - x0r = a(6) + a(15) - x0i = a(7) - a(14) - y7r = wk1r * x0r - wk1i * x0i - y7i = wk1r * x0i + wk1i * x0r - x0r = y0r + y2r - x0i = y0i + y2i - x1r = y4r + y6r - x1i = y4i + y6i - a(0) = x0r + x1r - a(1) = x0i + x1i - a(2) = x0r - x1r - a(3) = x0i - x1i - x0r = y0r - y2r - x0i = y0i - y2i - x1r = y4r - y6r - x1i = y4i - y6i - a(4) = x0r - x1i - a(5) = x0i + x1r - a(6) = x0r + x1i - a(7) = x0i - x1r - x0r = y1r - y3i - x0i = y1i + y3r - x1r = y5r - y7r - x1i = y5i - y7i - a(8) = x0r + x1r - a(9) = x0i + x1i - a(10) = x0r - x1r - a(11) = x0i - x1i - x0r = y1r + y3i - x0i = y1i - y3r - x1r = y5r + y7r - x1i = y5i + y7i - a(12) = x0r - x1i - a(13) = x0i + x1r - a(14) = x0r + x1i - a(15) = x0i - x1r - end -! - subroutine cftf040(a) - real*8 a(0 : 7), x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i - x0r = a(0) + a(4) - x0i = a(1) + a(5) - x1r = a(0) - a(4) - x1i = a(1) - a(5) - x2r = a(2) + a(6) - x2i = a(3) + a(7) - x3r = a(2) - a(6) - x3i = a(3) - a(7) - a(0) = x0r + x2r - a(1) = x0i + x2i - a(2) = x1r - x3i - a(3) = x1i + x3r - a(4) = x0r - x2r - a(5) = x0i - x2i - a(6) = x1r + x3i - a(7) = x1i - x3r - end -! - subroutine cftb040(a) - real*8 a(0 : 7), x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i - x0r = a(0) + a(4) - x0i = a(1) + a(5) - x1r = a(0) - a(4) - x1i = a(1) - a(5) - x2r = a(2) + a(6) - x2i = a(3) + a(7) - x3r = a(2) - a(6) - x3i = a(3) - a(7) - a(0) = x0r + x2r - a(1) = x0i + x2i - a(2) = x1r + x3i - a(3) = x1i - x3r - a(4) = x0r - x2r - a(5) = x0i - x2i - a(6) = x1r - x3i - a(7) = x1i + x3r - end -! - subroutine cftx020(a) - real*8 a(0 : 3), x0r, x0i - x0r = a(0) - a(2) - x0i = a(1) - a(3) - a(0) = a(0) + a(2) - a(1) = a(1) + a(3) - a(2) = x0r - a(3) = x0i - end -! - subroutine rftfsub(n, a, nc, c) - integer n, nc, j, k, kk, ks, m - real*8 a(0 : n - 1), c(0 : nc - 1), wkr, wki, xr, xi, yr, yi - m = n / 2 - ks = 2 * nc / m - kk = 0 - do j = 2, m - 2, 2 - k = n - j - kk = kk + ks - wkr = 0.5d0 - c(nc - kk) - wki = c(kk) - xr = a(j) - a(k) - xi = a(j + 1) + a(k + 1) - yr = wkr * xr - wki * xi - yi = wkr * xi + wki * xr - a(j) = a(j) - yr - a(j + 1) = a(j + 1) - yi - a(k) = a(k) + yr - a(k + 1) = a(k + 1) - yi - end do - end -! - subroutine rftbsub(n, a, nc, c) - integer n, nc, j, k, kk, ks, m - real*8 a(0 : n - 1), c(0 : nc - 1), wkr, wki, xr, xi, yr, yi - m = n / 2 - ks = 2 * nc / m - kk = 0 - do j = 2, m - 2, 2 - k = n - j - kk = kk + ks - wkr = 0.5d0 - c(nc - kk) - wki = c(kk) - xr = a(j) - a(k) - xi = a(j + 1) + a(k + 1) - yr = wkr * xr + wki * xi - yi = wkr * xi - wki * xr - a(j) = a(j) - yr - a(j + 1) = a(j + 1) - yi - a(k) = a(k) + yr - a(k + 1) = a(k + 1) - yi - end do - end -! - subroutine dctsub(n, a, nc, c) - integer n, nc, j, k, kk, ks, m - real*8 a(0 : n - 1), c(0 : nc - 1), wkr, wki, xr - m = n / 2 - ks = nc / n - kk = 0 - do j = 1, m - 1 - k = n - j - kk = kk + ks - wkr = c(kk) - c(nc - kk) - wki = c(kk) + c(nc - kk) - xr = wki * a(j) - wkr * a(k) - a(j) = wkr * a(j) + wki * a(k) - a(k) = xr - end do - a(m) = c(0) * a(m) - end -! - subroutine dstsub(n, a, nc, c) - integer n, nc, j, k, kk, ks, m - real*8 a(0 : n - 1), c(0 : nc - 1), wkr, wki, xr - m = n / 2 - ks = nc / n - kk = 0 - do j = 1, m - 1 - k = n - j - kk = kk + ks - wkr = c(kk) - c(nc - kk) - wki = c(kk) + c(nc - kk) - xr = wki * a(k) - wkr * a(j) - a(k) = wkr * a(k) + wki * a(j) - a(j) = xr - end do - a(m) = c(0) * a(m) - end -!
diff --git a/third_party/tensorflow_dependencies/fft2d/fftsg2d.c b/third_party/tensorflow_dependencies/fft2d/fftsg2d.c deleted file mode 100644 index 2ebfa93..0000000 --- a/third_party/tensorflow_dependencies/fft2d/fftsg2d.c +++ /dev/null
@@ -1,1190 +0,0 @@ -/* -Fast Fourier/Cosine/Sine Transform - dimension :two - data length :power of 2 - decimation :frequency - radix :split-radix, row-column - data :inplace - table :use -functions - cdft2d: Complex Discrete Fourier Transform - rdft2d: Real Discrete Fourier Transform - ddct2d: Discrete Cosine Transform - ddst2d: Discrete Sine Transform -function prototypes - void cdft2d(int, int, int, double **, double *, int *, double *); - void rdft2d(int, int, int, double **, double *, int *, double *); - void rdft2dsort(int, int, int, double **); - void ddct2d(int, int, int, double **, double *, int *, double *); - void ddst2d(int, int, int, double **, double *, int *, double *); -necessary package - fftsg.c : 1D-FFT package -macro definitions - USE_FFT2D_PTHREADS : default=not defined - FFT2D_MAX_THREADS : must be 2^N, default=4 - FFT2D_THREADS_BEGIN_N : default=65536 - USE_FFT2D_WINTHREADS : default=not defined - FFT2D_MAX_THREADS : must be 2^N, default=4 - FFT2D_THREADS_BEGIN_N : default=131072 - - --------- Complex DFT (Discrete Fourier Transform) -------- - [definition] - <case1> - X[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 x[j1][j2] * - exp(2*pi*i*j1*k1/n1) * - exp(2*pi*i*j2*k2/n2), 0<=k1<n1, 0<=k2<n2 - <case2> - X[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 x[j1][j2] * - exp(-2*pi*i*j1*k1/n1) * - exp(-2*pi*i*j2*k2/n2), 0<=k1<n1, 0<=k2<n2 - (notes: sum_j=0^n-1 is a summation from j=0 to n-1) - [usage] - <case1> - ip[0] = 0; // first time only - cdft2d(n1, 2*n2, 1, a, t, ip, w); - <case2> - ip[0] = 0; // first time only - cdft2d(n1, 2*n2, -1, a, t, ip, w); - [parameters] - n1 :data length (int) - n1 >= 1, n1 = power of 2 - 2*n2 :data length (int) - n2 >= 1, n2 = power of 2 - a[0...n1-1][0...2*n2-1] - :input/output data (double **) - input data - a[j1][2*j2] = Re(x[j1][j2]), - a[j1][2*j2+1] = Im(x[j1][j2]), - 0<=j1<n1, 0<=j2<n2 - output data - a[k1][2*k2] = Re(X[k1][k2]), - a[k1][2*k2+1] = Im(X[k1][k2]), - 0<=k1<n1, 0<=k2<n2 - t[0...*] - :work area (double *) - length of t >= 8*n1, if single thread, - length of t >= 8*n1*FFT2D_MAX_THREADS, if multi threads, - t is dynamically allocated, if t == NULL. - ip[0...*] - :work area for bit reversal (int *) - length of ip >= 2+sqrt(n) - (n = max(n1, n2)) - ip[0],ip[1] are pointers of the cos/sin table. - w[0...*] - :cos/sin table (double *) - length of w >= max(n1/2, n2/2) - w[],ip[] are initialized if ip[0] == 0. - [remark] - Inverse of - cdft2d(n1, 2*n2, -1, a, t, ip, w); - is - cdft2d(n1, 2*n2, 1, a, t, ip, w); - for (j1 = 0; j1 <= n1 - 1; j1++) { - for (j2 = 0; j2 <= 2 * n2 - 1; j2++) { - a[j1][j2] *= 1.0 / n1 / n2; - } - } - . - - --------- Real DFT / Inverse of Real DFT -------- - [definition] - <case1> RDFT - R[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 a[j1][j2] * - cos(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2), - 0<=k1<n1, 0<=k2<n2 - I[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 a[j1][j2] * - sin(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2), - 0<=k1<n1, 0<=k2<n2 - <case2> IRDFT (excluding scale) - a[k1][k2] = (1/2) * sum_j1=0^n1-1 sum_j2=0^n2-1 - (R[j1][j2] * - cos(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2) + - I[j1][j2] * - sin(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2)), - 0<=k1<n1, 0<=k2<n2 - (notes: R[n1-k1][n2-k2] = R[k1][k2], - I[n1-k1][n2-k2] = -I[k1][k2], - R[n1-k1][0] = R[k1][0], - I[n1-k1][0] = -I[k1][0], - R[0][n2-k2] = R[0][k2], - I[0][n2-k2] = -I[0][k2], - 0<k1<n1, 0<k2<n2) - [usage] - <case1> - ip[0] = 0; // first time only - rdft2d(n1, n2, 1, a, t, ip, w); - <case2> - ip[0] = 0; // first time only - rdft2d(n1, n2, -1, a, t, ip, w); - [parameters] - n1 :data length (int) - n1 >= 2, n1 = power of 2 - n2 :data length (int) - n2 >= 2, n2 = power of 2 - a[0...n1-1][0...n2-1] - :input/output data (double **) - <case1> - output data - a[k1][2*k2] = R[k1][k2] = R[n1-k1][n2-k2], - a[k1][2*k2+1] = I[k1][k2] = -I[n1-k1][n2-k2], - 0<k1<n1, 0<k2<n2/2, - a[0][2*k2] = R[0][k2] = R[0][n2-k2], - a[0][2*k2+1] = I[0][k2] = -I[0][n2-k2], - 0<k2<n2/2, - a[k1][0] = R[k1][0] = R[n1-k1][0], - a[k1][1] = I[k1][0] = -I[n1-k1][0], - a[n1-k1][1] = R[k1][n2/2] = R[n1-k1][n2/2], - a[n1-k1][0] = -I[k1][n2/2] = I[n1-k1][n2/2], - 0<k1<n1/2, - a[0][0] = R[0][0], - a[0][1] = R[0][n2/2], - a[n1/2][0] = R[n1/2][0], - a[n1/2][1] = R[n1/2][n2/2] - <case2> - input data - a[j1][2*j2] = R[j1][j2] = R[n1-j1][n2-j2], - a[j1][2*j2+1] = I[j1][j2] = -I[n1-j1][n2-j2], - 0<j1<n1, 0<j2<n2/2, - a[0][2*j2] = R[0][j2] = R[0][n2-j2], - a[0][2*j2+1] = I[0][j2] = -I[0][n2-j2], - 0<j2<n2/2, - a[j1][0] = R[j1][0] = R[n1-j1][0], - a[j1][1] = I[j1][0] = -I[n1-j1][0], - a[n1-j1][1] = R[j1][n2/2] = R[n1-j1][n2/2], - a[n1-j1][0] = -I[j1][n2/2] = I[n1-j1][n2/2], - 0<j1<n1/2, - a[0][0] = R[0][0], - a[0][1] = R[0][n2/2], - a[n1/2][0] = R[n1/2][0], - a[n1/2][1] = R[n1/2][n2/2] - ---- output ordering ---- - rdft2d(n1, n2, 1, a, t, ip, w); - rdft2dsort(n1, n2, 1, a); - // stored data is a[0...n1-1][0...n2+1]: - // a[k1][2*k2] = R[k1][k2], - // a[k1][2*k2+1] = I[k1][k2], - // 0<=k1<n1, 0<=k2<=n2/2. - // the stored data is larger than the input data! - ---- input ordering ---- - rdft2dsort(n1, n2, -1, a); - rdft2d(n1, n2, -1, a, t, ip, w); - t[0...*] - :work area (double *) - length of t >= 8*n1, if single thread, - length of t >= 8*n1*FFT2D_MAX_THREADS, if multi threads, - t is dynamically allocated, if t == NULL. - ip[0...*] - :work area for bit reversal (int *) - length of ip >= 2+sqrt(n) - (n = max(n1, n2/2)) - ip[0],ip[1] are pointers of the cos/sin table. - w[0...*] - :cos/sin table (double *) - length of w >= max(n1/2, n2/4) + n2/4 - w[],ip[] are initialized if ip[0] == 0. - [remark] - Inverse of - rdft2d(n1, n2, 1, a, t, ip, w); - is - rdft2d(n1, n2, -1, a, t, ip, w); - for (j1 = 0; j1 <= n1 - 1; j1++) { - for (j2 = 0; j2 <= n2 - 1; j2++) { - a[j1][j2] *= 2.0 / n1 / n2; - } - } - . - - --------- DCT (Discrete Cosine Transform) / Inverse of DCT -------- - [definition] - <case1> IDCT (excluding scale) - C[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 a[j1][j2] * - cos(pi*j1*(k1+1/2)/n1) * - cos(pi*j2*(k2+1/2)/n2), - 0<=k1<n1, 0<=k2<n2 - <case2> DCT - C[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 a[j1][j2] * - cos(pi*(j1+1/2)*k1/n1) * - cos(pi*(j2+1/2)*k2/n2), - 0<=k1<n1, 0<=k2<n2 - [usage] - <case1> - ip[0] = 0; // first time only - ddct2d(n1, n2, 1, a, t, ip, w); - <case2> - ip[0] = 0; // first time only - ddct2d(n1, n2, -1, a, t, ip, w); - [parameters] - n1 :data length (int) - n1 >= 2, n1 = power of 2 - n2 :data length (int) - n2 >= 2, n2 = power of 2 - a[0...n1-1][0...n2-1] - :input/output data (double **) - output data - a[k1][k2] = C[k1][k2], 0<=k1<n1, 0<=k2<n2 - t[0...*] - :work area (double *) - length of t >= 4*n1, if single thread, - length of t >= 4*n1*FFT2D_MAX_THREADS, if multi threads, - t is dynamically allocated, if t == NULL. - ip[0...*] - :work area for bit reversal (int *) - length of ip >= 2+sqrt(n) - (n = max(n1/2, n2/2)) - ip[0],ip[1] are pointers of the cos/sin table. - w[0...*] - :cos/sin table (double *) - length of w >= max(n1*3/2, n2*3/2) - w[],ip[] are initialized if ip[0] == 0. - [remark] - Inverse of - ddct2d(n1, n2, -1, a, t, ip, w); - is - for (j1 = 0; j1 <= n1 - 1; j1++) { - a[j1][0] *= 0.5; - } - for (j2 = 0; j2 <= n2 - 1; j2++) { - a[0][j2] *= 0.5; - } - ddct2d(n1, n2, 1, a, t, ip, w); - for (j1 = 0; j1 <= n1 - 1; j1++) { - for (j2 = 0; j2 <= n2 - 1; j2++) { - a[j1][j2] *= 4.0 / n1 / n2; - } - } - . - - --------- DST (Discrete Sine Transform) / Inverse of DST -------- - [definition] - <case1> IDST (excluding scale) - S[k1][k2] = sum_j1=1^n1 sum_j2=1^n2 A[j1][j2] * - sin(pi*j1*(k1+1/2)/n1) * - sin(pi*j2*(k2+1/2)/n2), - 0<=k1<n1, 0<=k2<n2 - <case2> DST - S[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 a[j1][j2] * - sin(pi*(j1+1/2)*k1/n1) * - sin(pi*(j2+1/2)*k2/n2), - 0<k1<=n1, 0<k2<=n2 - [usage] - <case1> - ip[0] = 0; // first time only - ddst2d(n1, n2, 1, a, t, ip, w); - <case2> - ip[0] = 0; // first time only - ddst2d(n1, n2, -1, a, t, ip, w); - [parameters] - n1 :data length (int) - n1 >= 2, n1 = power of 2 - n2 :data length (int) - n2 >= 2, n2 = power of 2 - a[0...n1-1][0...n2-1] - :input/output data (double **) - <case1> - input data - a[j1][j2] = A[j1][j2], 0<j1<n1, 0<j2<n2, - a[j1][0] = A[j1][n2], 0<j1<n1, - a[0][j2] = A[n1][j2], 0<j2<n2, - a[0][0] = A[n1][n2] - (i.e. A[j1][j2] = a[j1 % n1][j2 % n2]) - output data - a[k1][k2] = S[k1][k2], 0<=k1<n1, 0<=k2<n2 - <case2> - output data - a[k1][k2] = S[k1][k2], 0<k1<n1, 0<k2<n2, - a[k1][0] = S[k1][n2], 0<k1<n1, - a[0][k2] = S[n1][k2], 0<k2<n2, - a[0][0] = S[n1][n2] - (i.e. S[k1][k2] = a[k1 % n1][k2 % n2]) - t[0...*] - :work area (double *) - length of t >= 4*n1, if single thread, - length of t >= 4*n1*FFT2D_MAX_THREADS, if multi threads, - t is dynamically allocated, if t == NULL. - ip[0...*] - :work area for bit reversal (int *) - length of ip >= 2+sqrt(n) - (n = max(n1/2, n2/2)) - ip[0],ip[1] are pointers of the cos/sin table. - w[0...*] - :cos/sin table (double *) - length of w >= max(n1*3/2, n2*3/2) - w[],ip[] are initialized if ip[0] == 0. - [remark] - Inverse of - ddst2d(n1, n2, -1, a, t, ip, w); - is - for (j1 = 0; j1 <= n1 - 1; j1++) { - a[j1][0] *= 0.5; - } - for (j2 = 0; j2 <= n2 - 1; j2++) { - a[0][j2] *= 0.5; - } - ddst2d(n1, n2, 1, a, t, ip, w); - for (j1 = 0; j1 <= n1 - 1; j1++) { - for (j2 = 0; j2 <= n2 - 1; j2++) { - a[j1][j2] *= 4.0 / n1 / n2; - } - } - . -*/ - - -#include <stdio.h> -#include <stdlib.h> -#define fft2d_alloc_error_check(p) { \ - if ((p) == NULL) { \ - fprintf(stderr, "fft2d memory allocation error\n"); \ - exit(1); \ - } \ -} - - -#ifdef USE_FFT2D_PTHREADS -#define USE_FFT2D_THREADS -#ifndef FFT2D_MAX_THREADS -#define FFT2D_MAX_THREADS 4 -#endif -#ifndef FFT2D_THREADS_BEGIN_N -#define FFT2D_THREADS_BEGIN_N 65536 -#endif -#include <pthread.h> -#define fft2d_thread_t pthread_t -#define fft2d_thread_create(thp,func,argp) { \ - if (pthread_create(thp, NULL, func, (void *) (argp)) != 0) { \ - fprintf(stderr, "fft2d thread error\n"); \ - exit(1); \ - } \ -} -#define fft2d_thread_wait(th) { \ - if (pthread_join(th, NULL) != 0) { \ - fprintf(stderr, "fft2d thread error\n"); \ - exit(1); \ - } \ -} -#endif /* USE_FFT2D_PTHREADS */ - - -#ifdef USE_FFT2D_WINTHREADS -#define USE_FFT2D_THREADS -#ifndef FFT2D_MAX_THREADS -#define FFT2D_MAX_THREADS 4 -#endif -#ifndef FFT2D_THREADS_BEGIN_N -#define FFT2D_THREADS_BEGIN_N 131072 -#endif -#include <windows.h> -#define fft2d_thread_t HANDLE -#define fft2d_thread_create(thp,func,argp) { \ - DWORD thid; \ - *(thp) = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE) (func), (LPVOID) (argp), 0, &thid); \ - if (*(thp) == 0) { \ - fprintf(stderr, "fft2d thread error\n"); \ - exit(1); \ - } \ -} -#define fft2d_thread_wait(th) { \ - WaitForSingleObject(th, INFINITE); \ - CloseHandle(th); \ -} -#endif /* USE_FFT2D_WINTHREADS */ - - -void cdft2d(int n1, int n2, int isgn, double **a, double *t, - int *ip, double *w) -{ - void makewt(int nw, int *ip, double *w); - void cdft(int n, int isgn, double *a, int *ip, double *w); - void cdft2d_sub(int n1, int n2, int isgn, double **a, double *t, - int *ip, double *w); -#ifdef USE_FFT2D_THREADS - void xdft2d0_subth(int n1, int n2, int icr, int isgn, double **a, - int *ip, double *w); - void cdft2d_subth(int n1, int n2, int isgn, double **a, double *t, - int *ip, double *w); -#endif /* USE_FFT2D_THREADS */ - int n, itnull, nthread, nt, i; - - n = n1 << 1; - if (n < n2) { - n = n2; - } - if (n > (ip[0] << 2)) { - makewt(n >> 2, ip, w); - } - itnull = 0; - if (t == NULL) { - itnull = 1; - nthread = 1; -#ifdef USE_FFT2D_THREADS - nthread = FFT2D_MAX_THREADS; -#endif /* USE_FFT2D_THREADS */ - nt = 8 * nthread * n1; - if (n2 == 4 * nthread) { - nt >>= 1; - } else if (n2 < 4 * nthread) { - nt >>= 2; - } - t = (double *) malloc(sizeof(double) * nt); - fft2d_alloc_error_check(t); - } -#ifdef USE_FFT2D_THREADS - if ((double) n1 * n2 >= (double) FFT2D_THREADS_BEGIN_N) { - xdft2d0_subth(n1, n2, 0, isgn, a, ip, w); - cdft2d_subth(n1, n2, isgn, a, t, ip, w); - } else -#endif /* USE_FFT2D_THREADS */ - { - for (i = 0; i < n1; i++) { - cdft(n2, isgn, a[i], ip, w); - } - cdft2d_sub(n1, n2, isgn, a, t, ip, w); - } - if (itnull != 0) { - free(t); - } -} - - -void rdft2d(int n1, int n2, int isgn, double **a, double *t, - int *ip, double *w) -{ - void makewt(int nw, int *ip, double *w); - void makect(int nc, int *ip, double *c); - void rdft(int n, int isgn, double *a, int *ip, double *w); - void cdft2d_sub(int n1, int n2, int isgn, double **a, double *t, - int *ip, double *w); - void rdft2d_sub(int n1, int n2, int isgn, double **a); -#ifdef USE_FFT2D_THREADS - void xdft2d0_subth(int n1, int n2, int icr, int isgn, double **a, - int *ip, double *w); - void cdft2d_subth(int n1, int n2, int isgn, double **a, double *t, - int *ip, double *w); -#endif /* USE_FFT2D_THREADS */ - int n, nw, nc, itnull, nthread, nt, i; - - n = n1 << 1; - if (n < n2) { - n = n2; - } - nw = ip[0]; - if (n > (nw << 2)) { - nw = n >> 2; - makewt(nw, ip, w); - } - nc = ip[1]; - if (n2 > (nc << 2)) { - nc = n2 >> 2; - makect(nc, ip, w + nw); - } - itnull = 0; - if (t == NULL) { - itnull = 1; - nthread = 1; -#ifdef USE_FFT2D_THREADS - nthread = FFT2D_MAX_THREADS; -#endif /* USE_FFT2D_THREADS */ - nt = 8 * nthread * n1; - if (n2 == 4 * nthread) { - nt >>= 1; - } else if (n2 < 4 * nthread) { - nt >>= 2; - } - t = (double *) malloc(sizeof(double) * nt); - fft2d_alloc_error_check(t); - } -#ifdef USE_FFT2D_THREADS - if ((double) n1 * n2 >= (double) FFT2D_THREADS_BEGIN_N) { - if (isgn < 0) { - rdft2d_sub(n1, n2, isgn, a); - cdft2d_subth(n1, n2, isgn, a, t, ip, w); - } - xdft2d0_subth(n1, n2, 1, isgn, a, ip, w); - if (isgn >= 0) { - cdft2d_subth(n1, n2, isgn, a, t, ip, w); - rdft2d_sub(n1, n2, isgn, a); - } - } else -#endif /* USE_FFT2D_THREADS */ - { - if (isgn < 0) { - rdft2d_sub(n1, n2, isgn, a); - cdft2d_sub(n1, n2, isgn, a, t, ip, w); - } - for (i = 0; i < n1; i++) { - rdft(n2, isgn, a[i], ip, w); - } - if (isgn >= 0) { - cdft2d_sub(n1, n2, isgn, a, t, ip, w); - rdft2d_sub(n1, n2, isgn, a); - } - } - if (itnull != 0) { - free(t); - } -} - - -void rdft2dsort(int n1, int n2, int isgn, double **a) -{ - int n1h, i; - double x, y; - - n1h = n1 >> 1; - if (isgn < 0) { - for (i = n1h + 1; i < n1; i++) { - a[i][0] = a[i][n2 + 1]; - a[i][1] = a[i][n2]; - } - a[0][1] = a[0][n2]; - a[n1h][1] = a[n1h][n2]; - } else { - for (i = n1h + 1; i < n1; i++) { - y = a[i][0]; - x = a[i][1]; - a[i][n2] = x; - a[i][n2 + 1] = y; - a[n1 - i][n2] = x; - a[n1 - i][n2 + 1] = -y; - a[i][0] = a[n1 - i][0]; - a[i][1] = -a[n1 - i][1]; - } - a[0][n2] = a[0][1]; - a[0][n2 + 1] = 0; - a[0][1] = 0; - a[n1h][n2] = a[n1h][1]; - a[n1h][n2 + 1] = 0; - a[n1h][1] = 0; - } -} - - -void ddct2d(int n1, int n2, int isgn, double **a, double *t, - int *ip, double *w) -{ - void makewt(int nw, int *ip, double *w); - void makect(int nc, int *ip, double *c); - void ddct(int n, int isgn, double *a, int *ip, double *w); - void ddxt2d_sub(int n1, int n2, int ics, int isgn, double **a, - double *t, int *ip, double *w); -#ifdef USE_FFT2D_THREADS - void ddxt2d0_subth(int n1, int n2, int ics, int isgn, double **a, - int *ip, double *w); - void ddxt2d_subth(int n1, int n2, int ics, int isgn, double **a, - double *t, int *ip, double *w); -#endif /* USE_FFT2D_THREADS */ - int n, nw, nc, itnull, nthread, nt, i; - - n = n1; - if (n < n2) { - n = n2; - } - nw = ip[0]; - if (n > (nw << 2)) { - nw = n >> 2; - makewt(nw, ip, w); - } - nc = ip[1]; - if (n > nc) { - nc = n; - makect(nc, ip, w + nw); - } - itnull = 0; - if (t == NULL) { - itnull = 1; - nthread = 1; -#ifdef USE_FFT2D_THREADS - nthread = FFT2D_MAX_THREADS; -#endif /* USE_FFT2D_THREADS */ - nt = 4 * nthread * n1; - if (n2 == 2 * nthread) { - nt >>= 1; - } else if (n2 < 2 * nthread) { - nt >>= 2; - } - t = (double *) malloc(sizeof(double) * nt); - fft2d_alloc_error_check(t); - } -#ifdef USE_FFT2D_THREADS - if ((double) n1 * n2 >= (double) FFT2D_THREADS_BEGIN_N) { - ddxt2d0_subth(n1, n2, 0, isgn, a, ip, w); - ddxt2d_subth(n1, n2, 0, isgn, a, t, ip, w); - } else -#endif /* USE_FFT2D_THREADS */ - { - for (i = 0; i < n1; i++) { - ddct(n2, isgn, a[i], ip, w); - } - ddxt2d_sub(n1, n2, 0, isgn, a, t, ip, w); - } - if (itnull != 0) { - free(t); - } -} - - -void ddst2d(int n1, int n2, int isgn, double **a, double *t, - int *ip, double *w) -{ - void makewt(int nw, int *ip, double *w); - void makect(int nc, int *ip, double *c); - void ddst(int n, int isgn, double *a, int *ip, double *w); - void ddxt2d_sub(int n1, int n2, int ics, int isgn, double **a, - double *t, int *ip, double *w); -#ifdef USE_FFT2D_THREADS - void ddxt2d0_subth(int n1, int n2, int ics, int isgn, double **a, - int *ip, double *w); - void ddxt2d_subth(int n1, int n2, int ics, int isgn, double **a, - double *t, int *ip, double *w); -#endif /* USE_FFT2D_THREADS */ - int n, nw, nc, itnull, nthread, nt, i; - - n = n1; - if (n < n2) { - n = n2; - } - nw = ip[0]; - if (n > (nw << 2)) { - nw = n >> 2; - makewt(nw, ip, w); - } - nc = ip[1]; - if (n > nc) { - nc = n; - makect(nc, ip, w + nw); - } - itnull = 0; - if (t == NULL) { - itnull = 1; - nthread = 1; -#ifdef USE_FFT2D_THREADS - nthread = FFT2D_MAX_THREADS; -#endif /* USE_FFT2D_THREADS */ - nt = 4 * nthread * n1; - if (n2 == 2 * nthread) { - nt >>= 1; - } else if (n2 < 2 * nthread) { - nt >>= 2; - } - t = (double *) malloc(sizeof(double) * nt); - fft2d_alloc_error_check(t); - } -#ifdef USE_FFT2D_THREADS - if ((double) n1 * n2 >= (double) FFT2D_THREADS_BEGIN_N) { - ddxt2d0_subth(n1, n2, 1, isgn, a, ip, w); - ddxt2d_subth(n1, n2, 1, isgn, a, t, ip, w); - } else -#endif /* USE_FFT2D_THREADS */ - { - for (i = 0; i < n1; i++) { - ddst(n2, isgn, a[i], ip, w); - } - ddxt2d_sub(n1, n2, 1, isgn, a, t, ip, w); - } - if (itnull != 0) { - free(t); - } -} - - -/* -------- child routines -------- */ - - -void cdft2d_sub(int n1, int n2, int isgn, double **a, double *t, - int *ip, double *w) -{ - void cdft(int n, int isgn, double *a, int *ip, double *w); - int i, j; - - if (n2 > 4) { - for (j = 0; j < n2; j += 8) { - for (i = 0; i < n1; i++) { - t[2 * i] = a[i][j]; - t[2 * i + 1] = a[i][j + 1]; - t[2 * n1 + 2 * i] = a[i][j + 2]; - t[2 * n1 + 2 * i + 1] = a[i][j + 3]; - t[4 * n1 + 2 * i] = a[i][j + 4]; - t[4 * n1 + 2 * i + 1] = a[i][j + 5]; - t[6 * n1 + 2 * i] = a[i][j + 6]; - t[6 * n1 + 2 * i + 1] = a[i][j + 7]; - } - cdft(2 * n1, isgn, t, ip, w); - cdft(2 * n1, isgn, &t[2 * n1], ip, w); - cdft(2 * n1, isgn, &t[4 * n1], ip, w); - cdft(2 * n1, isgn, &t[6 * n1], ip, w); - for (i = 0; i < n1; i++) { - a[i][j] = t[2 * i]; - a[i][j + 1] = t[2 * i + 1]; - a[i][j + 2] = t[2 * n1 + 2 * i]; - a[i][j + 3] = t[2 * n1 + 2 * i + 1]; - a[i][j + 4] = t[4 * n1 + 2 * i]; - a[i][j + 5] = t[4 * n1 + 2 * i + 1]; - a[i][j + 6] = t[6 * n1 + 2 * i]; - a[i][j + 7] = t[6 * n1 + 2 * i + 1]; - } - } - } else if (n2 == 4) { - for (i = 0; i < n1; i++) { - t[2 * i] = a[i][0]; - t[2 * i + 1] = a[i][1]; - t[2 * n1 + 2 * i] = a[i][2]; - t[2 * n1 + 2 * i + 1] = a[i][3]; - } - cdft(2 * n1, isgn, t, ip, w); - cdft(2 * n1, isgn, &t[2 * n1], ip, w); - for (i = 0; i < n1; i++) { - a[i][0] = t[2 * i]; - a[i][1] = t[2 * i + 1]; - a[i][2] = t[2 * n1 + 2 * i]; - a[i][3] = t[2 * n1 + 2 * i + 1]; - } - } else if (n2 == 2) { - for (i = 0; i < n1; i++) { - t[2 * i] = a[i][0]; - t[2 * i + 1] = a[i][1]; - } - cdft(2 * n1, isgn, t, ip, w); - for (i = 0; i < n1; i++) { - a[i][0] = t[2 * i]; - a[i][1] = t[2 * i + 1]; - } - } -} - - -void rdft2d_sub(int n1, int n2, int isgn, double **a) -{ - int n1h, i, j; - double xi; - - n1h = n1 >> 1; - if (isgn < 0) { - for (i = 1; i < n1h; i++) { - j = n1 - i; - xi = a[i][0] - a[j][0]; - a[i][0] += a[j][0]; - a[j][0] = xi; - xi = a[j][1] - a[i][1]; - a[i][1] += a[j][1]; - a[j][1] = xi; - } - } else { - for (i = 1; i < n1h; i++) { - j = n1 - i; - a[j][0] = 0.5 * (a[i][0] - a[j][0]); - a[i][0] -= a[j][0]; - a[j][1] = 0.5 * (a[i][1] + a[j][1]); - a[i][1] -= a[j][1]; - } - } -} - - -void ddxt2d_sub(int n1, int n2, int ics, int isgn, double **a, - double *t, int *ip, double *w) -{ - void ddct(int n, int isgn, double *a, int *ip, double *w); - void ddst(int n, int isgn, double *a, int *ip, double *w); - int i, j; - - if (n2 > 2) { - for (j = 0; j < n2; j += 4) { - for (i = 0; i < n1; i++) { - t[i] = a[i][j]; - t[n1 + i] = a[i][j + 1]; - t[2 * n1 + i] = a[i][j + 2]; - t[3 * n1 + i] = a[i][j + 3]; - } - if (ics == 0) { - ddct(n1, isgn, t, ip, w); - ddct(n1, isgn, &t[n1], ip, w); - ddct(n1, isgn, &t[2 * n1], ip, w); - ddct(n1, isgn, &t[3 * n1], ip, w); - } else { - ddst(n1, isgn, t, ip, w); - ddst(n1, isgn, &t[n1], ip, w); - ddst(n1, isgn, &t[2 * n1], ip, w); - ddst(n1, isgn, &t[3 * n1], ip, w); - } - for (i = 0; i < n1; i++) { - a[i][j] = t[i]; - a[i][j + 1] = t[n1 + i]; - a[i][j + 2] = t[2 * n1 + i]; - a[i][j + 3] = t[3 * n1 + i]; - } - } - } else if (n2 == 2) { - for (i = 0; i < n1; i++) { - t[i] = a[i][0]; - t[n1 + i] = a[i][1]; - } - if (ics == 0) { - ddct(n1, isgn, t, ip, w); - ddct(n1, isgn, &t[n1], ip, w); - } else { - ddst(n1, isgn, t, ip, w); - ddst(n1, isgn, &t[n1], ip, w); - } - for (i = 0; i < n1; i++) { - a[i][0] = t[i]; - a[i][1] = t[n1 + i]; - } - } -} - - -#ifdef USE_FFT2D_THREADS -struct fft2d_arg_st { - int nthread; - int n0; - int n1; - int n2; - int ic; - int isgn; - double **a; - double *t; - int *ip; - double *w; -}; -typedef struct fft2d_arg_st fft2d_arg_t; - - -void xdft2d0_subth(int n1, int n2, int icr, int isgn, double **a, - int *ip, double *w) -{ - void *xdft2d0_th(void *p); - fft2d_thread_t th[FFT2D_MAX_THREADS]; - fft2d_arg_t ag[FFT2D_MAX_THREADS]; - int nthread, i; - - nthread = FFT2D_MAX_THREADS; - if (nthread > n1) { - nthread = n1; - } - for (i = 0; i < nthread; i++) { - ag[i].nthread = nthread; - ag[i].n0 = i; - ag[i].n1 = n1; - ag[i].n2 = n2; - ag[i].ic = icr; - ag[i].isgn = isgn; - ag[i].a = a; - ag[i].ip = ip; - ag[i].w = w; - fft2d_thread_create(&th[i], xdft2d0_th, &ag[i]); - } - for (i = 0; i < nthread; i++) { - fft2d_thread_wait(th[i]); - } -} - - -void cdft2d_subth(int n1, int n2, int isgn, double **a, double *t, - int *ip, double *w) -{ - void *cdft2d_th(void *p); - fft2d_thread_t th[FFT2D_MAX_THREADS]; - fft2d_arg_t ag[FFT2D_MAX_THREADS]; - int nthread, nt, i; - - nthread = FFT2D_MAX_THREADS; - nt = 8 * n1; - if (n2 == 4 * FFT2D_MAX_THREADS) { - nt >>= 1; - } else if (n2 < 4 * FFT2D_MAX_THREADS) { - nthread = n2 >> 1; - nt >>= 2; - } - for (i = 0; i < nthread; i++) { - ag[i].nthread = nthread; - ag[i].n0 = i; - ag[i].n1 = n1; - ag[i].n2 = n2; - ag[i].isgn = isgn; - ag[i].a = a; - ag[i].t = &t[nt * i]; - ag[i].ip = ip; - ag[i].w = w; - fft2d_thread_create(&th[i], cdft2d_th, &ag[i]); - } - for (i = 0; i < nthread; i++) { - fft2d_thread_wait(th[i]); - } -} - - -void ddxt2d0_subth(int n1, int n2, int ics, int isgn, double **a, - int *ip, double *w) -{ - void *ddxt2d0_th(void *p); - fft2d_thread_t th[FFT2D_MAX_THREADS]; - fft2d_arg_t ag[FFT2D_MAX_THREADS]; - int nthread, i; - - nthread = FFT2D_MAX_THREADS; - if (nthread > n1) { - nthread = n1; - } - for (i = 0; i < nthread; i++) { - ag[i].nthread = nthread; - ag[i].n0 = i; - ag[i].n1 = n1; - ag[i].n2 = n2; - ag[i].ic = ics; - ag[i].isgn = isgn; - ag[i].a = a; - ag[i].ip = ip; - ag[i].w = w; - fft2d_thread_create(&th[i], ddxt2d0_th, &ag[i]); - } - for (i = 0; i < nthread; i++) { - fft2d_thread_wait(th[i]); - } -} - - -void ddxt2d_subth(int n1, int n2, int ics, int isgn, double **a, - double *t, int *ip, double *w) -{ - void *ddxt2d_th(void *p); - fft2d_thread_t th[FFT2D_MAX_THREADS]; - fft2d_arg_t ag[FFT2D_MAX_THREADS]; - int nthread, nt, i; - - nthread = FFT2D_MAX_THREADS; - nt = 4 * n1; - if (n2 == 2 * FFT2D_MAX_THREADS) { - nt >>= 1; - } else if (n2 < 2 * FFT2D_MAX_THREADS) { - nthread = n2; - nt >>= 2; - } - for (i = 0; i < nthread; i++) { - ag[i].nthread = nthread; - ag[i].n0 = i; - ag[i].n1 = n1; - ag[i].n2 = n2; - ag[i].ic = ics; - ag[i].isgn = isgn; - ag[i].a = a; - ag[i].t = &t[nt * i]; - ag[i].ip = ip; - ag[i].w = w; - fft2d_thread_create(&th[i], ddxt2d_th, &ag[i]); - } - for (i = 0; i < nthread; i++) { - fft2d_thread_wait(th[i]); - } -} - - -void *xdft2d0_th(void *p) -{ - void cdft(int n, int isgn, double *a, int *ip, double *w); - void rdft(int n, int isgn, double *a, int *ip, double *w); - int nthread, n0, n1, n2, icr, isgn, *ip, i; - double **a, *w; - - nthread = ((fft2d_arg_t *) p)->nthread; - n0 = ((fft2d_arg_t *) p)->n0; - n1 = ((fft2d_arg_t *) p)->n1; - n2 = ((fft2d_arg_t *) p)->n2; - icr = ((fft2d_arg_t *) p)->ic; - isgn = ((fft2d_arg_t *) p)->isgn; - a = ((fft2d_arg_t *) p)->a; - ip = ((fft2d_arg_t *) p)->ip; - w = ((fft2d_arg_t *) p)->w; - if (icr == 0) { - for (i = n0; i < n1; i += nthread) { - cdft(n2, isgn, a[i], ip, w); - } - } else { - for (i = n0; i < n1; i += nthread) { - rdft(n2, isgn, a[i], ip, w); - } - } - return (void *) 0; -} - - -void *cdft2d_th(void *p) -{ - void cdft(int n, int isgn, double *a, int *ip, double *w); - int nthread, n0, n1, n2, isgn, *ip, i, j; - double **a, *t, *w; - - nthread = ((fft2d_arg_t *) p)->nthread; - n0 = ((fft2d_arg_t *) p)->n0; - n1 = ((fft2d_arg_t *) p)->n1; - n2 = ((fft2d_arg_t *) p)->n2; - isgn = ((fft2d_arg_t *) p)->isgn; - a = ((fft2d_arg_t *) p)->a; - t = ((fft2d_arg_t *) p)->t; - ip = ((fft2d_arg_t *) p)->ip; - w = ((fft2d_arg_t *) p)->w; - if (n2 > 4 * nthread) { - for (j = 8 * n0; j < n2; j += 8 * nthread) { - for (i = 0; i < n1; i++) { - t[2 * i] = a[i][j]; - t[2 * i + 1] = a[i][j + 1]; - t[2 * n1 + 2 * i] = a[i][j + 2]; - t[2 * n1 + 2 * i + 1] = a[i][j + 3]; - t[4 * n1 + 2 * i] = a[i][j + 4]; - t[4 * n1 + 2 * i + 1] = a[i][j + 5]; - t[6 * n1 + 2 * i] = a[i][j + 6]; - t[6 * n1 + 2 * i + 1] = a[i][j + 7]; - } - cdft(2 * n1, isgn, t, ip, w); - cdft(2 * n1, isgn, &t[2 * n1], ip, w); - cdft(2 * n1, isgn, &t[4 * n1], ip, w); - cdft(2 * n1, isgn, &t[6 * n1], ip, w); - for (i = 0; i < n1; i++) { - a[i][j] = t[2 * i]; - a[i][j + 1] = t[2 * i + 1]; - a[i][j + 2] = t[2 * n1 + 2 * i]; - a[i][j + 3] = t[2 * n1 + 2 * i + 1]; - a[i][j + 4] = t[4 * n1 + 2 * i]; - a[i][j + 5] = t[4 * n1 + 2 * i + 1]; - a[i][j + 6] = t[6 * n1 + 2 * i]; - a[i][j + 7] = t[6 * n1 + 2 * i + 1]; - } - } - } else if (n2 == 4 * nthread) { - for (i = 0; i < n1; i++) { - t[2 * i] = a[i][4 * n0]; - t[2 * i + 1] = a[i][4 * n0 + 1]; - t[2 * n1 + 2 * i] = a[i][4 * n0 + 2]; - t[2 * n1 + 2 * i + 1] = a[i][4 * n0 + 3]; - } - cdft(2 * n1, isgn, t, ip, w); - cdft(2 * n1, isgn, &t[2 * n1], ip, w); - for (i = 0; i < n1; i++) { - a[i][4 * n0] = t[2 * i]; - a[i][4 * n0 + 1] = t[2 * i + 1]; - a[i][4 * n0 + 2] = t[2 * n1 + 2 * i]; - a[i][4 * n0 + 3] = t[2 * n1 + 2 * i + 1]; - } - } else if (n2 == 2 * nthread) { - for (i = 0; i < n1; i++) { - t[2 * i] = a[i][2 * n0]; - t[2 * i + 1] = a[i][2 * n0 + 1]; - } - cdft(2 * n1, isgn, t, ip, w); - for (i = 0; i < n1; i++) { - a[i][2 * n0] = t[2 * i]; - a[i][2 * n0 + 1] = t[2 * i + 1]; - } - } - return (void *) 0; -} - - -void *ddxt2d0_th(void *p) -{ - void ddct(int n, int isgn, double *a, int *ip, double *w); - void ddst(int n, int isgn, double *a, int *ip, double *w); - int nthread, n0, n1, n2, ics, isgn, *ip, i; - double **a, *w; - - nthread = ((fft2d_arg_t *) p)->nthread; - n0 = ((fft2d_arg_t *) p)->n0; - n1 = ((fft2d_arg_t *) p)->n1; - n2 = ((fft2d_arg_t *) p)->n2; - ics = ((fft2d_arg_t *) p)->ic; - isgn = ((fft2d_arg_t *) p)->isgn; - a = ((fft2d_arg_t *) p)->a; - ip = ((fft2d_arg_t *) p)->ip; - w = ((fft2d_arg_t *) p)->w; - if (ics == 0) { - for (i = n0; i < n1; i += nthread) { - ddct(n2, isgn, a[i], ip, w); - } - } else { - for (i = n0; i < n1; i += nthread) { - ddst(n2, isgn, a[i], ip, w); - } - } - return (void *) 0; -} - - -void *ddxt2d_th(void *p) -{ - void ddct(int n, int isgn, double *a, int *ip, double *w); - void ddst(int n, int isgn, double *a, int *ip, double *w); - int nthread, n0, n1, n2, ics, isgn, *ip, i, j; - double **a, *t, *w; - - nthread = ((fft2d_arg_t *) p)->nthread; - n0 = ((fft2d_arg_t *) p)->n0; - n1 = ((fft2d_arg_t *) p)->n1; - n2 = ((fft2d_arg_t *) p)->n2; - ics = ((fft2d_arg_t *) p)->ic; - isgn = ((fft2d_arg_t *) p)->isgn; - a = ((fft2d_arg_t *) p)->a; - t = ((fft2d_arg_t *) p)->t; - ip = ((fft2d_arg_t *) p)->ip; - w = ((fft2d_arg_t *) p)->w; - if (n2 > 2 * nthread) { - for (j = 4 * n0; j < n2; j += 4 * nthread) { - for (i = 0; i < n1; i++) { - t[i] = a[i][j]; - t[n1 + i] = a[i][j + 1]; - t[2 * n1 + i] = a[i][j + 2]; - t[3 * n1 + i] = a[i][j + 3]; - } - if (ics == 0) { - ddct(n1, isgn, t, ip, w); - ddct(n1, isgn, &t[n1], ip, w); - ddct(n1, isgn, &t[2 * n1], ip, w); - ddct(n1, isgn, &t[3 * n1], ip, w); - } else { - ddst(n1, isgn, t, ip, w); - ddst(n1, isgn, &t[n1], ip, w); - ddst(n1, isgn, &t[2 * n1], ip, w); - ddst(n1, isgn, &t[3 * n1], ip, w); - } - for (i = 0; i < n1; i++) { - a[i][j] = t[i]; - a[i][j + 1] = t[n1 + i]; - a[i][j + 2] = t[2 * n1 + i]; - a[i][j + 3] = t[3 * n1 + i]; - } - } - } else if (n2 == 2 * nthread) { - for (i = 0; i < n1; i++) { - t[i] = a[i][2 * n0]; - t[n1 + i] = a[i][2 * n0 + 1]; - } - if (ics == 0) { - ddct(n1, isgn, t, ip, w); - ddct(n1, isgn, &t[n1], ip, w); - } else { - ddst(n1, isgn, t, ip, w); - ddst(n1, isgn, &t[n1], ip, w); - } - for (i = 0; i < n1; i++) { - a[i][2 * n0] = t[i]; - a[i][2 * n0 + 1] = t[n1 + i]; - } - } else if (n2 == nthread) { - for (i = 0; i < n1; i++) { - t[i] = a[i][n0]; - } - if (ics == 0) { - ddct(n1, isgn, t, ip, w); - } else { - ddst(n1, isgn, t, ip, w); - } - for (i = 0; i < n1; i++) { - a[i][n0] = t[i]; - } - } - return (void *) 0; -} -#endif /* USE_FFT2D_THREADS */ -
diff --git a/third_party/tensorflow_dependencies/fft2d/fftsg2d.f b/third_party/tensorflow_dependencies/fft2d/fftsg2d.f deleted file mode 100644 index 49a9d6c..0000000 --- a/third_party/tensorflow_dependencies/fft2d/fftsg2d.f +++ /dev/null
@@ -1,562 +0,0 @@ -! Fast Fourier/Cosine/Sine Transform -! dimension :two -! data length :power of 2 -! decimation :frequency -! radix :split-radix, row-column -! data :inplace -! table :use -! subroutines -! cdft2d: Complex Discrete Fourier Transform -! rdft2d: Real Discrete Fourier Transform -! ddct2d: Discrete Cosine Transform -! ddst2d: Discrete Sine Transform -! necessary package -! fftsg.f : 1D-FFT package -! -! -! -------- Complex DFT (Discrete Fourier Transform) -------- -! [definition] -! <case1> -! X(k1,k2) = sum_j1=0^n1-1 sum_j2=0^n2-1 x(j1,j2) * -! exp(2*pi*i*j1*k1/n1) * -! exp(2*pi*i*j2*k2/n2), -! 0<=k1<n1, 0<=k2<n2 -! <case2> -! X(k1,k2) = sum_j1=0^n1-1 sum_j2=0^n2-1 x(j1,j2) * -! exp(-2*pi*i*j1*k1/n1) * -! exp(-2*pi*i*j2*k2/n2), -! 0<=k1<n1, 0<=k2<n2 -! (notes: sum_j=0^n-1 is a summation from j=0 to n-1) -! [usage] -! <case1> -! ip(0) = 0 ! first time only -! call cdft2d(n1max, 2*n1, n2, 1, a, t, ip, w) -! <case2> -! ip(0) = 0 ! first time only -! call cdft2d(n1max, 2*n1, n2, -1, a, t, ip, w) -! [parameters] -! n1max :row size of the 2D array (integer) -! 2*n1 :data length (integer) -! n1 >= 1, n1 = power of 2 -! n2 :data length (integer) -! n2 >= 1, n2 = power of 2 -! a(0:2*n1-1,0:n2-1) -! :input/output data (real*8) -! input data -! a(2*j1,j2) = Re(x(j1,j2)), -! a(2*j1+1,j2) = Im(x(j1,j2)), -! 0<=j1<n1, 0<=j2<n2 -! output data -! a(2*k1,k2) = Re(X(k1,k2)), -! a(2*k1+1,k2) = Im(X(k1,k2)), -! 0<=k1<n1, 0<=k2<n2 -! t(0:8*n2-1) -! :work area (real*8) -! length of t >= 8*n2 -! ip(0:*):work area for bit reversal (integer) -! length of ip >= 2+sqrt(n) -! (n = max(n1, n2)) -! ip(0),ip(1) are pointers of the cos/sin table. -! w(0:*) :cos/sin table (real*8) -! length of w >= max(n1/2, n2/2) -! w(),ip() are initialized if ip(0) = 0. -! [remark] -! Inverse of -! call cdft2d(n1max, 2*n1, n2, -1, a, t, ip, w) -! is -! call cdft2d(n1max, 2*n1, n2, 1, a, t, ip, w) -! do j2 = 0, n2 - 1 -! do j1 = 0, 2 * n1 - 1 -! a(j1, j2) = a(j1, j2) * (1.0d0 / n1 / n2) -! end do -! end do -! . -! -! -! -------- Real DFT / Inverse of Real DFT -------- -! [definition] -! <case1> RDFT -! R(k1,k2) = sum_j1=0^n1-1 sum_j2=0^n2-1 a(j1,j2) * -! cos(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2), -! 0<=k1<n1, 0<=k2<n2 -! I(k1,k2) = sum_j1=0^n1-1 sum_j2=0^n2-1 a(j1,j2) * -! sin(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2), -! 0<=k1<n1, 0<=k2<n2 -! <case2> IRDFT (excluding scale) -! a(k1,k2) = (1/2) * sum_j1=0^n1-1 sum_j2=0^n2-1 -! (R(j1,j2) * -! cos(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2) + -! I(j1,j2) * -! sin(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2)), -! 0<=k1<n1, 0<=k2<n2 -! (notes: R(n1-k1,n2-k2) = R(k1,k2), -! I(n1-k1,n2-k2) = -I(k1,k2), -! R(n1-k1,0) = R(k1,0), -! I(n1-k1,0) = -I(k1,0), -! R(0,n2-k2) = R(0,k2), -! I(0,n2-k2) = -I(0,k2), -! 0<k1<n1, 0<k2<n2) -! [usage] -! <case1> -! ip(0) = 0 ! first time only -! call rdft2d(n1max, n1, n2, 1, a, t, ip, w) -! <case2> -! ip(0) = 0 ! first time only -! call rdft2d(n1max, n1, n2, -1, a, t, ip, w) -! [parameters] -! n1max :row size of the 2D array (integer) -! n1 :data length (integer) -! n1 >= 2, n1 = power of 2 -! n2 :data length (integer) -! n2 >= 2, n2 = power of 2 -! a(0:n1-1,0:n2-1) -! :input/output data (real*8) -! <case1> -! output data -! a(2*k1,k2) = R(k1,k2) = R(n1-k1,n2-k2), -! a(2*k1+1,k2) = I(k1,k2) = -I(n1-k1,n2-k2), -! 0<k1<n1/2, 0<k2<n2, -! a(2*k1,0) = R(k1,0) = R(n1-k1,0), -! a(2*k1+1,0) = I(k1,0) = -I(n1-k1,0), -! 0<k1<n1/2, -! a(0,k2) = R(0,k2) = R(0,n2-k2), -! a(1,k2) = I(0,k2) = -I(0,n2-k2), -! a(1,n2-k2) = R(n1/2,k2) = R(n1/2,n2-k2), -! a(0,n2-k2) = -I(n1/2,k2) = I(n1/2,n2-k2), -! 0<k2<n2/2, -! a(0,0) = R(0,0), -! a(1,0) = R(n1/2,0), -! a(0,n2/2) = R(0,n2/2), -! a(1,n2/2) = R(n1/2,n2/2) -! <case2> -! input data -! a(2*j1,j2) = R(j1,j2) = R(n1-j1,n2-j2), -! a(2*j1+1,j2) = I(j1,j2) = -I(n1-j1,n2-j2), -! 0<j1<n1/2, 0<j2<n2, -! a(2*j1,0) = R(j1,0) = R(n1-j1,0), -! a(2*j1+1,0) = I(j1,0) = -I(n1-j1,0), -! 0<j1<n1/2, -! a(0,j2) = R(0,j2) = R(0,n2-j2), -! a(1,j2) = I(0,j2) = -I(0,n2-j2), -! a(1,n2-j2) = R(n1/2,j2) = R(n1/2,n2-j2), -! a(0,n2-j2) = -I(n1/2,j2) = I(n1/2,n2-j2), -! 0<j2<n2/2, -! a(0,0) = R(0,0), -! a(1,0) = R(n1/2,0), -! a(0,n2/2) = R(0,n2/2), -! a(1,n2/2) = R(n1/2,n2/2) -! ---- output ordering ---- -! call rdft2d(n1max, n1, n2, 1, a, t, ip, w) -! call rdft2dsort(n1max, n1, n2, 1, a) -! ! stored data is a(0:n1-1,0:n2+1): -! ! a(2*k1,k2) = R(k1,k2), -! ! a(2*k1+1,k2) = I(k1,k2), -! ! 0<=k1<=n1/2, 0<=k2<n2. -! ! the stored data is larger than the input data! -! ---- input ordering ---- -! call rdft2dsort(n1max, n1, n2, -1, a) -! call rdft2d(n1max, n1, n2, -1, a, t, ip, w) -! t(0:8*n2-1) -! :work area (real*8) -! length of t >= 8*n2 -! ip(0:*):work area for bit reversal (integer) -! length of ip >= 2+sqrt(n) -! (n = max(n1/2, n2)) -! ip(0),ip(1) are pointers of the cos/sin table. -! w(0:*) :cos/sin table (real*8) -! length of w >= max(n1/4, n2/2) + n1/4 -! w(),ip() are initialized if ip(0) = 0. -! [remark] -! Inverse of -! call rdft2d(n1max, n1, n2, 1, a, t, ip, w) -! is -! call rdft2d(n1max, n1, n2, -1, a, t, ip, w) -! do j2 = 0, n2 - 1 -! do j1 = 0, n1 - 1 -! a(j1, j2) = a(j1, j2) * (2.0d0 / n1 / n2) -! end do -! end do -! . -! -! -! -------- DCT (Discrete Cosine Transform) / Inverse of DCT -------- -! [definition] -! <case1> IDCT (excluding scale) -! C(k1,k2) = sum_j1=0^n1-1 sum_j2=0^n2-1 a(j1,j2) * -! cos(pi*j1*(k1+1/2)/n1) * -! cos(pi*j2*(k2+1/2)/n2), -! 0<=k1<n1, 0<=k2<n2 -! <case2> DCT -! C(k1,k2) = sum_j1=0^n1-1 sum_j2=0^n2-1 a(j1,j2) * -! cos(pi*(j1+1/2)*k1/n1) * -! cos(pi*(j2+1/2)*k2/n2), -! 0<=k1<n1, 0<=k2<n2 -! [usage] -! <case1> -! ip(0) = 0 ! first time only -! call ddct2d(n1max, n1, n2, 1, a, t, ip, w) -! <case2> -! ip(0) = 0 ! first time only -! call ddct2d(n1max, n1, n2, -1, a, t, ip, w) -! [parameters] -! n1max :row size of the 2D array (integer) -! n1 :data length (integer) -! n1 >= 2, n1 = power of 2 -! n2 :data length (integer) -! n2 >= 2, n2 = power of 2 -! a(0:n1-1,0:n2-1) -! :input/output data (real*8) -! output data -! a(k1,k2) = C(k1,k2), 0<=k1<n1, 0<=k2<n2 -! t(0:4*n2-1) -! :work area (real*8) -! length of t >= 4*n2 -! ip(0:*):work area for bit reversal (integer) -! length of ip >= 2+sqrt(n) -! (n = max(n1/2, n2/2)) -! ip(0),ip(1) are pointers of the cos/sin table. -! w(0:*) :cos/sin table (real*8) -! length of w >= max(n1*3/2, n2*3/2) -! w(),ip() are initialized if ip(0) = 0. -! [remark] -! Inverse of -! call ddct2d(n1max, n1, n2, -1, a, t, ip, w) -! is -! do j1 = 0, n1 - 1 -! a(j1, 0) = a(j1, 0) * 0.5d0 -! end do -! do j2 = 0, n2 - 1 -! a(0, j2) = a(0, j2) * 0.5d0 -! end do -! call ddct2d(n1max, n1, n2, 1, a, t, ip, w) -! do j2 = 0, n2 - 1 -! do j1 = 0, n1 - 1 -! a(j1, j2) = a(j1, j2) * (4.0d0 / n1 / n2) -! end do -! end do -! . -! -! -! -------- DST (Discrete Sine Transform) / Inverse of DST -------- -! [definition] -! <case1> IDST (excluding scale) -! S(k1,k2) = sum_j1=1^n1 sum_j2=1^n2 A(j1,j2) * -! sin(pi*j1*(k1+1/2)/n1) * -! sin(pi*j2*(k2+1/2)/n2), -! 0<=k1<n1, 0<=k2<n2 -! <case2> DST -! S(k1,k2) = sum_j1=0^n1-1 sum_j2=0^n2-1 a(j1,j2) * -! sin(pi*(j1+1/2)*k1/n1) * -! sin(pi*(j2+1/2)*k2/n2), -! 0<k1<=n1, 0<k2<=n2 -! [usage] -! <case1> -! ip(0) = 0 ! first time only -! call ddst2d(n1max, n1, n2, 1, a, t, ip, w) -! <case2> -! ip(0) = 0 ! first time only -! call ddst2d(n1max, n1, n2, -1, a, t, ip, w) -! [parameters] -! n1max :row size of the 2D array (integer) -! n1 :data length (integer) -! n1 >= 2, n1 = power of 2 -! n2 :data length (integer) -! n2 >= 2, n2 = power of 2 -! a(0:n1-1,0:n2-1) -! :input/output data (real*8) -! <case1> -! input data -! a(j1,j2) = A(j1,j2), 0<j1<n1, 0<j2<n2, -! a(j1,0) = A(j1,n2), 0<j1<n1, -! a(0,j2) = A(n1,j2), 0<j2<n2, -! a(0,0) = A(n1,n2) -! (i.e. A(j1,j2) = a(mod(j1,n1),mod(j2,n2))) -! output data -! a(k1,k2) = S(k1,k2), 0<=k1<n1, 0<=k2<n2 -! <case2> -! output data -! a(k1,k2) = S(k1,k2), 0<k1<n1, 0<k2<n2, -! a(k1,0) = S(k1,n2), 0<k1<n1, -! a(0,k2) = S(n1,k2), 0<k2<n2, -! a(0,0) = S(n1,n2) -! (i.e. S(k1,k2) = a(mod(k1,n1),mod(k2,n2))) -! t(0:4*n2-1) -! :work area (real*8) -! length of t >= 4*n2 -! ip(0:*):work area for bit reversal (integer) -! length of ip >= 2+sqrt(n) -! (n = max(n1/2, n2/2)) -! ip(0),ip(1) are pointers of the cos/sin table. -! w(0:*) :cos/sin table (real*8) -! length of w >= max(n1*3/2, n2*3/2) -! w(),ip() are initialized if ip(0) = 0. -! [remark] -! Inverse of -! call ddst2d(n1max, n1, n2, -1, a, t, ip, w) -! is -! do j1 = 0, n1 - 1 -! a(j1, 0) = a(j1, 0) * 0.5d0 -! end do -! do j2 = 0, n2 - 1 -! a(0, j2) = a(0, j2) * 0.5d0 -! end do -! call ddst2d(n1max, n1, n2, 1, a, t, ip, w) -! do j2 = 0, n2 - 1 -! do j1 = 0, n1 - 1 -! a(j1, j2) = a(j1, j2) * (4.0d0 / n1 / n2) -! end do -! end do -! . -! -! - subroutine cdft2d(n1max, n1, n2, isgn, a, t, ip, w) - integer n1max, n1, n2, isgn, ip(0 : *), n, j - real*8 a(0 : n1max - 1, 0 : n2 - 1), t(0 : 8 * n2 - 1), - & w(0 : *) - n = max(n1, 2 * n2) - if (n .gt. 4 * ip(0)) then - call makewt(n / 4, ip, w) - end if - do j = 0, n2 - 1 - call cdft(n1, isgn, a(0, j), ip, w) - end do - call cdft2d_sub(n1max, n1, n2, isgn, a, t, ip, w) - end -! - subroutine rdft2d(n1max, n1, n2, isgn, a, t, ip, w) - integer n1max, n1, n2, isgn, ip(0 : *), n, nw, nc, j - real*8 a(0 : n1max - 1, 0 : n2 - 1), t(0 : 8 * n2 - 1), - & w(0 : *) - n = max(n1, 2 * n2) - nw = ip(0) - if (n .gt. 4 * nw) then - nw = n / 4 - call makewt(nw, ip, w) - end if - nc = ip(1) - if (n1 .gt. 4 * nc) then - nc = n1 / 4 - call makect(nc, ip, w(nw)) - end if - if (isgn .lt. 0) then - call rdft2d_sub(n1max, n1, n2, isgn, a) - call cdft2d_sub(n1max, n1, n2, isgn, a, t, ip, w) - end if - do j = 0, n2 - 1 - call rdft(n1, isgn, a(0, j), ip, w) - end do - if (isgn .ge. 0) then - call cdft2d_sub(n1max, n1, n2, isgn, a, t, ip, w) - call rdft2d_sub(n1max, n1, n2, isgn, a) - end if - end -! - subroutine rdft2dsort(n1max, n1, n2, isgn, a) - integer n1max, n1, n2, isgn, n2h, j - real*8 a(0 : n1max - 1, 0 : n2 - 1), x, y - n2h = n2 / 2 - if (isgn .lt. 0) then - do j = n2h + 1, n2 - 1 - a(0, j) = a(n1 + 1, j) - a(1, j) = a(n1, j) - end do - a(1, 0) = a(n1, 0) - a(1, n2h) = a(n1, n2h) - else - do j = n2h + 1, n2 - 1 - y = a(0, j) - x = a(1, j) - a(n1, j) = x - a(n1 + 1, j) = y - a(n1, n2 - j) = x - a(n1 + 1, n2 - j) = -y - a(0, j) = a(0, n2 - j) - a(1, j) = -a(1, n2 - j) - end do - a(n1, 0) = a(1, 0) - a(n1 + 1, 0) = 0 - a(1, 0) = 0 - a(n1, n2h) = a(1, n2h) - a(n1 + 1, n2h) = 0 - a(1, n2h) = 0 - end if - end -! - subroutine ddct2d(n1max, n1, n2, isgn, a, t, ip, w) - integer n1max, n1, n2, isgn, ip(0 : *), n, nw, nc, j - real*8 a(0 : n1max - 1, 0 : n2 - 1), t(0 : 4 * n2 - 1), - & w(0 : *) - n = max(n1, n2) - nw = ip(0) - if (n .gt. 4 * nw) then - nw = n / 4 - call makewt(nw, ip, w) - end if - nc = ip(1) - if (n .gt. nc) then - nc = n - call makect(nc, ip, w(nw)) - end if - do j = 0, n2 - 1 - call ddct(n1, isgn, a(0, j), ip, w) - end do - call ddxt2d_sub(n1max, n1, n2, 0, isgn, a, t, ip, w) - end -! - subroutine ddst2d(n1max, n1, n2, isgn, a, t, ip, w) - integer n1max, n1, n2, isgn, ip(0 : *), n, nw, nc, j - real*8 a(0 : n1max - 1, 0 : n2 - 1), t(0 : 4 * n2 - 1), - & w(0 : *) - n = max(n1, n2) - nw = ip(0) - if (n .gt. 4 * nw) then - nw = n / 4 - call makewt(nw, ip, w) - end if - nc = ip(1) - if (n .gt. nc) then - nc = n - call makect(nc, ip, w(nw)) - end if - do j = 0, n2 - 1 - call ddst(n1, isgn, a(0, j), ip, w) - end do - call ddxt2d_sub(n1max, n1, n2, 1, isgn, a, t, ip, w) - end -! -! -------- child routines -------- -! - subroutine cdft2d_sub(n1max, n1, n2, isgn, a, t, ip, w) - integer n1max, n1, n2, isgn, ip(0 : *), i, j - real*8 a(0 : n1max - 1, 0 : n2 - 1), t(0 : 8 * n2 - 1), - & w(0 : *) - if (n1 .gt. 4) then - do i = 0, n1 - 8, 8 - do j = 0, n2 - 1 - t(2 * j) = a(i, j) - t(2 * j + 1) = a(i + 1, j) - t(2 * n2 + 2 * j) = a(i + 2, j) - t(2 * n2 + 2 * j + 1) = a(i + 3, j) - t(4 * n2 + 2 * j) = a(i + 4, j) - t(4 * n2 + 2 * j + 1) = a(i + 5, j) - t(6 * n2 + 2 * j) = a(i + 6, j) - t(6 * n2 + 2 * j + 1) = a(i + 7, j) - end do - call cdft(2 * n2, isgn, t, ip, w) - call cdft(2 * n2, isgn, t(2 * n2), ip, w) - call cdft(2 * n2, isgn, t(4 * n2), ip, w) - call cdft(2 * n2, isgn, t(6 * n2), ip, w) - do j = 0, n2 - 1 - a(i, j) = t(2 * j) - a(i + 1, j) = t(2 * j + 1) - a(i + 2, j) = t(2 * n2 + 2 * j) - a(i + 3, j) = t(2 * n2 + 2 * j + 1) - a(i + 4, j) = t(4 * n2 + 2 * j) - a(i + 5, j) = t(4 * n2 + 2 * j + 1) - a(i + 6, j) = t(6 * n2 + 2 * j) - a(i + 7, j) = t(6 * n2 + 2 * j + 1) - end do - end do - else if (n1 .eq. 4) then - do j = 0, n2 - 1 - t(2 * j) = a(0, j) - t(2 * j + 1) = a(1, j) - t(2 * n2 + 2 * j) = a(2, j) - t(2 * n2 + 2 * j + 1) = a(3, j) - end do - call cdft(2 * n2, isgn, t, ip, w) - call cdft(2 * n2, isgn, t(2 * n2), ip, w) - do j = 0, n2 - 1 - a(0, j) = t(2 * j) - a(1, j) = t(2 * j + 1) - a(2, j) = t(2 * n2 + 2 * j) - a(3, j) = t(2 * n2 + 2 * j + 1) - end do - else if (n1 .eq. 2) then - do j = 0, n2 - 1 - t(2 * j) = a(0, j) - t(2 * j + 1) = a(1, j) - end do - call cdft(2 * n2, isgn, t, ip, w) - do j = 0, n2 - 1 - a(0, j) = t(2 * j) - a(1, j) = t(2 * j + 1) - end do - end if - end -! - subroutine rdft2d_sub(n1max, n1, n2, isgn, a) - integer n1max, n1, n2, isgn, n2h, i, j - real*8 a(0 : n1max - 1, 0 : n2 - 1), xi - n2h = n2 / 2 - if (isgn .lt. 0) then - do i = 1, n2h - 1 - j = n2 - i - xi = a(0, i) - a(0, j) - a(0, i) = a(0, i) + a(0, j) - a(0, j) = xi - xi = a(1, j) - a(1, i) - a(1, i) = a(1, i) + a(1, j) - a(1, j) = xi - end do - else - do i = 1, n2h - 1 - j = n2 - i - a(0, j) = 0.5d0 * (a(0, i) - a(0, j)) - a(0, i) = a(0, i) - a(0, j) - a(1, j) = 0.5d0 * (a(1, i) + a(1, j)) - a(1, i) = a(1, i) - a(1, j) - end do - end if - end -! - subroutine ddxt2d_sub(n1max, n1, n2, ics, isgn, a, t, - & ip, w) - integer n1max, n1, n2, ics, isgn, ip(0 : *), i, j - real*8 a(0 : n1max - 1, 0 : n2 - 1), t(0 : 4 * n2 - 1), - & w(0 : *) - if (n1 .gt. 2) then - do i = 0, n1 - 4, 4 - do j = 0, n2 - 1 - t(j) = a(i, j) - t(n2 + j) = a(i + 1, j) - t(2 * n2 + j) = a(i + 2, j) - t(3 * n2 + j) = a(i + 3, j) - end do - if (ics .eq. 0) then - call ddct(n2, isgn, t, ip, w) - call ddct(n2, isgn, t(n2), ip, w) - call ddct(n2, isgn, t(2 * n2), ip, w) - call ddct(n2, isgn, t(3 * n2), ip, w) - else - call ddst(n2, isgn, t, ip, w) - call ddst(n2, isgn, t(n2), ip, w) - call ddst(n2, isgn, t(2 * n2), ip, w) - call ddst(n2, isgn, t(3 * n2), ip, w) - end if - do j = 0, n2 - 1 - a(i, j) = t(j) - a(i + 1, j) = t(n2 + j) - a(i + 2, j) = t(2 * n2 + j) - a(i + 3, j) = t(3 * n2 + j) - end do - end do - else if (n1 .eq. 2) then - do j = 0, n2 - 1 - t(j) = a(0, j) - t(n2 + j) = a(1, j) - end do - if (ics .eq. 0) then - call ddct(n2, isgn, t, ip, w) - call ddct(n2, isgn, t(n2), ip, w) - else - call ddst(n2, isgn, t, ip, w) - call ddst(n2, isgn, t(n2), ip, w) - end if - do j = 0, n2 - 1 - a(0, j) = t(j) - a(1, j) = t(n2 + j) - end do - end if - end -!
diff --git a/third_party/tensorflow_dependencies/fft2d/fftsg3d.c b/third_party/tensorflow_dependencies/fft2d/fftsg3d.c deleted file mode 100644 index 62f7daa..0000000 --- a/third_party/tensorflow_dependencies/fft2d/fftsg3d.c +++ /dev/null
@@ -1,1695 +0,0 @@ -/* -Fast Fourier/Cosine/Sine Transform - dimension :three - data length :power of 2 - decimation :frequency - radix :split-radix, row-column - data :inplace - table :use -functions - cdft3d: Complex Discrete Fourier Transform - rdft3d: Real Discrete Fourier Transform - ddct3d: Discrete Cosine Transform - ddst3d: Discrete Sine Transform -function prototypes - void cdft3d(int, int, int, int, double ***, double *, int *, double *); - void rdft3d(int, int, int, int, double ***, double *, int *, double *); - void rdft3dsort(int, int, int, int, double ***); - void ddct3d(int, int, int, int, double ***, double *, int *, double *); - void ddst3d(int, int, int, int, double ***, double *, int *, double *); -necessary package - fftsg.c : 1D-FFT package -macro definitions - USE_FFT3D_PTHREADS : default=not defined - FFT3D_MAX_THREADS : must be 2^N, default=4 - FFT3D_THREADS_BEGIN_N : default=65536 - USE_FFT3D_WINTHREADS : default=not defined - FFT3D_MAX_THREADS : must be 2^N, default=4 - FFT3D_THREADS_BEGIN_N : default=131072 - - --------- Complex DFT (Discrete Fourier Transform) -------- - [definition] - <case1> - X[k1][k2][k3] = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1 - x[j1][j2][j3] * - exp(2*pi*i*j1*k1/n1) * - exp(2*pi*i*j2*k2/n2) * - exp(2*pi*i*j3*k3/n3), - 0<=k1<n1, 0<=k2<n2, 0<=k3<n3 - <case2> - X[k1][k2][k3] = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1 - x[j1][j2][j3] * - exp(-2*pi*i*j1*k1/n1) * - exp(-2*pi*i*j2*k2/n2) * - exp(-2*pi*i*j3*k3/n3), - 0<=k1<n1, 0<=k2<n2, 0<=k3<n3 - (notes: sum_j=0^n-1 is a summation from j=0 to n-1) - [usage] - <case1> - ip[0] = 0; // first time only - cdft3d(n1, n2, 2*n3, 1, a, t, ip, w); - <case2> - ip[0] = 0; // first time only - cdft3d(n1, n2, 2*n3, -1, a, t, ip, w); - [parameters] - n1 :data length (int) - n1 >= 1, n1 = power of 2 - n2 :data length (int) - n2 >= 1, n2 = power of 2 - 2*n3 :data length (int) - n3 >= 1, n3 = power of 2 - a[0...n1-1][0...n2-1][0...2*n3-1] - :input/output data (double ***) - input data - a[j1][j2][2*j3] = Re(x[j1][j2][j3]), - a[j1][j2][2*j3+1] = Im(x[j1][j2][j3]), - 0<=j1<n1, 0<=j2<n2, 0<=j3<n3 - output data - a[k1][k2][2*k3] = Re(X[k1][k2][k3]), - a[k1][k2][2*k3+1] = Im(X[k1][k2][k3]), - 0<=k1<n1, 0<=k2<n2, 0<=k3<n3 - t[0...*] - :work area (double *) - length of t >= max(8*n1, 8*n2), if single thread, - length of t >= max(8*n1, 8*n2)*FFT3D_MAX_THREADS, - if multi threads, - t is dynamically allocated, if t == NULL. - ip[0...*] - :work area for bit reversal (int *) - length of ip >= 2+sqrt(n) - (n = max(n1, n2, n3)) - ip[0],ip[1] are pointers of the cos/sin table. - w[0...*] - :cos/sin table (double *) - length of w >= max(n1/2, n2/2, n3/2) - w[],ip[] are initialized if ip[0] == 0. - [remark] - Inverse of - cdft3d(n1, n2, 2*n3, -1, a, t, ip, w); - is - cdft3d(n1, n2, 2*n3, 1, a, t, ip, w); - for (j1 = 0; j1 <= n1 - 1; j1++) { - for (j2 = 0; j2 <= n2 - 1; j2++) { - for (j3 = 0; j3 <= 2 * n3 - 1; j3++) { - a[j1][j2][j3] *= 1.0 / n1 / n2 / n3; - } - } - } - . - - --------- Real DFT / Inverse of Real DFT -------- - [definition] - <case1> RDFT - R[k1][k2][k3] = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1 - a[j1][j2][j3] * - cos(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2 + - 2*pi*j3*k3/n3), - 0<=k1<n1, 0<=k2<n2, 0<=k3<n3 - I[k1][k2][k3] = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1 - a[j1][j2][j3] * - sin(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2 + - 2*pi*j3*k3/n3), - 0<=k1<n1, 0<=k2<n2, 0<=k3<n3 - <case2> IRDFT (excluding scale) - a[k1][k2][k3] = (1/2) * sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1 - (R[j1][j2][j3] * - cos(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2 + - 2*pi*j3*k3/n3) + - I[j1][j2][j3] * - sin(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2 + - 2*pi*j3*k3/n3)), - 0<=k1<n1, 0<=k2<n2, 0<=k3<n3 - (notes: R[(n1-k1)%n1][(n2-k2)%n2][(n3-k3)%n3] = R[k1][k2][k3], - I[(n1-k1)%n1][(n2-k2)%n2][(n3-k3)%n3] = -I[k1][k2][k3], - 0<=k1<n1, 0<=k2<n2, 0<=k3<n3) - [usage] - <case1> - ip[0] = 0; // first time only - rdft3d(n1, n2, n3, 1, a, t, ip, w); - <case2> - ip[0] = 0; // first time only - rdft3d(n1, n2, n3, -1, a, t, ip, w); - [parameters] - n1 :data length (int) - n1 >= 2, n1 = power of 2 - n2 :data length (int) - n2 >= 2, n2 = power of 2 - n3 :data length (int) - n3 >= 2, n3 = power of 2 - a[0...n1-1][0...n2-1][0...n3-1] - :input/output data (double ***) - <case1> - output data - a[k1][k2][2*k3] = R[k1][k2][k3] - = R[(n1-k1)%n1][(n2-k2)%n2][n3-k3], - a[k1][k2][2*k3+1] = I[k1][k2][k3] - = -I[(n1-k1)%n1][(n2-k2)%n2][n3-k3], - 0<=k1<n1, 0<=k2<n2, 0<k3<n3/2, - (n%m : n mod m), - a[k1][k2][0] = R[k1][k2][0] - = R[(n1-k1)%n1][n2-k2][0], - a[k1][k2][1] = I[k1][k2][0] - = -I[(n1-k1)%n1][n2-k2][0], - a[k1][n2-k2][1] = R[(n1-k1)%n1][k2][n3/2] - = R[k1][n2-k2][n3/2], - a[k1][n2-k2][0] = -I[(n1-k1)%n1][k2][n3/2] - = I[k1][n2-k2][n3/2], - 0<=k1<n1, 0<k2<n2/2, - a[k1][0][0] = R[k1][0][0] - = R[n1-k1][0][0], - a[k1][0][1] = I[k1][0][0] - = -I[n1-k1][0][0], - a[k1][n2/2][0] = R[k1][n2/2][0] - = R[n1-k1][n2/2][0], - a[k1][n2/2][1] = I[k1][n2/2][0] - = -I[n1-k1][n2/2][0], - a[n1-k1][0][1] = R[k1][0][n3/2] - = R[n1-k1][0][n3/2], - a[n1-k1][0][0] = -I[k1][0][n3/2] - = I[n1-k1][0][n3/2], - a[n1-k1][n2/2][1] = R[k1][n2/2][n3/2] - = R[n1-k1][n2/2][n3/2], - a[n1-k1][n2/2][0] = -I[k1][n2/2][n3/2] - = I[n1-k1][n2/2][n3/2], - 0<k1<n1/2, - a[0][0][0] = R[0][0][0], - a[0][0][1] = R[0][0][n3/2], - a[0][n2/2][0] = R[0][n2/2][0], - a[0][n2/2][1] = R[0][n2/2][n3/2], - a[n1/2][0][0] = R[n1/2][0][0], - a[n1/2][0][1] = R[n1/2][0][n3/2], - a[n1/2][n2/2][0] = R[n1/2][n2/2][0], - a[n1/2][n2/2][1] = R[n1/2][n2/2][n3/2] - <case2> - input data - a[j1][j2][2*j3] = R[j1][j2][j3] - = R[(n1-j1)%n1][(n2-j2)%n2][n3-j3], - a[j1][j2][2*j3+1] = I[j1][j2][j3] - = -I[(n1-j1)%n1][(n2-j2)%n2][n3-j3], - 0<=j1<n1, 0<=j2<n2, 0<j3<n3/2, - a[j1][j2][0] = R[j1][j2][0] - = R[(n1-j1)%n1][n2-j2][0], - a[j1][j2][1] = I[j1][j2][0] - = -I[(n1-j1)%n1][n2-j2][0], - a[j1][n2-j2][1] = R[(n1-j1)%n1][j2][n3/2] - = R[j1][n2-j2][n3/2], - a[j1][n2-j2][0] = -I[(n1-j1)%n1][j2][n3/2] - = I[j1][n2-j2][n3/2], - 0<=j1<n1, 0<j2<n2/2, - a[j1][0][0] = R[j1][0][0] - = R[n1-j1][0][0], - a[j1][0][1] = I[j1][0][0] - = -I[n1-j1][0][0], - a[j1][n2/2][0] = R[j1][n2/2][0] - = R[n1-j1][n2/2][0], - a[j1][n2/2][1] = I[j1][n2/2][0] - = -I[n1-j1][n2/2][0], - a[n1-j1][0][1] = R[j1][0][n3/2] - = R[n1-j1][0][n3/2], - a[n1-j1][0][0] = -I[j1][0][n3/2] - = I[n1-j1][0][n3/2], - a[n1-j1][n2/2][1] = R[j1][n2/2][n3/2] - = R[n1-j1][n2/2][n3/2], - a[n1-j1][n2/2][0] = -I[j1][n2/2][n3/2] - = I[n1-j1][n2/2][n3/2], - 0<j1<n1/2, - a[0][0][0] = R[0][0][0], - a[0][0][1] = R[0][0][n3/2], - a[0][n2/2][0] = R[0][n2/2][0], - a[0][n2/2][1] = R[0][n2/2][n3/2], - a[n1/2][0][0] = R[n1/2][0][0], - a[n1/2][0][1] = R[n1/2][0][n3/2], - a[n1/2][n2/2][0] = R[n1/2][n2/2][0], - a[n1/2][n2/2][1] = R[n1/2][n2/2][n3/2] - ---- output ordering ---- - rdft3d(n1, n2, n3, 1, a, t, ip, w); - rdft3dsort(n1, n2, n3, 1, a); - // stored data is a[0...n1-1][0...n2-1][0...n3+1]: - // a[k1][k2][2*k3] = R[k1][k2][k3], - // a[k1][k2][2*k3+1] = I[k1][k2][k3], - // 0<=k1<n1, 0<=k2<n2, 0<=k3<=n3/2. - // the stored data is larger than the input data! - ---- input ordering ---- - rdft3dsort(n1, n2, n3, -1, a); - rdft3d(n1, n2, n3, -1, a, t, ip, w); - t[0...*] - :work area (double *) - length of t >= max(8*n1, 8*n2), if single thread, - length of t >= max(8*n1, 8*n2)*FFT3D_MAX_THREADS, - if multi threads, - t is dynamically allocated, if t == NULL. - ip[0...*] - :work area for bit reversal (int *) - length of ip >= 2+sqrt(n) - (n = max(n1, n2, n3/2)) - ip[0],ip[1] are pointers of the cos/sin table. - w[0...*] - :cos/sin table (double *) - length of w >= max(n1/2, n2/2, n3/4) + n3/4 - w[],ip[] are initialized if ip[0] == 0. - [remark] - Inverse of - rdft3d(n1, n2, n3, 1, a, t, ip, w); - is - rdft3d(n1, n2, n3, -1, a, t, ip, w); - for (j1 = 0; j1 <= n1 - 1; j1++) { - for (j2 = 0; j2 <= n2 - 1; j2++) { - for (j3 = 0; j3 <= n3 - 1; j3++) { - a[j1][j2][j3] *= 2.0 / n1 / n2 / n3; - } - } - } - . - - --------- DCT (Discrete Cosine Transform) / Inverse of DCT -------- - [definition] - <case1> IDCT (excluding scale) - C[k1][k2][k3] = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1 - a[j1][j2][j3] * - cos(pi*j1*(k1+1/2)/n1) * - cos(pi*j2*(k2+1/2)/n2) * - cos(pi*j3*(k3+1/2)/n3), - 0<=k1<n1, 0<=k2<n2, 0<=k3<n3 - <case2> DCT - C[k1][k2][k3] = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1 - a[j1][j2][j3] * - cos(pi*(j1+1/2)*k1/n1) * - cos(pi*(j2+1/2)*k2/n2) * - cos(pi*(j3+1/2)*k3/n3), - 0<=k1<n1, 0<=k2<n2, 0<=k3<n3 - [usage] - <case1> - ip[0] = 0; // first time only - ddct3d(n1, n2, n3, 1, a, t, ip, w); - <case2> - ip[0] = 0; // first time only - ddct3d(n1, n2, n3, -1, a, t, ip, w); - [parameters] - n1 :data length (int) - n1 >= 2, n1 = power of 2 - n2 :data length (int) - n2 >= 2, n2 = power of 2 - n3 :data length (int) - n3 >= 2, n3 = power of 2 - a[0...n1-1][0...n2-1][0...n3-1] - :input/output data (double ***) - output data - a[k1][k2][k3] = C[k1][k2][k3], - 0<=k1<n1, 0<=k2<n2, 0<=k3<n3 - t[0...*] - :work area (double *) - length of t >= max(4*n1, 4*n2), if single thread, - length of t >= max(4*n1, 4*n2)*FFT3D_MAX_THREADS, - if multi threads, - t is dynamically allocated, if t == NULL. - ip[0...*] - :work area for bit reversal (int *) - length of ip >= 2+sqrt(n) - (n = max(n1/2, n2/2, n3/2)) - ip[0],ip[1] are pointers of the cos/sin table. - w[0...*] - :cos/sin table (double *) - length of w >= max(n1*3/2, n2*3/2, n3*3/2) - w[],ip[] are initialized if ip[0] == 0. - [remark] - Inverse of - ddct3d(n1, n2, n3, -1, a, t, ip, w); - is - for (j1 = 0; j1 <= n1 - 1; j1++) { - for (j2 = 0; j2 <= n2 - 1; j2++) { - a[j1][j2][0] *= 0.5; - } - for (j3 = 0; j3 <= n3 - 1; j3++) { - a[j1][0][j3] *= 0.5; - } - } - for (j2 = 0; j2 <= n2 - 1; j2++) { - for (j3 = 0; j3 <= n3 - 1; j3++) { - a[0][j2][j3] *= 0.5; - } - } - ddct3d(n1, n2, n3, 1, a, t, ip, w); - for (j1 = 0; j1 <= n1 - 1; j1++) { - for (j2 = 0; j2 <= n2 - 1; j2++) { - for (j3 = 0; j3 <= n3 - 1; j3++) { - a[j1][j2][j3] *= 8.0 / n1 / n2 / n3; - } - } - } - . - - --------- DST (Discrete Sine Transform) / Inverse of DST -------- - [definition] - <case1> IDST (excluding scale) - S[k1][k2][k3] = sum_j1=1^n1 sum_j2=1^n2 sum_j3=1^n3 - A[j1][j2][j3] * - sin(pi*j1*(k1+1/2)/n1) * - sin(pi*j2*(k2+1/2)/n2) * - sin(pi*j3*(k3+1/2)/n3), - 0<=k1<n1, 0<=k2<n2, 0<=k3<n3 - <case2> DST - S[k1][k2][k3] = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1 - a[j1][j2][j3] * - sin(pi*(j1+1/2)*k1/n1) * - sin(pi*(j2+1/2)*k2/n2) * - sin(pi*(j3+1/2)*k3/n3), - 0<k1<=n1, 0<k2<=n2, 0<k3<=n3 - [usage] - <case1> - ip[0] = 0; // first time only - ddst3d(n1, n2, n3, 1, a, t, ip, w); - <case2> - ip[0] = 0; // first time only - ddst3d(n1, n2, n3, -1, a, t, ip, w); - [parameters] - n1 :data length (int) - n1 >= 2, n1 = power of 2 - n2 :data length (int) - n2 >= 2, n2 = power of 2 - n3 :data length (int) - n3 >= 2, n3 = power of 2 - a[0...n1-1][0...n2-1][0...n3-1] - :input/output data (double ***) - <case1> - input data - a[j1%n1][j2%n2][j3%n3] = A[j1][j2][j3], - 0<j1<=n1, 0<j2<=n2, 0<j3<=n3, - (n%m : n mod m) - output data - a[k1][k2][k3] = S[k1][k2][k3], - 0<=k1<n1, 0<=k2<n2, 0<=k3<n3 - <case2> - output data - a[k1%n1][k2%n2][k3%n3] = S[k1][k2][k3], - 0<k1<=n1, 0<k2<=n2, 0<k3<=n3 - t[0...*] - :work area (double *) - length of t >= max(4*n1, 4*n2), if single thread, - length of t >= max(4*n1, 4*n2)*FFT3D_MAX_THREADS, - if multi threads, - t is dynamically allocated, if t == NULL. - ip[0...*] - :work area for bit reversal (int *) - length of ip >= 2+sqrt(n) - (n = max(n1/2, n2/2, n3/2)) - ip[0],ip[1] are pointers of the cos/sin table. - w[0...*] - :cos/sin table (double *) - length of w >= max(n1*3/2, n2*3/2, n3*3/2) - w[],ip[] are initialized if ip[0] == 0. - [remark] - Inverse of - ddst3d(n1, n2, n3, -1, a, t, ip, w); - is - for (j1 = 0; j1 <= n1 - 1; j1++) { - for (j2 = 0; j2 <= n2 - 1; j2++) { - a[j1][j2][0] *= 0.5; - } - for (j3 = 0; j3 <= n3 - 1; j3++) { - a[j1][0][j3] *= 0.5; - } - } - for (j2 = 0; j2 <= n2 - 1; j2++) { - for (j3 = 0; j3 <= n3 - 1; j3++) { - a[0][j2][j3] *= 0.5; - } - } - ddst3d(n1, n2, n3, 1, a, t, ip, w); - for (j1 = 0; j1 <= n1 - 1; j1++) { - for (j2 = 0; j2 <= n2 - 1; j2++) { - for (j3 = 0; j3 <= n3 - 1; j3++) { - a[j1][j2][j3] *= 8.0 / n1 / n2 / n3; - } - } - } - . -*/ - - -#include <stdio.h> -#include <stdlib.h> -#define fft3d_alloc_error_check(p) { \ - if ((p) == NULL) { \ - fprintf(stderr, "fft3d memory allocation error\n"); \ - exit(1); \ - } \ -} - - -#ifdef USE_FFT3D_PTHREADS -#define USE_FFT3D_THREADS -#ifndef FFT3D_MAX_THREADS -#define FFT3D_MAX_THREADS 4 -#endif -#ifndef FFT3D_THREADS_BEGIN_N -#define FFT3D_THREADS_BEGIN_N 65536 -#endif -#include <pthread.h> -#define fft3d_thread_t pthread_t -#define fft3d_thread_create(thp,func,argp) { \ - if (pthread_create(thp, NULL, func, (void *) (argp)) != 0) { \ - fprintf(stderr, "fft3d thread error\n"); \ - exit(1); \ - } \ -} -#define fft3d_thread_wait(th) { \ - if (pthread_join(th, NULL) != 0) { \ - fprintf(stderr, "fft3d thread error\n"); \ - exit(1); \ - } \ -} -#endif /* USE_FFT3D_PTHREADS */ - - -#ifdef USE_FFT3D_WINTHREADS -#define USE_FFT3D_THREADS -#ifndef FFT3D_MAX_THREADS -#define FFT3D_MAX_THREADS 4 -#endif -#ifndef FFT3D_THREADS_BEGIN_N -#define FFT3D_THREADS_BEGIN_N 131072 -#endif -#include <windows.h> -#define fft3d_thread_t HANDLE -#define fft3d_thread_create(thp,func,argp) { \ - DWORD thid; \ - *(thp) = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE) (func), (LPVOID) (argp), 0, &thid); \ - if (*(thp) == 0) { \ - fprintf(stderr, "fft3d thread error\n"); \ - exit(1); \ - } \ -} -#define fft3d_thread_wait(th) { \ - WaitForSingleObject(th, INFINITE); \ - CloseHandle(th); \ -} -#endif /* USE_FFT3D_WINTHREADS */ - - -void cdft3d(int n1, int n2, int n3, int isgn, double ***a, - double *t, int *ip, double *w) -{ - void makewt(int nw, int *ip, double *w); - void xdft3da_sub(int n1, int n2, int n3, int icr, int isgn, - double ***a, double *t, int *ip, double *w); - void cdft3db_sub(int n1, int n2, int n3, int isgn, double ***a, - double *t, int *ip, double *w); -#ifdef USE_FFT3D_THREADS - void xdft3da_subth(int n1, int n2, int n3, int icr, int isgn, - double ***a, double *t, int *ip, double *w); - void cdft3db_subth(int n1, int n2, int n3, int isgn, double ***a, - double *t, int *ip, double *w); -#endif /* USE_FFT3D_THREADS */ - int n, itnull, nt; - - n = n1; - if (n < n2) { - n = n2; - } - n <<= 1; - if (n < n3) { - n = n3; - } - if (n > (ip[0] << 2)) { - makewt(n >> 2, ip, w); - } - itnull = 0; - if (t == NULL) { - itnull = 1; - nt = n1; - if (nt < n2) { - nt = n2; - } - nt *= 8; -#ifdef USE_FFT3D_THREADS - nt *= FFT3D_MAX_THREADS; -#endif /* USE_FFT3D_THREADS */ - if (n3 == 4) { - nt >>= 1; - } else if (n3 < 4) { - nt >>= 2; - } - t = (double *) malloc(sizeof(double) * nt); - fft3d_alloc_error_check(t); - } -#ifdef USE_FFT3D_THREADS - if ((double) n1 * n2 * n3 >= (double) FFT3D_THREADS_BEGIN_N) { - xdft3da_subth(n1, n2, n3, 0, isgn, a, t, ip, w); - cdft3db_subth(n1, n2, n3, isgn, a, t, ip, w); - } else -#endif /* USE_FFT3D_THREADS */ - { - xdft3da_sub(n1, n2, n3, 0, isgn, a, t, ip, w); - cdft3db_sub(n1, n2, n3, isgn, a, t, ip, w); - } - if (itnull != 0) { - free(t); - } -} - - -void rdft3d(int n1, int n2, int n3, int isgn, double ***a, - double *t, int *ip, double *w) -{ - void makewt(int nw, int *ip, double *w); - void makect(int nc, int *ip, double *c); - void xdft3da_sub(int n1, int n2, int n3, int icr, int isgn, - double ***a, double *t, int *ip, double *w); - void cdft3db_sub(int n1, int n2, int n3, int isgn, double ***a, - double *t, int *ip, double *w); - void rdft3d_sub(int n1, int n2, int n3, int isgn, double ***a); -#ifdef USE_FFT3D_THREADS - void xdft3da_subth(int n1, int n2, int n3, int icr, int isgn, - double ***a, double *t, int *ip, double *w); - void cdft3db_subth(int n1, int n2, int n3, int isgn, double ***a, - double *t, int *ip, double *w); -#endif /* USE_FFT3D_THREADS */ - int n, nw, nc, itnull, nt; - - n = n1; - if (n < n2) { - n = n2; - } - n <<= 1; - if (n < n3) { - n = n3; - } - nw = ip[0]; - if (n > (nw << 2)) { - nw = n >> 2; - makewt(nw, ip, w); - } - nc = ip[1]; - if (n3 > (nc << 2)) { - nc = n3 >> 2; - makect(nc, ip, w + nw); - } - itnull = 0; - if (t == NULL) { - itnull = 1; - nt = n1; - if (nt < n2) { - nt = n2; - } - nt *= 8; -#ifdef USE_FFT3D_THREADS - nt *= FFT3D_MAX_THREADS; -#endif /* USE_FFT3D_THREADS */ - if (n3 == 4) { - nt >>= 1; - } else if (n3 < 4) { - nt >>= 2; - } - t = (double *) malloc(sizeof(double) * nt); - fft3d_alloc_error_check(t); - } -#ifdef USE_FFT3D_THREADS - if ((double) n1 * n2 * n3 >= (double) FFT3D_THREADS_BEGIN_N) { - if (isgn < 0) { - rdft3d_sub(n1, n2, n3, isgn, a); - cdft3db_subth(n1, n2, n3, isgn, a, t, ip, w); - } - xdft3da_subth(n1, n2, n3, 1, isgn, a, t, ip, w); - if (isgn >= 0) { - cdft3db_subth(n1, n2, n3, isgn, a, t, ip, w); - rdft3d_sub(n1, n2, n3, isgn, a); - } - } else -#endif /* USE_FFT3D_THREADS */ - { - if (isgn < 0) { - rdft3d_sub(n1, n2, n3, isgn, a); - cdft3db_sub(n1, n2, n3, isgn, a, t, ip, w); - } - xdft3da_sub(n1, n2, n3, 1, isgn, a, t, ip, w); - if (isgn >= 0) { - cdft3db_sub(n1, n2, n3, isgn, a, t, ip, w); - rdft3d_sub(n1, n2, n3, isgn, a); - } - } - if (itnull != 0) { - free(t); - } -} - - -void rdft3dsort(int n1, int n2, int n3, int isgn, double ***a) -{ - int n1h, n2h, i, j; - double x, y; - - n1h = n1 >> 1; - n2h = n2 >> 1; - if (isgn < 0) { - for (i = 0; i < n1; i++) { - for (j = n2h + 1; j < n2; j++) { - a[i][j][0] = a[i][j][n3 + 1]; - a[i][j][1] = a[i][j][n3]; - } - } - for (i = n1h + 1; i < n1; i++) { - a[i][0][0] = a[i][0][n3 + 1]; - a[i][0][1] = a[i][0][n3]; - a[i][n2h][0] = a[i][n2h][n3 + 1]; - a[i][n2h][1] = a[i][n2h][n3]; - } - a[0][0][1] = a[0][0][n3]; - a[0][n2h][1] = a[0][n2h][n3]; - a[n1h][0][1] = a[n1h][0][n3]; - a[n1h][n2h][1] = a[n1h][n2h][n3]; - } else { - for (j = n2h + 1; j < n2; j++) { - y = a[0][j][0]; - x = a[0][j][1]; - a[0][j][n3] = x; - a[0][j][n3 + 1] = y; - a[0][n2 - j][n3] = x; - a[0][n2 - j][n3 + 1] = -y; - a[0][j][0] = a[0][n2 - j][0]; - a[0][j][1] = -a[0][n2 - j][1]; - } - for (i = 1; i < n1; i++) { - for (j = n2h + 1; j < n2; j++) { - y = a[i][j][0]; - x = a[i][j][1]; - a[i][j][n3] = x; - a[i][j][n3 + 1] = y; - a[n1 - i][n2 - j][n3] = x; - a[n1 - i][n2 - j][n3 + 1] = -y; - a[i][j][0] = a[n1 - i][n2 - j][0]; - a[i][j][1] = -a[n1 - i][n2 - j][1]; - } - } - for (i = n1h + 1; i < n1; i++) { - y = a[i][0][0]; - x = a[i][0][1]; - a[i][0][n3] = x; - a[i][0][n3 + 1] = y; - a[n1 - i][0][n3] = x; - a[n1 - i][0][n3 + 1] = -y; - a[i][0][0] = a[n1 - i][0][0]; - a[i][0][1] = -a[n1 - i][0][1]; - y = a[i][n2h][0]; - x = a[i][n2h][1]; - a[i][n2h][n3] = x; - a[i][n2h][n3 + 1] = y; - a[n1 - i][n2h][n3] = x; - a[n1 - i][n2h][n3 + 1] = -y; - a[i][n2h][0] = a[n1 - i][n2h][0]; - a[i][n2h][1] = -a[n1 - i][n2h][1]; - } - a[0][0][n3] = a[0][0][1]; - a[0][0][n3 + 1] = 0; - a[0][0][1] = 0; - a[0][n2h][n3] = a[0][n2h][1]; - a[0][n2h][n3 + 1] = 0; - a[0][n2h][1] = 0; - a[n1h][0][n3] = a[n1h][0][1]; - a[n1h][0][n3 + 1] = 0; - a[n1h][0][1] = 0; - a[n1h][n2h][n3] = a[n1h][n2h][1]; - a[n1h][n2h][n3 + 1] = 0; - a[n1h][n2h][1] = 0; - } -} - - -void ddct3d(int n1, int n2, int n3, int isgn, double ***a, - double *t, int *ip, double *w) -{ - void makewt(int nw, int *ip, double *w); - void makect(int nc, int *ip, double *c); - void ddxt3da_sub(int n1, int n2, int n3, int ics, int isgn, - double ***a, double *t, int *ip, double *w); - void ddxt3db_sub(int n1, int n2, int n3, int ics, int isgn, - double ***a, double *t, int *ip, double *w); -#ifdef USE_FFT3D_THREADS - void ddxt3da_subth(int n1, int n2, int n3, int ics, int isgn, - double ***a, double *t, int *ip, double *w); - void ddxt3db_subth(int n1, int n2, int n3, int ics, int isgn, - double ***a, double *t, int *ip, double *w); -#endif /* USE_FFT3D_THREADS */ - int n, nw, nc, itnull, nt; - - n = n1; - if (n < n2) { - n = n2; - } - if (n < n3) { - n = n3; - } - nw = ip[0]; - if (n > (nw << 2)) { - nw = n >> 2; - makewt(nw, ip, w); - } - nc = ip[1]; - if (n > nc) { - nc = n; - makect(nc, ip, w + nw); - } - itnull = 0; - if (t == NULL) { - itnull = 1; - nt = n1; - if (nt < n2) { - nt = n2; - } - nt *= 4; -#ifdef USE_FFT3D_THREADS - nt *= FFT3D_MAX_THREADS; -#endif /* USE_FFT3D_THREADS */ - if (n3 == 2) { - nt >>= 1; - } - t = (double *) malloc(sizeof(double) * nt); - fft3d_alloc_error_check(t); - } -#ifdef USE_FFT3D_THREADS - if ((double) n1 * n2 * n3 >= (double) FFT3D_THREADS_BEGIN_N) { - ddxt3da_subth(n1, n2, n3, 0, isgn, a, t, ip, w); - ddxt3db_subth(n1, n2, n3, 0, isgn, a, t, ip, w); - } else -#endif /* USE_FFT3D_THREADS */ - { - ddxt3da_sub(n1, n2, n3, 0, isgn, a, t, ip, w); - ddxt3db_sub(n1, n2, n3, 0, isgn, a, t, ip, w); - } - if (itnull != 0) { - free(t); - } -} - - -void ddst3d(int n1, int n2, int n3, int isgn, double ***a, - double *t, int *ip, double *w) -{ - void makewt(int nw, int *ip, double *w); - void makect(int nc, int *ip, double *c); - void ddxt3da_sub(int n1, int n2, int n3, int ics, int isgn, - double ***a, double *t, int *ip, double *w); - void ddxt3db_sub(int n1, int n2, int n3, int ics, int isgn, - double ***a, double *t, int *ip, double *w); -#ifdef USE_FFT3D_THREADS - void ddxt3da_subth(int n1, int n2, int n3, int ics, int isgn, - double ***a, double *t, int *ip, double *w); - void ddxt3db_subth(int n1, int n2, int n3, int ics, int isgn, - double ***a, double *t, int *ip, double *w); -#endif /* USE_FFT3D_THREADS */ - int n, nw, nc, itnull, nt; - - n = n1; - if (n < n2) { - n = n2; - } - if (n < n3) { - n = n3; - } - nw = ip[0]; - if (n > (nw << 2)) { - nw = n >> 2; - makewt(nw, ip, w); - } - nc = ip[1]; - if (n > nc) { - nc = n; - makect(nc, ip, w + nw); - } - itnull = 0; - if (t == NULL) { - itnull = 1; - nt = n1; - if (nt < n2) { - nt = n2; - } - nt *= 4; -#ifdef USE_FFT3D_THREADS - nt *= FFT3D_MAX_THREADS; -#endif /* USE_FFT3D_THREADS */ - if (n3 == 2) { - nt >>= 1; - } - t = (double *) malloc(sizeof(double) * nt); - fft3d_alloc_error_check(t); - } -#ifdef USE_FFT3D_THREADS - if ((double) n1 * n2 * n3 >= (double) FFT3D_THREADS_BEGIN_N) { - ddxt3da_subth(n1, n2, n3, 1, isgn, a, t, ip, w); - ddxt3db_subth(n1, n2, n3, 1, isgn, a, t, ip, w); - } else -#endif /* USE_FFT3D_THREADS */ - { - ddxt3da_sub(n1, n2, n3, 1, isgn, a, t, ip, w); - ddxt3db_sub(n1, n2, n3, 1, isgn, a, t, ip, w); - } - if (itnull != 0) { - free(t); - } -} - - -/* -------- child routines -------- */ - - -void xdft3da_sub(int n1, int n2, int n3, int icr, int isgn, - double ***a, double *t, int *ip, double *w) -{ - void cdft(int n, int isgn, double *a, int *ip, double *w); - void rdft(int n, int isgn, double *a, int *ip, double *w); - int i, j, k; - - for (i = 0; i < n1; i++) { - if (icr == 0) { - for (j = 0; j < n2; j++) { - cdft(n3, isgn, a[i][j], ip, w); - } - } else if (isgn >= 0) { - for (j = 0; j < n2; j++) { - rdft(n3, isgn, a[i][j], ip, w); - } - } - if (n3 > 4) { - for (k = 0; k < n3; k += 8) { - for (j = 0; j < n2; j++) { - t[2 * j] = a[i][j][k]; - t[2 * j + 1] = a[i][j][k + 1]; - t[2 * n2 + 2 * j] = a[i][j][k + 2]; - t[2 * n2 + 2 * j + 1] = a[i][j][k + 3]; - t[4 * n2 + 2 * j] = a[i][j][k + 4]; - t[4 * n2 + 2 * j + 1] = a[i][j][k + 5]; - t[6 * n2 + 2 * j] = a[i][j][k + 6]; - t[6 * n2 + 2 * j + 1] = a[i][j][k + 7]; - } - cdft(2 * n2, isgn, t, ip, w); - cdft(2 * n2, isgn, &t[2 * n2], ip, w); - cdft(2 * n2, isgn, &t[4 * n2], ip, w); - cdft(2 * n2, isgn, &t[6 * n2], ip, w); - for (j = 0; j < n2; j++) { - a[i][j][k] = t[2 * j]; - a[i][j][k + 1] = t[2 * j + 1]; - a[i][j][k + 2] = t[2 * n2 + 2 * j]; - a[i][j][k + 3] = t[2 * n2 + 2 * j + 1]; - a[i][j][k + 4] = t[4 * n2 + 2 * j]; - a[i][j][k + 5] = t[4 * n2 + 2 * j + 1]; - a[i][j][k + 6] = t[6 * n2 + 2 * j]; - a[i][j][k + 7] = t[6 * n2 + 2 * j + 1]; - } - } - } else if (n3 == 4) { - for (j = 0; j < n2; j++) { - t[2 * j] = a[i][j][0]; - t[2 * j + 1] = a[i][j][1]; - t[2 * n2 + 2 * j] = a[i][j][2]; - t[2 * n2 + 2 * j + 1] = a[i][j][3]; - } - cdft(2 * n2, isgn, t, ip, w); - cdft(2 * n2, isgn, &t[2 * n2], ip, w); - for (j = 0; j < n2; j++) { - a[i][j][0] = t[2 * j]; - a[i][j][1] = t[2 * j + 1]; - a[i][j][2] = t[2 * n2 + 2 * j]; - a[i][j][3] = t[2 * n2 + 2 * j + 1]; - } - } else if (n3 == 2) { - for (j = 0; j < n2; j++) { - t[2 * j] = a[i][j][0]; - t[2 * j + 1] = a[i][j][1]; - } - cdft(2 * n2, isgn, t, ip, w); - for (j = 0; j < n2; j++) { - a[i][j][0] = t[2 * j]; - a[i][j][1] = t[2 * j + 1]; - } - } - if (icr != 0 && isgn < 0) { - for (j = 0; j < n2; j++) { - rdft(n3, isgn, a[i][j], ip, w); - } - } - } -} - - -void cdft3db_sub(int n1, int n2, int n3, int isgn, double ***a, - double *t, int *ip, double *w) -{ - void cdft(int n, int isgn, double *a, int *ip, double *w); - int i, j, k; - - if (n3 > 4) { - for (j = 0; j < n2; j++) { - for (k = 0; k < n3; k += 8) { - for (i = 0; i < n1; i++) { - t[2 * i] = a[i][j][k]; - t[2 * i + 1] = a[i][j][k + 1]; - t[2 * n1 + 2 * i] = a[i][j][k + 2]; - t[2 * n1 + 2 * i + 1] = a[i][j][k + 3]; - t[4 * n1 + 2 * i] = a[i][j][k + 4]; - t[4 * n1 + 2 * i + 1] = a[i][j][k + 5]; - t[6 * n1 + 2 * i] = a[i][j][k + 6]; - t[6 * n1 + 2 * i + 1] = a[i][j][k + 7]; - } - cdft(2 * n1, isgn, t, ip, w); - cdft(2 * n1, isgn, &t[2 * n1], ip, w); - cdft(2 * n1, isgn, &t[4 * n1], ip, w); - cdft(2 * n1, isgn, &t[6 * n1], ip, w); - for (i = 0; i < n1; i++) { - a[i][j][k] = t[2 * i]; - a[i][j][k + 1] = t[2 * i + 1]; - a[i][j][k + 2] = t[2 * n1 + 2 * i]; - a[i][j][k + 3] = t[2 * n1 + 2 * i + 1]; - a[i][j][k + 4] = t[4 * n1 + 2 * i]; - a[i][j][k + 5] = t[4 * n1 + 2 * i + 1]; - a[i][j][k + 6] = t[6 * n1 + 2 * i]; - a[i][j][k + 7] = t[6 * n1 + 2 * i + 1]; - } - } - } - } else if (n3 == 4) { - for (j = 0; j < n2; j++) { - for (i = 0; i < n1; i++) { - t[2 * i] = a[i][j][0]; - t[2 * i + 1] = a[i][j][1]; - t[2 * n1 + 2 * i] = a[i][j][2]; - t[2 * n1 + 2 * i + 1] = a[i][j][3]; - } - cdft(2 * n1, isgn, t, ip, w); - cdft(2 * n1, isgn, &t[2 * n1], ip, w); - for (i = 0; i < n1; i++) { - a[i][j][0] = t[2 * i]; - a[i][j][1] = t[2 * i + 1]; - a[i][j][2] = t[2 * n1 + 2 * i]; - a[i][j][3] = t[2 * n1 + 2 * i + 1]; - } - } - } else if (n3 == 2) { - for (j = 0; j < n2; j++) { - for (i = 0; i < n1; i++) { - t[2 * i] = a[i][j][0]; - t[2 * i + 1] = a[i][j][1]; - } - cdft(2 * n1, isgn, t, ip, w); - for (i = 0; i < n1; i++) { - a[i][j][0] = t[2 * i]; - a[i][j][1] = t[2 * i + 1]; - } - } - } -} - - -void rdft3d_sub(int n1, int n2, int n3, int isgn, double ***a) -{ - int n1h, n2h, i, j, k, l; - double xi; - - n1h = n1 >> 1; - n2h = n2 >> 1; - if (isgn < 0) { - for (i = 1; i < n1h; i++) { - j = n1 - i; - xi = a[i][0][0] - a[j][0][0]; - a[i][0][0] += a[j][0][0]; - a[j][0][0] = xi; - xi = a[j][0][1] - a[i][0][1]; - a[i][0][1] += a[j][0][1]; - a[j][0][1] = xi; - xi = a[i][n2h][0] - a[j][n2h][0]; - a[i][n2h][0] += a[j][n2h][0]; - a[j][n2h][0] = xi; - xi = a[j][n2h][1] - a[i][n2h][1]; - a[i][n2h][1] += a[j][n2h][1]; - a[j][n2h][1] = xi; - for (k = 1; k < n2h; k++) { - l = n2 - k; - xi = a[i][k][0] - a[j][l][0]; - a[i][k][0] += a[j][l][0]; - a[j][l][0] = xi; - xi = a[j][l][1] - a[i][k][1]; - a[i][k][1] += a[j][l][1]; - a[j][l][1] = xi; - xi = a[j][k][0] - a[i][l][0]; - a[j][k][0] += a[i][l][0]; - a[i][l][0] = xi; - xi = a[i][l][1] - a[j][k][1]; - a[j][k][1] += a[i][l][1]; - a[i][l][1] = xi; - } - } - for (k = 1; k < n2h; k++) { - l = n2 - k; - xi = a[0][k][0] - a[0][l][0]; - a[0][k][0] += a[0][l][0]; - a[0][l][0] = xi; - xi = a[0][l][1] - a[0][k][1]; - a[0][k][1] += a[0][l][1]; - a[0][l][1] = xi; - xi = a[n1h][k][0] - a[n1h][l][0]; - a[n1h][k][0] += a[n1h][l][0]; - a[n1h][l][0] = xi; - xi = a[n1h][l][1] - a[n1h][k][1]; - a[n1h][k][1] += a[n1h][l][1]; - a[n1h][l][1] = xi; - } - } else { - for (i = 1; i < n1h; i++) { - j = n1 - i; - a[j][0][0] = 0.5 * (a[i][0][0] - a[j][0][0]); - a[i][0][0] -= a[j][0][0]; - a[j][0][1] = 0.5 * (a[i][0][1] + a[j][0][1]); - a[i][0][1] -= a[j][0][1]; - a[j][n2h][0] = 0.5 * (a[i][n2h][0] - a[j][n2h][0]); - a[i][n2h][0] -= a[j][n2h][0]; - a[j][n2h][1] = 0.5 * (a[i][n2h][1] + a[j][n2h][1]); - a[i][n2h][1] -= a[j][n2h][1]; - for (k = 1; k < n2h; k++) { - l = n2 - k; - a[j][l][0] = 0.5 * (a[i][k][0] - a[j][l][0]); - a[i][k][0] -= a[j][l][0]; - a[j][l][1] = 0.5 * (a[i][k][1] + a[j][l][1]); - a[i][k][1] -= a[j][l][1]; - a[i][l][0] = 0.5 * (a[j][k][0] - a[i][l][0]); - a[j][k][0] -= a[i][l][0]; - a[i][l][1] = 0.5 * (a[j][k][1] + a[i][l][1]); - a[j][k][1] -= a[i][l][1]; - } - } - for (k = 1; k < n2h; k++) { - l = n2 - k; - a[0][l][0] = 0.5 * (a[0][k][0] - a[0][l][0]); - a[0][k][0] -= a[0][l][0]; - a[0][l][1] = 0.5 * (a[0][k][1] + a[0][l][1]); - a[0][k][1] -= a[0][l][1]; - a[n1h][l][0] = 0.5 * (a[n1h][k][0] - a[n1h][l][0]); - a[n1h][k][0] -= a[n1h][l][0]; - a[n1h][l][1] = 0.5 * (a[n1h][k][1] + a[n1h][l][1]); - a[n1h][k][1] -= a[n1h][l][1]; - } - } -} - - -void ddxt3da_sub(int n1, int n2, int n3, int ics, int isgn, - double ***a, double *t, int *ip, double *w) -{ - void ddct(int n, int isgn, double *a, int *ip, double *w); - void ddst(int n, int isgn, double *a, int *ip, double *w); - int i, j, k; - - for (i = 0; i < n1; i++) { - if (ics == 0) { - for (j = 0; j < n2; j++) { - ddct(n3, isgn, a[i][j], ip, w); - } - } else { - for (j = 0; j < n2; j++) { - ddst(n3, isgn, a[i][j], ip, w); - } - } - if (n3 > 2) { - for (k = 0; k < n3; k += 4) { - for (j = 0; j < n2; j++) { - t[j] = a[i][j][k]; - t[n2 + j] = a[i][j][k + 1]; - t[2 * n2 + j] = a[i][j][k + 2]; - t[3 * n2 + j] = a[i][j][k + 3]; - } - if (ics == 0) { - ddct(n2, isgn, t, ip, w); - ddct(n2, isgn, &t[n2], ip, w); - ddct(n2, isgn, &t[2 * n2], ip, w); - ddct(n2, isgn, &t[3 * n2], ip, w); - } else { - ddst(n2, isgn, t, ip, w); - ddst(n2, isgn, &t[n2], ip, w); - ddst(n2, isgn, &t[2 * n2], ip, w); - ddst(n2, isgn, &t[3 * n2], ip, w); - } - for (j = 0; j < n2; j++) { - a[i][j][k] = t[j]; - a[i][j][k + 1] = t[n2 + j]; - a[i][j][k + 2] = t[2 * n2 + j]; - a[i][j][k + 3] = t[3 * n2 + j]; - } - } - } else if (n3 == 2) { - for (j = 0; j < n2; j++) { - t[j] = a[i][j][0]; - t[n2 + j] = a[i][j][1]; - } - if (ics == 0) { - ddct(n2, isgn, t, ip, w); - ddct(n2, isgn, &t[n2], ip, w); - } else { - ddst(n2, isgn, t, ip, w); - ddst(n2, isgn, &t[n2], ip, w); - } - for (j = 0; j < n2; j++) { - a[i][j][0] = t[j]; - a[i][j][1] = t[n2 + j]; - } - } - } -} - - -void ddxt3db_sub(int n1, int n2, int n3, int ics, int isgn, - double ***a, double *t, int *ip, double *w) -{ - void ddct(int n, int isgn, double *a, int *ip, double *w); - void ddst(int n, int isgn, double *a, int *ip, double *w); - int i, j, k; - - if (n3 > 2) { - for (j = 0; j < n2; j++) { - for (k = 0; k < n3; k += 4) { - for (i = 0; i < n1; i++) { - t[i] = a[i][j][k]; - t[n1 + i] = a[i][j][k + 1]; - t[2 * n1 + i] = a[i][j][k + 2]; - t[3 * n1 + i] = a[i][j][k + 3]; - } - if (ics == 0) { - ddct(n1, isgn, t, ip, w); - ddct(n1, isgn, &t[n1], ip, w); - ddct(n1, isgn, &t[2 * n1], ip, w); - ddct(n1, isgn, &t[3 * n1], ip, w); - } else { - ddst(n1, isgn, t, ip, w); - ddst(n1, isgn, &t[n1], ip, w); - ddst(n1, isgn, &t[2 * n1], ip, w); - ddst(n1, isgn, &t[3 * n1], ip, w); - } - for (i = 0; i < n1; i++) { - a[i][j][k] = t[i]; - a[i][j][k + 1] = t[n1 + i]; - a[i][j][k + 2] = t[2 * n1 + i]; - a[i][j][k + 3] = t[3 * n1 + i]; - } - } - } - } else if (n3 == 2) { - for (j = 0; j < n2; j++) { - for (i = 0; i < n1; i++) { - t[i] = a[i][j][0]; - t[n1 + i] = a[i][j][1]; - } - if (ics == 0) { - ddct(n1, isgn, t, ip, w); - ddct(n1, isgn, &t[n1], ip, w); - } else { - ddst(n1, isgn, t, ip, w); - ddst(n1, isgn, &t[n1], ip, w); - } - for (i = 0; i < n1; i++) { - a[i][j][0] = t[i]; - a[i][j][1] = t[n1 + i]; - } - } - } -} - - -#ifdef USE_FFT3D_THREADS -struct fft3d_arg_st { - int nthread; - int n0; - int n1; - int n2; - int n3; - int ic; - int isgn; - double ***a; - double *t; - int *ip; - double *w; -}; -typedef struct fft3d_arg_st fft3d_arg_t; - - -void xdft3da_subth(int n1, int n2, int n3, int icr, int isgn, - double ***a, double *t, int *ip, double *w) -{ - void *xdft3da_th(void *p); - fft3d_thread_t th[FFT3D_MAX_THREADS]; - fft3d_arg_t ag[FFT3D_MAX_THREADS]; - int nthread, nt, i; - - nthread = FFT3D_MAX_THREADS; - if (nthread > n1) { - nthread = n1; - } - nt = 8 * n2; - if (n3 == 4) { - nt >>= 1; - } else if (n3 < 4) { - nt >>= 2; - } - for (i = 0; i < nthread; i++) { - ag[i].nthread = nthread; - ag[i].n0 = i; - ag[i].n1 = n1; - ag[i].n2 = n2; - ag[i].n3 = n3; - ag[i].ic = icr; - ag[i].isgn = isgn; - ag[i].a = a; - ag[i].t = &t[nt * i]; - ag[i].ip = ip; - ag[i].w = w; - fft3d_thread_create(&th[i], xdft3da_th, &ag[i]); - } - for (i = 0; i < nthread; i++) { - fft3d_thread_wait(th[i]); - } -} - - -void cdft3db_subth(int n1, int n2, int n3, int isgn, double ***a, - double *t, int *ip, double *w) -{ - void *cdft3db_th(void *p); - fft3d_thread_t th[FFT3D_MAX_THREADS]; - fft3d_arg_t ag[FFT3D_MAX_THREADS]; - int nthread, nt, i; - - nthread = FFT3D_MAX_THREADS; - if (nthread > n2) { - nthread = n2; - } - nt = 8 * n1; - if (n3 == 4) { - nt >>= 1; - } else if (n3 < 4) { - nt >>= 2; - } - for (i = 0; i < nthread; i++) { - ag[i].nthread = nthread; - ag[i].n0 = i; - ag[i].n1 = n1; - ag[i].n2 = n2; - ag[i].n3 = n3; - ag[i].isgn = isgn; - ag[i].a = a; - ag[i].t = &t[nt * i]; - ag[i].ip = ip; - ag[i].w = w; - fft3d_thread_create(&th[i], cdft3db_th, &ag[i]); - } - for (i = 0; i < nthread; i++) { - fft3d_thread_wait(th[i]); - } -} - - -void ddxt3da_subth(int n1, int n2, int n3, int ics, int isgn, - double ***a, double *t, int *ip, double *w) -{ - void *ddxt3da_th(void *p); - fft3d_thread_t th[FFT3D_MAX_THREADS]; - fft3d_arg_t ag[FFT3D_MAX_THREADS]; - int nthread, nt, i; - - nthread = FFT3D_MAX_THREADS; - if (nthread > n1) { - nthread = n1; - } - nt = 4 * n2; - if (n3 == 2) { - nt >>= 1; - } - for (i = 0; i < nthread; i++) { - ag[i].nthread = nthread; - ag[i].n0 = i; - ag[i].n1 = n1; - ag[i].n2 = n2; - ag[i].n3 = n3; - ag[i].ic = ics; - ag[i].isgn = isgn; - ag[i].a = a; - ag[i].t = &t[nt * i]; - ag[i].ip = ip; - ag[i].w = w; - fft3d_thread_create(&th[i], ddxt3da_th, &ag[i]); - } - for (i = 0; i < nthread; i++) { - fft3d_thread_wait(th[i]); - } -} - - -void ddxt3db_subth(int n1, int n2, int n3, int ics, int isgn, - double ***a, double *t, int *ip, double *w) -{ - void *ddxt3db_th(void *p); - fft3d_thread_t th[FFT3D_MAX_THREADS]; - fft3d_arg_t ag[FFT3D_MAX_THREADS]; - int nthread, nt, i; - - nthread = FFT3D_MAX_THREADS; - if (nthread > n2) { - nthread = n2; - } - nt = 4 * n1; - if (n3 == 2) { - nt >>= 1; - } - for (i = 0; i < nthread; i++) { - ag[i].nthread = nthread; - ag[i].n0 = i; - ag[i].n1 = n1; - ag[i].n2 = n2; - ag[i].n3 = n3; - ag[i].ic = ics; - ag[i].isgn = isgn; - ag[i].a = a; - ag[i].t = &t[nt * i]; - ag[i].ip = ip; - ag[i].w = w; - fft3d_thread_create(&th[i], ddxt3db_th, &ag[i]); - } - for (i = 0; i < nthread; i++) { - fft3d_thread_wait(th[i]); - } -} - - -void *xdft3da_th(void *p) -{ - void cdft(int n, int isgn, double *a, int *ip, double *w); - void rdft(int n, int isgn, double *a, int *ip, double *w); - int nthread, n0, n1, n2, n3, icr, isgn, *ip, i, j, k; - double ***a, *t, *w; - - nthread = ((fft3d_arg_t *) p)->nthread; - n0 = ((fft3d_arg_t *) p)->n0; - n1 = ((fft3d_arg_t *) p)->n1; - n2 = ((fft3d_arg_t *) p)->n2; - n3 = ((fft3d_arg_t *) p)->n3; - icr = ((fft3d_arg_t *) p)->ic; - isgn = ((fft3d_arg_t *) p)->isgn; - a = ((fft3d_arg_t *) p)->a; - t = ((fft3d_arg_t *) p)->t; - ip = ((fft3d_arg_t *) p)->ip; - w = ((fft3d_arg_t *) p)->w; - for (i = n0; i < n1; i += nthread) { - if (icr == 0) { - for (j = 0; j < n2; j++) { - cdft(n3, isgn, a[i][j], ip, w); - } - } else if (isgn >= 0) { - for (j = 0; j < n2; j++) { - rdft(n3, isgn, a[i][j], ip, w); - } - } - if (n3 > 4) { - for (k = 0; k < n3; k += 8) { - for (j = 0; j < n2; j++) { - t[2 * j] = a[i][j][k]; - t[2 * j + 1] = a[i][j][k + 1]; - t[2 * n2 + 2 * j] = a[i][j][k + 2]; - t[2 * n2 + 2 * j + 1] = a[i][j][k + 3]; - t[4 * n2 + 2 * j] = a[i][j][k + 4]; - t[4 * n2 + 2 * j + 1] = a[i][j][k + 5]; - t[6 * n2 + 2 * j] = a[i][j][k + 6]; - t[6 * n2 + 2 * j + 1] = a[i][j][k + 7]; - } - cdft(2 * n2, isgn, t, ip, w); - cdft(2 * n2, isgn, &t[2 * n2], ip, w); - cdft(2 * n2, isgn, &t[4 * n2], ip, w); - cdft(2 * n2, isgn, &t[6 * n2], ip, w); - for (j = 0; j < n2; j++) { - a[i][j][k] = t[2 * j]; - a[i][j][k + 1] = t[2 * j + 1]; - a[i][j][k + 2] = t[2 * n2 + 2 * j]; - a[i][j][k + 3] = t[2 * n2 + 2 * j + 1]; - a[i][j][k + 4] = t[4 * n2 + 2 * j]; - a[i][j][k + 5] = t[4 * n2 + 2 * j + 1]; - a[i][j][k + 6] = t[6 * n2 + 2 * j]; - a[i][j][k + 7] = t[6 * n2 + 2 * j + 1]; - } - } - } else if (n3 == 4) { - for (j = 0; j < n2; j++) { - t[2 * j] = a[i][j][0]; - t[2 * j + 1] = a[i][j][1]; - t[2 * n2 + 2 * j] = a[i][j][2]; - t[2 * n2 + 2 * j + 1] = a[i][j][3]; - } - cdft(2 * n2, isgn, t, ip, w); - cdft(2 * n2, isgn, &t[2 * n2], ip, w); - for (j = 0; j < n2; j++) { - a[i][j][0] = t[2 * j]; - a[i][j][1] = t[2 * j + 1]; - a[i][j][2] = t[2 * n2 + 2 * j]; - a[i][j][3] = t[2 * n2 + 2 * j + 1]; - } - } else if (n3 == 2) { - for (j = 0; j < n2; j++) { - t[2 * j] = a[i][j][0]; - t[2 * j + 1] = a[i][j][1]; - } - cdft(2 * n2, isgn, t, ip, w); - for (j = 0; j < n2; j++) { - a[i][j][0] = t[2 * j]; - a[i][j][1] = t[2 * j + 1]; - } - } - if (icr != 0 && isgn < 0) { - for (j = 0; j < n2; j++) { - rdft(n3, isgn, a[i][j], ip, w); - } - } - } - return (void *) 0; -} - - -void *cdft3db_th(void *p) -{ - void cdft(int n, int isgn, double *a, int *ip, double *w); - int nthread, n0, n1, n2, n3, isgn, *ip, i, j, k; - double ***a, *t, *w; - - nthread = ((fft3d_arg_t *) p)->nthread; - n0 = ((fft3d_arg_t *) p)->n0; - n1 = ((fft3d_arg_t *) p)->n1; - n2 = ((fft3d_arg_t *) p)->n2; - n3 = ((fft3d_arg_t *) p)->n3; - isgn = ((fft3d_arg_t *) p)->isgn; - a = ((fft3d_arg_t *) p)->a; - t = ((fft3d_arg_t *) p)->t; - ip = ((fft3d_arg_t *) p)->ip; - w = ((fft3d_arg_t *) p)->w; - if (n3 > 4) { - for (j = n0; j < n2; j += nthread) { - for (k = 0; k < n3; k += 8) { - for (i = 0; i < n1; i++) { - t[2 * i] = a[i][j][k]; - t[2 * i + 1] = a[i][j][k + 1]; - t[2 * n1 + 2 * i] = a[i][j][k + 2]; - t[2 * n1 + 2 * i + 1] = a[i][j][k + 3]; - t[4 * n1 + 2 * i] = a[i][j][k + 4]; - t[4 * n1 + 2 * i + 1] = a[i][j][k + 5]; - t[6 * n1 + 2 * i] = a[i][j][k + 6]; - t[6 * n1 + 2 * i + 1] = a[i][j][k + 7]; - } - cdft(2 * n1, isgn, t, ip, w); - cdft(2 * n1, isgn, &t[2 * n1], ip, w); - cdft(2 * n1, isgn, &t[4 * n1], ip, w); - cdft(2 * n1, isgn, &t[6 * n1], ip, w); - for (i = 0; i < n1; i++) { - a[i][j][k] = t[2 * i]; - a[i][j][k + 1] = t[2 * i + 1]; - a[i][j][k + 2] = t[2 * n1 + 2 * i]; - a[i][j][k + 3] = t[2 * n1 + 2 * i + 1]; - a[i][j][k + 4] = t[4 * n1 + 2 * i]; - a[i][j][k + 5] = t[4 * n1 + 2 * i + 1]; - a[i][j][k + 6] = t[6 * n1 + 2 * i]; - a[i][j][k + 7] = t[6 * n1 + 2 * i + 1]; - } - } - } - } else if (n3 == 4) { - for (j = n0; j < n2; j += nthread) { - for (i = 0; i < n1; i++) { - t[2 * i] = a[i][j][0]; - t[2 * i + 1] = a[i][j][1]; - t[2 * n1 + 2 * i] = a[i][j][2]; - t[2 * n1 + 2 * i + 1] = a[i][j][3]; - } - cdft(2 * n1, isgn, t, ip, w); - cdft(2 * n1, isgn, &t[2 * n1], ip, w); - for (i = 0; i < n1; i++) { - a[i][j][0] = t[2 * i]; - a[i][j][1] = t[2 * i + 1]; - a[i][j][2] = t[2 * n1 + 2 * i]; - a[i][j][3] = t[2 * n1 + 2 * i + 1]; - } - } - } else if (n3 == 2) { - for (j = n0; j < n2; j += nthread) { - for (i = 0; i < n1; i++) { - t[2 * i] = a[i][j][0]; - t[2 * i + 1] = a[i][j][1]; - } - cdft(2 * n1, isgn, t, ip, w); - for (i = 0; i < n1; i++) { - a[i][j][0] = t[2 * i]; - a[i][j][1] = t[2 * i + 1]; - } - } - } - return (void *) 0; -} - - -void *ddxt3da_th(void *p) -{ - void ddct(int n, int isgn, double *a, int *ip, double *w); - void ddst(int n, int isgn, double *a, int *ip, double *w); - int nthread, n0, n1, n2, n3, ics, isgn, *ip, i, j, k; - double ***a, *t, *w; - - nthread = ((fft3d_arg_t *) p)->nthread; - n0 = ((fft3d_arg_t *) p)->n0; - n1 = ((fft3d_arg_t *) p)->n1; - n2 = ((fft3d_arg_t *) p)->n2; - n3 = ((fft3d_arg_t *) p)->n3; - ics = ((fft3d_arg_t *) p)->ic; - isgn = ((fft3d_arg_t *) p)->isgn; - a = ((fft3d_arg_t *) p)->a; - t = ((fft3d_arg_t *) p)->t; - ip = ((fft3d_arg_t *) p)->ip; - w = ((fft3d_arg_t *) p)->w; - for (i = n0; i < n1; i += nthread) { - if (ics == 0) { - for (j = 0; j < n2; j++) { - ddct(n3, isgn, a[i][j], ip, w); - } - } else { - for (j = 0; j < n2; j++) { - ddst(n3, isgn, a[i][j], ip, w); - } - } - if (n3 > 2) { - for (k = 0; k < n3; k += 4) { - for (j = 0; j < n2; j++) { - t[j] = a[i][j][k]; - t[n2 + j] = a[i][j][k + 1]; - t[2 * n2 + j] = a[i][j][k + 2]; - t[3 * n2 + j] = a[i][j][k + 3]; - } - if (ics == 0) { - ddct(n2, isgn, t, ip, w); - ddct(n2, isgn, &t[n2], ip, w); - ddct(n2, isgn, &t[2 * n2], ip, w); - ddct(n2, isgn, &t[3 * n2], ip, w); - } else { - ddst(n2, isgn, t, ip, w); - ddst(n2, isgn, &t[n2], ip, w); - ddst(n2, isgn, &t[2 * n2], ip, w); - ddst(n2, isgn, &t[3 * n2], ip, w); - } - for (j = 0; j < n2; j++) { - a[i][j][k] = t[j]; - a[i][j][k + 1] = t[n2 + j]; - a[i][j][k + 2] = t[2 * n2 + j]; - a[i][j][k + 3] = t[3 * n2 + j]; - } - } - } else if (n3 == 2) { - for (j = 0; j < n2; j++) { - t[j] = a[i][j][0]; - t[n2 + j] = a[i][j][1]; - } - if (ics == 0) { - ddct(n2, isgn, t, ip, w); - ddct(n2, isgn, &t[n2], ip, w); - } else { - ddst(n2, isgn, t, ip, w); - ddst(n2, isgn, &t[n2], ip, w); - } - for (j = 0; j < n2; j++) { - a[i][j][0] = t[j]; - a[i][j][1] = t[n2 + j]; - } - } - } - return (void *) 0; -} - - -void *ddxt3db_th(void *p) -{ - void ddct(int n, int isgn, double *a, int *ip, double *w); - void ddst(int n, int isgn, double *a, int *ip, double *w); - int nthread, n0, n1, n2, n3, ics, isgn, *ip, i, j, k; - double ***a, *t, *w; - - nthread = ((fft3d_arg_t *) p)->nthread; - n0 = ((fft3d_arg_t *) p)->n0; - n1 = ((fft3d_arg_t *) p)->n1; - n2 = ((fft3d_arg_t *) p)->n2; - n3 = ((fft3d_arg_t *) p)->n3; - ics = ((fft3d_arg_t *) p)->ic; - isgn = ((fft3d_arg_t *) p)->isgn; - a = ((fft3d_arg_t *) p)->a; - t = ((fft3d_arg_t *) p)->t; - ip = ((fft3d_arg_t *) p)->ip; - w = ((fft3d_arg_t *) p)->w; - if (n3 > 2) { - for (j = n0; j < n2; j += nthread) { - for (k = 0; k < n3; k += 4) { - for (i = 0; i < n1; i++) { - t[i] = a[i][j][k]; - t[n1 + i] = a[i][j][k + 1]; - t[2 * n1 + i] = a[i][j][k + 2]; - t[3 * n1 + i] = a[i][j][k + 3]; - } - if (ics == 0) { - ddct(n1, isgn, t, ip, w); - ddct(n1, isgn, &t[n1], ip, w); - ddct(n1, isgn, &t[2 * n1], ip, w); - ddct(n1, isgn, &t[3 * n1], ip, w); - } else { - ddst(n1, isgn, t, ip, w); - ddst(n1, isgn, &t[n1], ip, w); - ddst(n1, isgn, &t[2 * n1], ip, w); - ddst(n1, isgn, &t[3 * n1], ip, w); - } - for (i = 0; i < n1; i++) { - a[i][j][k] = t[i]; - a[i][j][k + 1] = t[n1 + i]; - a[i][j][k + 2] = t[2 * n1 + i]; - a[i][j][k + 3] = t[3 * n1 + i]; - } - } - } - } else if (n3 == 2) { - for (j = n0; j < n2; j += nthread) { - for (i = 0; i < n1; i++) { - t[i] = a[i][j][0]; - t[n1 + i] = a[i][j][1]; - } - if (ics == 0) { - ddct(n1, isgn, t, ip, w); - ddct(n1, isgn, &t[n1], ip, w); - } else { - ddst(n1, isgn, t, ip, w); - ddst(n1, isgn, &t[n1], ip, w); - } - for (i = 0; i < n1; i++) { - a[i][j][0] = t[i]; - a[i][j][1] = t[n1 + i]; - } - } - } - return (void *) 0; -} -#endif /* USE_FFT3D_THREADS */ -
diff --git a/third_party/tensorflow_dependencies/fft2d/fftsg3d.f b/third_party/tensorflow_dependencies/fft2d/fftsg3d.f deleted file mode 100644 index f08b1dc..0000000 --- a/third_party/tensorflow_dependencies/fft2d/fftsg3d.f +++ /dev/null
@@ -1,926 +0,0 @@ -! Fast Fourier/Cosine/Sine Transform -! dimension :three -! data length :power of 2 -! decimation :frequency -! radix :split-radix, row-column -! data :inplace -! table :use -! subroutines -! cdft3d: Complex Discrete Fourier Transform -! rdft3d: Real Discrete Fourier Transform -! ddct3d: Discrete Cosine Transform -! ddst3d: Discrete Sine Transform -! necessary package -! fftsg.f : 1D-FFT package -! -! -! -------- Complex DFT (Discrete Fourier Transform) -------- -! [definition] -! <case1> -! X(k1,k2,k3) = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1 -! x(j1,j2,j3) * -! exp(2*pi*i*j1*k1/n1) * -! exp(2*pi*i*j2*k2/n2) * -! exp(2*pi*i*j3*k3/n3), -! 0<=k1<n1, 0<=k2<n2, 0<=k3<n3 -! <case2> -! X(k1,k2,k3) = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1 -! x(j1,j2,j3) * -! exp(-2*pi*i*j1*k1/n1) * -! exp(-2*pi*i*j2*k2/n2) * -! exp(-2*pi*i*j3*k3/n3), -! 0<=k1<n1, 0<=k2<n2, 0<=k3<n3 -! (notes: sum_j=0^n-1 is a summation from j=0 to n-1) -! [usage] -! <case1> -! ip(0) = 0 ! first time only -! call cdft3d(n1max, n2max, 2*n1, n2, n3, 1, a, t, ip, w) -! <case2> -! ip(0) = 0 ! first time only -! call cdft3d(n1max, n2max, 2*n1, n2, n3, -1, a, t, ip, w) -! [parameters] -! n1max :row1 size of the 3D array (integer) -! n2max :row2 size of the 3D array (integer) -! 2*n1 :data length (integer) -! n1 >= 1, n1 = power of 2 -! n2 :data length (integer) -! n2 >= 1, n2 = power of 2 -! n3 :data length (integer) -! n3 >= 1, n3 = power of 2 -! a(0:2*n1-1,0:n2-1,0:n3-1) -! :input/output data (real*8) -! input data -! a(2*j1,j2,j3) = Re(x(j1,j2,j3)), -! a(2*j1+1,j2,j3) = Im(x(j1,j2,j3)), -! 0<=j1<n1, 0<=j2<n2, 0<=j3<n3 -! output data -! a(2*k1,k2,k3) = Re(X(k1,k2,k3)), -! a(2*k1+1,k2,k3) = Im(X(k1,k2,k3)), -! 0<=k1<n1, 0<=k2<n2, 0<=k3<n3 -! t(0:*) :work area (real*8) -! length of t >= max(8*n2, 8*n3) -! ip(0:*):work area for bit reversal (integer) -! length of ip >= 2+sqrt(n) -! (n = max(n1, n2, n3)) -! ip(0),ip(1) are pointers of the cos/sin table. -! w(0:*) :cos/sin table (real*8) -! length of w >= max(n1/2, n2/2, n3/2) -! w(),ip() are initialized if ip(0) = 0. -! [remark] -! Inverse of -! call cdft3d(n1max, n2max, 2*n1, n2, n3, -1, a, t, ip, w) -! is -! call cdft3d(n1max, n2max, 2*n1, n2, n3, 1, a, t, ip, w) -! do j3 = 0, n3 - 1 -! do j2 = 0, n2 - 1 -! do j1 = 0, 2 * n1 - 1 -! a(j1,j2,j3) = a(j1,j2,j3) * (1.0d0/n1/n2/n3) -! end do -! end do -! end do -! . -! -! -! -------- Real DFT / Inverse of Real DFT -------- -! [definition] -! <case1> RDFT -! R(k1,k2,k3) = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1 -! a(j1,j2,j3) * -! cos(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2 + -! 2*pi*j3*k3/n3), -! 0<=k1<n1, 0<=k2<n2, 0<=k3<n3 -! I(k1,k2,k3) = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1 -! a(j1,j2,j3) * -! sin(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2 + -! 2*pi*j3*k3/n3), -! 0<=k1<n1, 0<=k2<n2, 0<=k3<n3 -! <case2> IRDFT (excluding scale) -! a(k1,k2,k3) = (1/2) * sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1 -! (R(j1,j2,j3) * -! cos(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2 + -! 2*pi*j3*k3/n3) + -! I(j1,j2,j3) * -! sin(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2 + -! 2*pi*j3*k3/n3)), -! 0<=k1<n1, 0<=k2<n2, 0<=k3<n3 -! (notes: R(mod(n1-k1,n1),mod(n2-k2,n2),mod(n3-k3,n3)) = R(k1,k2,k3), -! I(mod(n1-k1,n1),mod(n2-k2,n2),mod(n3-k3,n3)) = -I(k1,k2,k3), -! 0<=k1<n1, 0<=k2<n2, 0<=k3<n3) -! [usage] -! <case1> -! ip(0) = 0 ! first time only -! call rdft3d(n1max, n2max, n1, n2, n3, 1, a, t, ip, w) -! <case2> -! ip(0) = 0 ! first time only -! call rdft3d(n1max, n2max, n1, n2, n3, -1, a, t, ip, w) -! [parameters] -! n1max :row1 size of the 3D array (integer) -! n2max :row2 size of the 3D array (integer) -! n1 :data length (integer) -! n1 >= 2, n1 = power of 2 -! n2 :data length (integer) -! n2 >= 2, n2 = power of 2 -! n3 :data length (integer) -! n3 >= 2, n3 = power of 2 -! a(0:n1-1,0:n2-1,0:n3-1) -! :input/output data (real*8) -! <case1> -! output data -! a(2*k1,k2,k3) = R(k1,k2,k3) -! = R(n1-k1,mod(n2-k2,n2),mod(n3-k3,n3)), -! a(2*k1+1,k2,k3) = I(k1,k2,k3) -! = -I(n1-k1,mod(n2-k2,n2),mod(n3-k3,n3)), -! 0<k1<n1/2, 0<=k2<n2, 0<=k3<n3, -! a(0,k2,k3) = R(0,k2,k3) -! = R(0,n2-k2,mod(n3-k3,n3)), -! a(1,k2,k3) = I(0,k2,k3) -! = -I(0,n2-k2,mod(n3-k3,n3)), -! a(1,n2-k2,k3) = R(n1/2,k2,mod(n3-k3,n3)) -! = R(n1/2,n2-k2,k3), -! a(0,n2-k2,k3) = -I(n1/2,k2,mod(n3-k3,n3)) -! = I(n1/2,n2-k2,k3), -! 0<k2<n2/2, 0<=k3<n3, -! a(0,0,k3) = R(0,0,k3) -! = R(0,0,n3-k3), -! a(1,0,k3) = I(0,0,k3) -! = -I(0,0,n3-k3), -! a(0,n2/2,k3) = R(0,n2/2,k3) -! = R(0,n2/2,n3-k3), -! a(1,n2/2,k3) = I(0,n2/2,k3) -! = -I(0,n2/2,n3-k3), -! a(1,0,n3-k3) = R(n1/2,0,k3) -! = R(n1/2,0,n3-k3), -! a(0,0,n3-k3) = -I(n1/2,0,k3) -! = I(n1/2,0,n3-k3), -! a(1,n2/2,n3-k3) = R(n1/2,n2/2,k3) -! = R(n1/2,n2/2,n3-k3), -! a(0,n2/2,n3-k3) = -I(n1/2,n2/2,k3) -! = I(n1/2,n2/2,n3-k3), -! 0<k3<n3/2, -! a(0,0,0) = R(0,0,0), -! a(1,0,0) = R(n1/2,0,0), -! a(0,0,n3/2) = R(0,0,n3/2), -! a(1,0,n3/2) = R(n1/2,0,n3/2), -! a(0,n2/2,0) = R(0,n2/2,0), -! a(1,n2/2,0) = R(n1/2,n2/2,0), -! a(0,n2/2,n3/2) = R(0,n2/2,n3/2), -! a(1,n2/2,n3/2) = R(n1/2,n2/2,n3/2) -! <case2> -! input data -! a(2*j1,j2,j3) = R(j1,j2,j3) -! = R(n1-j1,mod(n2-j2,n2),mod(n3-j3,n3)), -! a(2*j1+1,j2,j3) = I(j1,j2,j3) -! = -I(n1-j1,mod(n2-j2,n2),mod(n3-j3,n3)), -! 0<j1<n1/2, 0<=j2<n2, 0<=j3<n3, -! a(0,j2,j3) = R(0,j2,j3) -! = R(0,n2-j2,mod(n3-j3,n3)), -! a(1,j2,j3) = I(0,j2,j3) -! = -I(0,n2-j2,mod(n3-j3,n3)), -! a(1,n2-j2,j3) = R(n1/2,j2,mod(n3-j3,n3)) -! = R(n1/2,n2-j2,j3), -! a(0,n2-j2,j3) = -I(n1/2,j2,mod(n3-j3,n3)) -! = I(n1/2,n2-j2,j3), -! 0<j2<n2/2, 0<=j3<n3, -! a(0,0,j3) = R(0,0,j3) -! = R(0,0,n3-j3), -! a(1,0,j3) = I(0,0,j3) -! = -I(0,0,n3-j3), -! a(0,n2/2,j3) = R(0,n2/2,j3) -! = R(0,n2/2,n3-j3), -! a(1,n2/2,j3) = I(0,n2/2,j3) -! = -I(0,n2/2,n3-j3), -! a(1,0,n3-j3) = R(n1/2,0,j3) -! = R(n1/2,0,n3-j3), -! a(0,0,n3-j3) = -I(n1/2,0,j3) -! = I(n1/2,0,n3-j3), -! a(1,n2/2,n3-j3) = R(n1/2,n2/2,j3) -! = R(n1/2,n2/2,n3-j3), -! a(0,n2/2,n3-j3) = -I(n1/2,n2/2,j3) -! = I(n1/2,n2/2,n3-j3), -! 0<j3<n3/2, -! a(0,0,0) = R(0,0,0), -! a(1,0,0) = R(n1/2,0,0), -! a(0,0,n3/2) = R(0,0,n3/2), -! a(1,0,n3/2) = R(n1/2,0,n3/2), -! a(0,n2/2,0) = R(0,n2/2,0), -! a(1,n2/2,0) = R(n1/2,n2/2,0), -! a(0,n2/2,n3/2) = R(0,n2/2,n3/2), -! a(1,n2/2,n3/2) = R(n1/2,n2/2,n3/2) -! ---- output ordering ---- -! call rdft3d(n1max, n2max, n1, n2, n3, 1, a, t, ip, w) -! call rdft3dsort(n1max, n2max, n1, n2, n3, 1, a) -! ! stored data is a(0:n1-1,0:n2-1,0:n3+1): -! ! a(2*k1,k2,k3) = R(k1,k2,k3), -! ! a(2*k1+1,k2,k3) = I(k1,k2,k3), -! ! 0<=k1<=n1/2, 0<=k2<n2, 0<=k3<n3. -! ! the stored data is larger than the input data! -! ---- input ordering ---- -! call rdft3dsort(n1max, n2max, n1, n2, n3, -1, a) -! call rdft3d(n1max, n2max, n1, n2, n3, -1, a, t, ip, w) -! t(0:*) :work area (real*8) -! length of t >= max(8*n2, 8*n3) -! ip(0:*):work area for bit reversal (integer) -! length of ip >= 2+sqrt(n) -! (n = max(n1/2, n2, n3)) -! ip(0),ip(1) are pointers of the cos/sin table. -! w(0:*) :cos/sin table (real*8) -! length of w >= max(n1/4, n2/2, n3/2) + n1/4 -! w(),ip() are initialized if ip(0) = 0. -! [remark] -! Inverse of -! call rdft3d(n1max, n2max, n1, n2, n3, 1, a, t, ip, w) -! is -! call rdft3d(n1max, n2max, n1, n2, n3, -1, a, t, ip, w) -! do j3 = 0, n3 - 1 -! do j2 = 0, n2 - 1 -! do j1 = 0, n1 - 1 -! a(j1,j2,j3) = a(j1,j2,j3) * (2.0d0/n1/n2/n3) -! end do -! end do -! end do -! . -! -! -! -------- DCT (Discrete Cosine Transform) / Inverse of DCT -------- -! [definition] -! <case1> IDCT (excluding scale) -! C(k1,k2,k3) = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1 -! a(j1,j2,j3) * -! cos(pi*j1*(k1+1/2)/n1) * -! cos(pi*j2*(k2+1/2)/n2) * -! cos(pi*j3*(k3+1/2)/n3), -! 0<=k1<n1, 0<=k2<n2, 0<=k3<n3 -! <case2> DCT -! C(k1,k2,k3) = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1 -! a(j1,j2,j3) * -! cos(pi*(j1+1/2)*k1/n1) * -! cos(pi*(j2+1/2)*k2/n2) * -! cos(pi*(j3+1/2)*k3/n3), -! 0<=k1<n1, 0<=k2<n2, 0<=k3<n3 -! [usage] -! <case1> -! ip(0) = 0 ! first time only -! call ddct3d(n1max, n2max, n1, n2, n3, 1, a, t, ip, w) -! <case2> -! ip(0) = 0 ! first time only -! call ddct3d(n1max, n2max, n1, n2, n3, -1, a, t, ip, w) -! [parameters] -! n1max :row1 size of the 3D array (integer) -! n2max :row2 size of the 3D array (integer) -! n1 :data length (integer) -! n1 >= 2, n1 = power of 2 -! n2 :data length (integer) -! n2 >= 2, n2 = power of 2 -! n3 :data length (integer) -! n3 >= 2, n3 = power of 2 -! a(0:n1-1,0:n2-1,0:n3-1) -! :input/output data (real*8) -! output data -! a(k1,k2,k3) = C(k1,k2,k3), -! 0<=k1<n1, 0<=k2<n2, 0<=k3<n3 -! t(0:*) :work area (real*8) -! length of t >= max(4*n2, 4*n3) -! ip(0:*):work area for bit reversal (integer) -! length of ip >= 2+sqrt(n) -! (n = max(n1/2, n2/2, n3/2)) -! ip(0),ip(1) are pointers of the cos/sin table. -! w(0:*) :cos/sin table (real*8) -! length of w >= max(n1*3/2, n2*3/2, n3*3/2) -! w(),ip() are initialized if ip(0) = 0. -! [remark] -! Inverse of -! call ddct3d(n1max, n2max, n1, n2, n3, -1, a, t, ip, w) -! is -! do j3 = 0, n3 - 1 -! do j2 = 0, n2 - 1 -! a(0, j2, j3) = a(0, j2, j3) * 0.5d0 -! end do -! do j1 = 0, n1 - 1 -! a(j1, 0, j3) = a(j1, 0, j3) * 0.5d0 -! end do -! end do -! do j2 = 0, n2 - 1 -! do j1 = 0, n1 - 1 -! a(j1, j2, 0) = a(j1, j2, 0) * 0.5d0 -! end do -! end do -! call ddct3d(n1max, n2max, n1, n2, n3, 1, a, t, ip, w) -! do j3 = 0, n3 - 1 -! do j2 = 0, n2 - 1 -! do j1 = 0, n1 - 1 -! a(j1,j2,j3) = a(j1,j2,j3) * (8.0d0/n1/n2/n3) -! end do -! end do -! end do -! . -! -! -! -------- DST (Discrete Sine Transform) / Inverse of DST -------- -! [definition] -! <case1> IDST (excluding scale) -! S(k1,k2,k3) = sum_j1=1^n1 sum_j2=1^n2 sum_j3=1^n3 -! A(j1,j2,j3) * -! sin(pi*j1*(k1+1/2)/n1) * -! sin(pi*j2*(k2+1/2)/n2) * -! sin(pi*j3*(k3+1/2)/n3), -! 0<=k1<n1, 0<=k2<n2, 0<=k3<n3 -! <case2> DST -! S(k1,k2,k3) = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1 -! a(j1,j2,j3) * -! sin(pi*(j1+1/2)*k1/n1) * -! sin(pi*(j2+1/2)*k2/n2) * -! sin(pi*(j3+1/2)*k3/n3), -! 0<k1<=n1, 0<k2<=n2, 0<k3<=n3 -! [usage] -! <case1> -! ip(0) = 0 ! first time only -! call ddst3d(n1max, n2max, n1, n2, n3, 1, a, t, ip, w) -! <case2> -! ip(0) = 0 ! first time only -! call ddst3d(n1max, n2max, n1, n2, n3, -1, a, t, ip, w) -! [parameters] -! n1max :row1 size of the 3D array (integer) -! n2max :row2 size of the 3D array (integer) -! n1 :data length (integer) -! n1 >= 2, n1 = power of 2 -! n2 :data length (integer) -! n2 >= 2, n2 = power of 2 -! n3 :data length (integer) -! n3 >= 2, n3 = power of 2 -! a(0:n1-1,0:n2-1,0:n3-1) -! :input/output data (real*8) -! <case1> -! input data -! a(mod(j1,n1),mod(j2,n2),mod(j3,n3)) = A(j1,j2,j3), -! 0<j1<=n1, 0<j2<=n2, 0<j3<=n3 -! output data -! a(k1,k2,k3) = S(k1,k2,k3), -! 0<=k1<n1, 0<=k2<n2, 0<=k3<n3 -! <case2> -! output data -! a(mod(k1,n1),mod(k2,n2),mod(k3,n3)) = S(k1,k2,k3), -! 0<k1<=n1, 0<k2<=n2, 0<k3<=n3 -! t(0:*) :work area (real*8) -! length of t >= max(4*n2, 4*n3) -! ip(0:*):work area for bit reversal (integer) -! length of ip >= 2+sqrt(n) -! (n = max(n1/2, n2/2, n3/2)) -! ip(0),ip(1) are pointers of the cos/sin table. -! w(0:*) :cos/sin table (real*8) -! length of w >= max(n1*3/2, n2*3/2, n3*3/2) -! w(),ip() are initialized if ip(0) = 0. -! [remark] -! Inverse of -! call ddst3d(n1max, n2max, n1, n2, n3, -1, a, t, ip, w) -! is -! do j3 = 0, n3 - 1 -! do j2 = 0, n2 - 1 -! a(0, j2, j3) = a(0, j2, j3) * 0.5d0 -! end do -! do j1 = 0, n1 - 1 -! a(j1, 0, j3) = a(j1, 0, j3) * 0.5d0 -! end do -! end do -! do j2 = 0, n2 - 1 -! do j1 = 0, n1 - 1 -! a(j1, j2, 0) = a(j1, j2, 0) * 0.5d0 -! end do -! end do -! call ddst3d(n1max, n2max, n1, n2, n3, 1, a, t, ip, w) -! do j3 = 0, n3 - 1 -! do j2 = 0, n2 - 1 -! do j1 = 0, n1 - 1 -! a(j1,j2,j3) = a(j1,j2,j3) * (8.0d0/n1/n2/n3) -! end do -! end do -! end do -! . -! -! - subroutine cdft3d(n1max, n2max, n1, n2, n3, isgn, a, - & t, ip, w) - integer n1max, n2max, n1, n2, n3, isgn, ip(0 : *), n - real*8 a(0 : n1max - 1, 0 : n2max - 1, 0 : n3 - 1), - & t(0 : *), w(0 : *) - n = 2 * max(n2, n3) - n = max(n, n1) - if (n .gt. 4 * ip(0)) then - call makewt(n / 4, ip, w) - end if - call xdft3da_sub(n1max, n2max, n1, n2, n3, 0, - & isgn, a, t, ip, w) - call cdft3db_sub(n1max, n2max, n1, n2, n3, - & isgn, a, t, ip, w) - end -! - subroutine rdft3d(n1max, n2max, n1, n2, n3, isgn, a, - & t, ip, w) - integer n1max, n2max, n1, n2, n3, isgn, ip(0 : *), - & n, nw, nc - real*8 a(0 : n1max - 1, 0 : n2max - 1, 0 : n3 - 1), - & t(0 : *), w(0 : *) - n = 2 * max(n2, n3) - n = max(n, n1) - nw = ip(0) - if (n .gt. 4 * nw) then - nw = n / 4 - call makewt(nw, ip, w) - end if - nc = ip(1) - if (n1 .gt. 4 * nc) then - nc = n1 / 4 - call makect(nc, ip, w(nw)) - end if - if (isgn .lt. 0) then - call rdft3d_sub(n1max, n2max, n1, n2, n3, isgn, a) - call cdft3db_sub(n1max, n2max, n1, n2, n3, - & isgn, a, t, ip, w) - call xdft3da_sub(n1max, n2max, n1, n2, n3, 1, - & isgn, a, t, ip, w) - else - call xdft3da_sub(n1max, n2max, n1, n2, n3, 1, - & isgn, a, t, ip, w) - call cdft3db_sub(n1max, n2max, n1, n2, n3, - & isgn, a, t, ip, w) - call rdft3d_sub(n1max, n2max, n1, n2, n3, isgn, a) - end if - end -! - subroutine rdft3dsort(n1max, n2max, n1, n2, n3, isgn, a) - integer n1max, n2max, n1, n2, n3, isgn, n2h, n3h, j, k - real*8 a(0 : n1max - 1, 0 : n2max - 1, 0 : n3 - 1), x, y - n2h = n2 / 2 - n3h = n3 / 2 - if (isgn .lt. 0) then - do k = 0, n3 - 1 - do j = n2h + 1, n2 - 1 - a(0, j, k) = a(n1 + 1, j, k) - a(1, j, k) = a(n1, j, k) - end do - end do - do k = n3h + 1, n3 - 1 - a(0, 0, k) = a(n1 + 1, 0, k) - a(1, 0, k) = a(n1, 0, k) - a(0, n2h, k) = a(n1 + 1, n2h, k) - a(1, n2h, k) = a(n1, n2h, k) - end do - a(1, 0, 0) = a(n1, 0, 0) - a(1, n2h, 0) = a(n1, n2h, 0) - a(1, 0, n3h) = a(n1, 0, n3h) - a(1, n2h, n3h) = a(n1, n2h, n3h) - else - do j = n2h + 1, n2 - 1 - y = a(0, j, 0) - x = a(1, j, 0) - a(n1, j, 0) = x - a(n1 + 1, j, 0) = y - a(n1, n2 - j, 0) = x - a(n1 + 1, n2 - j, 0) = -y - a(0, j, 0) = a(0, n2 - j, 0) - a(1, j, 0) = -a(1, n2 - j, 0) - end do - do k = 1, n3 - 1 - do j = n2h + 1, n2 - 1 - y = a(0, j, k) - x = a(1, j, k) - a(n1, j, k) = x - a(n1 + 1, j, k) = y - a(n1, n2 - j, n3 - k) = x - a(n1 + 1, n2 - j, n3 - k) = -y - a(0, j, k) = a(0, n2 - j, n3 - k) - a(1, j, k) = -a(1, n2 - j, n3 - k) - end do - end do - do k = n3h + 1, n3 - 1 - y = a(0, 0, k) - x = a(1, 0, k) - a(n1, 0, k) = x - a(n1 + 1, 0, k) = y - a(n1, 0, n3 - k) = x - a(n1 + 1, 0, n3 - k) = -y - a(0, 0, k) = a(0, 0, n3 - k) - a(1, 0, k) = -a(1, 0, n3 - k) - y = a(0, n2h, k) - x = a(1, n2h, k) - a(n1, n2h, k) = x - a(n1 + 1, n2h, k) = y - a(n1, n2h, n3 - k) = x - a(n1 + 1, n2h, n3 - k) = -y - a(0, n2h, k) = a(0, n2h, n3 - k) - a(1, n2h, k) = -a(1, n2h, n3 - k) - end do - a(n1, 0, 0) = a(1, 0, 0) - a(n1 + 1, 0, 0) = 0 - a(1, 0, 0) = 0 - a(n1, n2h, 0) = a(1, n2h, 0) - a(n1 + 1, n2h, 0) = 0 - a(1, n2h, 0) = 0 - a(n1, 0, n3h) = a(1, 0, n3h) - a(n1 + 1, 0, n3h) = 0 - a(1, 0, n3h) = 0 - a(n1, n2h, n3h) = a(1, n2h, n3h) - a(n1 + 1, n2h, n3h) = 0 - a(1, n2h, n3h) = 0 - end if - end -! - subroutine ddct3d(n1max, n2max, n1, n2, n3, isgn, a, - & t, ip, w) - integer n1max, n2max, n1, n2, n3, isgn, ip(0 : *), - & n, nw, nc - real*8 a(0 : n1max - 1, 0 : n2max - 1, 0 : n3 - 1), - & t(0 : *), w(0 : *) - n = max(n2, n3) - n = max(n, n1) - nw = ip(0) - if (n .gt. 4 * nw) then - nw = n / 4 - call makewt(nw, ip, w) - end if - nc = ip(1) - if (n .gt. nc) then - nc = n - call makect(nc, ip, w(nw)) - end if - call ddxt3da_sub(n1max, n2max, n1, n2, n3, 0, - & isgn, a, t, ip, w) - call ddxt3db_sub(n1max, n2max, n1, n2, n3, 0, - & isgn, a, t, ip, w) - end -! - subroutine ddst3d(n1max, n2max, n1, n2, n3, isgn, a, - & t, ip, w) - integer n1max, n2max, n1, n2, n3, isgn, ip(0 : *), - & n, nw, nc - real*8 a(0 : n1max - 1, 0 : n2max - 1, 0 : n3 - 1), - & t(0 : *), w(0 : *) - n = max(n2, n3) - n = max(n, n1) - nw = ip(0) - if (n .gt. 4 * nw) then - nw = n / 4 - call makewt(nw, ip, w) - end if - nc = ip(1) - if (n .gt. nc) then - nc = n - call makect(nc, ip, w(nw)) - end if - call ddxt3da_sub(n1max, n2max, n1, n2, n3, 1, - & isgn, a, t, ip, w) - call ddxt3db_sub(n1max, n2max, n1, n2, n3, 1, - & isgn, a, t, ip, w) - end -! -! -------- child routines -------- -! - subroutine xdft3da_sub(n1max, n2max, n1, n2, n3, icr, - & isgn, a, t, ip, w) - integer n1max, n2max, n1, n2, n3, icr, isgn, - & ip(0 : *), i, j, k - real*8 a(0 : n1max - 1, 0 : n2max - 1, 0 : n3 - 1), - & t(0 : *), w(0 : *) - do k = 0, n3 - 1 - if (icr .eq. 0) then - do j = 0, n2 - 1 - call cdft(n1, isgn, a(0, j, k), ip, w) - end do - else if (isgn .ge. 0) then - do j = 0, n2 - 1 - call rdft(n1, isgn, a(0, j, k), ip, w) - end do - end if - if (n1 .gt. 4) then - do i = 0, n1 - 8, 8 - do j = 0, n2 - 1 - t(2 * j) = a(i, j, k) - t(2 * j + 1) = a(i + 1, j, k) - t(2 * n2 + 2 * j) = a(i + 2, j, k) - t(2 * n2 + 2 * j + 1) = a(i + 3, j, k) - t(4 * n2 + 2 * j) = a(i + 4, j, k) - t(4 * n2 + 2 * j + 1) = a(i + 5, j, k) - t(6 * n2 + 2 * j) = a(i + 6, j, k) - t(6 * n2 + 2 * j + 1) = a(i + 7, j, k) - end do - call cdft(2 * n2, isgn, t, ip, w) - call cdft(2 * n2, isgn, t(2 * n2), ip, w) - call cdft(2 * n2, isgn, t(4 * n2), ip, w) - call cdft(2 * n2, isgn, t(6 * n2), ip, w) - do j = 0, n2 - 1 - a(i, j, k) = t(2 * j) - a(i + 1, j, k) = t(2 * j + 1) - a(i + 2, j, k) = t(2 * n2 + 2 * j) - a(i + 3, j, k) = t(2 * n2 + 2 * j + 1) - a(i + 4, j, k) = t(4 * n2 + 2 * j) - a(i + 5, j, k) = t(4 * n2 + 2 * j + 1) - a(i + 6, j, k) = t(6 * n2 + 2 * j) - a(i + 7, j, k) = t(6 * n2 + 2 * j + 1) - end do - end do - else if (n1 .eq. 4) then - do j = 0, n2 - 1 - t(2 * j) = a(0, j, k) - t(2 * j + 1) = a(1, j, k) - t(2 * n2 + 2 * j) = a(2, j, k) - t(2 * n2 + 2 * j + 1) = a(3, j, k) - end do - call cdft(2 * n2, isgn, t, ip, w) - call cdft(2 * n2, isgn, t(2 * n2), ip, w) - do j = 0, n2 - 1 - a(0, j, k) = t(2 * j) - a(1, j, k) = t(2 * j + 1) - a(2, j, k) = t(2 * n2 + 2 * j) - a(3, j, k) = t(2 * n2 + 2 * j + 1) - end do - else if (n1 .eq. 2) then - do j = 0, n2 - 1 - t(2 * j) = a(0, j, k) - t(2 * j + 1) = a(1, j, k) - end do - call cdft(2 * n2, isgn, t, ip, w) - do j = 0, n2 - 1 - a(0, j, k) = t(2 * j) - a(1, j, k) = t(2 * j + 1) - end do - end if - if (icr .ne. 0 .and. isgn .lt. 0) then - do j = 0, n2 - 1 - call rdft(n1, isgn, a(0, j, k), ip, w) - end do - end if - end do - end -! - subroutine cdft3db_sub(n1max, n2max, n1, n2, n3, - & isgn, a, t, ip, w) - integer n1max, n2max, n1, n2, n3, isgn, ip(0 : *), - & i, j, k - real*8 a(0 : n1max - 1, 0 : n2max - 1, 0 : n3 - 1), - & t(0 : *), w(0 : *) - if (n1 .gt. 4) then - do j = 0, n2 - 1 - do i = 0, n1 - 8, 8 - do k = 0, n3 - 1 - t(2 * k) = a(i, j, k) - t(2 * k + 1) = a(i + 1, j, k) - t(2 * n3 + 2 * k) = a(i + 2, j, k) - t(2 * n3 + 2 * k + 1) = a(i + 3, j, k) - t(4 * n3 + 2 * k) = a(i + 4, j, k) - t(4 * n3 + 2 * k + 1) = a(i + 5, j, k) - t(6 * n3 + 2 * k) = a(i + 6, j, k) - t(6 * n3 + 2 * k + 1) = a(i + 7, j, k) - end do - call cdft(2 * n3, isgn, t, ip, w) - call cdft(2 * n3, isgn, t(2 * n3), ip, w) - call cdft(2 * n3, isgn, t(4 * n3), ip, w) - call cdft(2 * n3, isgn, t(6 * n3), ip, w) - do k = 0, n3 - 1 - a(i, j, k) = t(2 * k) - a(i + 1, j, k) = t(2 * k + 1) - a(i + 2, j, k) = t(2 * n3 + 2 * k) - a(i + 3, j, k) = t(2 * n3 + 2 * k + 1) - a(i + 4, j, k) = t(4 * n3 + 2 * k) - a(i + 5, j, k) = t(4 * n3 + 2 * k + 1) - a(i + 6, j, k) = t(6 * n3 + 2 * k) - a(i + 7, j, k) = t(6 * n3 + 2 * k + 1) - end do - end do - end do - else if (n1 .eq. 4) then - do j = 0, n2 - 1 - do k = 0, n3 - 1 - t(2 * k) = a(0, j, k) - t(2 * k + 1) = a(1, j, k) - t(2 * n3 + 2 * k) = a(2, j, k) - t(2 * n3 + 2 * k + 1) = a(3, j, k) - end do - call cdft(2 * n3, isgn, t, ip, w) - call cdft(2 * n3, isgn, t(2 * n3), ip, w) - do k = 0, n3 - 1 - a(0, j, k) = t(2 * k) - a(1, j, k) = t(2 * k + 1) - a(2, j, k) = t(2 * n3 + 2 * k) - a(3, j, k) = t(2 * n3 + 2 * k + 1) - end do - end do - else if (n1 .eq. 2) then - do j = 0, n2 - 1 - do k = 0, n3 - 1 - t(2 * k) = a(0, j, k) - t(2 * k + 1) = a(1, j, k) - end do - call cdft(2 * n3, isgn, t, ip, w) - do k = 0, n3 - 1 - a(0, j, k) = t(2 * k) - a(1, j, k) = t(2 * k + 1) - end do - end do - end if - end -! - subroutine rdft3d_sub(n1max, n2max, n1, n2, n3, isgn, a) - integer n1max, n2max, n1, n2, n3, isgn, - & n2h, n3h, i, j, k, l - real*8 a(0 : n1max - 1, 0 : n2max - 1, 0 : n3 - 1), xi - n2h = n2 / 2 - n3h = n3 / 2 - if (isgn .lt. 0) then - do k = 1, n3h - 1 - l = n3 - k - xi = a(0, 0, k) - a(0, 0, l) - a(0, 0, k) = a(0, 0, k) + a(0, 0, l) - a(0, 0, l) = xi - xi = a(1, 0, l) - a(1, 0, k) - a(1, 0, k) = a(1, 0, k) + a(1, 0, l) - a(1, 0, l) = xi - xi = a(0, n2h, k) - a(0, n2h, l) - a(0, n2h, k) = a(0, n2h, k) + a(0, n2h, l) - a(0, n2h, l) = xi - xi = a(1, n2h, l) - a(1, n2h, k) - a(1, n2h, k) = a(1, n2h, k) + a(1, n2h, l) - a(1, n2h, l) = xi - do i = 1, n2h - 1 - j = n2 - i - xi = a(0, i, k) - a(0, j, l) - a(0, i, k) = a(0, i, k) + a(0, j, l) - a(0, j, l) = xi - xi = a(1, j, l) - a(1, i, k) - a(1, i, k) = a(1, i, k) + a(1, j, l) - a(1, j, l) = xi - xi = a(0, i, l) - a(0, j, k) - a(0, i, l) = a(0, i, l) + a(0, j, k) - a(0, j, k) = xi - xi = a(1, j, k) - a(1, i, l) - a(1, i, l) = a(1, i, l) + a(1, j, k) - a(1, j, k) = xi - end do - end do - do i = 1, n2h - 1 - j = n2 - i - xi = a(0, i, 0) - a(0, j, 0) - a(0, i, 0) = a(0, i, 0) + a(0, j, 0) - a(0, j, 0) = xi - xi = a(1, j, 0) - a(1, i, 0) - a(1, i, 0) = a(1, i, 0) + a(1, j, 0) - a(1, j, 0) = xi - xi = a(0, i, n3h) - a(0, j, n3h) - a(0, i, n3h) = a(0, i, n3h) + a(0, j, n3h) - a(0, j, n3h) = xi - xi = a(1, j, n3h) - a(1, i, n3h) - a(1, i, n3h) = a(1, i, n3h) + a(1, j, n3h) - a(1, j, n3h) = xi - end do - else - do k = 1, n3h - 1 - l = n3 - k - a(0, 0, l) = 0.5d0 * (a(0, 0, k) - a(0, 0, l)) - a(0, 0, k) = a(0, 0, k) - a(0, 0, l) - a(1, 0, l) = 0.5d0 * (a(1, 0, k) + a(1, 0, l)) - a(1, 0, k) = a(1, 0, k) - a(1, 0, l) - a(0, n2h, l) = 0.5d0 * (a(0, n2h, k) - a(0, n2h, l)) - a(0, n2h, k) = a(0, n2h, k) - a(0, n2h, l) - a(1, n2h, l) = 0.5d0 * (a(1, n2h, k) + a(1, n2h, l)) - a(1, n2h, k) = a(1, n2h, k) - a(1, n2h, l) - do i = 1, n2h - 1 - j = n2 - i - a(0, j, l) = 0.5d0 * (a(0, i, k) - a(0, j, l)) - a(0, i, k) = a(0, i, k) - a(0, j, l) - a(1, j, l) = 0.5d0 * (a(1, i, k) + a(1, j, l)) - a(1, i, k) = a(1, i, k) - a(1, j, l) - a(0, j, k) = 0.5d0 * (a(0, i, l) - a(0, j, k)) - a(0, i, l) = a(0, i, l) - a(0, j, k) - a(1, j, k) = 0.5d0 * (a(1, i, l) + a(1, j, k)) - a(1, i, l) = a(1, i, l) - a(1, j, k) - end do - end do - do i = 1, n2h - 1 - j = n2 - i - a(0, j, 0) = 0.5d0 * (a(0, i, 0) - a(0, j, 0)) - a(0, i, 0) = a(0, i, 0) - a(0, j, 0) - a(1, j, 0) = 0.5d0 * (a(1, i, 0) + a(1, j, 0)) - a(1, i, 0) = a(1, i, 0) - a(1, j, 0) - a(0, j, n3h) = 0.5d0 * (a(0, i, n3h) - a(0, j, n3h)) - a(0, i, n3h) = a(0, i, n3h) - a(0, j, n3h) - a(1, j, n3h) = 0.5d0 * (a(1, i, n3h) + a(1, j, n3h)) - a(1, i, n3h) = a(1, i, n3h) - a(1, j, n3h) - end do - end if - end -! - subroutine ddxt3da_sub(n1max, n2max, n1, n2, n3, ics, - & isgn, a, t, ip, w) - integer n1max, n2max, n1, n2, n3, ics, isgn, - & ip(0 : *), i, j, k - real*8 a(0 : n1max - 1, 0 : n2max - 1, 0 : n3 - 1), - & t(0 : *), w(0 : *) - do k = 0, n3 - 1 - if (ics .eq. 0) then - do j = 0, n2 - 1 - call ddct(n1, isgn, a(0, j, k), ip, w) - end do - else - do j = 0, n2 - 1 - call ddst(n1, isgn, a(0, j, k), ip, w) - end do - end if - if (n1 .gt. 2) then - do i = 0, n1 - 4, 4 - do j = 0, n2 - 1 - t(j) = a(i, j, k) - t(n2 + j) = a(i + 1, j, k) - t(2 * n2 + j) = a(i + 2, j, k) - t(3 * n2 + j) = a(i + 3, j, k) - end do - if (ics .eq. 0) then - call ddct(n2, isgn, t, ip, w) - call ddct(n2, isgn, t(n2), ip, w) - call ddct(n2, isgn, t(2 * n2), ip, w) - call ddct(n2, isgn, t(3 * n2), ip, w) - else - call ddst(n2, isgn, t, ip, w) - call ddst(n2, isgn, t(n2), ip, w) - call ddst(n2, isgn, t(2 * n2), ip, w) - call ddst(n2, isgn, t(3 * n2), ip, w) - end if - do j = 0, n2 - 1 - a(i, j, k) = t(j) - a(i + 1, j, k) = t(n2 + j) - a(i + 2, j, k) = t(2 * n2 + j) - a(i + 3, j, k) = t(3 * n2 + j) - end do - end do - else if (n1 .eq. 2) then - do j = 0, n2 - 1 - t(j) = a(0, j, k) - t(n2 + j) = a(1, j, k) - end do - if (ics .eq. 0) then - call ddct(n2, isgn, t, ip, w) - call ddct(n2, isgn, t(n2), ip, w) - else - call ddst(n2, isgn, t, ip, w) - call ddst(n2, isgn, t(n2), ip, w) - end if - do j = 0, n2 - 1 - a(0, j, k) = t(j) - a(1, j, k) = t(n2 + j) - end do - end if - end do - end -! - subroutine ddxt3db_sub(n1max, n2max, n1, n2, n3, ics, - & isgn, a, t, ip, w) - integer n1max, n2max, n1, n2, n3, ics, isgn, - & ip(0 : *), i, j, k - real*8 a(0 : n1max - 1, 0 : n2max - 1, 0 : n3 - 1), - & t(0 : *), w(0 : *) - if (n1 .gt. 2) then - do j = 0, n2 - 1 - do i = 0, n1 - 4, 4 - do k = 0, n3 - 1 - t(k) = a(i, j, k) - t(n3 + k) = a(i + 1, j, k) - t(2 * n3 + k) = a(i + 2, j, k) - t(3 * n3 + k) = a(i + 3, j, k) - end do - if (ics .eq. 0) then - call ddct(n3, isgn, t, ip, w) - call ddct(n3, isgn, t(n3), ip, w) - call ddct(n3, isgn, t(2 * n3), ip, w) - call ddct(n3, isgn, t(3 * n3), ip, w) - else - call ddst(n3, isgn, t, ip, w) - call ddst(n3, isgn, t(n3), ip, w) - call ddst(n3, isgn, t(2 * n3), ip, w) - call ddst(n3, isgn, t(3 * n3), ip, w) - end if - do k = 0, n3 - 1 - a(i, j, k) = t(k) - a(i + 1, j, k) = t(n3 + k) - a(i + 2, j, k) = t(2 * n3 + k) - a(i + 3, j, k) = t(3 * n3 + k) - end do - end do - end do - else if (n1 .eq. 2) then - do j = 0, n2 - 1 - do k = 0, n3 - 1 - t(k) = a(0, j, k) - t(n3 + k) = a(1, j, k) - end do - if (ics .eq. 0) then - call ddct(n3, isgn, t, ip, w) - call ddct(n3, isgn, t(n3), ip, w) - else - call ddst(n3, isgn, t, ip, w) - call ddst(n3, isgn, t(n3), ip, w) - end if - do k = 0, n3 - 1 - a(0, j, k) = t(k) - a(1, j, k) = t(n3 + k) - end do - end do - end if - end -!
diff --git a/third_party/tensorflow_dependencies/fft2d/readme2d.txt b/third_party/tensorflow_dependencies/fft2d/readme2d.txt deleted file mode 100644 index 7cc4dfa..0000000 --- a/third_party/tensorflow_dependencies/fft2d/readme2d.txt +++ /dev/null
@@ -1,77 +0,0 @@ -General Purpose 2D,3D FFT (Fast Fourier Transform) Package - -Files - alloc.c : 2D-array Allocation - alloc.h : 2D-array Allocation - fft4f2d.c : 2D FFT Package in C - Version I (radix 4, 2) - fft4f2d.f : 2D FFT Package in Fortran - Version I (radix 4, 2) - fftsg.c : 1D FFT Package in C - Fast Version (Split-Radix) - fftsg.f : 1D FFT Package in Fortran - Fast Version (Split-Radix) - fftsg2d.c : 2D FFT Package in C - Version II (Split-Radix) - fftsg2d.f : 2D FFT Package in Fortran - Version II (Split-Radix) - fftsg3d.c : 3D FFT Package in C - Version II (Split-Radix) - fftsg3d.f : 3D FFT Package in Fortran - Version II (Split-Radix) - shrtdct.c : 8x8, 16x16 DCT Package - sample2d/ - Makefile : for gcc, cc - Makefile.f77: for Fortran - Makefile.pth: Pthread version - fft4f2dt.c : Test Program for "fft4f2d.c" - fft4f2dt.f : Test Program for "fft4f2d.f" - fftsg2dt.c : Test Program for "fftsg2d.c" - fftsg2dt.f : Test Program for "fftsg2d.f" - fftsg3dt.c : Test Program for "fftsg3d.c" - fftsg3dt.f : Test Program for "fftsg3d.f" - shrtdctt.c : Test Program for "shrtdct.c" - -Difference of Files - C and Fortran versions are equal and - the same routines are in each version. - ---- Difference between "fft4f2d.*" and "fftsg2d.*" ---- - "fft4f2d.*" are optimized for the old machines that - don't have the large size CPU cache. - "fftsg2d.*", "fftsg3d.*" use 1D FFT routines in "fftsg.*". - "fftsg2d.*", "fftsg3d.*" are optimized for the machines that - have the multi-level (L1,L2,etc) cache. - -Routines in the Package - in fft4f2d.*, fftsg2d.* - cdft2d: 2-dim Complex Discrete Fourier Transform - rdft2d: 2-dim Real Discrete Fourier Transform - ddct2d: 2-dim Discrete Cosine Transform - ddst2d: 2-dim Discrete Sine Transform - rdft2dsort: rdft2d input/output ordering (fftsg2d.*) - in fftsg3d.* - cdft3d: 3-dim Complex Discrete Fourier Transform - rdft3d: 3-dim Real Discrete Fourier Transform - ddct3d: 3-dim Discrete Cosine Transform - ddst3d: 3-dim Discrete Sine Transform - rdft3dsort: rdft3d input/output ordering - in fftsg.* - cdft: 1-dim Complex Discrete Fourier Transform - rdft: 1-dim Real Discrete Fourier Transform - ddct: 1-dim Discrete Cosine Transform - ddst: 1-dim Discrete Sine Transform - dfct: 1-dim Real Symmetric DFT - dfst: 1-dim Real Anti-symmetric DFT - (these routines are called by fftsg2d.*, fftsg3d.*) - in shrtdct.c - ddct8x8s : Normalized 8x8 DCT - ddct16x16s: Normalized 16x16 DCT - (faster than ddct2d()) - -Usage - Brief explanations are in block comments of each packages. - The examples are given in the test programs. - -Copyright - Copyright(C) 1997,2001 Takuya OOURA (email: ooura@kurims.kyoto-u.ac.jp). - You may use, copy, modify this code for any purpose and - without fee. You may distribute this ORIGINAL package. - -History - ... - Nov. 2001 : Add 3D-FFT routines - Dec. 2006 : Fix a documentation bug in "fftsg3d.*" - Dec. 2006 : Fix a minor bug in "fftsg.f" -
diff --git a/third_party/tensorflow_dependencies/fft2d/sample2d/Makefile b/third_party/tensorflow_dependencies/fft2d/sample2d/Makefile deleted file mode 100644 index 130e0a4..0000000 --- a/third_party/tensorflow_dependencies/fft2d/sample2d/Makefile +++ /dev/null
@@ -1,72 +0,0 @@ -# ---- for GNU gcc ---- - -CC = gcc - -CFLAGS = -Wall - -OFLAGS = -O2 - -# ---- for SUN WS cc ---- -# -#CC = cc -# -#CFLAGS = -# -#OFLAGS = -xO2 - - - - -all: fft4f2dt fftsg2dt fftsg3dt shrtdctt - - -fft4f2dt : fft4f2dt.o fft4f2d.o alloc.o - $(CC) fft4f2dt.o fft4f2d.o alloc.o -lm -o fft4f2dt - -fftsg2dt : fftsg2dt.o fftsg2d.o fftsg.o alloc.o - $(CC) fftsg2dt.o fftsg2d.o fftsg.o alloc.o -lm -o fftsg2dt - -fftsg3dt : fftsg3dt.o fftsg3d.o fftsg.o alloc.o - $(CC) fftsg3dt.o fftsg3d.o fftsg.o alloc.o -lm -o fftsg3dt - -shrtdctt : shrtdctt.o shrtdct.o - $(CC) shrtdctt.o shrtdct.o -lm -o shrtdctt - - -fft4f2dt.o : fft4f2dt.c - $(CC) $(CFLAGS) $(OFLAGS) -c fft4f2dt.c -o fft4f2dt.o - -fftsg2dt.o : fftsg2dt.c - $(CC) $(CFLAGS) $(OFLAGS) -c fftsg2dt.c -o fftsg2dt.o - -fftsg3dt.o : fftsg3dt.c - $(CC) $(CFLAGS) $(OFLAGS) -c fftsg3dt.c -o fftsg3dt.o - -shrtdctt.o : shrtdctt.c - $(CC) $(CFLAGS) $(OFLAGS) -c shrtdctt.c -o shrtdctt.o - - -fft4f2d.o : ../fft4f2d.c - $(CC) $(CFLAGS) $(OFLAGS) -c ../fft4f2d.c -o fft4f2d.o - -fftsg2d.o : ../fftsg2d.c - $(CC) $(CFLAGS) $(OFLAGS) -c ../fftsg2d.c -o fftsg2d.o - -fftsg3d.o : ../fftsg3d.c - $(CC) $(CFLAGS) $(OFLAGS) -c ../fftsg3d.c -o fftsg3d.o - -fftsg.o : ../fftsg.c - $(CC) $(CFLAGS) $(OFLAGS) -c ../fftsg.c -o fftsg.o - -alloc.o : ../alloc.c - $(CC) $(CFLAGS) $(OFLAGS) -c ../alloc.c -o alloc.o - -shrtdct.o : ../shrtdct.c - $(CC) $(CFLAGS) $(OFLAGS) -c ../shrtdct.c -o shrtdct.o - - - - -clean: - rm -f *.o -
diff --git a/third_party/tensorflow_dependencies/fft2d/sample2d/Makefile.f77 b/third_party/tensorflow_dependencies/fft2d/sample2d/Makefile.f77 deleted file mode 100644 index 0162fdc..0000000 --- a/third_party/tensorflow_dependencies/fft2d/sample2d/Makefile.f77 +++ /dev/null
@@ -1,60 +0,0 @@ -# ---- for GNU g77 ---- - -F77 = g77 - -FFLAGS = -Wall - -OFLAGS = -O2 - -# ---- for SUN WS f77 ---- -# -#F77 = f77 -# -#FFLAGS = -# -#OFLAGS = -xO2 - - - - -all: fft4f2dt_f fftsg2dt_f fftsg3dt_f - - -fft4f2dt_f : fft4f2dt_f.o fft4f2d_f.o - $(F77) fft4f2dt_f.o fft4f2d_f.o -o fft4f2dt_f - -fftsg2dt_f : fftsg2dt_f.o fftsg2d_f.o fftsg_f.o - $(F77) fftsg2dt_f.o fftsg2d_f.o fftsg_f.o -o fftsg2dt_f - -fftsg3dt_f : fftsg3dt_f.o fftsg3d_f.o fftsg_f.o - $(F77) fftsg3dt_f.o fftsg3d_f.o fftsg_f.o -o fftsg3dt_f - - -fft4f2dt_f.o : fft4f2dt.f - $(F77) $(FFLAGS) $(OFLAGS) -c fft4f2dt.f -o fft4f2dt_f.o - -fftsg2dt_f.o : fftsg2dt.f - $(F77) $(FFLAGS) $(OFLAGS) -c fftsg2dt.f -o fftsg2dt_f.o - -fftsg3dt_f.o : fftsg3dt.f - $(F77) $(FFLAGS) $(OFLAGS) -c fftsg3dt.f -o fftsg3dt_f.o - - -fft4f2d_f.o : ../fft4f2d.f - $(F77) $(FFLAGS) $(OFLAGS) -c ../fft4f2d.f -o fft4f2d_f.o - -fftsg2d_f.o : ../fftsg2d.f - $(F77) $(FFLAGS) $(OFLAGS) -c ../fftsg2d.f -o fftsg2d_f.o - -fftsg3d_f.o : ../fftsg3d.f - $(F77) $(FFLAGS) $(OFLAGS) -c ../fftsg3d.f -o fftsg3d_f.o - -fftsg_f.o : ../fftsg.f - $(F77) $(FFLAGS) $(OFLAGS) -c ../fftsg.f -o fftsg_f.o - - - - -clean: - rm -f *.o -
diff --git a/third_party/tensorflow_dependencies/fft2d/sample2d/Makefile.pth b/third_party/tensorflow_dependencies/fft2d/sample2d/Makefile.pth deleted file mode 100644 index d46941e..0000000 --- a/third_party/tensorflow_dependencies/fft2d/sample2d/Makefile.pth +++ /dev/null
@@ -1,54 +0,0 @@ -# ---- for GNU gcc ---- - -CC = gcc - -CFLAGS = -Wall -DUSE_FFT2D_PTHREADS -DUSE_FFT3D_PTHREADS - -OFLAGS = -O2 - -# ---- for SUN WS cc ---- -# -#CC = cc -# -#CFLAGS = -Wall -DUSE_FFT2D_PTHREADS -DUSE_FFT3D_PTHREADS -# -#OFLAGS = -xO2 - - - - -all: fftsg2dt_pt fftsg3dt_pt - - -fftsg2dt_pt : fftsg2dt.o fftsg2dpt.o fftsg.o alloc.o - $(CC) fftsg2dt.o fftsg2dpt.o fftsg.o alloc.o -lm -lpthread -o fftsg2dt_pt - -fftsg3dt_pt : fftsg3dt.o fftsg3dpt.o fftsg.o alloc.o - $(CC) fftsg3dt.o fftsg3dpt.o fftsg.o alloc.o -lm -lpthread -o fftsg3dt_pt - - -fftsg2dt.o : fftsg2dt.c - $(CC) $(CFLAGS) $(OFLAGS) -c fftsg2dt.c -o fftsg2dt.o - -fftsg3dt.o : fftsg3dt.c - $(CC) $(CFLAGS) $(OFLAGS) -c fftsg3dt.c -o fftsg3dt.o - - -fftsg2dpt.o : ../fftsg2d.c - $(CC) $(CFLAGS) $(OFLAGS) -c ../fftsg2d.c -o fftsg2dpt.o - -fftsg3dpt.o : ../fftsg3d.c - $(CC) $(CFLAGS) $(OFLAGS) -c ../fftsg3d.c -o fftsg3dpt.o - -fftsg.o : ../fftsg.c - $(CC) $(CFLAGS) $(OFLAGS) -c ../fftsg.c -o fftsg.o - -alloc.o : ../alloc.c - $(CC) $(CFLAGS) $(OFLAGS) -c ../alloc.c -o alloc.o - - - - -clean: - rm -f *.o -
diff --git a/third_party/tensorflow_dependencies/fft2d/sample2d/alloc.h b/third_party/tensorflow_dependencies/fft2d/sample2d/alloc.h deleted file mode 100644 index 3467cc4..0000000 --- a/third_party/tensorflow_dependencies/fft2d/sample2d/alloc.h +++ /dev/null
@@ -1,20 +0,0 @@ -/* ---- memory allocation ---- */ - - -#include <stdlib.h> -#include <stdio.h> - - -int *alloc_1d_int(int n1); -void free_1d_int(int *i); -double *alloc_1d_double(int n1); -void free_1d_double(double *d); -int **alloc_2d_int(int n1, int n2); -void free_2d_int(int **ii); -double **alloc_2d_double(int n1, int n2); -void free_2d_double(double **dd); -int ***alloc_3d_int(int n1, int n2, int n3); -void free_3d_int(int ***iii); -double ***alloc_3d_double(int n1, int n2, int n3); -void free_3d_double(double ***ddd); -
diff --git a/third_party/tensorflow_dependencies/fft2d/sample2d/fft4f2dt.c b/third_party/tensorflow_dependencies/fft2d/sample2d/fft4f2dt.c deleted file mode 100644 index 2f300fd..0000000 --- a/third_party/tensorflow_dependencies/fft2d/sample2d/fft4f2dt.c +++ /dev/null
@@ -1,109 +0,0 @@ -/* test of fft4f2d.c */ - -#include <math.h> -#include <stdio.h> -#include "alloc.h" -#define MAX(x,y) ((x) > (y) ? (x) : (y)) - -/* random number generator, 0 <= RND < 1 */ -#define RND(p) ((*(p) = (*(p) * 7141 + 54773) % 259200) * (1.0 / 259200)) - - -int main() -{ - void cdft2d(int, int, int, double **, int *, double *); - void rdft2d(int, int, int, double **, int *, double *); - void ddct2d(int, int, int, double **, double **, int *, double *); - void ddst2d(int, int, int, double **, double **, int *, double *); - void putdata2d(int n1, int n2, double **a); - double errorcheck2d(int n1, int n2, double scale, double **a); - int *ip, n1, n2, n, i; - double **a, **t, *w, err; - - printf("data length n1=? (n1 = power of 2) \n"); - scanf("%d", &n1); - printf("data length n2=? (n2 = power of 2) \n"); - scanf("%d", &n2); - - a = alloc_2d_double(n1, n2); - t = alloc_2d_double(n1, n2); - n = MAX(n1, n2 / 2); - ip = alloc_1d_int(2 + (int) sqrt(n + 0.5)); - n = MAX(n1 / 2, n2 / 4) + MAX(n1, n2); - w = alloc_1d_double(n); - ip[0] = 0; - - /* check of CDFT */ - putdata2d(n1, n2, a); - cdft2d(n1, n2, 1, a, ip, w); - cdft2d(n1, n2, -1, a, ip, w); - err = errorcheck2d(n1, n2, 2.0 / n1 / n2, a); - printf("cdft2d err= %g \n", err); - - /* check of RDFT */ - putdata2d(n1, n2, a); - rdft2d(n1, n2, 1, a, ip, w); - rdft2d(n1, n2, -1, a, ip, w); - err = errorcheck2d(n1, n2, 2.0 / n1 / n2, a); - printf("rdft2d err= %g \n", err); - - /* check of DDCT */ - putdata2d(n1, n2, a); - ddct2d(n1, n2, 1, a, t, ip, w); - ddct2d(n1, n2, -1, a, t, ip, w); - for (i = 0; i <= n1 - 1; i++) { - a[i][0] *= 0.5; - } - for (i = 0; i <= n2 - 1; i++) { - a[0][i] *= 0.5; - } - err = errorcheck2d(n1, n2, 4.0 / n1 / n2, a); - printf("ddct2d err= %g \n", err); - - /* check of DDST */ - putdata2d(n1, n2, a); - ddst2d(n1, n2, 1, a, t, ip, w); - ddst2d(n1, n2, -1, a, t, ip, w); - for (i = 0; i <= n1 - 1; i++) { - a[i][0] *= 0.5; - } - for (i = 0; i <= n2 - 1; i++) { - a[0][i] *= 0.5; - } - err = errorcheck2d(n1, n2, 4.0 / n1 / n2, a); - printf("ddst2d err= %g \n", err); - - free_1d_double(w); - free_1d_int(ip); - free_2d_double(t); - free_2d_double(a); - return 0; -} - - -void putdata2d(int n1, int n2, double **a) -{ - int j1, j2, seed = 0; - - for (j1 = 0; j1 <= n1 - 1; j1++) { - for (j2 = 0; j2 <= n2 - 1; j2++) { - a[j1][j2] = RND(&seed); - } - } -} - - -double errorcheck2d(int n1, int n2, double scale, double **a) -{ - int j1, j2, seed = 0; - double err = 0, e; - - for (j1 = 0; j1 <= n1 - 1; j1++) { - for (j2 = 0; j2 <= n2 - 1; j2++) { - e = RND(&seed) - a[j1][j2] * scale; - err = MAX(err, fabs(e)); - } - } - return err; -} -
diff --git a/third_party/tensorflow_dependencies/fft2d/sample2d/fft4f2dt.f b/third_party/tensorflow_dependencies/fft2d/sample2d/fft4f2dt.f deleted file mode 100644 index 43ac8ac..0000000 --- a/third_party/tensorflow_dependencies/fft2d/sample2d/fft4f2dt.f +++ /dev/null
@@ -1,95 +0,0 @@ -! test of fft4f2d.f -! - program main - integer nmax, nmaxsqrt - parameter (nmax = 1024) - parameter (nmaxsqrt = 32) - integer ip(0 : nmaxsqrt + 1), n1, n2, i - real*8 a(0 : nmax - 1, 0 : nmax - 1), - & t(0 : nmax - 1, 0 : nmax - 1), w(0 : nmax * 3 / 2 - 1), - & err, errorcheck2d -! - write (*, *) 'data length n1=? (n1 = power of 2) ' - read (*, *) n1 - write (*, *) 'data length n2=? (n2 = power of 2) ' - read (*, *) n2 - ip(0) = 0 -! -! check of CDFT - call putdata2d(nmax, n1, n2, a) - call cdft2d(nmax, n1, n2, 1, a, ip, w) - call cdft2d(nmax, n1, n2, -1, a, ip, w) - err = errorcheck2d(nmax, n1, n2, 2.0d0 / n1 / n2, a) - write (*, *) 'cdft2d err= ', err -! -! check of RDFT - call putdata2d(nmax, n1, n2, a) - call rdft2d(nmax, n1, n2, 1, a, ip, w) - call rdft2d(nmax, n1, n2, -1, a, ip, w) - err = errorcheck2d(nmax, n1, n2, 2.0d0 / n1 / n2, a) - write (*, *) 'rdft2d err= ', err -! -! check of DDCT - call putdata2d(nmax, n1, n2, a) - call ddct2d(nmax, n1, n2, 1, a, t, ip, w) - call ddct2d(nmax, n1, n2, -1, a, t, ip, w) - do i = 0, n1 - 1 - a(i, 0) = a(i, 0) * 0.5d0 - end do - do i = 0, n2 - 1 - a(0, i) = a(0, i) * 0.5d0 - end do - err = errorcheck2d(nmax, n1, n2, 4.0d0 / n1 / n2, a) - write (*, *) 'ddct2d err= ', err -! -! check of DDST - call putdata2d(nmax, n1, n2, a) - call ddst2d(nmax, n1, n2, 1, a, t, ip, w) - call ddst2d(nmax, n1, n2, -1, a, t, ip, w) - do i = 0, n1 - 1 - a(i, 0) = a(i, 0) * 0.5d0 - end do - do i = 0, n2 - 1 - a(0, i) = a(0, i) * 0.5d0 - end do - err = errorcheck2d(nmax, n1, n2, 4.0d0 / n1 / n2, a) - write (*, *) 'ddst2d err= ', err -! - end -! -! - subroutine putdata2d(n1max, n1, n2, a) - integer n1max, n1, n2, j1, j2, seed - real*8 a(0 : n1max - 1, 0 : *), drnd - seed = 0 - do j2 = 0, n2 - 1 - do j1 = 0, n1 - 1 - a(j1, j2) = drnd(seed) - end do - end do - end -! -! - function errorcheck2d(n1max, n1, n2, scale, a) - integer n1max, n1, n2, j1, j2, seed - real*8 scale, a(0 : n1max - 1, 0 : *), drnd, err, e, - & errorcheck2d - err = 0 - seed = 0 - do j2 = 0, n2 - 1 - do j1 = 0, n1 - 1 - e = drnd(seed) - a(j1, j2) * scale - err = max(err, abs(e)) - end do - end do - errorcheck2d = err - end -! -! -! random number generator, 0 <= drnd < 1 - real*8 function drnd(seed) - integer seed - seed = mod(seed * 7141 + 54773, 259200) - drnd = seed * (1.0d0 / 259200) - end -!
diff --git a/third_party/tensorflow_dependencies/fft2d/sample2d/fftsg2dt.c b/third_party/tensorflow_dependencies/fft2d/sample2d/fftsg2dt.c deleted file mode 100644 index c4cf935..0000000 --- a/third_party/tensorflow_dependencies/fft2d/sample2d/fftsg2dt.c +++ /dev/null
@@ -1,107 +0,0 @@ -/* test of fftsg2d.c */ - -#include <math.h> -#include <stdio.h> -#include "alloc.h" -#define MAX(x,y) ((x) > (y) ? (x) : (y)) - -/* random number generator, 0 <= RND < 1 */ -#define RND(p) ((*(p) = (*(p) * 7141 + 54773) % 259200) * (1.0 / 259200)) - - -int main() -{ - void cdft2d(int, int, int, double **, double *, int *, double *); - void rdft2d(int, int, int, double **, double *, int *, double *); - void ddct2d(int, int, int, double **, double *, int *, double *); - void ddst2d(int, int, int, double **, double *, int *, double *); - void putdata2d(int n1, int n2, double **a); - double errorcheck2d(int n1, int n2, double scale, double **a); - int *ip, n1, n2, n, i; - double **a, *w, err; - - printf("data length n1=? (n1 = power of 2) \n"); - scanf("%d", &n1); - printf("data length n2=? (n2 = power of 2) \n"); - scanf("%d", &n2); - - a = alloc_2d_double(n1, n2); - n = MAX(n1, n2 / 2); - ip = alloc_1d_int(2 + (int) sqrt(n + 0.5)); - n = MAX(n1, n2) * 3 / 2; - w = alloc_1d_double(n); - ip[0] = 0; - - /* check of CDFT */ - putdata2d(n1, n2, a); - cdft2d(n1, n2, 1, a, NULL, ip, w); - cdft2d(n1, n2, -1, a, NULL, ip, w); - err = errorcheck2d(n1, n2, 2.0 / n1 / n2, a); - printf("cdft2d err= %g \n", err); - - /* check of RDFT */ - putdata2d(n1, n2, a); - rdft2d(n1, n2, 1, a, NULL, ip, w); - rdft2d(n1, n2, -1, a, NULL, ip, w); - err = errorcheck2d(n1, n2, 2.0 / n1 / n2, a); - printf("rdft2d err= %g \n", err); - - /* check of DDCT */ - putdata2d(n1, n2, a); - ddct2d(n1, n2, 1, a, NULL, ip, w); - ddct2d(n1, n2, -1, a, NULL, ip, w); - for (i = 0; i <= n1 - 1; i++) { - a[i][0] *= 0.5; - } - for (i = 0; i <= n2 - 1; i++) { - a[0][i] *= 0.5; - } - err = errorcheck2d(n1, n2, 4.0 / n1 / n2, a); - printf("ddct2d err= %g \n", err); - - /* check of DDST */ - putdata2d(n1, n2, a); - ddst2d(n1, n2, 1, a, NULL, ip, w); - ddst2d(n1, n2, -1, a, NULL, ip, w); - for (i = 0; i <= n1 - 1; i++) { - a[i][0] *= 0.5; - } - for (i = 0; i <= n2 - 1; i++) { - a[0][i] *= 0.5; - } - err = errorcheck2d(n1, n2, 4.0 / n1 / n2, a); - printf("ddst2d err= %g \n", err); - - free_1d_double(w); - free_1d_int(ip); - free_2d_double(a); - return 0; -} - - -void putdata2d(int n1, int n2, double **a) -{ - int j1, j2, seed = 0; - - for (j1 = 0; j1 <= n1 - 1; j1++) { - for (j2 = 0; j2 <= n2 - 1; j2++) { - a[j1][j2] = RND(&seed); - } - } -} - - -double errorcheck2d(int n1, int n2, double scale, double **a) -{ - int j1, j2, seed = 0; - double err = 0, e; - - for (j1 = 0; j1 <= n1 - 1; j1++) { - for (j2 = 0; j2 <= n2 - 1; j2++) { - e = RND(&seed) - a[j1][j2] * scale; - err = MAX(err, fabs(e)); - } - } - return err; -} -
diff --git a/third_party/tensorflow_dependencies/fft2d/sample2d/fftsg2dt.f b/third_party/tensorflow_dependencies/fft2d/sample2d/fftsg2dt.f deleted file mode 100644 index a375c05..0000000 --- a/third_party/tensorflow_dependencies/fft2d/sample2d/fftsg2dt.f +++ /dev/null
@@ -1,94 +0,0 @@ -! test of fftsg2d.f -! - program main - integer nmax, nmaxsqrt - parameter (nmax = 1024) - parameter (nmaxsqrt = 32) - integer ip(0 : nmaxsqrt + 1), n1, n2, i - real*8 a(0 : nmax - 1, 0 : nmax - 1), t(0 : 8 * nmax - 1), - & w(0 : nmax * 3 / 2 - 1), err, errorcheck2d -! - write (*, *) 'data length n1=? (n1 = power of 2) ' - read (*, *) n1 - write (*, *) 'data length n2=? (n2 = power of 2) ' - read (*, *) n2 - ip(0) = 0 -! -! check of CDFT - call putdata2d(nmax, n1, n2, a) - call cdft2d(nmax, n1, n2, 1, a, t, ip, w) - call cdft2d(nmax, n1, n2, -1, a, t, ip, w) - err = errorcheck2d(nmax, n1, n2, 2.0d0 / n1 / n2, a) - write (*, *) 'cdft2d err= ', err -! -! check of RDFT - call putdata2d(nmax, n1, n2, a) - call rdft2d(nmax, n1, n2, 1, a, t, ip, w) - call rdft2d(nmax, n1, n2, -1, a, t, ip, w) - err = errorcheck2d(nmax, n1, n2, 2.0d0 / n1 / n2, a) - write (*, *) 'rdft2d err= ', err -! -! check of DDCT - call putdata2d(nmax, n1, n2, a) - call ddct2d(nmax, n1, n2, 1, a, t, ip, w) - call ddct2d(nmax, n1, n2, -1, a, t, ip, w) - do i = 0, n1 - 1 - a(i, 0) = a(i, 0) * 0.5d0 - end do - do i = 0, n2 - 1 - a(0, i) = a(0, i) * 0.5d0 - end do - err = errorcheck2d(nmax, n1, n2, 4.0d0 / n1 / n2, a) - write (*, *) 'ddct2d err= ', err -! -! check of DDST - call putdata2d(nmax, n1, n2, a) - call ddst2d(nmax, n1, n2, 1, a, t, ip, w) - call ddst2d(nmax, n1, n2, -1, a, t, ip, w) - do i = 0, n1 - 1 - a(i, 0) = a(i, 0) * 0.5d0 - end do - do i = 0, n2 - 1 - a(0, i) = a(0, i) * 0.5d0 - end do - err = errorcheck2d(nmax, n1, n2, 4.0d0 / n1 / n2, a) - write (*, *) 'ddst2d err= ', err -! - end -! -! - subroutine putdata2d(n1max, n1, n2, a) - integer n1max, n1, n2, j1, j2, seed - real*8 a(0 : n1max - 1, 0 : *), drnd - seed = 0 - do j2 = 0, n2 - 1 - do j1 = 0, n1 - 1 - a(j1, j2) = drnd(seed) - end do - end do - end -! -! - function errorcheck2d(n1max, n1, n2, scale, a) - integer n1max, n1, n2, j1, j2, seed - real*8 scale, a(0 : n1max - 1, 0 : *), drnd, err, e, - & errorcheck2d - err = 0 - seed = 0 - do j2 = 0, n2 - 1 - do j1 = 0, n1 - 1 - e = drnd(seed) - a(j1, j2) * scale - err = max(err, abs(e)) - end do - end do - errorcheck2d = err - end -! -! -! random number generator, 0 <= drnd < 1 - real*8 function drnd(seed) - integer seed - seed = mod(seed * 7141 + 54773, 259200) - drnd = seed * (1.0d0 / 259200) - end -!
diff --git a/third_party/tensorflow_dependencies/fft2d/sample2d/fftsg3dt.c b/third_party/tensorflow_dependencies/fft2d/sample2d/fftsg3dt.c deleted file mode 100644 index 87879e6..0000000 --- a/third_party/tensorflow_dependencies/fft2d/sample2d/fftsg3dt.c +++ /dev/null
@@ -1,128 +0,0 @@ -/* test of fftsg3d.c */ - -#include <math.h> -#include <stdio.h> -#include "alloc.h" -#define MAX(x,y) ((x) > (y) ? (x) : (y)) - -/* random number generator, 0 <= RND < 1 */ -#define RND(p) ((*(p) = (*(p) * 7141 + 54773) % 259200) * (1.0 / 259200)) - - -int main() -{ - void cdft3d(int, int, int, int, double ***, double *, int *, double *); - void rdft3d(int, int, int, int, double ***, double *, int *, double *); - void ddct3d(int, int, int, int, double ***, double *, int *, double *); - void ddst3d(int, int, int, int, double ***, double *, int *, double *); - void putdata3d(int n1, int n2, int n3, double ***a); - double errorcheck3d(int n1, int n2, int n3, double scale, double ***a); - int *ip, n1, n2, n3, n, nt, i, j; - double ***a, *w, err; - - printf("data length n1=? (n1 = power of 2) \n"); - scanf("%d", &n1); - printf("data length n2=? (n2 = power of 2) \n"); - scanf("%d", &n2); - printf("data length n3=? (n3 = power of 2) \n"); - scanf("%d", &n3); - - a = alloc_3d_double(n1, n2, n3); - nt = MAX(n1, n2); - n = MAX(nt, n3 / 2); - ip = alloc_1d_int(2 + (int) sqrt(n + 0.5)); - n = MAX(nt, n3) * 3 / 2; - w = alloc_1d_double(n); - ip[0] = 0; - - /* check of CDFT */ - putdata3d(n1, n2, n3, a); - cdft3d(n1, n2, n3, 1, a, NULL, ip, w); - cdft3d(n1, n2, n3, -1, a, NULL, ip, w); - err = errorcheck3d(n1, n2, n3, 2.0 / n1 / n2 / n3, a); - printf("cdft3d err= %g \n", err); - - /* check of RDFT */ - putdata3d(n1, n2, n3, a); - rdft3d(n1, n2, n3, 1, a, NULL, ip, w); - rdft3d(n1, n2, n3, -1, a, NULL, ip, w); - err = errorcheck3d(n1, n2, n3, 2.0 / n1 / n2 / n3, a); - printf("rdft3d err= %g \n", err); - - /* check of DDCT */ - putdata3d(n1, n2, n3, a); - ddct3d(n1, n2, n3, 1, a, NULL, ip, w); - ddct3d(n1, n2, n3, -1, a, NULL, ip, w); - for (i = 0; i <= n1 - 1; i++) { - for (j = 0; j <= n2 - 1; j++) { - a[i][j][0] *= 0.5; - } - for (j = 0; j <= n3 - 1; j++) { - a[i][0][j] *= 0.5; - } - } - for (i = 0; i <= n2 - 1; i++) { - for (j = 0; j <= n3 - 1; j++) { - a[0][i][j] *= 0.5; - } - } - err = errorcheck3d(n1, n2, n3, 8.0 / n1 / n2 / n3, a); - printf("ddct3d err= %g \n", err); - - /* check of DDST */ - putdata3d(n1, n2, n3, a); - ddst3d(n1, n2, n3, 1, a, NULL, ip, w); - ddst3d(n1, n2, n3, -1, a, NULL, ip, w); - for (i = 0; i <= n1 - 1; i++) { - for (j = 0; j <= n2 - 1; j++) { - a[i][j][0] *= 0.5; - } - for (j = 0; j <= n3 - 1; j++) { - a[i][0][j] *= 0.5; - } - } - for (i = 0; i <= n2 - 1; i++) { - for (j = 0; j <= n3 - 1; j++) { - a[0][i][j] *= 0.5; - } - } - err = errorcheck3d(n1, n2, n3, 8.0 / n1 / n2 / n3, a); - printf("ddst3d err= %g \n", err); - - free_1d_double(w); - free_1d_int(ip); - free_3d_double(a); - return 0; -} - - -void putdata3d(int n1, int n2, int n3, double ***a) -{ - int j1, j2, j3, seed = 0; - - for (j1 = 0; j1 <= n1 - 1; j1++) { - for (j2 = 0; j2 <= n2 - 1; j2++) { - for (j3 = 0; j3 <= n3 - 1; j3++) { - a[j1][j2][j3] = RND(&seed); - } - } - } -} - - -double errorcheck3d(int n1, int n2, int n3, double scale, double ***a) -{ - int j1, j2, j3, seed = 0; - double err = 0, e; - - for (j1 = 0; j1 <= n1 - 1; j1++) { - for (j2 = 0; j2 <= n2 - 1; j2++) { - for (j3 = 0; j3 <= n3 - 1; j3++) { - e = RND(&seed) - a[j1][j2][j3] * scale; - err = MAX(err, fabs(e)); - } - } - } - return err; -} -
diff --git a/third_party/tensorflow_dependencies/fft2d/sample2d/fftsg3dt.f b/third_party/tensorflow_dependencies/fft2d/sample2d/fftsg3dt.f deleted file mode 100644 index bbada0d..0000000 --- a/third_party/tensorflow_dependencies/fft2d/sample2d/fftsg3dt.f +++ /dev/null
@@ -1,119 +0,0 @@ -! test of fftsg3d.f -! - program main - integer nmax, nmaxsqrt - parameter (nmax = 128) - parameter (nmaxsqrt = 16) - integer ip(0 : nmaxsqrt + 1), n1, n2, n3, i, j - real*8 a(0 : nmax - 1, 0 : nmax - 1, 0 : nmax - 1), - & t(0 : 8 * nmax - 1), - & w(0 : nmax * 3 / 2 - 1), err, errorcheck3d -! - write (*, *) 'data length n1=? (n1 = power of 2) ' - read (*, *) n1 - write (*, *) 'data length n2=? (n2 = power of 2) ' - read (*, *) n2 - write (*, *) 'data length n3=? (n3 = power of 2) ' - read (*, *) n3 - ip(0) = 0 -! -! check of CDFT - call putdata3d(nmax, nmax, n1, n2, n3, a) - call cdft3d(nmax, nmax, n1, n2, n3, 1, a, t, ip, w) - call cdft3d(nmax, nmax, n1, n2, n3, -1, a, t, ip, w) - err = errorcheck3d(nmax, nmax, n1, n2, n3, - & 2.0d0 / n1 / n2 / n3, a) - write (*, *) 'cdft3d err= ', err -! -! check of RDFT - call putdata3d(nmax, nmax, n1, n2, n3, a) - call rdft3d(nmax, nmax, n1, n2, n3, 1, a, t, ip, w) - call rdft3d(nmax, nmax, n1, n2, n3, -1, a, t, ip, w) - err = errorcheck3d(nmax, nmax, n1, n2, n3, - & 2.0d0 / n1 / n2 / n3, a) - write (*, *) 'rdft3d err= ', err -! -! check of DDCT - call putdata3d(nmax, nmax, n1, n2, n3, a) - call ddct3d(nmax, nmax, n1, n2, n3, 1, a, t, ip, w) - call ddct3d(nmax, nmax, n1, n2, n3, -1, a, t, ip, w) - do j = 0, n2 - 1 - do i = 0, n1 - 1 - a(i, j, 0) = a(i, j, 0) * 0.5d0 - end do - end do - do j = 0, n3 - 1 - do i = 0, n1 - 1 - a(i, 0, j) = a(i, 0, j) * 0.5d0 - end do - do i = 0, n2 - 1 - a(0, i, j) = a(0, i, j) * 0.5d0 - end do - end do - err = errorcheck3d(nmax, nmax, n1, n2, n3, - & 8.0d0 / n1 / n2 / n3, a) - write (*, *) 'ddct3d err= ', err -! -! check of DDST - call putdata3d(nmax, nmax, n1, n2, n3, a) - call ddst3d(nmax, nmax, n1, n2, n3, 1, a, t, ip, w) - call ddst3d(nmax, nmax, n1, n2, n3, -1, a, t, ip, w) - do j = 0, n2 - 1 - do i = 0, n1 - 1 - a(i, j, 0) = a(i, j, 0) * 0.5d0 - end do - end do - do j = 0, n3 - 1 - do i = 0, n1 - 1 - a(i, 0, j) = a(i, 0, j) * 0.5d0 - end do - do i = 0, n2 - 1 - a(0, i, j) = a(0, i, j) * 0.5d0 - end do - end do - err = errorcheck3d(nmax, nmax, n1, n2, n3, - & 8.0d0 / n1 / n2 / n3, a) - write (*, *) 'ddst3d err= ', err -! - end -! -! - subroutine putdata3d(n1max, n2max, n1, n2, n3, a) - integer n1max, n2max, n1, n2, n3, j1, j2, j3, seed - real*8 a(0 : n1max - 1, 0 : n2max - 1, 0 : *), drnd - seed = 0 - do j3 = 0, n3 - 1 - do j2 = 0, n2 - 1 - do j1 = 0, n1 - 1 - a(j1, j2, j3) = drnd(seed) - end do - end do - end do - end -! -! - function errorcheck3d(n1max, n2max, n1, n2, n3, scale, a) - integer n1max, n2max, n1, n2, n3, j1, j2, j3, seed - real*8 scale, a(0 : n1max - 1, 0 : n2max - 1, 0 : *), - & drnd, err, e, errorcheck3d - err = 0 - seed = 0 - do j3 = 0, n3 - 1 - do j2 = 0, n2 - 1 - do j1 = 0, n1 - 1 - e = drnd(seed) - a(j1, j2, j3) * scale - err = max(err, abs(e)) - end do - end do - end do - errorcheck3d = err - end -! -! -! random number generator, 0 <= drnd < 1 - real*8 function drnd(seed) - integer seed - seed = mod(seed * 7141 + 54773, 259200) - drnd = seed * (1.0d0 / 259200) - end -!
diff --git a/third_party/tensorflow_dependencies/fft2d/sample2d/shrtdctt.c b/third_party/tensorflow_dependencies/fft2d/sample2d/shrtdctt.c deleted file mode 100644 index 70d665f..0000000 --- a/third_party/tensorflow_dependencies/fft2d/sample2d/shrtdctt.c +++ /dev/null
@@ -1,68 +0,0 @@ -/* test of shrtdct.c */ - -#include <math.h> -#include <stdio.h> -#define MAX(x,y) ((x) > (y) ? (x) : (y)) - -/* random number generator, 0 <= RND < 1 */ -#define RND(p) ((*(p) = (*(p) * 7141 + 54773) % 259200) * (1.0 / 259200)) - -#define NMAX 16 - -int main() -{ - void ddct8x8s(int isgn, double **a); - void ddct16x16s(int isgn, double **a); - void putdata2d(int n1, int n2, double **a); - double errorcheck2d(int n1, int n2, double scale, double **a); - double err; - - int i; - double aarr[NMAX][NMAX], *a[NMAX], barr[NMAX][NMAX], *b[NMAX]; - for (i = 0; i < NMAX; i++) a[i] = aarr[i]; - for (i = 0; i < NMAX; i++) b[i] = barr[i]; - - /* check of 8x8 DCT */ - putdata2d(8, 8, a); - ddct8x8s(-1, a); - ddct8x8s(1, a); - err = errorcheck2d(8, 8, 1.0, a); - printf("ddct8x8s err= %g\n", err); - - /* check of 16x16 DCT */ - putdata2d(16, 16, a); - ddct16x16s(-1, a); - ddct16x16s(1, a); - err = errorcheck2d(16, 16, 1.0, a); - printf("ddct16x16s err= %g\n", err); - - return 0; -} - - -void putdata2d(int n1, int n2, double **a) -{ - int j1, j2, seed = 0; - - for (j1 = 0; j1 <= n1 - 1; j1++) { - for (j2 = 0; j2 <= n2 - 1; j2++) { - a[j1][j2] = RND(&seed); - } - } -} - - -double errorcheck2d(int n1, int n2, double scale, double **a) -{ - int j1, j2, seed = 0; - double err = 0, e; - - for (j1 = 0; j1 <= n1 - 1; j1++) { - for (j2 = 0; j2 <= n2 - 1; j2++) { - e = RND(&seed) - a[j1][j2] * scale; - err = MAX(err, fabs(e)); - } - } - return err; -} -
diff --git a/third_party/tensorflow_dependencies/fft2d/shrtdct.c b/third_party/tensorflow_dependencies/fft2d/shrtdct.c deleted file mode 100644 index 455cb4c..0000000 --- a/third_party/tensorflow_dependencies/fft2d/shrtdct.c +++ /dev/null
@@ -1,538 +0,0 @@ -/* -Short Discrete Cosine Transform - data length :8x8, 16x16 - method :row-column, radix 4 FFT -functions - ddct8x8s : 8x8 DCT - ddct16x16s: 16x16 DCT -function prototypes - void ddct8x8s(int isgn, double **a); - void ddct16x16s(int isgn, double **a); -*/ - - -/* --------- 8x8 DCT (Discrete Cosine Transform) / Inverse of DCT -------- - [definition] - <case1> Normalized 8x8 IDCT - C[k1][k2] = (1/4) * sum_j1=0^7 sum_j2=0^7 - a[j1][j2] * s[j1] * s[j2] * - cos(pi*j1*(k1+1/2)/8) * - cos(pi*j2*(k2+1/2)/8), 0<=k1<8, 0<=k2<8 - (s[0] = 1/sqrt(2), s[j] = 1, j > 0) - <case2> Normalized 8x8 DCT - C[k1][k2] = (1/4) * s[k1] * s[k2] * sum_j1=0^7 sum_j2=0^7 - a[j1][j2] * - cos(pi*(j1+1/2)*k1/8) * - cos(pi*(j2+1/2)*k2/8), 0<=k1<8, 0<=k2<8 - (s[0] = 1/sqrt(2), s[j] = 1, j > 0) - [usage] - <case1> - ddct8x8s(1, a); - <case2> - ddct8x8s(-1, a); - [parameters] - a[0...7][0...7] :input/output data (double **) - output data - a[k1][k2] = C[k1][k2], 0<=k1<8, 0<=k2<8 -*/ - - -/* Cn_kR = sqrt(2.0/n) * cos(pi/2*k/n) */ -/* Cn_kI = sqrt(2.0/n) * sin(pi/2*k/n) */ -/* Wn_kR = cos(pi/2*k/n) */ -/* Wn_kI = sin(pi/2*k/n) */ -#define C8_1R 0.49039264020161522456 -#define C8_1I 0.09754516100806413392 -#define C8_2R 0.46193976625564337806 -#define C8_2I 0.19134171618254488586 -#define C8_3R 0.41573480615127261854 -#define C8_3I 0.27778511650980111237 -#define C8_4R 0.35355339059327376220 -#define W8_4R 0.70710678118654752440 - - -void ddct8x8s(int isgn, double **a) -{ - int j; - double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; - double xr, xi; - - if (isgn < 0) { - for (j = 0; j <= 7; j++) { - x0r = a[0][j] + a[7][j]; - x1r = a[0][j] - a[7][j]; - x0i = a[2][j] + a[5][j]; - x1i = a[2][j] - a[5][j]; - x2r = a[4][j] + a[3][j]; - x3r = a[4][j] - a[3][j]; - x2i = a[6][j] + a[1][j]; - x3i = a[6][j] - a[1][j]; - xr = x0r + x2r; - xi = x0i + x2i; - a[0][j] = C8_4R * (xr + xi); - a[4][j] = C8_4R * (xr - xi); - xr = x0r - x2r; - xi = x0i - x2i; - a[2][j] = C8_2R * xr - C8_2I * xi; - a[6][j] = C8_2R * xi + C8_2I * xr; - xr = W8_4R * (x1i - x3i); - x1i = W8_4R * (x1i + x3i); - x3i = x1i - x3r; - x1i += x3r; - x3r = x1r - xr; - x1r += xr; - a[1][j] = C8_1R * x1r - C8_1I * x1i; - a[7][j] = C8_1R * x1i + C8_1I * x1r; - a[3][j] = C8_3R * x3r - C8_3I * x3i; - a[5][j] = C8_3R * x3i + C8_3I * x3r; - } - for (j = 0; j <= 7; j++) { - x0r = a[j][0] + a[j][7]; - x1r = a[j][0] - a[j][7]; - x0i = a[j][2] + a[j][5]; - x1i = a[j][2] - a[j][5]; - x2r = a[j][4] + a[j][3]; - x3r = a[j][4] - a[j][3]; - x2i = a[j][6] + a[j][1]; - x3i = a[j][6] - a[j][1]; - xr = x0r + x2r; - xi = x0i + x2i; - a[j][0] = C8_4R * (xr + xi); - a[j][4] = C8_4R * (xr - xi); - xr = x0r - x2r; - xi = x0i - x2i; - a[j][2] = C8_2R * xr - C8_2I * xi; - a[j][6] = C8_2R * xi + C8_2I * xr; - xr = W8_4R * (x1i - x3i); - x1i = W8_4R * (x1i + x3i); - x3i = x1i - x3r; - x1i += x3r; - x3r = x1r - xr; - x1r += xr; - a[j][1] = C8_1R * x1r - C8_1I * x1i; - a[j][7] = C8_1R * x1i + C8_1I * x1r; - a[j][3] = C8_3R * x3r - C8_3I * x3i; - a[j][5] = C8_3R * x3i + C8_3I * x3r; - } - } else { - for (j = 0; j <= 7; j++) { - x1r = C8_1R * a[1][j] + C8_1I * a[7][j]; - x1i = C8_1R * a[7][j] - C8_1I * a[1][j]; - x3r = C8_3R * a[3][j] + C8_3I * a[5][j]; - x3i = C8_3R * a[5][j] - C8_3I * a[3][j]; - xr = x1r - x3r; - xi = x1i + x3i; - x1r += x3r; - x3i -= x1i; - x1i = W8_4R * (xr + xi); - x3r = W8_4R * (xr - xi); - xr = C8_2R * a[2][j] + C8_2I * a[6][j]; - xi = C8_2R * a[6][j] - C8_2I * a[2][j]; - x0r = C8_4R * (a[0][j] + a[4][j]); - x0i = C8_4R * (a[0][j] - a[4][j]); - x2r = x0r - xr; - x2i = x0i - xi; - x0r += xr; - x0i += xi; - a[0][j] = x0r + x1r; - a[7][j] = x0r - x1r; - a[2][j] = x0i + x1i; - a[5][j] = x0i - x1i; - a[4][j] = x2r - x3i; - a[3][j] = x2r + x3i; - a[6][j] = x2i - x3r; - a[1][j] = x2i + x3r; - } - for (j = 0; j <= 7; j++) { - x1r = C8_1R * a[j][1] + C8_1I * a[j][7]; - x1i = C8_1R * a[j][7] - C8_1I * a[j][1]; - x3r = C8_3R * a[j][3] + C8_3I * a[j][5]; - x3i = C8_3R * a[j][5] - C8_3I * a[j][3]; - xr = x1r - x3r; - xi = x1i + x3i; - x1r += x3r; - x3i -= x1i; - x1i = W8_4R * (xr + xi); - x3r = W8_4R * (xr - xi); - xr = C8_2R * a[j][2] + C8_2I * a[j][6]; - xi = C8_2R * a[j][6] - C8_2I * a[j][2]; - x0r = C8_4R * (a[j][0] + a[j][4]); - x0i = C8_4R * (a[j][0] - a[j][4]); - x2r = x0r - xr; - x2i = x0i - xi; - x0r += xr; - x0i += xi; - a[j][0] = x0r + x1r; - a[j][7] = x0r - x1r; - a[j][2] = x0i + x1i; - a[j][5] = x0i - x1i; - a[j][4] = x2r - x3i; - a[j][3] = x2r + x3i; - a[j][6] = x2i - x3r; - a[j][1] = x2i + x3r; - } - } -} - - - -/* --------- 16x16 DCT (Discrete Cosine Transform) / Inverse of DCT -------- - [definition] - <case1> Normalized 16x16 IDCT - C[k1][k2] = (1/8) * sum_j1=0^15 sum_j2=0^15 - a[j1][j2] * s[j1] * s[j2] * - cos(pi*j1*(k1+1/2)/16) * - cos(pi*j2*(k2+1/2)/16), 0<=k1<16, 0<=k2<16 - (s[0] = 1/sqrt(2), s[j] = 1, j > 0) - <case2> Normalized 16x16 DCT - C[k1][k2] = (1/8) * s[k1] * s[k2] * sum_j1=0^15 sum_j2=0^15 - a[j1][j2] * - cos(pi*(j1+1/2)*k1/16) * - cos(pi*(j2+1/2)*k2/16), 0<=k1<16, 0<=k2<16 - (s[0] = 1/sqrt(2), s[j] = 1, j > 0) - [usage] - <case1> - ddct16x16s(1, a); - <case2> - ddct16x16s(-1, a); - [parameters] - a[0...15][0...15] :input/output data (double **) - output data - a[k1][k2] = C[k1][k2], 0<=k1<16, 0<=k2<16 -*/ - - -/* Cn_kR = sqrt(2.0/n) * cos(pi/2*k/n) */ -/* Cn_kI = sqrt(2.0/n) * sin(pi/2*k/n) */ -/* Wn_kR = cos(pi/2*k/n) */ -/* Wn_kI = sin(pi/2*k/n) */ -#define C16_1R 0.35185093438159561476 -#define C16_1I 0.03465429229977286565 -#define C16_2R 0.34675996133053686546 -#define C16_2I 0.06897484482073575308 -#define C16_3R 0.33832950029358816957 -#define C16_3I 0.10263113188058934529 -#define C16_4R 0.32664074121909413196 -#define C16_4I 0.13529902503654924610 -#define C16_5R 0.31180625324666780814 -#define C16_5I 0.16666391461943662432 -#define C16_6R 0.29396890060483967924 -#define C16_6I 0.19642373959677554532 -#define C16_7R 0.27330046675043937206 -#define C16_7I 0.22429189658565907106 -#define C16_8R 0.25 -#define W16_4R 0.92387953251128675613 -#define W16_4I 0.38268343236508977173 -#define W16_8R 0.70710678118654752440 - - -void ddct16x16s(int isgn, double **a) -{ - int j; - double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; - double x4r, x4i, x5r, x5i, x6r, x6i, x7r, x7i; - double xr, xi; - - if (isgn < 0) { - for (j = 0; j <= 15; j++) { - x4r = a[0][j] - a[15][j]; - xr = a[0][j] + a[15][j]; - x4i = a[8][j] - a[7][j]; - xi = a[8][j] + a[7][j]; - x0r = xr + xi; - x0i = xr - xi; - x5r = a[2][j] - a[13][j]; - xr = a[2][j] + a[13][j]; - x5i = a[10][j] - a[5][j]; - xi = a[10][j] + a[5][j]; - x1r = xr + xi; - x1i = xr - xi; - x6r = a[4][j] - a[11][j]; - xr = a[4][j] + a[11][j]; - x6i = a[12][j] - a[3][j]; - xi = a[12][j] + a[3][j]; - x2r = xr + xi; - x2i = xr - xi; - x7r = a[6][j] - a[9][j]; - xr = a[6][j] + a[9][j]; - x7i = a[14][j] - a[1][j]; - xi = a[14][j] + a[1][j]; - x3r = xr + xi; - x3i = xr - xi; - xr = x0r + x2r; - xi = x1r + x3r; - a[0][j] = C16_8R * (xr + xi); - a[8][j] = C16_8R * (xr - xi); - xr = x0r - x2r; - xi = x1r - x3r; - a[4][j] = C16_4R * xr - C16_4I * xi; - a[12][j] = C16_4R * xi + C16_4I * xr; - x0r = W16_8R * (x1i - x3i); - x2r = W16_8R * (x1i + x3i); - xr = x0i + x0r; - xi = x2r + x2i; - a[2][j] = C16_2R * xr - C16_2I * xi; - a[14][j] = C16_2R * xi + C16_2I * xr; - xr = x0i - x0r; - xi = x2r - x2i; - a[6][j] = C16_6R * xr - C16_6I * xi; - a[10][j] = C16_6R * xi + C16_6I * xr; - xr = W16_8R * (x6r - x6i); - xi = W16_8R * (x6i + x6r); - x6r = x4r - xr; - x6i = x4i - xi; - x4r += xr; - x4i += xi; - xr = W16_4I * x7r - W16_4R * x7i; - xi = W16_4I * x7i + W16_4R * x7r; - x7r = W16_4R * x5r - W16_4I * x5i; - x7i = W16_4R * x5i + W16_4I * x5r; - x5r = x7r + xr; - x5i = x7i + xi; - x7r -= xr; - x7i -= xi; - xr = x4r + x5r; - xi = x5i + x4i; - a[1][j] = C16_1R * xr - C16_1I * xi; - a[15][j] = C16_1R * xi + C16_1I * xr; - xr = x4r - x5r; - xi = x5i - x4i; - a[7][j] = C16_7R * xr - C16_7I * xi; - a[9][j] = C16_7R * xi + C16_7I * xr; - xr = x6r - x7i; - xi = x7r + x6i; - a[5][j] = C16_5R * xr - C16_5I * xi; - a[11][j] = C16_5R * xi + C16_5I * xr; - xr = x6r + x7i; - xi = x7r - x6i; - a[3][j] = C16_3R * xr - C16_3I * xi; - a[13][j] = C16_3R * xi + C16_3I * xr; - } - for (j = 0; j <= 15; j++) { - x4r = a[j][0] - a[j][15]; - xr = a[j][0] + a[j][15]; - x4i = a[j][8] - a[j][7]; - xi = a[j][8] + a[j][7]; - x0r = xr + xi; - x0i = xr - xi; - x5r = a[j][2] - a[j][13]; - xr = a[j][2] + a[j][13]; - x5i = a[j][10] - a[j][5]; - xi = a[j][10] + a[j][5]; - x1r = xr + xi; - x1i = xr - xi; - x6r = a[j][4] - a[j][11]; - xr = a[j][4] + a[j][11]; - x6i = a[j][12] - a[j][3]; - xi = a[j][12] + a[j][3]; - x2r = xr + xi; - x2i = xr - xi; - x7r = a[j][6] - a[j][9]; - xr = a[j][6] + a[j][9]; - x7i = a[j][14] - a[j][1]; - xi = a[j][14] + a[j][1]; - x3r = xr + xi; - x3i = xr - xi; - xr = x0r + x2r; - xi = x1r + x3r; - a[j][0] = C16_8R * (xr + xi); - a[j][8] = C16_8R * (xr - xi); - xr = x0r - x2r; - xi = x1r - x3r; - a[j][4] = C16_4R * xr - C16_4I * xi; - a[j][12] = C16_4R * xi + C16_4I * xr; - x0r = W16_8R * (x1i - x3i); - x2r = W16_8R * (x1i + x3i); - xr = x0i + x0r; - xi = x2r + x2i; - a[j][2] = C16_2R * xr - C16_2I * xi; - a[j][14] = C16_2R * xi + C16_2I * xr; - xr = x0i - x0r; - xi = x2r - x2i; - a[j][6] = C16_6R * xr - C16_6I * xi; - a[j][10] = C16_6R * xi + C16_6I * xr; - xr = W16_8R * (x6r - x6i); - xi = W16_8R * (x6i + x6r); - x6r = x4r - xr; - x6i = x4i - xi; - x4r += xr; - x4i += xi; - xr = W16_4I * x7r - W16_4R * x7i; - xi = W16_4I * x7i + W16_4R * x7r; - x7r = W16_4R * x5r - W16_4I * x5i; - x7i = W16_4R * x5i + W16_4I * x5r; - x5r = x7r + xr; - x5i = x7i + xi; - x7r -= xr; - x7i -= xi; - xr = x4r + x5r; - xi = x5i + x4i; - a[j][1] = C16_1R * xr - C16_1I * xi; - a[j][15] = C16_1R * xi + C16_1I * xr; - xr = x4r - x5r; - xi = x5i - x4i; - a[j][7] = C16_7R * xr - C16_7I * xi; - a[j][9] = C16_7R * xi + C16_7I * xr; - xr = x6r - x7i; - xi = x7r + x6i; - a[j][5] = C16_5R * xr - C16_5I * xi; - a[j][11] = C16_5R * xi + C16_5I * xr; - xr = x6r + x7i; - xi = x7r - x6i; - a[j][3] = C16_3R * xr - C16_3I * xi; - a[j][13] = C16_3R * xi + C16_3I * xr; - } - } else { - for (j = 0; j <= 15; j++) { - x5r = C16_1R * a[1][j] + C16_1I * a[15][j]; - x5i = C16_1R * a[15][j] - C16_1I * a[1][j]; - xr = C16_7R * a[7][j] + C16_7I * a[9][j]; - xi = C16_7R * a[9][j] - C16_7I * a[7][j]; - x4r = x5r + xr; - x4i = x5i - xi; - x5r -= xr; - x5i += xi; - x7r = C16_5R * a[5][j] + C16_5I * a[11][j]; - x7i = C16_5R * a[11][j] - C16_5I * a[5][j]; - xr = C16_3R * a[3][j] + C16_3I * a[13][j]; - xi = C16_3R * a[13][j] - C16_3I * a[3][j]; - x6r = x7r + xr; - x6i = x7i - xi; - x7r -= xr; - x7i += xi; - xr = x4r - x6r; - xi = x4i - x6i; - x4r += x6r; - x4i += x6i; - x6r = W16_8R * (xi + xr); - x6i = W16_8R * (xi - xr); - xr = x5r + x7i; - xi = x5i - x7r; - x5r -= x7i; - x5i += x7r; - x7r = W16_4I * x5r + W16_4R * x5i; - x7i = W16_4I * x5i - W16_4R * x5r; - x5r = W16_4R * xr + W16_4I * xi; - x5i = W16_4R * xi - W16_4I * xr; - xr = C16_4R * a[4][j] + C16_4I * a[12][j]; - xi = C16_4R * a[12][j] - C16_4I * a[4][j]; - x2r = C16_8R * (a[0][j] + a[8][j]); - x3r = C16_8R * (a[0][j] - a[8][j]); - x0r = x2r + xr; - x1r = x3r + xi; - x2r -= xr; - x3r -= xi; - x0i = C16_2R * a[2][j] + C16_2I * a[14][j]; - x2i = C16_2R * a[14][j] - C16_2I * a[2][j]; - x1i = C16_6R * a[6][j] + C16_6I * a[10][j]; - x3i = C16_6R * a[10][j] - C16_6I * a[6][j]; - xr = x0i - x1i; - xi = x2i + x3i; - x0i += x1i; - x2i -= x3i; - x1i = W16_8R * (xi + xr); - x3i = W16_8R * (xi - xr); - xr = x0r + x0i; - xi = x0r - x0i; - a[0][j] = xr + x4r; - a[15][j] = xr - x4r; - a[8][j] = xi + x4i; - a[7][j] = xi - x4i; - xr = x1r + x1i; - xi = x1r - x1i; - a[2][j] = xr + x5r; - a[13][j] = xr - x5r; - a[10][j] = xi + x5i; - a[5][j] = xi - x5i; - xr = x2r + x2i; - xi = x2r - x2i; - a[4][j] = xr + x6r; - a[11][j] = xr - x6r; - a[12][j] = xi + x6i; - a[3][j] = xi - x6i; - xr = x3r + x3i; - xi = x3r - x3i; - a[6][j] = xr + x7r; - a[9][j] = xr - x7r; - a[14][j] = xi + x7i; - a[1][j] = xi - x7i; - } - for (j = 0; j <= 15; j++) { - x5r = C16_1R * a[j][1] + C16_1I * a[j][15]; - x5i = C16_1R * a[j][15] - C16_1I * a[j][1]; - xr = C16_7R * a[j][7] + C16_7I * a[j][9]; - xi = C16_7R * a[j][9] - C16_7I * a[j][7]; - x4r = x5r + xr; - x4i = x5i - xi; - x5r -= xr; - x5i += xi; - x7r = C16_5R * a[j][5] + C16_5I * a[j][11]; - x7i = C16_5R * a[j][11] - C16_5I * a[j][5]; - xr = C16_3R * a[j][3] + C16_3I * a[j][13]; - xi = C16_3R * a[j][13] - C16_3I * a[j][3]; - x6r = x7r + xr; - x6i = x7i - xi; - x7r -= xr; - x7i += xi; - xr = x4r - x6r; - xi = x4i - x6i; - x4r += x6r; - x4i += x6i; - x6r = W16_8R * (xi + xr); - x6i = W16_8R * (xi - xr); - xr = x5r + x7i; - xi = x5i - x7r; - x5r -= x7i; - x5i += x7r; - x7r = W16_4I * x5r + W16_4R * x5i; - x7i = W16_4I * x5i - W16_4R * x5r; - x5r = W16_4R * xr + W16_4I * xi; - x5i = W16_4R * xi - W16_4I * xr; - xr = C16_4R * a[j][4] + C16_4I * a[j][12]; - xi = C16_4R * a[j][12] - C16_4I * a[j][4]; - x2r = C16_8R * (a[j][0] + a[j][8]); - x3r = C16_8R * (a[j][0] - a[j][8]); - x0r = x2r + xr; - x1r = x3r + xi; - x2r -= xr; - x3r -= xi; - x0i = C16_2R * a[j][2] + C16_2I * a[j][14]; - x2i = C16_2R * a[j][14] - C16_2I * a[j][2]; - x1i = C16_6R * a[j][6] + C16_6I * a[j][10]; - x3i = C16_6R * a[j][10] - C16_6I * a[j][6]; - xr = x0i - x1i; - xi = x2i + x3i; - x0i += x1i; - x2i -= x3i; - x1i = W16_8R * (xi + xr); - x3i = W16_8R * (xi - xr); - xr = x0r + x0i; - xi = x0r - x0i; - a[j][0] = xr + x4r; - a[j][15] = xr - x4r; - a[j][8] = xi + x4i; - a[j][7] = xi - x4i; - xr = x1r + x1i; - xi = x1r - x1i; - a[j][2] = xr + x5r; - a[j][13] = xr - x5r; - a[j][10] = xi + x5i; - a[j][5] = xi - x5i; - xr = x2r + x2i; - xi = x2r - x2i; - a[j][4] = xr + x6r; - a[j][11] = xr - x6r; - a[j][12] = xi + x6i; - a[j][3] = xi - x6i; - xr = x3r + x3i; - xi = x3r - x3i; - a[j][6] = xr + x7r; - a[j][9] = xr - x7r; - a[j][14] = xi + x7i; - a[j][1] = xi - x7i; - } - } -} -
diff --git a/third_party/tensorflow_dependencies/flatbuffers b/third_party/tensorflow_dependencies/flatbuffers deleted file mode 160000 index 6d0aae7..0000000 --- a/third_party/tensorflow_dependencies/flatbuffers +++ /dev/null
@@ -1 +0,0 @@ -Subproject commit 6d0aae73cd2ad3f8ee93c952af6b33e99049ad9c
diff --git a/third_party/tensorflow_dependencies/fp16 b/third_party/tensorflow_dependencies/fp16 deleted file mode 160000 index 4dfe081..0000000 --- a/third_party/tensorflow_dependencies/fp16 +++ /dev/null
@@ -1 +0,0 @@ -Subproject commit 4dfe081cf6bcd15db339cf2680b9281b8451eeb3
diff --git a/third_party/tensorflow_dependencies/gemmlowp b/third_party/tensorflow_dependencies/gemmlowp deleted file mode 160000 index fda83bd..0000000 --- a/third_party/tensorflow_dependencies/gemmlowp +++ /dev/null
@@ -1 +0,0 @@ -Subproject commit fda83bdc38b118cc6b56753bd540caa49e570745
diff --git a/third_party/tensorflow_dependencies/neon_2_sse b/third_party/tensorflow_dependencies/neon_2_sse deleted file mode 160000 index 42b2beb..0000000 --- a/third_party/tensorflow_dependencies/neon_2_sse +++ /dev/null
@@ -1 +0,0 @@ -Subproject commit 42b2bebacee25452e150095ef4480b3fa26e30f5
diff --git a/third_party/tensorflow_dependencies/ruy b/third_party/tensorflow_dependencies/ruy deleted file mode 160000 index bebf022..0000000 --- a/third_party/tensorflow_dependencies/ruy +++ /dev/null
@@ -1 +0,0 @@ -Subproject commit bebf022784e9b22277b84373c9877aebff8411a7