diff options
author | Wim <wim@42.be> | 2022-01-31 00:27:37 +0100 |
---|---|---|
committer | Wim <wim@42.be> | 2022-03-20 14:57:48 +0100 |
commit | e3cafeaf9292f67459ff1d186f68283bfaedf2ae (patch) | |
tree | b69c39620aa91dba695b3b935c6651c0fb37ce75 /vendor/github.com/remyoudompheng/bigfft | |
parent | e7b193788a56ee7cdb02a87a9db0ad6724ef66d5 (diff) | |
download | matterbridge-msglm-e3cafeaf9292f67459ff1d186f68283bfaedf2ae.tar.gz matterbridge-msglm-e3cafeaf9292f67459ff1d186f68283bfaedf2ae.tar.bz2 matterbridge-msglm-e3cafeaf9292f67459ff1d186f68283bfaedf2ae.zip |
Add dependencies/vendor (whatsapp)
Diffstat (limited to 'vendor/github.com/remyoudompheng/bigfft')
-rw-r--r-- | vendor/github.com/remyoudompheng/bigfft/LICENSE | 27 | ||||
-rw-r--r-- | vendor/github.com/remyoudompheng/bigfft/README | 43 | ||||
-rw-r--r-- | vendor/github.com/remyoudompheng/bigfft/arith_386.s | 36 | ||||
-rw-r--r-- | vendor/github.com/remyoudompheng/bigfft/arith_amd64.s | 38 | ||||
-rw-r--r-- | vendor/github.com/remyoudompheng/bigfft/arith_arm.s | 36 | ||||
-rw-r--r-- | vendor/github.com/remyoudompheng/bigfft/arith_arm64.s | 36 | ||||
-rw-r--r-- | vendor/github.com/remyoudompheng/bigfft/arith_decl.go | 16 | ||||
-rw-r--r-- | vendor/github.com/remyoudompheng/bigfft/arith_mips64x.s | 40 | ||||
-rw-r--r-- | vendor/github.com/remyoudompheng/bigfft/arith_mipsx.s | 40 | ||||
-rw-r--r-- | vendor/github.com/remyoudompheng/bigfft/arith_ppc64x.s | 38 | ||||
-rw-r--r-- | vendor/github.com/remyoudompheng/bigfft/arith_s390x.s | 37 | ||||
-rw-r--r-- | vendor/github.com/remyoudompheng/bigfft/fermat.go | 216 | ||||
-rw-r--r-- | vendor/github.com/remyoudompheng/bigfft/fft.go | 370 | ||||
-rw-r--r-- | vendor/github.com/remyoudompheng/bigfft/scan.go | 70 |
14 files changed, 1043 insertions, 0 deletions
diff --git a/vendor/github.com/remyoudompheng/bigfft/LICENSE b/vendor/github.com/remyoudompheng/bigfft/LICENSE new file mode 100644 index 00000000..74487567 --- /dev/null +++ b/vendor/github.com/remyoudompheng/bigfft/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2012 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/remyoudompheng/bigfft/README b/vendor/github.com/remyoudompheng/bigfft/README new file mode 100644 index 00000000..303c6177 --- /dev/null +++ b/vendor/github.com/remyoudompheng/bigfft/README @@ -0,0 +1,43 @@ +Benchmarking math/big vs. bigfft + +Number size old ns/op new ns/op delta + 1kb 1599 1640 +2.56% + 10kb 61533 62170 +1.04% + 50kb 833693 831051 -0.32% +100kb 2567995 2693864 +4.90% + 1Mb 105237800 28446400 -72.97% + 5Mb 1272947000 168554600 -86.76% + 10Mb 3834354000 405120200 -89.43% + 20Mb 11514488000 845081600 -92.66% + 50Mb 49199945000 2893950000 -94.12% +100Mb 147599836000 5921594000 -95.99% + +Benchmarking GMP vs bigfft + +Number size GMP ns/op Go ns/op delta + 1kb 536 1500 +179.85% + 10kb 26669 50777 +90.40% + 50kb 252270 658534 +161.04% +100kb 686813 2127534 +209.77% + 1Mb 12100000 22391830 +85.06% + 5Mb 111731843 133550600 +19.53% + 10Mb 212314000 318595800 +50.06% + 20Mb 490196000 671512800 +36.99% + 50Mb 1280000000 2451476000 +91.52% +100Mb 2673000000 5228991000 +95.62% + +Benchmarks were run on a Core 2 Quad Q8200 (2.33GHz). +FFT is enabled when input numbers are over 200kbits. + +Scanning large decimal number from strings. +(math/big [n^2 complexity] vs bigfft [n^1.6 complexity], Core i5-4590) + +Digits old ns/op new ns/op delta +1e3 9995 10876 +8.81% +1e4 175356 243806 +39.03% +1e5 9427422 6780545 -28.08% +1e6 1776707489 144867502 -91.85% +2e6 6865499995 346540778 -94.95% +5e6 42641034189 1069878799 -97.49% +10e6 151975273589 2693328580 -98.23% + diff --git a/vendor/github.com/remyoudompheng/bigfft/arith_386.s b/vendor/github.com/remyoudompheng/bigfft/arith_386.s new file mode 100644 index 00000000..cc50a017 --- /dev/null +++ b/vendor/github.com/remyoudompheng/bigfft/arith_386.s @@ -0,0 +1,36 @@ +// Trampolines to math/big assembly implementations. + +#include "textflag.h" + +// func addVV(z, x, y []Word) (c Word) +TEXT ·addVV(SB),NOSPLIT,$0 + JMP math∕big·addVV(SB) + +// func subVV(z, x, y []Word) (c Word) +TEXT ·subVV(SB),NOSPLIT,$0 + JMP math∕big·subVV(SB) + +// func addVW(z, x []Word, y Word) (c Word) +TEXT ·addVW(SB),NOSPLIT,$0 + JMP math∕big·addVW(SB) + +// func subVW(z, x []Word, y Word) (c Word) +TEXT ·subVW(SB),NOSPLIT,$0 + JMP math∕big·subVW(SB) + +// func shlVU(z, x []Word, s uint) (c Word) +TEXT ·shlVU(SB),NOSPLIT,$0 + JMP math∕big·shlVU(SB) + +// func shrVU(z, x []Word, s uint) (c Word) +TEXT ·shrVU(SB),NOSPLIT,$0 + JMP math∕big·shrVU(SB) + +// func mulAddVWW(z, x []Word, y, r Word) (c Word) +TEXT ·mulAddVWW(SB),NOSPLIT,$0 + JMP math∕big·mulAddVWW(SB) + +// func addMulVVW(z, x []Word, y Word) (c Word) +TEXT ·addMulVVW(SB),NOSPLIT,$0 + JMP math∕big·addMulVVW(SB) + diff --git a/vendor/github.com/remyoudompheng/bigfft/arith_amd64.s b/vendor/github.com/remyoudompheng/bigfft/arith_amd64.s new file mode 100644 index 00000000..0b79335f --- /dev/null +++ b/vendor/github.com/remyoudompheng/bigfft/arith_amd64.s @@ -0,0 +1,38 @@ +// Trampolines to math/big assembly implementations. + +#include "textflag.h" + +// func addVV(z, x, y []Word) (c Word) +TEXT ·addVV(SB),NOSPLIT,$0 + JMP math∕big·addVV(SB) + +// func subVV(z, x, y []Word) (c Word) +// (same as addVV except for SBBQ instead of ADCQ and label names) +TEXT ·subVV(SB),NOSPLIT,$0 + JMP math∕big·subVV(SB) + +// func addVW(z, x []Word, y Word) (c Word) +TEXT ·addVW(SB),NOSPLIT,$0 + JMP math∕big·addVW(SB) + +// func subVW(z, x []Word, y Word) (c Word) +// (same as addVW except for SUBQ/SBBQ instead of ADDQ/ADCQ and label names) +TEXT ·subVW(SB),NOSPLIT,$0 + JMP math∕big·subVW(SB) + +// func shlVU(z, x []Word, s uint) (c Word) +TEXT ·shlVU(SB),NOSPLIT,$0 + JMP math∕big·shlVU(SB) + +// func shrVU(z, x []Word, s uint) (c Word) +TEXT ·shrVU(SB),NOSPLIT,$0 + JMP math∕big·shrVU(SB) + +// func mulAddVWW(z, x []Word, y, r Word) (c Word) +TEXT ·mulAddVWW(SB),NOSPLIT,$0 + JMP math∕big·mulAddVWW(SB) + +// func addMulVVW(z, x []Word, y Word) (c Word) +TEXT ·addMulVVW(SB),NOSPLIT,$0 + JMP math∕big·addMulVVW(SB) + diff --git a/vendor/github.com/remyoudompheng/bigfft/arith_arm.s b/vendor/github.com/remyoudompheng/bigfft/arith_arm.s new file mode 100644 index 00000000..0ed60f5c --- /dev/null +++ b/vendor/github.com/remyoudompheng/bigfft/arith_arm.s @@ -0,0 +1,36 @@ +// Trampolines to math/big assembly implementations. + +#include "textflag.h" + +// func addVV(z, x, y []Word) (c Word) +TEXT ·addVV(SB),NOSPLIT,$0 + B math∕big·addVV(SB) + +// func subVV(z, x, y []Word) (c Word) +TEXT ·subVV(SB),NOSPLIT,$0 + B math∕big·subVV(SB) + +// func addVW(z, x []Word, y Word) (c Word) +TEXT ·addVW(SB),NOSPLIT,$0 + B math∕big·addVW(SB) + +// func subVW(z, x []Word, y Word) (c Word) +TEXT ·subVW(SB),NOSPLIT,$0 + B math∕big·subVW(SB) + +// func shlVU(z, x []Word, s uint) (c Word) +TEXT ·shlVU(SB),NOSPLIT,$0 + B math∕big·shlVU(SB) + +// func shrVU(z, x []Word, s uint) (c Word) +TEXT ·shrVU(SB),NOSPLIT,$0 + B math∕big·shrVU(SB) + +// func mulAddVWW(z, x []Word, y, r Word) (c Word) +TEXT ·mulAddVWW(SB),NOSPLIT,$0 + B math∕big·mulAddVWW(SB) + +// func addMulVVW(z, x []Word, y Word) (c Word) +TEXT ·addMulVVW(SB),NOSPLIT,$0 + B math∕big·addMulVVW(SB) + diff --git a/vendor/github.com/remyoudompheng/bigfft/arith_arm64.s b/vendor/github.com/remyoudompheng/bigfft/arith_arm64.s new file mode 100644 index 00000000..0ed60f5c --- /dev/null +++ b/vendor/github.com/remyoudompheng/bigfft/arith_arm64.s @@ -0,0 +1,36 @@ +// Trampolines to math/big assembly implementations. + +#include "textflag.h" + +// func addVV(z, x, y []Word) (c Word) +TEXT ·addVV(SB),NOSPLIT,$0 + B math∕big·addVV(SB) + +// func subVV(z, x, y []Word) (c Word) +TEXT ·subVV(SB),NOSPLIT,$0 + B math∕big·subVV(SB) + +// func addVW(z, x []Word, y Word) (c Word) +TEXT ·addVW(SB),NOSPLIT,$0 + B math∕big·addVW(SB) + +// func subVW(z, x []Word, y Word) (c Word) +TEXT ·subVW(SB),NOSPLIT,$0 + B math∕big·subVW(SB) + +// func shlVU(z, x []Word, s uint) (c Word) +TEXT ·shlVU(SB),NOSPLIT,$0 + B math∕big·shlVU(SB) + +// func shrVU(z, x []Word, s uint) (c Word) +TEXT ·shrVU(SB),NOSPLIT,$0 + B math∕big·shrVU(SB) + +// func mulAddVWW(z, x []Word, y, r Word) (c Word) +TEXT ·mulAddVWW(SB),NOSPLIT,$0 + B math∕big·mulAddVWW(SB) + +// func addMulVVW(z, x []Word, y Word) (c Word) +TEXT ·addMulVVW(SB),NOSPLIT,$0 + B math∕big·addMulVVW(SB) + diff --git a/vendor/github.com/remyoudompheng/bigfft/arith_decl.go b/vendor/github.com/remyoudompheng/bigfft/arith_decl.go new file mode 100644 index 00000000..7659b019 --- /dev/null +++ b/vendor/github.com/remyoudompheng/bigfft/arith_decl.go @@ -0,0 +1,16 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package bigfft + +import . "math/big" + +// implemented in arith_$GOARCH.s +func addVV(z, x, y []Word) (c Word) +func subVV(z, x, y []Word) (c Word) +func addVW(z, x []Word, y Word) (c Word) +func subVW(z, x []Word, y Word) (c Word) +func shlVU(z, x []Word, s uint) (c Word) +func mulAddVWW(z, x []Word, y, r Word) (c Word) +func addMulVVW(z, x []Word, y Word) (c Word) diff --git a/vendor/github.com/remyoudompheng/bigfft/arith_mips64x.s b/vendor/github.com/remyoudompheng/bigfft/arith_mips64x.s new file mode 100644 index 00000000..82443882 --- /dev/null +++ b/vendor/github.com/remyoudompheng/bigfft/arith_mips64x.s @@ -0,0 +1,40 @@ +// Trampolines to math/big assembly implementations. + +// +build mips64 mips64le + +#include "textflag.h" + +// func addVV(z, x, y []Word) (c Word) +TEXT ·addVV(SB),NOSPLIT,$0 + JMP math∕big·addVV(SB) + +// func subVV(z, x, y []Word) (c Word) +// (same as addVV except for SBBQ instead of ADCQ and label names) +TEXT ·subVV(SB),NOSPLIT,$0 + JMP math∕big·subVV(SB) + +// func addVW(z, x []Word, y Word) (c Word) +TEXT ·addVW(SB),NOSPLIT,$0 + JMP math∕big·addVW(SB) + +// func subVW(z, x []Word, y Word) (c Word) +// (same as addVW except for SUBQ/SBBQ instead of ADDQ/ADCQ and label names) +TEXT ·subVW(SB),NOSPLIT,$0 + JMP math∕big·subVW(SB) + +// func shlVU(z, x []Word, s uint) (c Word) +TEXT ·shlVU(SB),NOSPLIT,$0 + JMP math∕big·shlVU(SB) + +// func shrVU(z, x []Word, s uint) (c Word) +TEXT ·shrVU(SB),NOSPLIT,$0 + JMP math∕big·shrVU(SB) + +// func mulAddVWW(z, x []Word, y, r Word) (c Word) +TEXT ·mulAddVWW(SB),NOSPLIT,$0 + JMP math∕big·mulAddVWW(SB) + +// func addMulVVW(z, x []Word, y Word) (c Word) +TEXT ·addMulVVW(SB),NOSPLIT,$0 + JMP math∕big·addMulVVW(SB) + diff --git a/vendor/github.com/remyoudompheng/bigfft/arith_mipsx.s b/vendor/github.com/remyoudompheng/bigfft/arith_mipsx.s new file mode 100644 index 00000000..6c0e92e5 --- /dev/null +++ b/vendor/github.com/remyoudompheng/bigfft/arith_mipsx.s @@ -0,0 +1,40 @@ +// Trampolines to math/big assembly implementations. + +// +build mips mipsle + +#include "textflag.h" + +// func addVV(z, x, y []Word) (c Word) +TEXT ·addVV(SB),NOSPLIT,$0 + JMP math∕big·addVV(SB) + +// func subVV(z, x, y []Word) (c Word) +// (same as addVV except for SBBQ instead of ADCQ and label names) +TEXT ·subVV(SB),NOSPLIT,$0 + JMP math∕big·subVV(SB) + +// func addVW(z, x []Word, y Word) (c Word) +TEXT ·addVW(SB),NOSPLIT,$0 + JMP math∕big·addVW(SB) + +// func subVW(z, x []Word, y Word) (c Word) +// (same as addVW except for SUBQ/SBBQ instead of ADDQ/ADCQ and label names) +TEXT ·subVW(SB),NOSPLIT,$0 + JMP math∕big·subVW(SB) + +// func shlVU(z, x []Word, s uint) (c Word) +TEXT ·shlVU(SB),NOSPLIT,$0 + JMP math∕big·shlVU(SB) + +// func shrVU(z, x []Word, s uint) (c Word) +TEXT ·shrVU(SB),NOSPLIT,$0 + JMP math∕big·shrVU(SB) + +// func mulAddVWW(z, x []Word, y, r Word) (c Word) +TEXT ·mulAddVWW(SB),NOSPLIT,$0 + JMP math∕big·mulAddVWW(SB) + +// func addMulVVW(z, x []Word, y Word) (c Word) +TEXT ·addMulVVW(SB),NOSPLIT,$0 + JMP math∕big·addMulVVW(SB) + diff --git a/vendor/github.com/remyoudompheng/bigfft/arith_ppc64x.s b/vendor/github.com/remyoudompheng/bigfft/arith_ppc64x.s new file mode 100644 index 00000000..16c7f153 --- /dev/null +++ b/vendor/github.com/remyoudompheng/bigfft/arith_ppc64x.s @@ -0,0 +1,38 @@ +// Trampolines to math/big assembly implementations. + +// +build ppc64 ppc64le + +#include "textflag.h" + +// func addVV(z, x, y []Word) (c Word) +TEXT ·addVV(SB),NOSPLIT,$0 + BR math∕big·addVV(SB) + +// func subVV(z, x, y []Word) (c Word) +TEXT ·subVV(SB),NOSPLIT,$0 + BR math∕big·subVV(SB) + +// func addVW(z, x []Word, y Word) (c Word) +TEXT ·addVW(SB),NOSPLIT,$0 + BR math∕big·addVW(SB) + +// func subVW(z, x []Word, y Word) (c Word) +TEXT ·subVW(SB),NOSPLIT,$0 + BR math∕big·subVW(SB) + +// func shlVU(z, x []Word, s uint) (c Word) +TEXT ·shlVU(SB),NOSPLIT,$0 + BR math∕big·shlVU(SB) + +// func shrVU(z, x []Word, s uint) (c Word) +TEXT ·shrVU(SB),NOSPLIT,$0 + BR math∕big·shrVU(SB) + +// func mulAddVWW(z, x []Word, y, r Word) (c Word) +TEXT ·mulAddVWW(SB),NOSPLIT,$0 + BR math∕big·mulAddVWW(SB) + +// func addMulVVW(z, x []Word, y Word) (c Word) +TEXT ·addMulVVW(SB),NOSPLIT,$0 + BR math∕big·addMulVVW(SB) + diff --git a/vendor/github.com/remyoudompheng/bigfft/arith_s390x.s b/vendor/github.com/remyoudompheng/bigfft/arith_s390x.s new file mode 100644 index 00000000..f72ab053 --- /dev/null +++ b/vendor/github.com/remyoudompheng/bigfft/arith_s390x.s @@ -0,0 +1,37 @@ + +// Trampolines to math/big assembly implementations. + +#include "textflag.h" + +// func addVV(z, x, y []Word) (c Word) +TEXT ·addVV(SB),NOSPLIT,$0 + BR math∕big·addVV(SB) + +// func subVV(z, x, y []Word) (c Word) +TEXT ·subVV(SB),NOSPLIT,$0 + BR math∕big·subVV(SB) + +// func addVW(z, x []Word, y Word) (c Word) +TEXT ·addVW(SB),NOSPLIT,$0 + BR math∕big·addVW(SB) + +// func subVW(z, x []Word, y Word) (c Word) +TEXT ·subVW(SB),NOSPLIT,$0 + BR math∕big·subVW(SB) + +// func shlVU(z, x []Word, s uint) (c Word) +TEXT ·shlVU(SB),NOSPLIT,$0 + BR math∕big·shlVU(SB) + +// func shrVU(z, x []Word, s uint) (c Word) +TEXT ·shrVU(SB),NOSPLIT,$0 + BR math∕big·shrVU(SB) + +// func mulAddVWW(z, x []Word, y, r Word) (c Word) +TEXT ·mulAddVWW(SB),NOSPLIT,$0 + BR math∕big·mulAddVWW(SB) + +// func addMulVVW(z, x []Word, y Word) (c Word) +TEXT ·addMulVVW(SB),NOSPLIT,$0 + BR math∕big·addMulVVW(SB) + diff --git a/vendor/github.com/remyoudompheng/bigfft/fermat.go b/vendor/github.com/remyoudompheng/bigfft/fermat.go new file mode 100644 index 00000000..200ee573 --- /dev/null +++ b/vendor/github.com/remyoudompheng/bigfft/fermat.go @@ -0,0 +1,216 @@ +package bigfft + +import ( + "math/big" +) + +// Arithmetic modulo 2^n+1. + +// A fermat of length w+1 represents a number modulo 2^(w*_W) + 1. The last +// word is zero or one. A number has at most two representatives satisfying the +// 0-1 last word constraint. +type fermat nat + +func (n fermat) String() string { return nat(n).String() } + +func (z fermat) norm() { + n := len(z) - 1 + c := z[n] + if c == 0 { + return + } + if z[0] >= c { + z[n] = 0 + z[0] -= c + return + } + // z[0] < z[n]. + subVW(z, z, c) // Substract c + if c > 1 { + z[n] -= c - 1 + c = 1 + } + // Add back c. + if z[n] == 1 { + z[n] = 0 + return + } else { + addVW(z, z, 1) + } +} + +// Shift computes (x << k) mod (2^n+1). +func (z fermat) Shift(x fermat, k int) { + if len(z) != len(x) { + panic("len(z) != len(x) in Shift") + } + n := len(x) - 1 + // Shift by n*_W is taking the opposite. + k %= 2 * n * _W + if k < 0 { + k += 2 * n * _W + } + neg := false + if k >= n*_W { + k -= n * _W + neg = true + } + + kw, kb := k/_W, k%_W + + z[n] = 1 // Add (-1) + if !neg { + for i := 0; i < kw; i++ { + z[i] = 0 + } + // Shift left by kw words. + // x = a·2^(n-k) + b + // x<<k = (b<<k) - a + copy(z[kw:], x[:n-kw]) + b := subVV(z[:kw+1], z[:kw+1], x[n-kw:]) + if z[kw+1] > 0 { + z[kw+1] -= b + } else { + subVW(z[kw+1:], z[kw+1:], b) + } + } else { + for i := kw + 1; i < n; i++ { + z[i] = 0 + } + // Shift left and negate, by kw words. + copy(z[:kw+1], x[n-kw:n+1]) // z_low = x_high + b := subVV(z[kw:n], z[kw:n], x[:n-kw]) // z_high -= x_low + z[n] -= b + } + // Add back 1. + if z[n] > 0 { + z[n]-- + } else if z[0] < ^big.Word(0) { + z[0]++ + } else { + addVW(z, z, 1) + } + // Shift left by kb bits + shlVU(z, z, uint(kb)) + z.norm() +} + +// ShiftHalf shifts x by k/2 bits the left. Shifting by 1/2 bit +// is multiplication by sqrt(2) mod 2^n+1 which is 2^(3n/4) - 2^(n/4). +// A temporary buffer must be provided in tmp. +func (z fermat) ShiftHalf(x fermat, k int, tmp fermat) { + n := len(z) - 1 + if k%2 == 0 { + z.Shift(x, k/2) + return + } + u := (k - 1) / 2 + a := u + (3*_W/4)*n + b := u + (_W/4)*n + z.Shift(x, a) + tmp.Shift(x, b) + z.Sub(z, tmp) +} + +// Add computes addition mod 2^n+1. +func (z fermat) Add(x, y fermat) fermat { + if len(z) != len(x) { + panic("Add: len(z) != len(x)") + } + addVV(z, x, y) // there cannot be a carry here. + z.norm() + return z +} + +// Sub computes substraction mod 2^n+1. +func (z fermat) Sub(x, y fermat) fermat { + if len(z) != len(x) { + panic("Add: len(z) != len(x)") + } + n := len(y) - 1 + b := subVV(z[:n], x[:n], y[:n]) + b += y[n] + // If b > 0, we need to subtract b<<n, which is the same as adding b. + z[n] = x[n] + if z[0] <= ^big.Word(0)-b { + z[0] += b + } else { + addVW(z, z, b) + } + z.norm() + return z +} + +func (z fermat) Mul(x, y fermat) fermat { + if len(x) != len(y) { + panic("Mul: len(x) != len(y)") + } + n := len(x) - 1 + if n < 30 { + z = z[:2*n+2] + basicMul(z, x, y) + z = z[:2*n+1] + } else { + var xi, yi, zi big.Int + xi.SetBits(x) + yi.SetBits(y) + zi.SetBits(z) + zb := zi.Mul(&xi, &yi).Bits() + if len(zb) <= n { + // Short product. + copy(z, zb) + for i := len(zb); i < len(z); i++ { + z[i] = 0 + } + return z + } + z = zb + } + // len(z) is at most 2n+1. + if len(z) > 2*n+1 { + panic("len(z) > 2n+1") + } + // We now have + // z = z[:n] + 1<<(n*W) * z[n:2n+1] + // which normalizes to: + // z = z[:n] - z[n:2n] + z[2n] + c1 := big.Word(0) + if len(z) > 2*n { + c1 = addVW(z[:n], z[:n], z[2*n]) + } + c2 := big.Word(0) + if len(z) >= 2*n { + c2 = subVV(z[:n], z[:n], z[n:2*n]) + } else { + m := len(z) - n + c2 = subVV(z[:m], z[:m], z[n:]) + c2 = subVW(z[m:n], z[m:n], c2) + } + // Restore carries. + // Substracting z[n] -= c2 is the same + // as z[0] += c2 + z = z[:n+1] + z[n] = c1 + c := addVW(z, z, c2) + if c != 0 { + panic("impossible") + } + z.norm() + return z +} + +// copied from math/big +// +// basicMul multiplies x and y and leaves the result in z. +// The (non-normalized) result is placed in z[0 : len(x) + len(y)]. +func basicMul(z, x, y fermat) { + // initialize z + for i := 0; i < len(z); i++ { + z[i] = 0 + } + for i, d := range y { + if d != 0 { + z[len(x)+i] = addMulVVW(z[i:i+len(x)], x, d) + } + } +} diff --git a/vendor/github.com/remyoudompheng/bigfft/fft.go b/vendor/github.com/remyoudompheng/bigfft/fft.go new file mode 100644 index 00000000..2d4c1e7a --- /dev/null +++ b/vendor/github.com/remyoudompheng/bigfft/fft.go @@ -0,0 +1,370 @@ +// Package bigfft implements multiplication of big.Int using FFT. +// +// The implementation is based on the Schönhage-Strassen method +// using integer FFT modulo 2^n+1. +package bigfft + +import ( + "math/big" + "unsafe" +) + +const _W = int(unsafe.Sizeof(big.Word(0)) * 8) + +type nat []big.Word + +func (n nat) String() string { + v := new(big.Int) + v.SetBits(n) + return v.String() +} + +// fftThreshold is the size (in words) above which FFT is used over +// Karatsuba from math/big. +// +// TestCalibrate seems to indicate a threshold of 60kbits on 32-bit +// arches and 110kbits on 64-bit arches. +var fftThreshold = 1800 + +// Mul computes the product x*y and returns z. +// It can be used instead of the Mul method of +// *big.Int from math/big package. +func Mul(x, y *big.Int) *big.Int { + xwords := len(x.Bits()) + ywords := len(y.Bits()) + if xwords > fftThreshold && ywords > fftThreshold { + return mulFFT(x, y) + } + return new(big.Int).Mul(x, y) +} + +func mulFFT(x, y *big.Int) *big.Int { + var xb, yb nat = x.Bits(), y.Bits() + zb := fftmul(xb, yb) + z := new(big.Int) + z.SetBits(zb) + if x.Sign()*y.Sign() < 0 { + z.Neg(z) + } + return z +} + +// A FFT size of K=1<<k is adequate when K is about 2*sqrt(N) where +// N = x.Bitlen() + y.Bitlen(). + +func fftmul(x, y nat) nat { + k, m := fftSize(x, y) + xp := polyFromNat(x, k, m) + yp := polyFromNat(y, k, m) + rp := xp.Mul(&yp) + return rp.Int() +} + +// fftSizeThreshold[i] is the maximal size (in bits) where we should use +// fft size i. +var fftSizeThreshold = [...]int64{0, 0, 0, + 4 << 10, 8 << 10, 16 << 10, // 5 + 32 << 10, 64 << 10, 1 << 18, 1 << 20, 3 << 20, // 10 + 8 << 20, 30 << 20, 100 << 20, 300 << 20, 600 << 20, +} + +// returns the FFT length k, m the number of words per chunk +// such that m << k is larger than the number of words +// in x*y. +func fftSize(x, y nat) (k uint, m int) { + words := len(x) + len(y) + bits := int64(words) * int64(_W) + k = uint(len(fftSizeThreshold)) + for i := range fftSizeThreshold { + if fftSizeThreshold[i] > bits { + k = uint(i) + break + } + } + // The 1<<k chunks of m words must have N bits so that + // 2^N-1 is larger than x*y. That is, m<<k > words + m = words>>k + 1 + return +} + +// valueSize returns the length (in words) to use for polynomial +// coefficients, to compute a correct product of polynomials P*Q +// where deg(P*Q) < K (== 1<<k) and where coefficients of P and Q are +// less than b^m (== 1 << (m*_W)). +// The chosen length (in bits) must be a multiple of 1 << (k-extra). +func valueSize(k uint, m int, extra uint) int { + // The coefficients of P*Q are less than b^(2m)*K + // so we need W * valueSize >= 2*m*W+K + n := 2*m*_W + int(k) // necessary bits + K := 1 << (k - extra) + if K < _W { + K = _W + } + n = ((n / K) + 1) * K // round to a multiple of K + return n / _W +} + +// poly represents an integer via a polynomial in Z[x]/(x^K+1) +// where K is the FFT length and b^m is the computation basis 1<<(m*_W). +// If P = a[0] + a[1] x + ... a[n] x^(K-1), the associated natural number +// is P(b^m). +type poly struct { + k uint // k is such that K = 1<<k. + m int // the m such that P(b^m) is the original number. + a []nat // a slice of at most K m-word coefficients. +} + +// polyFromNat slices the number x into a polynomial +// with 1<<k coefficients made of m words. +func polyFromNat(x nat, k uint, m int) poly { + p := poly{k: k, m: m} + length := len(x)/m + 1 + p.a = make([]nat, length) + for i := range p.a { + if len(x) < m { + p.a[i] = make(nat, m) + copy(p.a[i], x) + break + } + p.a[i] = x[:m] + x = x[m:] + } + return p +} + +// Int evaluates back a poly to its integer value. +func (p *poly) Int() nat { + length := len(p.a)*p.m + 1 + if na := len(p.a); na > 0 { + length += len(p.a[na-1]) + } + n := make(nat, length) + m := p.m + np := n + for i := range p.a { + l := len(p.a[i]) + c := addVV(np[:l], np[:l], p.a[i]) + if np[l] < ^big.Word(0) { + np[l] += c + } else { + addVW(np[l:], np[l:], c) + } + np = np[m:] + } + n = trim(n) + return n +} + +func trim(n nat) nat { + for i := range n { + if n[len(n)-1-i] != 0 { + return n[:len(n)-i] + } + } + return nil +} + +// Mul multiplies p and q modulo X^K-1, where K = 1<<p.k. +// The product is done via a Fourier transform. +func (p *poly) Mul(q *poly) poly { + // extra=2 because: + // * some power of 2 is a K-th root of unity when n is a multiple of K/2. + // * 2 itself is a square (see fermat.ShiftHalf) + n := valueSize(p.k, p.m, 2) + + pv, qv := p.Transform(n), q.Transform(n) + rv := pv.Mul(&qv) + r := rv.InvTransform() + r.m = p.m + return r +} + +// A polValues represents the value of a poly at the powers of a +// K-th root of unity θ=2^(l/2) in Z/(b^n+1)Z, where b^n = 2^(K/4*l). +type polValues struct { + k uint // k is such that K = 1<<k. + n int // the length of coefficients, n*_W a multiple of K/4. + values []fermat // a slice of K (n+1)-word values +} + +// Transform evaluates p at θ^i for i = 0...K-1, where +// θ is a K-th primitive root of unity in Z/(b^n+1)Z. +func (p *poly) Transform(n int) polValues { + k := p.k + inputbits := make([]big.Word, (n+1)<<k) + input := make([]fermat, 1<<k) + // Now computed q(ω^i) for i = 0 ... K-1 + valbits := make([]big.Word, (n+1)<<k) + values := make([]fermat, 1<<k) + for i := range values { + input[i] = inputbits[i*(n+1) : (i+1)*(n+1)] + if i < len(p.a) { + copy(input[i], p.a[i]) + } + values[i] = fermat(valbits[i*(n+1) : (i+1)*(n+1)]) + } + fourier(values, input, false, n, k) + return polValues{k, n, values} +} + +// InvTransform reconstructs p (modulo X^K - 1) from its +// values at θ^i for i = 0..K-1. +func (v *polValues) InvTransform() poly { + k, n := v.k, v.n + + // Perform an inverse Fourier transform to recover p. + pbits := make([]big.Word, (n+1)<<k) + p := make([]fermat, 1<<k) + for i := range p { + p[i] = fermat(pbits[i*(n+1) : (i+1)*(n+1)]) + } + fourier(p, v.values, true, n, k) + // Divide by K, and untwist q to recover p. + u := make(fermat, n+1) + a := make([]nat, 1<<k) + for i := range p { + u.Shift(p[i], -int(k)) + copy(p[i], u) + a[i] = nat(p[i]) + } + return poly{k: k, m: 0, a: a} +} + +// NTransform evaluates p at θω^i for i = 0...K-1, where +// θ is a (2K)-th primitive root of unity in Z/(b^n+1)Z +// and ω = θ². +func (p *poly) NTransform(n int) polValues { + k := p.k + if len(p.a) >= 1<<k { + panic("Transform: len(p.a) >= 1<<k") + } + // θ is represented as a shift. + θshift := (n * _W) >> k + // p(x) = a_0 + a_1 x + ... + a_{K-1} x^(K-1) + // p(θx) = q(x) where + // q(x) = a_0 + θa_1 x + ... + θ^(K-1) a_{K-1} x^(K-1) + // + // Twist p by θ to obtain q. + tbits := make([]big.Word, (n+1)<<k) + twisted := make([]fermat, 1<<k) + src := make(fermat, n+1) + for i := range twisted { + twisted[i] = fermat(tbits[i*(n+1) : (i+1)*(n+1)]) + if i < len(p.a) { + for i := range src { + src[i] = 0 + } + copy(src, p.a[i]) + twisted[i].Shift(src, θshift*i) + } + } + + // Now computed q(ω^i) for i = 0 ... K-1 + valbits := make([]big.Word, (n+1)<<k) + values := make([]fermat, 1<<k) + for i := range values { + values[i] = fermat(valbits[i*(n+1) : (i+1)*(n+1)]) + } + fourier(values, twisted, false, n, k) + return polValues{k, n, values} +} + +// InvTransform reconstructs a polynomial from its values at +// roots of x^K+1. The m field of the returned polynomial +// is unspecified. +func (v *polValues) InvNTransform() poly { + k := v.k + n := v.n + θshift := (n * _W) >> k + + // Perform an inverse Fourier transform to recover q. + qbits := make([]big.Word, (n+1)<<k) + q := make([]fermat, 1<<k) + for i := range q { + q[i] = fermat(qbits[i*(n+1) : (i+1)*(n+1)]) + } + fourier(q, v.values, true, n, k) + + // Divide by K, and untwist q to recover p. + u := make(fermat, n+1) + a := make([]nat, 1<<k) + for i := range q { + u.Shift(q[i], -int(k)-i*θshift) + copy(q[i], u) + a[i] = nat(q[i]) + } + return poly{k: k, m: 0, a: a} +} + +// fourier performs an unnormalized Fourier transform +// of src, a length 1<<k vector of numbers modulo b^n+1 +// where b = 1<<_W. +func fourier(dst []fermat, src []fermat, backward bool, n int, k uint) { + var rec func(dst, src []fermat, size uint) + tmp := make(fermat, n+1) // pre-allocate temporary variables. + tmp2 := make(fermat, n+1) // pre-allocate temporary variables. + + // The recursion function of the FFT. + // The root of unity used in the transform is ω=1<<(ω2shift/2). + // The source array may use shifted indices (i.e. the i-th + // element is src[i << idxShift]). + rec = func(dst, src []fermat, size uint) { + idxShift := k - size + ω2shift := (4 * n * _W) >> size + if backward { + ω2shift = -ω2shift + } + + // Easy cases. + if len(src[0]) != n+1 || len(dst[0]) != n+1 { + panic("len(src[0]) != n+1 || len(dst[0]) != n+1") + } + switch size { + case 0: + copy(dst[0], src[0]) + return + case 1: + dst[0].Add(src[0], src[1<<idxShift]) // dst[0] = src[0] + src[1] + dst[1].Sub(src[0], src[1<<idxShift]) // dst[1] = src[0] - src[1] + return + } + + // Let P(x) = src[0] + src[1<<idxShift] * x + ... + src[K-1 << idxShift] * x^(K-1) + // The P(x) = Q1(x²) + x*Q2(x²) + // where Q1's coefficients are src with indices shifted by 1 + // where Q2's coefficients are src[1<<idxShift:] with indices shifted by 1 + + // Split destination vectors in halves. + dst1 := dst[:1<<(size-1)] + dst2 := dst[1<<(size-1):] + // Transform Q1 and Q2 in the halves. + rec(dst1, src, size-1) + rec(dst2, src[1<<idxShift:], size-1) + + // Reconstruct P's transform from transforms of Q1 and Q2. + // dst[i] is dst1[i] + ω^i * dst2[i] + // dst[i + 1<<(k-1)] is dst1[i] + ω^(i+K/2) * dst2[i] + // + for i := range dst1 { + tmp.ShiftHalf(dst2[i], i*ω2shift, tmp2) // ω^i * dst2[i] + dst2[i].Sub(dst1[i], tmp) + dst1[i].Add(dst1[i], tmp) + } + } + rec(dst, src, k) +} + +// Mul returns the pointwise product of p and q. +func (p *polValues) Mul(q *polValues) (r polValues) { + n := p.n + r.k, r.n = p.k, p.n + r.values = make([]fermat, len(p.values)) + bits := make([]big.Word, len(p.values)*(n+1)) + buf := make(fermat, 8*n) + for i := range r.values { + r.values[i] = bits[i*(n+1) : (i+1)*(n+1)] + z := buf.Mul(p.values[i], q.values[i]) + copy(r.values[i], z) + } + return +} diff --git a/vendor/github.com/remyoudompheng/bigfft/scan.go b/vendor/github.com/remyoudompheng/bigfft/scan.go new file mode 100644 index 00000000..dd3f2679 --- /dev/null +++ b/vendor/github.com/remyoudompheng/bigfft/scan.go @@ -0,0 +1,70 @@ +package bigfft + +import ( + "math/big" +) + +// FromDecimalString converts the base 10 string +// representation of a natural (non-negative) number +// into a *big.Int. +// Its asymptotic complexity is less than quadratic. +func FromDecimalString(s string) *big.Int { + var sc scanner + z := new(big.Int) + sc.scan(z, s) + return z +} + +type scanner struct { + // powers[i] is 10^(2^i * quadraticScanThreshold). + powers []*big.Int +} + +func (s *scanner) chunkSize(size int) (int, *big.Int) { + if size <= quadraticScanThreshold { + panic("size < quadraticScanThreshold") + } + pow := uint(0) + for n := size; n > quadraticScanThreshold; n /= 2 { + pow++ + } + // threshold * 2^(pow-1) <= size < threshold * 2^pow + return quadraticScanThreshold << (pow - 1), s.power(pow - 1) +} + +func (s *scanner) power(k uint) *big.Int { + for i := len(s.powers); i <= int(k); i++ { + z := new(big.Int) + if i == 0 { + if quadraticScanThreshold%14 != 0 { + panic("quadraticScanThreshold % 14 != 0") + } + z.Exp(big.NewInt(1e14), big.NewInt(quadraticScanThreshold/14), nil) + } else { + z.Mul(s.powers[i-1], s.powers[i-1]) + } + s.powers = append(s.powers, z) + } + return s.powers[k] +} + +func (s *scanner) scan(z *big.Int, str string) { + if len(str) <= quadraticScanThreshold { + z.SetString(str, 10) + return + } + sz, pow := s.chunkSize(len(str)) + // Scan the left half. + s.scan(z, str[:len(str)-sz]) + // FIXME: reuse temporaries. + left := Mul(z, pow) + // Scan the right half + s.scan(z, str[len(str)-sz:]) + z.Add(z, left) +} + +// quadraticScanThreshold is the number of digits +// below which big.Int.SetString is more efficient +// than subquadratic algorithms. +// 1232 digits fit in 4096 bits. +const quadraticScanThreshold = 1232 |