mirror of
https://github.com/taigrr/arc
synced 2025-01-18 04:33:13 -08:00
initial import
This commit is contained in:
28
vendor/github.com/klauspost/crc32/LICENSE
generated
vendored
Normal file
28
vendor/github.com/klauspost/crc32/LICENSE
generated
vendored
Normal file
@@ -0,0 +1,28 @@
|
||||
Copyright (c) 2012 The Go Authors. All rights reserved.
|
||||
Copyright (c) 2015 Klaus Post
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
84
vendor/github.com/klauspost/crc32/README.md
generated
vendored
Normal file
84
vendor/github.com/klauspost/crc32/README.md
generated
vendored
Normal file
@@ -0,0 +1,84 @@
|
||||
# crc32
|
||||
CRC32 hash with x64 optimizations
|
||||
|
||||
This package is a drop-in replacement for the standard library `hash/crc32` package, that features SSE 4.2 optimizations on x64 platforms, for a 10x speedup.
|
||||
|
||||
[](https://travis-ci.org/klauspost/crc32)
|
||||
|
||||
# usage
|
||||
|
||||
Install using `go get github.com/klauspost/crc32`. This library is based on Go 1.5 code and requires Go 1.3 or newer.
|
||||
|
||||
Replace `import "hash/crc32"` with `import "github.com/klauspost/crc32"` and you are good to go.
|
||||
|
||||
# changes
|
||||
|
||||
* Dec 4, 2015: Uses the "slice-by-8" trick more extensively, which gives a 1.5 to 2.5x speedup if assembler is unavailable.
|
||||
|
||||
|
||||
# performance
|
||||
|
||||
For IEEE tables (the most common), there is approximately a factor 10 speedup with "CLMUL" (Carryless multiplication) instruction:
|
||||
```
|
||||
benchmark old ns/op new ns/op delta
|
||||
BenchmarkCrc32KB 99955 10258 -89.74%
|
||||
|
||||
benchmark old MB/s new MB/s speedup
|
||||
BenchmarkCrc32KB 327.83 3194.20 9.74x
|
||||
```
|
||||
|
||||
For other tables and "CLMUL" capable machines the performance is the same as the standard library.
|
||||
|
||||
Here are some detailed benchmarks, comparing to go 1.5 standard library with and without assembler enabled.
|
||||
|
||||
```
|
||||
Std: Standard Go 1.5 library
|
||||
Crc: Indicates IEEE type CRC.
|
||||
40B: Size of each slice encoded.
|
||||
NoAsm: Assembler was disabled (ie. not an AMD64 or SSE 4.2+ capable machine).
|
||||
Castagnoli: Castagnoli CRC type.
|
||||
|
||||
BenchmarkStdCrc40B-4 10000000 158 ns/op 252.88 MB/s
|
||||
BenchmarkCrc40BNoAsm-4 20000000 105 ns/op 377.38 MB/s (slice8)
|
||||
BenchmarkCrc40B-4 20000000 105 ns/op 378.77 MB/s (slice8)
|
||||
|
||||
BenchmarkStdCrc1KB-4 500000 3604 ns/op 284.10 MB/s
|
||||
BenchmarkCrc1KBNoAsm-4 1000000 1463 ns/op 699.79 MB/s (slice8)
|
||||
BenchmarkCrc1KB-4 3000000 396 ns/op 2583.69 MB/s (asm)
|
||||
|
||||
BenchmarkStdCrc8KB-4 200000 11417 ns/op 717.48 MB/s (slice8)
|
||||
BenchmarkCrc8KBNoAsm-4 200000 11317 ns/op 723.85 MB/s (slice8)
|
||||
BenchmarkCrc8KB-4 500000 2919 ns/op 2805.73 MB/s (asm)
|
||||
|
||||
BenchmarkStdCrc32KB-4 30000 45749 ns/op 716.24 MB/s (slice8)
|
||||
BenchmarkCrc32KBNoAsm-4 30000 45109 ns/op 726.42 MB/s (slice8)
|
||||
BenchmarkCrc32KB-4 100000 11497 ns/op 2850.09 MB/s (asm)
|
||||
|
||||
BenchmarkStdNoAsmCastagnol40B-4 10000000 161 ns/op 246.94 MB/s
|
||||
BenchmarkStdCastagnoli40B-4 50000000 28.4 ns/op 1410.69 MB/s (asm)
|
||||
BenchmarkCastagnoli40BNoAsm-4 20000000 100 ns/op 398.01 MB/s (slice8)
|
||||
BenchmarkCastagnoli40B-4 50000000 28.2 ns/op 1419.54 MB/s (asm)
|
||||
|
||||
BenchmarkStdNoAsmCastagnoli1KB-4 500000 3622 ns/op 282.67 MB/s
|
||||
BenchmarkStdCastagnoli1KB-4 10000000 144 ns/op 7099.78 MB/s (asm)
|
||||
BenchmarkCastagnoli1KBNoAsm-4 1000000 1475 ns/op 694.14 MB/s (slice8)
|
||||
BenchmarkCastagnoli1KB-4 10000000 146 ns/op 6993.35 MB/s (asm)
|
||||
|
||||
BenchmarkStdNoAsmCastagnoli8KB-4 50000 28781 ns/op 284.63 MB/s
|
||||
BenchmarkStdCastagnoli8KB-4 1000000 1029 ns/op 7957.89 MB/s (asm)
|
||||
BenchmarkCastagnoli8KBNoAsm-4 200000 11410 ns/op 717.94 MB/s (slice8)
|
||||
BenchmarkCastagnoli8KB-4 1000000 1000 ns/op 8188.71 MB/s (asm)
|
||||
|
||||
BenchmarkStdNoAsmCastagnoli32KB-4 10000 115426 ns/op 283.89 MB/s
|
||||
BenchmarkStdCastagnoli32KB-4 300000 4065 ns/op 8059.13 MB/s (asm)
|
||||
BenchmarkCastagnoli32KBNoAsm-4 30000 45171 ns/op 725.41 MB/s (slice8)
|
||||
BenchmarkCastagnoli32KB-4 500000 4077 ns/op 8035.89 MB/s (asm)
|
||||
```
|
||||
|
||||
The IEEE assembler optimizations has been submitted and will be part of the Go 1.6 standard library.
|
||||
|
||||
However, the improved use of slice-by-8 has not, but will probably be submitted for Go 1.7.
|
||||
|
||||
# license
|
||||
|
||||
Standard Go license. Changes are Copyright (c) 2015 Klaus Post under same conditions.
|
||||
186
vendor/github.com/klauspost/crc32/crc32.go
generated
vendored
Normal file
186
vendor/github.com/klauspost/crc32/crc32.go
generated
vendored
Normal file
@@ -0,0 +1,186 @@
|
||||
// Copyright 2009 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package crc32 implements the 32-bit cyclic redundancy check, or CRC-32,
|
||||
// checksum. See http://en.wikipedia.org/wiki/Cyclic_redundancy_check for
|
||||
// information.
|
||||
//
|
||||
// Polynomials are represented in LSB-first form also known as reversed representation.
|
||||
//
|
||||
// See http://en.wikipedia.org/wiki/Mathematics_of_cyclic_redundancy_checks#Reversed_representations_and_reciprocal_polynomials
|
||||
// for information.
|
||||
package crc32
|
||||
|
||||
import (
|
||||
"hash"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// The size of a CRC-32 checksum in bytes.
|
||||
const Size = 4
|
||||
|
||||
// Predefined polynomials.
|
||||
const (
|
||||
// IEEE is by far and away the most common CRC-32 polynomial.
|
||||
// Used by ethernet (IEEE 802.3), v.42, fddi, gzip, zip, png, ...
|
||||
IEEE = 0xedb88320
|
||||
|
||||
// Castagnoli's polynomial, used in iSCSI.
|
||||
// Has better error detection characteristics than IEEE.
|
||||
// http://dx.doi.org/10.1109/26.231911
|
||||
Castagnoli = 0x82f63b78
|
||||
|
||||
// Koopman's polynomial.
|
||||
// Also has better error detection characteristics than IEEE.
|
||||
// http://dx.doi.org/10.1109/DSN.2002.1028931
|
||||
Koopman = 0xeb31d82e
|
||||
)
|
||||
|
||||
// Table is a 256-word table representing the polynomial for efficient processing.
|
||||
type Table [256]uint32
|
||||
|
||||
// castagnoliTable points to a lazily initialized Table for the Castagnoli
|
||||
// polynomial. MakeTable will always return this value when asked to make a
|
||||
// Castagnoli table so we can compare against it to find when the caller is
|
||||
// using this polynomial.
|
||||
var castagnoliTable *Table
|
||||
var castagnoliTable8 *slicing8Table
|
||||
var castagnoliOnce sync.Once
|
||||
|
||||
func castagnoliInit() {
|
||||
castagnoliTable = makeTable(Castagnoli)
|
||||
castagnoliTable8 = makeTable8(Castagnoli)
|
||||
}
|
||||
|
||||
// IEEETable is the table for the IEEE polynomial.
|
||||
var IEEETable = makeTable(IEEE)
|
||||
|
||||
// slicing8Table is array of 8 Tables
|
||||
type slicing8Table [8]Table
|
||||
|
||||
// ieeeTable8 is the slicing8Table for IEEE
|
||||
var ieeeTable8 *slicing8Table
|
||||
var ieeeTable8Once sync.Once
|
||||
|
||||
// MakeTable returns a Table constructed from the specified polynomial.
|
||||
// The contents of this Table must not be modified.
|
||||
func MakeTable(poly uint32) *Table {
|
||||
switch poly {
|
||||
case IEEE:
|
||||
return IEEETable
|
||||
case Castagnoli:
|
||||
castagnoliOnce.Do(castagnoliInit)
|
||||
return castagnoliTable
|
||||
}
|
||||
return makeTable(poly)
|
||||
}
|
||||
|
||||
// makeTable returns the Table constructed from the specified polynomial.
|
||||
func makeTable(poly uint32) *Table {
|
||||
t := new(Table)
|
||||
for i := 0; i < 256; i++ {
|
||||
crc := uint32(i)
|
||||
for j := 0; j < 8; j++ {
|
||||
if crc&1 == 1 {
|
||||
crc = (crc >> 1) ^ poly
|
||||
} else {
|
||||
crc >>= 1
|
||||
}
|
||||
}
|
||||
t[i] = crc
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
// makeTable8 returns slicing8Table constructed from the specified polynomial.
|
||||
func makeTable8(poly uint32) *slicing8Table {
|
||||
t := new(slicing8Table)
|
||||
t[0] = *makeTable(poly)
|
||||
for i := 0; i < 256; i++ {
|
||||
crc := t[0][i]
|
||||
for j := 1; j < 8; j++ {
|
||||
crc = t[0][crc&0xFF] ^ (crc >> 8)
|
||||
t[j][i] = crc
|
||||
}
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
// digest represents the partial evaluation of a checksum.
|
||||
type digest struct {
|
||||
crc uint32
|
||||
tab *Table
|
||||
}
|
||||
|
||||
// New creates a new hash.Hash32 computing the CRC-32 checksum
|
||||
// using the polynomial represented by the Table.
|
||||
// Its Sum method will lay the value out in big-endian byte order.
|
||||
func New(tab *Table) hash.Hash32 { return &digest{0, tab} }
|
||||
|
||||
// NewIEEE creates a new hash.Hash32 computing the CRC-32 checksum
|
||||
// using the IEEE polynomial.
|
||||
// Its Sum method will lay the value out in big-endian byte order.
|
||||
func NewIEEE() hash.Hash32 { return New(IEEETable) }
|
||||
|
||||
func (d *digest) Size() int { return Size }
|
||||
|
||||
func (d *digest) BlockSize() int { return 1 }
|
||||
|
||||
func (d *digest) Reset() { d.crc = 0 }
|
||||
|
||||
func update(crc uint32, tab *Table, p []byte) uint32 {
|
||||
crc = ^crc
|
||||
for _, v := range p {
|
||||
crc = tab[byte(crc)^v] ^ (crc >> 8)
|
||||
}
|
||||
return ^crc
|
||||
}
|
||||
|
||||
// updateSlicingBy8 updates CRC using Slicing-by-8
|
||||
func updateSlicingBy8(crc uint32, tab *slicing8Table, p []byte) uint32 {
|
||||
crc = ^crc
|
||||
for len(p) > 8 {
|
||||
crc ^= uint32(p[0]) | uint32(p[1])<<8 | uint32(p[2])<<16 | uint32(p[3])<<24
|
||||
crc = tab[0][p[7]] ^ tab[1][p[6]] ^ tab[2][p[5]] ^ tab[3][p[4]] ^
|
||||
tab[4][crc>>24] ^ tab[5][(crc>>16)&0xFF] ^
|
||||
tab[6][(crc>>8)&0xFF] ^ tab[7][crc&0xFF]
|
||||
p = p[8:]
|
||||
}
|
||||
crc = ^crc
|
||||
if len(p) == 0 {
|
||||
return crc
|
||||
}
|
||||
return update(crc, &tab[0], p)
|
||||
}
|
||||
|
||||
// Update returns the result of adding the bytes in p to the crc.
|
||||
func Update(crc uint32, tab *Table, p []byte) uint32 {
|
||||
if tab == castagnoliTable {
|
||||
return updateCastagnoli(crc, p)
|
||||
}
|
||||
if tab == IEEETable {
|
||||
return updateIEEE(crc, p)
|
||||
}
|
||||
return update(crc, tab, p)
|
||||
}
|
||||
|
||||
func (d *digest) Write(p []byte) (n int, err error) {
|
||||
d.crc = Update(d.crc, d.tab, p)
|
||||
return len(p), nil
|
||||
}
|
||||
|
||||
func (d *digest) Sum32() uint32 { return d.crc }
|
||||
|
||||
func (d *digest) Sum(in []byte) []byte {
|
||||
s := d.Sum32()
|
||||
return append(in, byte(s>>24), byte(s>>16), byte(s>>8), byte(s))
|
||||
}
|
||||
|
||||
// Checksum returns the CRC-32 checksum of data
|
||||
// using the polynomial represented by the Table.
|
||||
func Checksum(data []byte, tab *Table) uint32 { return Update(0, tab, data) }
|
||||
|
||||
// ChecksumIEEE returns the CRC-32 checksum of data
|
||||
// using the IEEE polynomial.
|
||||
func ChecksumIEEE(data []byte) uint32 { return updateIEEE(0, data) }
|
||||
62
vendor/github.com/klauspost/crc32/crc32_amd64.go
generated
vendored
Normal file
62
vendor/github.com/klauspost/crc32/crc32_amd64.go
generated
vendored
Normal file
@@ -0,0 +1,62 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build !appengine,!gccgo
|
||||
|
||||
package crc32
|
||||
|
||||
// This file contains the code to call the SSE 4.2 version of the Castagnoli
|
||||
// and IEEE CRC.
|
||||
|
||||
// haveSSE41/haveSSE42/haveCLMUL are defined in crc_amd64.s and use
|
||||
// CPUID to test for SSE 4.1, 4.2 and CLMUL support.
|
||||
func haveSSE41() bool
|
||||
func haveSSE42() bool
|
||||
func haveCLMUL() bool
|
||||
|
||||
// castagnoliSSE42 is defined in crc_amd64.s and uses the SSE4.2 CRC32
|
||||
// instruction.
|
||||
//go:noescape
|
||||
func castagnoliSSE42(crc uint32, p []byte) uint32
|
||||
|
||||
// ieeeCLMUL is defined in crc_amd64.s and uses the PCLMULQDQ
|
||||
// instruction as well as SSE 4.1.
|
||||
//go:noescape
|
||||
func ieeeCLMUL(crc uint32, p []byte) uint32
|
||||
|
||||
var sse42 = haveSSE42()
|
||||
var useFastIEEE = haveCLMUL() && haveSSE41()
|
||||
|
||||
func updateCastagnoli(crc uint32, p []byte) uint32 {
|
||||
if sse42 {
|
||||
return castagnoliSSE42(crc, p)
|
||||
}
|
||||
// only use slicing-by-8 when input is >= 16 Bytes
|
||||
if len(p) >= 16 {
|
||||
return updateSlicingBy8(crc, castagnoliTable8, p)
|
||||
}
|
||||
return update(crc, castagnoliTable, p)
|
||||
}
|
||||
|
||||
func updateIEEE(crc uint32, p []byte) uint32 {
|
||||
if useFastIEEE && len(p) >= 64 {
|
||||
left := len(p) & 15
|
||||
do := len(p) - left
|
||||
crc = ^ieeeCLMUL(^crc, p[:do])
|
||||
if left > 0 {
|
||||
crc = update(crc, IEEETable, p[do:])
|
||||
}
|
||||
return crc
|
||||
}
|
||||
|
||||
// only use slicing-by-8 when input is >= 16 Bytes
|
||||
if len(p) >= 16 {
|
||||
ieeeTable8Once.Do(func() {
|
||||
ieeeTable8 = makeTable8(IEEE)
|
||||
})
|
||||
return updateSlicingBy8(crc, ieeeTable8, p)
|
||||
}
|
||||
|
||||
return update(crc, IEEETable, p)
|
||||
}
|
||||
237
vendor/github.com/klauspost/crc32/crc32_amd64.s
generated
vendored
Normal file
237
vendor/github.com/klauspost/crc32/crc32_amd64.s
generated
vendored
Normal file
@@ -0,0 +1,237 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build gc
|
||||
|
||||
#define NOSPLIT 4
|
||||
#define RODATA 8
|
||||
|
||||
// func castagnoliSSE42(crc uint32, p []byte) uint32
|
||||
TEXT ·castagnoliSSE42(SB), NOSPLIT, $0
|
||||
MOVL crc+0(FP), AX // CRC value
|
||||
MOVQ p+8(FP), SI // data pointer
|
||||
MOVQ p_len+16(FP), CX // len(p)
|
||||
|
||||
NOTL AX
|
||||
|
||||
// If there's less than 8 bytes to process, we do it byte-by-byte.
|
||||
CMPQ CX, $8
|
||||
JL cleanup
|
||||
|
||||
// Process individual bytes until the input is 8-byte aligned.
|
||||
startup:
|
||||
MOVQ SI, BX
|
||||
ANDQ $7, BX
|
||||
JZ aligned
|
||||
|
||||
CRC32B (SI), AX
|
||||
DECQ CX
|
||||
INCQ SI
|
||||
JMP startup
|
||||
|
||||
aligned:
|
||||
// The input is now 8-byte aligned and we can process 8-byte chunks.
|
||||
CMPQ CX, $8
|
||||
JL cleanup
|
||||
|
||||
CRC32Q (SI), AX
|
||||
ADDQ $8, SI
|
||||
SUBQ $8, CX
|
||||
JMP aligned
|
||||
|
||||
cleanup:
|
||||
// We may have some bytes left over that we process one at a time.
|
||||
CMPQ CX, $0
|
||||
JE done
|
||||
|
||||
CRC32B (SI), AX
|
||||
INCQ SI
|
||||
DECQ CX
|
||||
JMP cleanup
|
||||
|
||||
done:
|
||||
NOTL AX
|
||||
MOVL AX, ret+32(FP)
|
||||
RET
|
||||
|
||||
// func haveSSE42() bool
|
||||
TEXT ·haveSSE42(SB), NOSPLIT, $0
|
||||
XORQ AX, AX
|
||||
INCL AX
|
||||
CPUID
|
||||
SHRQ $20, CX
|
||||
ANDQ $1, CX
|
||||
MOVB CX, ret+0(FP)
|
||||
RET
|
||||
|
||||
// func haveCLMUL() bool
|
||||
TEXT ·haveCLMUL(SB), NOSPLIT, $0
|
||||
XORQ AX, AX
|
||||
INCL AX
|
||||
CPUID
|
||||
SHRQ $1, CX
|
||||
ANDQ $1, CX
|
||||
MOVB CX, ret+0(FP)
|
||||
RET
|
||||
|
||||
// func haveSSE41() bool
|
||||
TEXT ·haveSSE41(SB), NOSPLIT, $0
|
||||
XORQ AX, AX
|
||||
INCL AX
|
||||
CPUID
|
||||
SHRQ $19, CX
|
||||
ANDQ $1, CX
|
||||
MOVB CX, ret+0(FP)
|
||||
RET
|
||||
|
||||
// CRC32 polynomial data
|
||||
//
|
||||
// These constants are lifted from the
|
||||
// Linux kernel, since they avoid the costly
|
||||
// PSHUFB 16 byte reversal proposed in the
|
||||
// original Intel paper.
|
||||
DATA r2r1kp<>+0(SB)/8, $0x154442bd4
|
||||
DATA r2r1kp<>+8(SB)/8, $0x1c6e41596
|
||||
DATA r4r3kp<>+0(SB)/8, $0x1751997d0
|
||||
DATA r4r3kp<>+8(SB)/8, $0x0ccaa009e
|
||||
DATA rupolykp<>+0(SB)/8, $0x1db710641
|
||||
DATA rupolykp<>+8(SB)/8, $0x1f7011641
|
||||
DATA r5kp<>+0(SB)/8, $0x163cd6124
|
||||
|
||||
GLOBL r2r1kp<>(SB), RODATA, $16
|
||||
GLOBL r4r3kp<>(SB), RODATA, $16
|
||||
GLOBL rupolykp<>(SB), RODATA, $16
|
||||
GLOBL r5kp<>(SB), RODATA, $8
|
||||
|
||||
// Based on http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
|
||||
// len(p) must be at least 64, and must be a multiple of 16.
|
||||
|
||||
// func ieeeCLMUL(crc uint32, p []byte) uint32
|
||||
TEXT ·ieeeCLMUL(SB), NOSPLIT, $0
|
||||
MOVL crc+0(FP), X0 // Initial CRC value
|
||||
MOVQ p+8(FP), SI // data pointer
|
||||
MOVQ p_len+16(FP), CX // len(p)
|
||||
|
||||
MOVOU (SI), X1
|
||||
MOVOU 16(SI), X2
|
||||
MOVOU 32(SI), X3
|
||||
MOVOU 48(SI), X4
|
||||
PXOR X0, X1
|
||||
ADDQ $64, SI // buf+=64
|
||||
SUBQ $64, CX // len-=64
|
||||
CMPQ CX, $64 // Less than 64 bytes left
|
||||
JB remain64
|
||||
|
||||
MOVOA r2r1kp<>+0(SB), X0
|
||||
|
||||
loopback64:
|
||||
MOVOA X1, X5
|
||||
MOVOA X2, X6
|
||||
MOVOA X3, X7
|
||||
MOVOA X4, X8
|
||||
|
||||
PCLMULQDQ $0, X0, X1
|
||||
PCLMULQDQ $0, X0, X2
|
||||
PCLMULQDQ $0, X0, X3
|
||||
PCLMULQDQ $0, X0, X4
|
||||
|
||||
// Load next early
|
||||
MOVOU (SI), X11
|
||||
MOVOU 16(SI), X12
|
||||
MOVOU 32(SI), X13
|
||||
MOVOU 48(SI), X14
|
||||
|
||||
PCLMULQDQ $0x11, X0, X5
|
||||
PCLMULQDQ $0x11, X0, X6
|
||||
PCLMULQDQ $0x11, X0, X7
|
||||
PCLMULQDQ $0x11, X0, X8
|
||||
|
||||
PXOR X5, X1
|
||||
PXOR X6, X2
|
||||
PXOR X7, X3
|
||||
PXOR X8, X4
|
||||
|
||||
PXOR X11, X1
|
||||
PXOR X12, X2
|
||||
PXOR X13, X3
|
||||
PXOR X14, X4
|
||||
|
||||
ADDQ $0x40, DI
|
||||
ADDQ $64, SI // buf+=64
|
||||
SUBQ $64, CX // len-=64
|
||||
CMPQ CX, $64 // Less than 64 bytes left?
|
||||
JGE loopback64
|
||||
|
||||
// Fold result into a single register (X1)
|
||||
remain64:
|
||||
MOVOA r4r3kp<>+0(SB), X0
|
||||
|
||||
MOVOA X1, X5
|
||||
PCLMULQDQ $0, X0, X1
|
||||
PCLMULQDQ $0x11, X0, X5
|
||||
PXOR X5, X1
|
||||
PXOR X2, X1
|
||||
|
||||
MOVOA X1, X5
|
||||
PCLMULQDQ $0, X0, X1
|
||||
PCLMULQDQ $0x11, X0, X5
|
||||
PXOR X5, X1
|
||||
PXOR X3, X1
|
||||
|
||||
MOVOA X1, X5
|
||||
PCLMULQDQ $0, X0, X1
|
||||
PCLMULQDQ $0x11, X0, X5
|
||||
PXOR X5, X1
|
||||
PXOR X4, X1
|
||||
|
||||
// More than 16 bytes left?
|
||||
CMPQ CX, $16
|
||||
JB finish
|
||||
|
||||
// Encode 16 bytes
|
||||
remain16:
|
||||
MOVOU (SI), X10
|
||||
MOVOA X1, X5
|
||||
PCLMULQDQ $0, X0, X1
|
||||
PCLMULQDQ $0x11, X0, X5
|
||||
PXOR X5, X1
|
||||
PXOR X10, X1
|
||||
SUBQ $16, CX
|
||||
ADDQ $16, SI
|
||||
CMPQ CX, $16
|
||||
JGE remain16
|
||||
|
||||
finish:
|
||||
// Fold final result into 32 bits and return it
|
||||
PCMPEQB X3, X3
|
||||
PCLMULQDQ $1, X1, X0
|
||||
PSRLDQ $8, X1
|
||||
PXOR X0, X1
|
||||
|
||||
MOVOA X1, X2
|
||||
MOVQ r5kp<>+0(SB), X0
|
||||
|
||||
// Creates 32 bit mask. Note that we don't care about upper half.
|
||||
PSRLQ $32, X3
|
||||
|
||||
PSRLDQ $4, X2
|
||||
PAND X3, X1
|
||||
PCLMULQDQ $0, X0, X1
|
||||
PXOR X2, X1
|
||||
|
||||
MOVOA rupolykp<>+0(SB), X0
|
||||
|
||||
MOVOA X1, X2
|
||||
PAND X3, X1
|
||||
PCLMULQDQ $0x10, X0, X1
|
||||
PAND X3, X1
|
||||
PCLMULQDQ $0, X0, X1
|
||||
PXOR X2, X1
|
||||
|
||||
// PEXTRD $1, X1, AX (SSE 4.1)
|
||||
BYTE $0x66; BYTE $0x0f; BYTE $0x3a
|
||||
BYTE $0x16; BYTE $0xc8; BYTE $0x01
|
||||
MOVL AX, ret+32(FP)
|
||||
|
||||
RET
|
||||
40
vendor/github.com/klauspost/crc32/crc32_amd64p32.go
generated
vendored
Normal file
40
vendor/github.com/klauspost/crc32/crc32_amd64p32.go
generated
vendored
Normal file
@@ -0,0 +1,40 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build !appengine,!gccgo
|
||||
|
||||
package crc32
|
||||
|
||||
// This file contains the code to call the SSE 4.2 version of the Castagnoli
|
||||
// CRC.
|
||||
|
||||
// haveSSE42 is defined in crc_amd64p32.s and uses CPUID to test for SSE 4.2
|
||||
// support.
|
||||
func haveSSE42() bool
|
||||
|
||||
// castagnoliSSE42 is defined in crc_amd64.s and uses the SSE4.2 CRC32
|
||||
// instruction.
|
||||
//go:noescape
|
||||
func castagnoliSSE42(crc uint32, p []byte) uint32
|
||||
|
||||
var sse42 = haveSSE42()
|
||||
|
||||
func updateCastagnoli(crc uint32, p []byte) uint32 {
|
||||
if sse42 {
|
||||
return castagnoliSSE42(crc, p)
|
||||
}
|
||||
return update(crc, castagnoliTable, p)
|
||||
}
|
||||
|
||||
func updateIEEE(crc uint32, p []byte) uint32 {
|
||||
// only use slicing-by-8 when input is >= 4KB
|
||||
if len(p) >= 4096 {
|
||||
ieeeTable8Once.Do(func() {
|
||||
ieeeTable8 = makeTable8(IEEE)
|
||||
})
|
||||
return updateSlicingBy8(crc, ieeeTable8, p)
|
||||
}
|
||||
|
||||
return update(crc, IEEETable, p)
|
||||
}
|
||||
67
vendor/github.com/klauspost/crc32/crc32_amd64p32.s
generated
vendored
Normal file
67
vendor/github.com/klauspost/crc32/crc32_amd64p32.s
generated
vendored
Normal file
@@ -0,0 +1,67 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build gc
|
||||
|
||||
#define NOSPLIT 4
|
||||
#define RODATA 8
|
||||
|
||||
// func castagnoliSSE42(crc uint32, p []byte) uint32
|
||||
TEXT ·castagnoliSSE42(SB), NOSPLIT, $0
|
||||
MOVL crc+0(FP), AX // CRC value
|
||||
MOVL p+4(FP), SI // data pointer
|
||||
MOVL p_len+8(FP), CX // len(p)
|
||||
|
||||
NOTL AX
|
||||
|
||||
// If there's less than 8 bytes to process, we do it byte-by-byte.
|
||||
CMPQ CX, $8
|
||||
JL cleanup
|
||||
|
||||
// Process individual bytes until the input is 8-byte aligned.
|
||||
startup:
|
||||
MOVQ SI, BX
|
||||
ANDQ $7, BX
|
||||
JZ aligned
|
||||
|
||||
CRC32B (SI), AX
|
||||
DECQ CX
|
||||
INCQ SI
|
||||
JMP startup
|
||||
|
||||
aligned:
|
||||
// The input is now 8-byte aligned and we can process 8-byte chunks.
|
||||
CMPQ CX, $8
|
||||
JL cleanup
|
||||
|
||||
CRC32Q (SI), AX
|
||||
ADDQ $8, SI
|
||||
SUBQ $8, CX
|
||||
JMP aligned
|
||||
|
||||
cleanup:
|
||||
// We may have some bytes left over that we process one at a time.
|
||||
CMPQ CX, $0
|
||||
JE done
|
||||
|
||||
CRC32B (SI), AX
|
||||
INCQ SI
|
||||
DECQ CX
|
||||
JMP cleanup
|
||||
|
||||
done:
|
||||
NOTL AX
|
||||
MOVL AX, ret+16(FP)
|
||||
RET
|
||||
|
||||
// func haveSSE42() bool
|
||||
TEXT ·haveSSE42(SB), NOSPLIT, $0
|
||||
XORQ AX, AX
|
||||
INCL AX
|
||||
CPUID
|
||||
SHRQ $20, CX
|
||||
ANDQ $1, CX
|
||||
MOVB CX, ret+0(FP)
|
||||
RET
|
||||
|
||||
29
vendor/github.com/klauspost/crc32/crc32_generic.go
generated
vendored
Normal file
29
vendor/github.com/klauspost/crc32/crc32_generic.go
generated
vendored
Normal file
@@ -0,0 +1,29 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build !amd64,!amd64p32 appengine gccgo
|
||||
|
||||
package crc32
|
||||
|
||||
// This file contains the generic version of updateCastagnoli which does
|
||||
// slicing-by-8, or uses the fallback for very small sizes.
|
||||
|
||||
func updateCastagnoli(crc uint32, p []byte) uint32 {
|
||||
// only use slicing-by-8 when input is >= 16 Bytes
|
||||
if len(p) >= 16 {
|
||||
return updateSlicingBy8(crc, castagnoliTable8, p)
|
||||
}
|
||||
return update(crc, castagnoliTable, p)
|
||||
}
|
||||
|
||||
func updateIEEE(crc uint32, p []byte) uint32 {
|
||||
// only use slicing-by-8 when input is >= 16 Bytes
|
||||
if len(p) >= 16 {
|
||||
ieeeTable8Once.Do(func() {
|
||||
ieeeTable8 = makeTable8(IEEE)
|
||||
})
|
||||
return updateSlicingBy8(crc, ieeeTable8, p)
|
||||
}
|
||||
return update(crc, IEEETable, p)
|
||||
}
|
||||
170
vendor/github.com/klauspost/crc32/crc32_test.go
generated
vendored
Normal file
170
vendor/github.com/klauspost/crc32/crc32_test.go
generated
vendored
Normal file
@@ -0,0 +1,170 @@
|
||||
// Copyright 2009 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package crc32
|
||||
|
||||
import (
|
||||
"hash"
|
||||
"hash/crc32"
|
||||
"io"
|
||||
"testing"
|
||||
)
|
||||
|
||||
type test struct {
|
||||
ieee, castagnoli uint32
|
||||
in string
|
||||
}
|
||||
|
||||
var golden = []test{
|
||||
{0x0, 0x0, ""},
|
||||
{0xe8b7be43, 0xc1d04330, "a"},
|
||||
{0x9e83486d, 0xe2a22936, "ab"},
|
||||
{0x352441c2, 0x364b3fb7, "abc"},
|
||||
{0xed82cd11, 0x92c80a31, "abcd"},
|
||||
{0x8587d865, 0xc450d697, "abcde"},
|
||||
{0x4b8e39ef, 0x53bceff1, "abcdef"},
|
||||
{0x312a6aa6, 0xe627f441, "abcdefg"},
|
||||
{0xaeef2a50, 0xa9421b7, "abcdefgh"},
|
||||
{0x8da988af, 0x2ddc99fc, "abcdefghi"},
|
||||
{0x3981703a, 0xe6599437, "abcdefghij"},
|
||||
{0x6b9cdfe7, 0xb2cc01fe, "Discard medicine more than two years old."},
|
||||
{0xc90ef73f, 0xe28207f, "He who has a shady past knows that nice guys finish last."},
|
||||
{0xb902341f, 0xbe93f964, "I wouldn't marry him with a ten foot pole."},
|
||||
{0x42080e8, 0x9e3be0c3, "Free! Free!/A trip/to Mars/for 900/empty jars/Burma Shave"},
|
||||
{0x154c6d11, 0xf505ef04, "The days of the digital watch are numbered. -Tom Stoppard"},
|
||||
{0x4c418325, 0x85d3dc82, "Nepal premier won't resign."},
|
||||
{0x33955150, 0xc5142380, "For every action there is an equal and opposite government program."},
|
||||
{0x26216a4b, 0x75eb77dd, "His money is twice tainted: 'taint yours and 'taint mine."},
|
||||
{0x1abbe45e, 0x91ebe9f7, "There is no reason for any individual to have a computer in their home. -Ken Olsen, 1977"},
|
||||
{0xc89a94f7, 0xf0b1168e, "It's a tiny change to the code and not completely disgusting. - Bob Manchek"},
|
||||
{0xab3abe14, 0x572b74e2, "size: a.out: bad magic"},
|
||||
{0xbab102b6, 0x8a58a6d5, "The major problem is with sendmail. -Mark Horton"},
|
||||
{0x999149d7, 0x9c426c50, "Give me a rock, paper and scissors and I will move the world. CCFestoon"},
|
||||
{0x6d52a33c, 0x735400a4, "If the enemy is within range, then so are you."},
|
||||
{0x90631e8d, 0xbec49c95, "It's well we cannot hear the screams/That we create in others' dreams."},
|
||||
{0x78309130, 0xa95a2079, "You remind me of a TV show, but that's all right: I watch it anyway."},
|
||||
{0x7d0a377f, 0xde2e65c5, "C is as portable as Stonehedge!!"},
|
||||
{0x8c79fd79, 0x297a88ed, "Even if I could be Shakespeare, I think I should still choose to be Faraday. - A. Huxley"},
|
||||
{0xa20b7167, 0x66ed1d8b, "The fugacity of a constituent in a mixture of gases at a given temperature is proportional to its mole fraction. Lewis-Randall Rule"},
|
||||
{0x8e0bb443, 0xdcded527, "How can you write a big system without C++? -Paul Glick"},
|
||||
}
|
||||
|
||||
func TestGolden(t *testing.T) {
|
||||
castagnoliTab := MakeTable(Castagnoli)
|
||||
|
||||
for _, g := range golden {
|
||||
ieee := NewIEEE()
|
||||
io.WriteString(ieee, g.in)
|
||||
s := ieee.Sum32()
|
||||
if s != g.ieee {
|
||||
t.Errorf("IEEE(%s) = 0x%x want 0x%x", g.in, s, g.ieee)
|
||||
}
|
||||
|
||||
castagnoli := New(castagnoliTab)
|
||||
io.WriteString(castagnoli, g.in)
|
||||
s = castagnoli.Sum32()
|
||||
if s != g.castagnoli {
|
||||
t.Errorf("Castagnoli(%s) = 0x%x want 0x%x", g.in, s, g.castagnoli)
|
||||
}
|
||||
|
||||
if len(g.in) > 0 {
|
||||
// The SSE4.2 implementation of this has code to deal
|
||||
// with misaligned data so we ensure that we test that
|
||||
// too.
|
||||
castagnoli = New(castagnoliTab)
|
||||
io.WriteString(castagnoli, g.in[:1])
|
||||
io.WriteString(castagnoli, g.in[1:])
|
||||
s = castagnoli.Sum32()
|
||||
if s != g.castagnoli {
|
||||
t.Errorf("Castagnoli[misaligned](%s) = 0x%x want 0x%x", g.in, s, g.castagnoli)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkCrc40B(b *testing.B) {
|
||||
benchmark(b, NewIEEE(), 40)
|
||||
}
|
||||
|
||||
func BenchmarkStdCrc40B(b *testing.B) {
|
||||
benchmark(b, crc32.NewIEEE(), 40)
|
||||
}
|
||||
|
||||
func BenchmarkCrc1KB(b *testing.B) {
|
||||
benchmark(b, NewIEEE(), 1024)
|
||||
}
|
||||
|
||||
func BenchmarkStdCrc1KB(b *testing.B) {
|
||||
benchmark(b, crc32.NewIEEE(), 1024)
|
||||
}
|
||||
|
||||
func BenchmarkCrc8KB(b *testing.B) {
|
||||
benchmark(b, NewIEEE(), 8*1024)
|
||||
}
|
||||
|
||||
func BenchmarkStdCrc8KB(b *testing.B) {
|
||||
benchmark(b, crc32.NewIEEE(), 8*1024)
|
||||
}
|
||||
|
||||
func BenchmarkCrc32KB(b *testing.B) {
|
||||
benchmark(b, NewIEEE(), 32*1024)
|
||||
}
|
||||
|
||||
func BenchmarkStdCrc32KB(b *testing.B) {
|
||||
benchmark(b, crc32.NewIEEE(), 32*1024)
|
||||
}
|
||||
|
||||
func BenchmarkCastagnoli40B(b *testing.B) {
|
||||
benchmark(b, New(MakeTable(Castagnoli)), 40)
|
||||
}
|
||||
|
||||
func BenchmarkStdCastagnoli40B(b *testing.B) {
|
||||
benchmark(b, crc32.New(crc32.MakeTable(Castagnoli)), 40)
|
||||
}
|
||||
|
||||
func BenchmarkCastagnoli1KB(b *testing.B) {
|
||||
benchmark(b, New(MakeTable(Castagnoli)), 1024)
|
||||
}
|
||||
|
||||
func BenchmarkStdCastagnoli1KB(b *testing.B) {
|
||||
benchmark(b, crc32.New(crc32.MakeTable(Castagnoli)), 1024)
|
||||
}
|
||||
|
||||
func BenchmarkCastagnoli8KB(b *testing.B) {
|
||||
benchmark(b, New(MakeTable(Castagnoli)), 8*1024)
|
||||
}
|
||||
|
||||
func BenchmarkStdCastagnoli8KB(b *testing.B) {
|
||||
benchmark(b, crc32.New(crc32.MakeTable(Castagnoli)), 8*1024)
|
||||
}
|
||||
|
||||
func BenchmarkCastagnoli32KB(b *testing.B) {
|
||||
benchmark(b, New(MakeTable(Castagnoli)), 32*1024)
|
||||
}
|
||||
|
||||
func BenchmarkStdCastagnoli32KB(b *testing.B) {
|
||||
benchmark(b, crc32.New(crc32.MakeTable(Castagnoli)), 32*1024)
|
||||
}
|
||||
|
||||
func benchmark(b *testing.B, h hash.Hash32, n int64) {
|
||||
b.SetBytes(n)
|
||||
data := make([]byte, n)
|
||||
for i := range data {
|
||||
data[i] = byte(i)
|
||||
}
|
||||
in := make([]byte, 0, h.Size())
|
||||
|
||||
// Warm up
|
||||
h.Reset()
|
||||
h.Write(data)
|
||||
h.Sum(in)
|
||||
|
||||
b.ResetTimer()
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
h.Reset()
|
||||
h.Write(data)
|
||||
h.Sum(in)
|
||||
}
|
||||
}
|
||||
28
vendor/github.com/klauspost/crc32/example_test.go
generated
vendored
Normal file
28
vendor/github.com/klauspost/crc32/example_test.go
generated
vendored
Normal file
@@ -0,0 +1,28 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package crc32_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"hash/crc32"
|
||||
)
|
||||
|
||||
func ExampleMakeTable() {
|
||||
// In this package, the CRC polynomial is represented in reversed notation,
|
||||
// or LSB-first representation.
|
||||
//
|
||||
// LSB-first representation is a hexadecimal number with n bits, in which the
|
||||
// most significant bit represents the coefficient of x⁰ and the least significant
|
||||
// bit represents the coefficient of xⁿ⁻¹ (the coefficient for xⁿ is implicit).
|
||||
//
|
||||
// For example, CRC32-Q, as defined by the following polynomial,
|
||||
// x³²+ x³¹+ x²⁴+ x²²+ x¹⁶+ x¹⁴+ x⁸+ x⁷+ x⁵+ x³+ x¹+ x⁰
|
||||
// has the reversed notation 0b11010101100000101000001010000001, so the value
|
||||
// that should be passed to MakeTable is 0xD5828281.
|
||||
crc32q := crc32.MakeTable(0xD5828281)
|
||||
fmt.Printf("%08x\n", crc32.Checksum([]byte("Hello world"), crc32q))
|
||||
// Output:
|
||||
// 2964d064
|
||||
}
|
||||
Reference in New Issue
Block a user