[chore] remove vendor

author: Terin Stock <terinjokes@gmail.com> 2025-03-09 17:47:56 +0100
committer: Terin Stock <terinjokes@gmail.com> 2025-12-01 22:08:04 +0100
commit: b1af8fd87760b34e3ff2fd3bda38f211815a0473 (patch)
tree: 9317fad1a7ec298d7a8d2678e4e422953bbc6f33 /vendor/github.com/klauspost/crc32/crc32_amd64.s
parent: [chore] update URLs to forked source (diff)
download: gotosocial-b1af8fd87760b34e3ff2fd3bda38f211815a0473.tar.xz
1 files changed, 0 insertions, 527 deletions
diff --git a/vendor/github.com/klauspost/crc32/crc32_amd64.s b/vendor/github.com/klauspost/crc32/crc32_amd64.s
deleted file mode 100644
index e2de3a5cb..000000000
--- a/vendor/github.com/klauspost/crc32/crc32_amd64.s
+++ /dev/null
@@ -1,527 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "textflag.h"
-
-// castagnoliSSE42 updates the (non-inverted) crc with the given buffer.
-//
-// func castagnoliSSE42(crc uint32, p []byte) uint32
-TEXT ·castagnoliSSE42(SB), NOSPLIT, $0
-	MOVL crc+0(FP), AX    // CRC value
-	MOVQ p+8(FP), SI      // data pointer
-	MOVQ p_len+16(FP), CX // len(p)
-
-	// If there are fewer than 8 bytes to process, skip alignment.
-	CMPQ CX, $8
-	JL   less_than_8
-
-	MOVQ SI, BX
-	ANDQ $7, BX
-	JZ   aligned
-
-	// Process the first few bytes to 8-byte align the input.
-
-	// BX = 8 - BX. We need to process this many bytes to align.
-	SUBQ $1, BX
-	XORQ $7, BX
-
-	BTQ $0, BX
-	JNC align_2
-
-	CRC32B (SI), AX
-	DECQ   CX
-	INCQ   SI
-
-align_2:
-	BTQ $1, BX
-	JNC align_4
-
-	CRC32W (SI), AX
-
-	SUBQ $2, CX
-	ADDQ $2, SI
-
-align_4:
-	BTQ $2, BX
-	JNC aligned
-
-	CRC32L (SI), AX
-
-	SUBQ $4, CX
-	ADDQ $4, SI
-
-aligned:
-	// The input is now 8-byte aligned and we can process 8-byte chunks.
-	CMPQ CX, $8
-	JL   less_than_8
-
-	CRC32Q (SI), AX
-	ADDQ   $8, SI
-	SUBQ   $8, CX
-	JMP    aligned
-
-less_than_8:
-	// We may have some bytes left over; process 4 bytes, then 2, then 1.
-	BTQ $2, CX
-	JNC less_than_4
-
-	CRC32L (SI), AX
-	ADDQ   $4, SI
-
-less_than_4:
-	BTQ $1, CX
-	JNC less_than_2
-
-	CRC32W (SI), AX
-	ADDQ   $2, SI
-
-less_than_2:
-	BTQ $0, CX
-	JNC done
-
-	CRC32B (SI), AX
-
-done:
-	MOVL AX, ret+32(FP)
-	RET
-
-// castagnoliSSE42Triple updates three (non-inverted) crcs with (24*rounds)
-// bytes from each buffer.
-//
-// func castagnoliSSE42Triple(
-//     crc1, crc2, crc3 uint32,
-//     a, b, c []byte,
-//     rounds uint32,
-// ) (retA uint32, retB uint32, retC uint32)
-TEXT ·castagnoliSSE42Triple(SB), NOSPLIT, $0
-	MOVL crcA+0(FP), AX
-	MOVL crcB+4(FP), CX
-	MOVL crcC+8(FP), DX
-
-	MOVQ a+16(FP), R8  // data pointer
-	MOVQ b+40(FP), R9  // data pointer
-	MOVQ c+64(FP), R10 // data pointer
-
-	MOVL rounds+88(FP), R11
-
-loop:
-	CRC32Q (R8), AX
-	CRC32Q (R9), CX
-	CRC32Q (R10), DX
-
-	CRC32Q 8(R8), AX
-	CRC32Q 8(R9), CX
-	CRC32Q 8(R10), DX
-
-	CRC32Q 16(R8), AX
-	CRC32Q 16(R9), CX
-	CRC32Q 16(R10), DX
-
-	ADDQ $24, R8
-	ADDQ $24, R9
-	ADDQ $24, R10
-
-	DECQ R11
-	JNZ  loop
-
-	MOVL AX, retA+96(FP)
-	MOVL CX, retB+100(FP)
-	MOVL DX, retC+104(FP)
-	RET
-
-// CRC32 polynomial data
-//
-// These constants are lifted from the
-// Linux kernel, since they avoid the costly
-// PSHUFB 16 byte reversal proposed in the
-// original Intel paper.
-DATA r2r1<>+0(SB)/8, $0x154442bd4
-DATA r2r1<>+8(SB)/8, $0x1c6e41596
-DATA r4r3<>+0(SB)/8, $0x1751997d0
-DATA r4r3<>+8(SB)/8, $0x0ccaa009e
-DATA rupoly<>+0(SB)/8, $0x1db710641
-DATA rupoly<>+8(SB)/8, $0x1f7011641
-DATA r5<>+0(SB)/8, $0x163cd6124
-
-GLOBL r2r1<>(SB), RODATA, $16
-GLOBL r4r3<>(SB), RODATA, $16
-GLOBL rupoly<>(SB), RODATA, $16
-GLOBL r5<>(SB), RODATA, $8
-
-// Based on https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
-// len(p) must be at least 64, and must be a multiple of 16.
-
-// func ieeeCLMUL(crc uint32, p []byte) uint32
-TEXT ·ieeeCLMUL(SB), NOSPLIT, $0
-	MOVL crc+0(FP), X0    // Initial CRC value
-	MOVQ p+8(FP), SI      // data pointer
-	MOVQ p_len+16(FP), CX // len(p)
-
-	MOVOU (SI), X1
-	MOVOU 16(SI), X2
-	MOVOU 32(SI), X3
-	MOVOU 48(SI), X4
-	PXOR  X0, X1
-	ADDQ  $64, SI    // buf+=64
-	SUBQ  $64, CX    // len-=64
-	CMPQ  CX, $64    // Less than 64 bytes left
-	JB    remain64
-
-	MOVOA r2r1<>+0(SB), X0
-
-loopback64:
-	MOVOA X1, X5
-	MOVOA X2, X6
-	MOVOA X3, X7
-	MOVOA X4, X8
-
-	PCLMULQDQ $0, X0, X1
-	PCLMULQDQ $0, X0, X2
-	PCLMULQDQ $0, X0, X3
-	PCLMULQDQ $0, X0, X4
-
-	// Load next early
-	MOVOU (SI), X11
-	MOVOU 16(SI), X12
-	MOVOU 32(SI), X13
-	MOVOU 48(SI), X14
-
-	PCLMULQDQ $0x11, X0, X5
-	PCLMULQDQ $0x11, X0, X6
-	PCLMULQDQ $0x11, X0, X7
-	PCLMULQDQ $0x11, X0, X8
-
-	PXOR X5, X1
-	PXOR X6, X2
-	PXOR X7, X3
-	PXOR X8, X4
-
-	PXOR X11, X1
-	PXOR X12, X2
-	PXOR X13, X3
-	PXOR X14, X4
-
-	ADDQ $0x40, DI
-	ADDQ $64, SI    // buf+=64
-	SUBQ $64, CX    // len-=64
-	CMPQ CX, $64    // Less than 64 bytes left?
-	JGE  loopback64
-
-	// Fold result into a single register (X1)
-remain64:
-	MOVOA r4r3<>+0(SB), X0
-
-	MOVOA     X1, X5
-	PCLMULQDQ $0, X0, X1
-	PCLMULQDQ $0x11, X0, X5
-	PXOR      X5, X1
-	PXOR      X2, X1
-
-	MOVOA     X1, X5
-	PCLMULQDQ $0, X0, X1
-	PCLMULQDQ $0x11, X0, X5
-	PXOR      X5, X1
-	PXOR      X3, X1
-
-	MOVOA     X1, X5
-	PCLMULQDQ $0, X0, X1
-	PCLMULQDQ $0x11, X0, X5
-	PXOR      X5, X1
-	PXOR      X4, X1
-
-	// If there is less than 16 bytes left we are done
-	CMPQ CX, $16
-	JB   finish
-
-	// Encode 16 bytes
-remain16:
-	MOVOU     (SI), X10
-	MOVOA     X1, X5
-	PCLMULQDQ $0, X0, X1
-	PCLMULQDQ $0x11, X0, X5
-	PXOR      X5, X1
-	PXOR      X10, X1
-	SUBQ      $16, CX
-	ADDQ      $16, SI
-	CMPQ      CX, $16
-	JGE       remain16
-
-finish:
-	// Fold final result into 32 bits and return it
-	PCMPEQB   X3, X3
-	PCLMULQDQ $1, X1, X0
-	PSRLDQ    $8, X1
-	PXOR      X0, X1
-
-	MOVOA X1, X2
-	MOVQ  r5<>+0(SB), X0
-
-	// Creates 32 bit mask. Note that we don't care about upper half.
-	PSRLQ $32, X3
-
-	PSRLDQ    $4, X2
-	PAND      X3, X1
-	PCLMULQDQ $0, X0, X1
-	PXOR      X2, X1
-
-	MOVOA rupoly<>+0(SB), X0
-
-	MOVOA     X1, X2
-	PAND      X3, X1
-	PCLMULQDQ $0x10, X0, X1
-	PAND      X3, X1
-	PCLMULQDQ $0, X0, X1
-	PXOR      X2, X1
-
-	PEXTRD $1, X1, AX
-	MOVL   AX, ret+32(FP)
-
-	RET
-
-DATA r2r1X<>+0(SB)/8, $0x154442bd4
-DATA r2r1X<>+8(SB)/8, $0x1c6e41596
-DATA r2r1X<>+16(SB)/8, $0x154442bd4
-DATA r2r1X<>+24(SB)/8, $0x1c6e41596
-DATA r2r1X<>+32(SB)/8, $0x154442bd4
-DATA r2r1X<>+40(SB)/8, $0x1c6e41596
-DATA r2r1X<>+48(SB)/8, $0x154442bd4
-DATA r2r1X<>+56(SB)/8, $0x1c6e41596
-GLOBL r2r1X<>(SB), RODATA, $64
-
-// Based on https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
-// len(p) must be at least 128, and must be a multiple of 16.
-
-// func ieeeCLMULAvx512(crc uint32, p []byte) uint32
-TEXT ·ieeeCLMULAvx512(SB), NOSPLIT, $0
-	MOVL crc+0(FP), AX    // Initial CRC value
-	MOVQ p+8(FP), SI      // data pointer
-	MOVQ p_len+16(FP), CX // len(p)
-
-	VPXORQ    Z0, Z0, Z0
-	VMOVDQU64 (SI), Z1
-	VMOVQ     AX, X0
-	VPXORQ    Z0, Z1, Z1 // Merge initial CRC value into Z1
-	ADDQ      $64, SI    // buf+=64
-	SUBQ      $64, CX    // len-=64
-
-	VMOVDQU64 r2r1X<>+0(SB), Z0
-
-loopback64:
-	// Load next early
-	VMOVDQU64 (SI), Z11
-
-	VPCLMULQDQ $0x11, Z0, Z1, Z5
-	VPCLMULQDQ $0, Z0, Z1, Z1
-
-	VPTERNLOGD $0x96, Z11, Z5, Z1 // Combine results with xor into Z1
-
-	ADDQ $0x40, DI
-	ADDQ $64, SI    // buf+=64
-	SUBQ $64, CX    // len-=64
-	CMPQ CX, $64    // Less than 64 bytes left?
-	JGE  loopback64
-
-	// Fold result into a single register (X1)
-remain64:
-	VEXTRACTF32X4 $1, Z1, X2 // X2: Second 128-bit lane
-	VEXTRACTF32X4 $2, Z1, X3 // X3: Third 128-bit lane
-	VEXTRACTF32X4 $3, Z1, X4 // X4: Fourth 128-bit lane
-
-	MOVOA r4r3<>+0(SB), X0
-
-	MOVOA     X1, X5
-	PCLMULQDQ $0, X0, X1
-	PCLMULQDQ $0x11, X0, X5
-	PXOR      X5, X1
-	PXOR      X2, X1
-
-	MOVOA     X1, X5
-	PCLMULQDQ $0, X0, X1
-	PCLMULQDQ $0x11, X0, X5
-	PXOR      X5, X1
-	PXOR      X3, X1
-
-	MOVOA     X1, X5
-	PCLMULQDQ $0, X0, X1
-	PCLMULQDQ $0x11, X0, X5
-	PXOR      X5, X1
-	PXOR      X4, X1
-
-	// If there is less than 16 bytes left we are done
-	CMPQ CX, $16
-	JB   finish
-
-	// Encode 16 bytes
-remain16:
-	MOVOU     (SI), X10
-	MOVOA     X1, X5
-	PCLMULQDQ $0, X0, X1
-	PCLMULQDQ $0x11, X0, X5
-	PXOR      X5, X1
-	PXOR      X10, X1
-	SUBQ      $16, CX
-	ADDQ      $16, SI
-	CMPQ      CX, $16
-	JGE       remain16
-
-finish:
-	// Fold final result into 32 bits and return it
-	PCMPEQB   X3, X3
-	PCLMULQDQ $1, X1, X0
-	PSRLDQ    $8, X1
-	PXOR      X0, X1
-
-	MOVOA X1, X2
-	MOVQ  r5<>+0(SB), X0
-
-	// Creates 32 bit mask. Note that we don't care about upper half.
-	PSRLQ $32, X3
-
-	PSRLDQ    $4, X2
-	PAND      X3, X1
-	PCLMULQDQ $0, X0, X1
-	PXOR      X2, X1
-
-	MOVOA rupoly<>+0(SB), X0
-
-	MOVOA     X1, X2
-	PAND      X3, X1
-	PCLMULQDQ $0x10, X0, X1
-	PAND      X3, X1
-	PCLMULQDQ $0, X0, X1
-	PXOR      X2, X1
-
-	PEXTRD $1, X1, AX
-	MOVL   AX, ret+32(FP)
-	VZEROUPPER
-	RET
-
-// Castagonli Polynomial constants
-DATA r2r1C<>+0(SB)/8, $0x0740eef02
-DATA r2r1C<>+8(SB)/8, $0x09e4addf8
-DATA r2r1C<>+16(SB)/8, $0x0740eef02
-DATA r2r1C<>+24(SB)/8, $0x09e4addf8
-DATA r2r1C<>+32(SB)/8, $0x0740eef02
-DATA r2r1C<>+40(SB)/8, $0x09e4addf8
-DATA r2r1C<>+48(SB)/8, $0x0740eef02
-DATA r2r1C<>+56(SB)/8, $0x09e4addf8
-GLOBL r2r1C<>(SB), RODATA, $64
-
-DATA r4r3C<>+0(SB)/8, $0xf20c0dfe
-DATA r4r3C<>+8(SB)/8, $0x14cd00bd6
-DATA rupolyC<>+0(SB)/8, $0x105ec76f0
-DATA rupolyC<>+8(SB)/8, $0xdea713f1
-DATA r5C<>+0(SB)/8, $0xdd45aab8
-
-GLOBL r4r3C<>(SB), RODATA, $16
-GLOBL rupolyC<>(SB), RODATA, $16
-GLOBL r5C<>(SB), RODATA, $8
-
-// Based on https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
-// len(p) must be at least 128, and must be a multiple of 16.
-
-// func castagnoliCLMULAvx512(crc uint32, p []byte) uint32
-TEXT ·castagnoliCLMULAvx512(SB), NOSPLIT, $0
-	MOVL crc+0(FP), AX    // Initial CRC value
-	MOVQ p+8(FP), SI      // data pointer
-	MOVQ p_len+16(FP), CX // len(p)
-
-	VPXORQ    Z0, Z0, Z0
-	VMOVDQU64 (SI), Z1
-	VMOVQ     AX, X0
-	VPXORQ    Z0, Z1, Z1 // Merge initial CRC value into Z1
-	ADDQ      $64, SI    // buf+=64
-	SUBQ      $64, CX    // len-=64
-
-	VMOVDQU64 r2r1C<>+0(SB), Z0
-
-loopback64:
-	// Load next early
-	VMOVDQU64 (SI), Z11
-
-	VPCLMULQDQ $0x11, Z0, Z1, Z5
-	VPCLMULQDQ $0, Z0, Z1, Z1
-
-	VPTERNLOGD $0x96, Z11, Z5, Z1 // Combine results with xor into Z1
-
-	ADDQ $0x40, DI
-	ADDQ $64, SI    // buf+=64
-	SUBQ $64, CX    // len-=64
-	CMPQ CX, $64    // Less than 64 bytes left?
-	JGE  loopback64
-
-	// Fold result into a single register (X1)
-remain64:
-	VEXTRACTF32X4 $1, Z1, X2 // X2: Second 128-bit lane
-	VEXTRACTF32X4 $2, Z1, X3 // X3: Third 128-bit lane
-	VEXTRACTF32X4 $3, Z1, X4 // X4: Fourth 128-bit lane
-
-	MOVOA r4r3C<>+0(SB), X0
-
-	MOVOA     X1, X5
-	PCLMULQDQ $0, X0, X1
-	PCLMULQDQ $0x11, X0, X5
-	PXOR      X5, X1
-	PXOR      X2, X1
-
-	MOVOA     X1, X5
-	PCLMULQDQ $0, X0, X1
-	PCLMULQDQ $0x11, X0, X5
-	PXOR      X5, X1
-	PXOR      X3, X1
-
-	MOVOA     X1, X5
-	PCLMULQDQ $0, X0, X1
-	PCLMULQDQ $0x11, X0, X5
-	PXOR      X5, X1
-	PXOR      X4, X1
-
-	// If there is less than 16 bytes left we are done
-	CMPQ CX, $16
-	JB   finish
-
-	// Encode 16 bytes
-remain16:
-	MOVOU     (SI), X10
-	MOVOA     X1, X5
-	PCLMULQDQ $0, X0, X1
-	PCLMULQDQ $0x11, X0, X5
-	PXOR      X5, X1
-	PXOR      X10, X1
-	SUBQ      $16, CX
-	ADDQ      $16, SI
-	CMPQ      CX, $16
-	JGE       remain16
-
-finish:
-	// Fold final result into 32 bits and return it
-	PCMPEQB   X3, X3
-	PCLMULQDQ $1, X1, X0
-	PSRLDQ    $8, X1
-	PXOR      X0, X1
-
-	MOVOA X1, X2
-	MOVQ  r5C<>+0(SB), X0
-
-	// Creates 32 bit mask. Note that we don't care about upper half.
-	PSRLQ $32, X3
-
-	PSRLDQ    $4, X2
-	PAND      X3, X1
-	PCLMULQDQ $0, X0, X1
-	PXOR      X2, X1
-
-	MOVOA rupolyC<>+0(SB), X0
-
-	MOVOA     X1, X2
-	PAND      X3, X1
-	PCLMULQDQ $0x10, X0, X1
-	PAND      X3, X1
-	PCLMULQDQ $0, X0, X1
-	PXOR      X2, X1
-
-	PEXTRD $1, X1, AX
-	MOVL   AX, ret+32(FP)
-	VZEROUPPER
-	RET
author	Terin Stock <terinjokes@gmail.com>	2025-03-09 17:47:56 +0100
committer	Terin Stock <terinjokes@gmail.com>	2025-12-01 22:08:04 +0100
commit	b1af8fd87760b34e3ff2fd3bda38f211815a0473 (patch)
tree	9317fad1a7ec298d7a8d2678e4e422953bbc6f33 /vendor/github.com/klauspost/crc32/crc32_amd64.s
parent	[chore] update URLs to forked source (diff)
download	gotosocial-b1af8fd87760b34e3ff2fd3bda38f211815a0473.tar.xz