~mcf/b3sum

0fa3655940ae4b2274733a018d90e74f444095e4 — Michael Forney 4 months ago cd0a671 + 0ef91b7
Merge branch 'upstream'
4 files changed, 26 insertions(+), 2 deletions(-)

M blake3.c
M blake3_avx2_x86-64_unix.S
M blake3_avx512_x86-64_unix.S
M blake3_sse41_x86-64_unix.S
M blake3.c => blake3.c +1 -1
@@ 373,7 373,7 @@ INLINE void compress_subtree_to_parent_node(
    uint64_t chunk_counter, uint8_t flags, uint8_t out[2 * BLAKE3_OUT_LEN]) {
  assert(input_len > BLAKE3_CHUNK_LEN);

  uint8_t cv_array[2 * MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN];
  uint8_t cv_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN];
  size_t num_cvs = blake3_compress_subtree_wide(input, input_len, key,
                                                chunk_counter, flags, cv_array);


M blake3_avx2_x86-64_unix.S => blake3_avx2_x86-64_unix.S +7 -0
@@ 1,3 1,9 @@
#if defined(__ELF__) && defined(__CET__) && __has_include(<cet.h>)
#include <cet.h>
#else
#define _CET_ENDBR
#endif

.intel_syntax noprefix
.global _blake3_hash_many_avx2
.global blake3_hash_many_avx2


@@ 9,6 15,7 @@
        .p2align  6
_blake3_hash_many_avx2:
blake3_hash_many_avx2:
        _CET_ENDBR
        push    r15
        push    r14
        push    r13

M blake3_avx512_x86-64_unix.S => blake3_avx512_x86-64_unix.S +9 -1
@@ 1,5 1,10 @@
.intel_syntax noprefix
#if defined(__ELF__) && defined(__CET__) && __has_include(<cet.h>)
#include <cet.h>
#else
#define _CET_ENDBR
#endif

.intel_syntax noprefix
.global _blake3_hash_many_avx512
.global blake3_hash_many_avx512
.global blake3_compress_in_place_avx512


@@ 15,6 20,7 @@
.p2align  6
_blake3_hash_many_avx512:
blake3_hash_many_avx512:
        _CET_ENDBR
        push    r15
        push    r14
        push    r13


@@ 2372,6 2378,7 @@ blake3_hash_many_avx512:
.p2align 6
_blake3_compress_in_place_avx512:
blake3_compress_in_place_avx512:
        _CET_ENDBR
        vmovdqu xmm0, xmmword ptr [rdi]
        vmovdqu xmm1, xmmword ptr [rdi+0x10]
        movzx   eax, r8b


@@ 2454,6 2461,7 @@ blake3_compress_in_place_avx512:
.p2align 6
_blake3_compress_xof_avx512:
blake3_compress_xof_avx512:
        _CET_ENDBR
        vmovdqu xmm0, xmmword ptr [rdi]
        vmovdqu xmm1, xmmword ptr [rdi+0x10]
        movzx   eax, r8b

M blake3_sse41_x86-64_unix.S => blake3_sse41_x86-64_unix.S +9 -0
@@ 1,3 1,9 @@
#if defined(__ELF__) && defined(__CET__) && __has_include(<cet.h>)
#include <cet.h>
#else
#define _CET_ENDBR
#endif

.intel_syntax noprefix
.global blake3_hash_many_sse41
.global _blake3_hash_many_sse41


@@ 13,6 19,7 @@
        .p2align  6
_blake3_hash_many_sse41:
blake3_hash_many_sse41:
        _CET_ENDBR
        push    r15
        push    r14
        push    r13


@@ 1774,6 1781,7 @@ blake3_hash_many_sse41:
.p2align 6
blake3_compress_in_place_sse41:
_blake3_compress_in_place_sse41:
        _CET_ENDBR
        movups  xmm0, xmmword ptr [rdi]
        movups  xmm1, xmmword ptr [rdi+0x10]
        movaps  xmm2, xmmword ptr [BLAKE3_IV+rip]


@@ 1874,6 1882,7 @@ _blake3_compress_in_place_sse41:
.p2align 6
blake3_compress_xof_sse41:
_blake3_compress_xof_sse41:
        _CET_ENDBR
        movups  xmm0, xmmword ptr [rdi]
        movups  xmm1, xmmword ptr [rdi+0x10]
        movaps  xmm2, xmmword ptr [BLAKE3_IV+rip]