Quellcodebibliothek Statistik Leitseite products/sources/formale Sprachen/C/Linux/lib/raid6/   (Open Source Betriebssystem Version 6.17.9©)  Datei vom 24.10.2025 mit Größe 9 kB image not shown  

Quelle  recov_avx512.c   Sprache: C

 
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Copyright (C) 2016 Intel Corporation
 *
 * Author: Gayatri Kammela <gayatri.kammela@intel.com>
 * Author: Megha Dey <megha.dey@linux.intel.com>
 */


#include <linux/raid/pq.h>
#include "x86.h"

static int raid6_has_avx512(void)
{
 return boot_cpu_has(X86_FEATURE_AVX2) &&
  boot_cpu_has(X86_FEATURE_AVX) &&
  boot_cpu_has(X86_FEATURE_AVX512F) &&
  boot_cpu_has(X86_FEATURE_AVX512BW) &&
  boot_cpu_has(X86_FEATURE_AVX512VL) &&
  boot_cpu_has(X86_FEATURE_AVX512DQ);
}

static void raid6_2data_recov_avx512(int disks, size_t bytes, int faila,
         int failb, void **ptrs)
{
 u8 *p, *q, *dp, *dq;
 const u8 *pbmul; /* P multiplier table for B data */
 const u8 *qmul;  /* Q multiplier table (for both) */
 const u8 x0f = 0x0f;

 p = (u8 *)ptrs[disks-2];
 q = (u8 *)ptrs[disks-1];

 /*
 * Compute syndrome with zero for the missing data pages
 * Use the dead data pages as temporary storage for
 * delta p and delta q
 */


 dp = (u8 *)ptrs[faila];
 ptrs[faila] = raid6_get_zero_page();
 ptrs[disks-2] = dp;
 dq = (u8 *)ptrs[failb];
 ptrs[failb] = raid6_get_zero_page();
 ptrs[disks-1] = dq;

 raid6_call.gen_syndrome(disks, bytes, ptrs);

 /* Restore pointer table */
 ptrs[faila]   = dp;
 ptrs[failb]   = dq;
 ptrs[disks-2] = p;
 ptrs[disks-1] = q;

 /* Now, pick the proper data tables */
 pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]];
 qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
  raid6_gfexp[failb]]];

 kernel_fpu_begin();

 /* zmm0 = x0f[16] */
 asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f));

 while (bytes) {
#ifdef CONFIG_X86_64
  asm volatile("vmovdqa64 %0, %%zmm1\n\t"
        "vmovdqa64 %1, %%zmm9\n\t"
        "vmovdqa64 %2, %%zmm0\n\t"
        "vmovdqa64 %3, %%zmm8\n\t"
        "vpxorq %4, %%zmm1, %%zmm1\n\t"
        "vpxorq %5, %%zmm9, %%zmm9\n\t"
        "vpxorq %6, %%zmm0, %%zmm0\n\t"
        "vpxorq %7, %%zmm8, %%zmm8"
        :
        : "m" (q[0]), "m" (q[64]), "m" (p[0]),
          "m" (p[64]), "m" (dq[0]), "m" (dq[64]),
          "m" (dp[0]), "m" (dp[64]));

  /*
 * 1 = dq[0]  ^ q[0]
 * 9 = dq[64] ^ q[64]
 * 0 = dp[0]  ^ p[0]
 * 8 = dp[64] ^ p[64]
 */


  asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
        "vbroadcasti64x2 %1, %%zmm5"
        :
        : "m" (qmul[0]), "m" (qmul[16]));

  asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t"
        "vpsraw $4, %%zmm9, %%zmm12\n\t"
        "vpandq %%zmm7, %%zmm1, %%zmm1\n\t"
        "vpandq %%zmm7, %%zmm9, %%zmm9\n\t"
        "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
        "vpandq %%zmm7, %%zmm12, %%zmm12\n\t"
        "vpshufb %%zmm9, %%zmm4, %%zmm14\n\t"
        "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t"
        "vpshufb %%zmm12, %%zmm5, %%zmm15\n\t"
        "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t"
        "vpxorq %%zmm14, %%zmm15, %%zmm15\n\t"
        "vpxorq %%zmm4, %%zmm5, %%zmm5"
        :
        : );

  /*
 * 5 = qx[0]
 * 15 = qx[64]
 */


  asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
        "vbroadcasti64x2 %1, %%zmm1\n\t"
        "vpsraw $4, %%zmm0, %%zmm2\n\t"
        "vpsraw $4, %%zmm8, %%zmm6\n\t"
        "vpandq %%zmm7, %%zmm0, %%zmm3\n\t"
        "vpandq %%zmm7, %%zmm8, %%zmm14\n\t"
        "vpandq %%zmm7, %%zmm2, %%zmm2\n\t"
        "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
        "vpshufb %%zmm14, %%zmm4, %%zmm12\n\t"
        "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t"
        "vpshufb %%zmm6, %%zmm1, %%zmm13\n\t"
        "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t"
        "vpxorq %%zmm4, %%zmm1, %%zmm1\n\t"
        "vpxorq %%zmm12, %%zmm13, %%zmm13"
        :
        : "m" (pbmul[0]), "m" (pbmul[16]));

  /*
 * 1  = pbmul[px[0]]
 * 13 = pbmul[px[64]]
 */

  asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t"
        "vpxorq %%zmm15, %%zmm13, %%zmm13"
        :
        : );

  /*
 * 1 = db = DQ
 * 13 = db[64] = DQ[64]
 */

  asm volatile("vmovdqa64 %%zmm1, %0\n\t"
        "vmovdqa64 %%zmm13,%1\n\t"
        "vpxorq %%zmm1, %%zmm0, %%zmm0\n\t"
        "vpxorq %%zmm13, %%zmm8, %%zmm8"
        :
        : "m" (dq[0]), "m" (dq[64]));

  asm volatile("vmovdqa64 %%zmm0, %0\n\t"
        "vmovdqa64 %%zmm8, %1"
        :
        : "m" (dp[0]), "m" (dp[64]));

  bytes -= 128;
  p += 128;
  q += 128;
  dp += 128;
  dq += 128;
#else
  asm volatile("vmovdqa64 %0, %%zmm1\n\t"
        "vmovdqa64 %1, %%zmm0\n\t"
        "vpxorq %2, %%zmm1, %%zmm1\n\t"
        "vpxorq %3, %%zmm0, %%zmm0"
        :
        : "m" (*q), "m" (*p), "m"(*dq), "m" (*dp));

  /* 1 = dq ^ q;  0 = dp ^ p */

  asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
        "vbroadcasti64x2 %1, %%zmm5"
        :
        : "m" (qmul[0]), "m" (qmul[16]));

  /*
 * 1 = dq ^ q
 * 3 = dq ^ p >> 4
 */

  asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t"
        "vpandq %%zmm7, %%zmm1, %%zmm1\n\t"
        "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
        "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t"
        "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t"
        "vpxorq %%zmm4, %%zmm5, %%zmm5"
        :
        : );

  /* 5 = qx */

  asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
        "vbroadcasti64x2 %1, %%zmm1"
        :
        : "m" (pbmul[0]), "m" (pbmul[16]));

  asm volatile("vpsraw $4, %%zmm0, %%zmm2\n\t"
        "vpandq %%zmm7, %%zmm0, %%zmm3\n\t"
        "vpandq %%zmm7, %%zmm2, %%zmm2\n\t"
        "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t"
        "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t"
        "vpxorq %%zmm4, %%zmm1, %%zmm1"
        :
        : );

  /* 1 = pbmul[px] */
  asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t"
        /* 1 = db = DQ */
        "vmovdqa64 %%zmm1, %0\n\t"
        :
        : "m" (dq[0]));

  asm volatile("vpxorq %%zmm1, %%zmm0, %%zmm0\n\t"
        "vmovdqa64 %%zmm0, %0"
        :
        : "m" (dp[0]));

  bytes -= 64;
  p += 64;
  q += 64;
  dp += 64;
  dq += 64;
#endif
 }

 kernel_fpu_end();
}

static void raid6_datap_recov_avx512(int disks, size_t bytes, int faila,
         void **ptrs)
{
 u8 *p, *q, *dq;
 const u8 *qmul;  /* Q multiplier table */
 const u8 x0f = 0x0f;

 p = (u8 *)ptrs[disks-2];
 q = (u8 *)ptrs[disks-1];

 /*
 * Compute syndrome with zero for the missing data page
 * Use the dead data page as temporary storage for delta q
 */


 dq = (u8 *)ptrs[faila];
 ptrs[faila] = raid6_get_zero_page();
 ptrs[disks-1] = dq;

 raid6_call.gen_syndrome(disks, bytes, ptrs);

 /* Restore pointer table */
 ptrs[faila]   = dq;
 ptrs[disks-1] = q;

 /* Now, pick the proper data tables */
 qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];

 kernel_fpu_begin();

 asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f));

 while (bytes) {
#ifdef CONFIG_X86_64
  asm volatile("vmovdqa64 %0, %%zmm3\n\t"
        "vmovdqa64 %1, %%zmm8\n\t"
        "vpxorq %2, %%zmm3, %%zmm3\n\t"
        "vpxorq %3, %%zmm8, %%zmm8"
        :
        : "m" (dq[0]), "m" (dq[64]), "m" (q[0]),
          "m" (q[64]));

  /*
 * 3 = q[0] ^ dq[0]
 * 8 = q[64] ^ dq[64]
 */

  asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t"
        "vmovapd %%zmm0, %%zmm13\n\t"
        "vbroadcasti64x2 %1, %%zmm1\n\t"
        "vmovapd %%zmm1, %%zmm14"
        :
        : "m" (qmul[0]), "m" (qmul[16]));

  asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t"
        "vpsraw $4, %%zmm8, %%zmm12\n\t"
        "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
        "vpandq %%zmm7, %%zmm8, %%zmm8\n\t"
        "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
        "vpandq %%zmm7, %%zmm12, %%zmm12\n\t"
        "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t"
        "vpshufb %%zmm8, %%zmm13, %%zmm13\n\t"
        "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t"
        "vpshufb %%zmm12, %%zmm14, %%zmm14\n\t"
        "vpxorq %%zmm0, %%zmm1, %%zmm1\n\t"
        "vpxorq %%zmm13, %%zmm14, %%zmm14"
        :
        : );

  /*
 * 1  = qmul[q[0]  ^ dq[0]]
 * 14 = qmul[q[64] ^ dq[64]]
 */

  asm volatile("vmovdqa64 %0, %%zmm2\n\t"
        "vmovdqa64 %1, %%zmm12\n\t"
        "vpxorq %%zmm1, %%zmm2, %%zmm2\n\t"
        "vpxorq %%zmm14, %%zmm12, %%zmm12"
        :
        : "m" (p[0]), "m" (p[64]));

  /*
 * 2  = p[0]  ^ qmul[q[0]  ^ dq[0]]
 * 12 = p[64] ^ qmul[q[64] ^ dq[64]]
 */


  asm volatile("vmovdqa64 %%zmm1, %0\n\t"
        "vmovdqa64 %%zmm14, %1\n\t"
        "vmovdqa64 %%zmm2, %2\n\t"
        "vmovdqa64 %%zmm12,%3"
        :
        : "m" (dq[0]), "m" (dq[64]), "m" (p[0]),
          "m" (p[64]));

  bytes -= 128;
  p += 128;
  q += 128;
  dq += 128;
#else
  asm volatile("vmovdqa64 %0, %%zmm3\n\t"
        "vpxorq %1, %%zmm3, %%zmm3"
        :
        : "m" (dq[0]), "m" (q[0]));

  /* 3 = q ^ dq */

  asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t"
        "vbroadcasti64x2 %1, %%zmm1"
        :
        : "m" (qmul[0]), "m" (qmul[16]));

  asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t"
        "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
        "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
        "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t"
        "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t"
        "vpxorq %%zmm0, %%zmm1, %%zmm1"
        :
        : );

  /* 1 = qmul[q ^ dq] */

  asm volatile("vmovdqa64 %0, %%zmm2\n\t"
        "vpxorq %%zmm1, %%zmm2, %%zmm2"
        :
        : "m" (p[0]));

  /* 2 = p ^ qmul[q ^ dq] */

  asm volatile("vmovdqa64 %%zmm1, %0\n\t"
        "vmovdqa64 %%zmm2, %1"
        :
        : "m" (dq[0]), "m" (p[0]));

  bytes -= 64;
  p += 64;
  q += 64;
  dq += 64;
#endif
 }

 kernel_fpu_end();
}

const struct raid6_recov_calls raid6_recov_avx512 = {
 .data2 = raid6_2data_recov_avx512,
 .datap = raid6_datap_recov_avx512,
 .valid = raid6_has_avx512,
#ifdef CONFIG_X86_64
 .name = "avx512x2",
#else
 .name = "avx512x1",
#endif
 .priority = 3,
};

Messung V0.5
C=94 H=27 G=68

¤ Dauer der Verarbeitung: 0.1 Sekunden  (vorverarbeitet)  ¤

*© Formatika GbR, Deutschland






Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.