LCOV - code coverage report
Current view: top level - src/crypto/x11/arm_crypto - shavite.cpp (source / functions) Hit Total Coverage
Test: total_coverage.info Lines: 127 128 99.2 %
Date: 2026-06-25 07:23:43 Functions: 2 2 100.0 %

          Line data    Source code
       1             : // Copyright (c) 2025 The Dash Core developers
       2             : // Distributed under the MIT software license, see the accompanying
       3             : // file COPYING or http://www.opensource.org/licenses/mit-license.php.
       4             : 
       5             : #if defined(ENABLE_ARM_AES)
       6             : #include <crypto/x11/sph_shavite.h>
       7             : #include <crypto/x11/util/util.hpp>
       8             : 
       9             : #include <cstdint>
      10             : #include <cstring>
      11             : 
      12             : #include <arm_neon.h>
      13             : 
      14             : namespace sapphire {
      15             : namespace {
      16   173844635 : void CompressElement(uint32_t& l0, uint32_t& l1, uint32_t& l2, uint32_t& l3,
      17             :                      uint32_t r0, uint32_t r1, uint32_t r2, uint32_t r3, const uint8x16_t* rk_words)
      18             : {
      19   173844635 :     uint8x16_t block = util::pack_le(r0, r1, r2, r3);
      20   173844635 :     block = util::Xor(block, rk_words[0]);
      21   173844635 :     block = util::Xor(util::aes_round_nk(block), rk_words[1]);
      22   173844635 :     block = util::Xor(util::aes_round_nk(block), rk_words[2]);
      23   173844635 :     block = util::Xor(util::aes_round_nk(block), rk_words[3]);
      24   173844635 :     block = util::aes_round_nk(block);
      25   173844635 :     uint32x4_t result = vreinterpretq_u32_u8(block);
      26   173844635 :     l0 ^= vgetq_lane_u32(result, 0);
      27   173844635 :     l1 ^= vgetq_lane_u32(result, 1);
      28   173844635 :     l2 ^= vgetq_lane_u32(result, 2);
      29   173844635 :     l3 ^= vgetq_lane_u32(result, 3);
      30   173844635 : }
      31             : } // anonymous namespace
      32             : 
      33             : namespace arm_crypto_shavite {
      34     6210149 : void Compress(sph_shavite_big_context *sc, const void *msg)
      35             : {
      36             :     uint32_t p0, p1, p2, p3, p4, p5, p6, p7;
      37             :     uint32_t p8, p9, pA, pB, pC, pD, pE, pF;
      38             : 
      39             :     alignas(16) uint8x16_t rk_words[448/4];
      40             :     alignas(16) uint32_t rk[448];
      41             : 
      42             : #if SPH_LITTLE_ENDIAN
      43             :     memcpy(rk, msg, 128);
      44             : #else
      45    55887961 :     for (size_t u{0}; u < 32; u += 4) {
      46    49677812 :         rk[u + 0] = sph_dec32le_aligned(
      47    49677812 :             (const unsigned char *)msg + (u << 2) +  0);
      48    49677812 :         rk[u + 1] = sph_dec32le_aligned(
      49    49677812 :             (const unsigned char *)msg + (u << 2) +  4);
      50    49677812 :         rk[u + 2] = sph_dec32le_aligned(
      51    49677812 :             (const unsigned char *)msg + (u << 2) +  8);
      52    49677812 :         rk[u + 3] = sph_dec32le_aligned(
      53    49677812 :             (const unsigned char *)msg + (u << 2) + 12);
      54    49677812 :     }
      55             : #endif
      56             : 
      57     6210149 :     size_t u{32};
      58    43467933 :     for (;;) {
      59   217298909 :         for (int s{0}; s < 4; s++) {
      60   173830976 :             uint32_t x0 = rk[u - 31];
      61   173830976 :             uint32_t x1 = rk[u - 30];
      62   173830976 :             uint32_t x2 = rk[u - 29];
      63   173830976 :             uint32_t x3 = rk[u - 32];
      64             : 
      65   173830976 :             uint32x4_t block = vreinterpretq_u32_u8(util::aes_round_nk(util::pack_le(x0, x1, x2, x3)));
      66   173830976 :             rk[u + 0] = vgetq_lane_u32(block, 0) ^ rk[u - 4];
      67   173830976 :             rk[u + 1] = vgetq_lane_u32(block, 1) ^ rk[u - 3];
      68   173830976 :             rk[u + 2] = vgetq_lane_u32(block, 2) ^ rk[u - 2];
      69   173830976 :             rk[u + 3] = vgetq_lane_u32(block, 3) ^ rk[u - 1];
      70             : 
      71   173830976 :             if (u == 32) {
      72     6204624 :                 rk[32] ^= sc->count0;
      73     6204624 :                 rk[33] ^= sc->count1;
      74     6204624 :                 rk[34] ^= sc->count2;
      75     6204624 :                 rk[35] ^= SPH_T32(~sc->count3);
      76   173830976 :             } else if (u == 440) {
      77     6210084 :                 rk[440] ^= sc->count1;
      78     6210084 :                 rk[441] ^= sc->count0;
      79     6210084 :                 rk[442] ^= sc->count3;
      80     6210084 :                 rk[443] ^= SPH_T32(~sc->count2);
      81     6210084 :             }
      82   173830976 :             u += 4;
      83             : 
      84   173830976 :             x0 = rk[u - 31];
      85   173830976 :             x1 = rk[u - 30];
      86   173830976 :             x2 = rk[u - 29];
      87   173830976 :             x3 = rk[u - 32];
      88             : 
      89   173830976 :             block = vreinterpretq_u32_u8(util::aes_round_nk(util::pack_le(x0, x1, x2, x3)));
      90   173830976 :             rk[u + 0] = vgetq_lane_u32(block, 0) ^ rk[u - 4];
      91   173830976 :             rk[u + 1] = vgetq_lane_u32(block, 1) ^ rk[u - 3];
      92   173830976 :             rk[u + 2] = vgetq_lane_u32(block, 2) ^ rk[u - 2];
      93   173830976 :             rk[u + 3] = vgetq_lane_u32(block, 3) ^ rk[u - 1];
      94             : 
      95   173830976 :             if (u == 164) {
      96     6210061 :                 rk[164] ^= sc->count3;
      97     6210061 :                 rk[165] ^= sc->count2;
      98     6210061 :                 rk[166] ^= sc->count1;
      99     6210061 :                 rk[167] ^= SPH_T32(~sc->count0);
     100   173830976 :             } else if (u == 316) {
     101     6210053 :                 rk[316] ^= sc->count2;
     102     6210053 :                 rk[317] ^= sc->count3;
     103     6210053 :                 rk[318] ^= sc->count0;
     104     6210053 :                 rk[319] ^= SPH_T32(~sc->count1);
     105     6210053 :             }
     106   173830976 :             u += 4;
     107   173830976 :         }
     108    43467933 :         if (u == 448)
     109     6210149 :             break;
     110   335305734 :         for (int s = 0; s < 8; s++) {
     111   298047950 :             rk[u + 0] = rk[u - 32] ^ rk[u - 7];
     112   298047950 :             rk[u + 1] = rk[u - 31] ^ rk[u - 6];
     113   298047950 :             rk[u + 2] = rk[u - 30] ^ rk[u - 5];
     114   298047950 :             rk[u + 3] = rk[u - 29] ^ rk[u - 4];
     115   298047950 :             u += 4;
     116   298047950 :             if (u == 448)
     117           0 :                 break;
     118   298047950 :         }
     119             :     }
     120             : 
     121   701659130 :     for (int i{0}; i < (448/4); i++) {
     122   695448981 :         rk_words[i] = vreinterpretq_u8_u32(vld1q_u32(&rk[i*4]));
     123   695448981 :     }
     124             : 
     125     6210149 :     p0 = sc->h[0x0];
     126     6210149 :     p1 = sc->h[0x1];
     127     6210149 :     p2 = sc->h[0x2];
     128     6210149 :     p3 = sc->h[0x3];
     129     6210149 :     p4 = sc->h[0x4];
     130     6210149 :     p5 = sc->h[0x5];
     131     6210149 :     p6 = sc->h[0x6];
     132     6210149 :     p7 = sc->h[0x7];
     133     6210149 :     p8 = sc->h[0x8];
     134     6210149 :     p9 = sc->h[0x9];
     135     6210149 :     pA = sc->h[0xA];
     136     6210149 :     pB = sc->h[0xB];
     137     6210149 :     pC = sc->h[0xC];
     138     6210149 :     pD = sc->h[0xD];
     139     6210149 :     pE = sc->h[0xE];
     140     6210149 :     pF = sc->h[0xF];
     141             : 
     142     6210149 :     size_t u_words{0};
     143    93142375 :     for (size_t r{0}; r < 14; r++) {
     144    86932226 :         CompressElement(p0, p1, p2, p3, p4, p5, p6, p7, &rk_words[u_words]);
     145    86932226 :         u_words += 4;
     146    86932226 :         CompressElement(p8, p9, pA, pB, pC, pD, pE, pF, &rk_words[u_words]);
     147    86932226 :         u_words += 4;
     148             : 
     149             : #define WROT(a, b, c, d)   do { \
     150             :         uint32_t t = d; \
     151             :         d = c; \
     152             :         c = b; \
     153             :         b = a; \
     154             :         a = t; \
     155             :     } while (0)
     156             : 
     157    86932226 :         WROT(p0, p4, p8, pC);
     158    86932226 :         WROT(p1, p5, p9, pD);
     159    86932226 :         WROT(p2, p6, pA, pE);
     160    86932226 :         WROT(p3, p7, pB, pF);
     161             : 
     162             : #undef WROT
     163    86932226 :     }
     164             : 
     165     6210149 :     sc->h[0x0] ^= p0;
     166     6210149 :     sc->h[0x1] ^= p1;
     167     6210149 :     sc->h[0x2] ^= p2;
     168     6210149 :     sc->h[0x3] ^= p3;
     169     6210149 :     sc->h[0x4] ^= p4;
     170     6210149 :     sc->h[0x5] ^= p5;
     171     6210149 :     sc->h[0x6] ^= p6;
     172     6210149 :     sc->h[0x7] ^= p7;
     173     6210149 :     sc->h[0x8] ^= p8;
     174     6210149 :     sc->h[0x9] ^= p9;
     175     6210149 :     sc->h[0xA] ^= pA;
     176     6210149 :     sc->h[0xB] ^= pB;
     177     6210149 :     sc->h[0xC] ^= pC;
     178     6210149 :     sc->h[0xD] ^= pD;
     179     6210149 :     sc->h[0xE] ^= pE;
     180     6210149 :     sc->h[0xF] ^= pF;
     181     6210149 : }
     182             : } // namespace arm_crypto_shavite
     183             : } // namespace sapphire
     184             : 
     185             : #endif // ENABLE_ARM_AES

Generated by: LCOV version 1.16