meson: target
[qemu.git] / target / arm / crypto_helper.c
1 /*
2 * crypto_helper.c - emulate v8 Crypto Extensions instructions
3 *
4 * Copyright (C) 2013 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 */
11
12 #include "qemu/osdep.h"
13
14 #include "cpu.h"
15 #include "exec/helper-proto.h"
16 #include "tcg/tcg-gvec-desc.h"
17 #include "crypto/aes.h"
18 #include "vec_internal.h"
19
20 union CRYPTO_STATE {
21 uint8_t bytes[16];
22 uint32_t words[4];
23 uint64_t l[2];
24 };
25
26 #ifdef HOST_WORDS_BIGENDIAN
27 #define CR_ST_BYTE(state, i) ((state).bytes[(15 - (i)) ^ 8])
28 #define CR_ST_WORD(state, i) ((state).words[(3 - (i)) ^ 2])
29 #else
30 #define CR_ST_BYTE(state, i) ((state).bytes[i])
31 #define CR_ST_WORD(state, i) ((state).words[i])
32 #endif
33
34 /*
35 * The caller has not been converted to full gvec, and so only
36 * modifies the low 16 bytes of the vector register.
37 */
38 static void clear_tail_16(void *vd, uint32_t desc)
39 {
40 int opr_sz = simd_oprsz(desc);
41 int max_sz = simd_maxsz(desc);
42
43 assert(opr_sz == 16);
44 clear_tail(vd, opr_sz, max_sz);
45 }
46
47 static void do_crypto_aese(uint64_t *rd, uint64_t *rn,
48 uint64_t *rm, bool decrypt)
49 {
50 static uint8_t const * const sbox[2] = { AES_sbox, AES_isbox };
51 static uint8_t const * const shift[2] = { AES_shifts, AES_ishifts };
52 union CRYPTO_STATE rk = { .l = { rm[0], rm[1] } };
53 union CRYPTO_STATE st = { .l = { rn[0], rn[1] } };
54 int i;
55
56 /* xor state vector with round key */
57 rk.l[0] ^= st.l[0];
58 rk.l[1] ^= st.l[1];
59
60 /* combine ShiftRows operation and sbox substitution */
61 for (i = 0; i < 16; i++) {
62 CR_ST_BYTE(st, i) = sbox[decrypt][CR_ST_BYTE(rk, shift[decrypt][i])];
63 }
64
65 rd[0] = st.l[0];
66 rd[1] = st.l[1];
67 }
68
69 void HELPER(crypto_aese)(void *vd, void *vn, void *vm, uint32_t desc)
70 {
71 intptr_t i, opr_sz = simd_oprsz(desc);
72 bool decrypt = simd_data(desc);
73
74 for (i = 0; i < opr_sz; i += 16) {
75 do_crypto_aese(vd + i, vn + i, vm + i, decrypt);
76 }
77 clear_tail(vd, opr_sz, simd_maxsz(desc));
78 }
79
80 static void do_crypto_aesmc(uint64_t *rd, uint64_t *rm, bool decrypt)
81 {
82 static uint32_t const mc[][256] = { {
83 /* MixColumns lookup table */
84 0x00000000, 0x03010102, 0x06020204, 0x05030306,
85 0x0c040408, 0x0f05050a, 0x0a06060c, 0x0907070e,
86 0x18080810, 0x1b090912, 0x1e0a0a14, 0x1d0b0b16,
87 0x140c0c18, 0x170d0d1a, 0x120e0e1c, 0x110f0f1e,
88 0x30101020, 0x33111122, 0x36121224, 0x35131326,
89 0x3c141428, 0x3f15152a, 0x3a16162c, 0x3917172e,
90 0x28181830, 0x2b191932, 0x2e1a1a34, 0x2d1b1b36,
91 0x241c1c38, 0x271d1d3a, 0x221e1e3c, 0x211f1f3e,
92 0x60202040, 0x63212142, 0x66222244, 0x65232346,
93 0x6c242448, 0x6f25254a, 0x6a26264c, 0x6927274e,
94 0x78282850, 0x7b292952, 0x7e2a2a54, 0x7d2b2b56,
95 0x742c2c58, 0x772d2d5a, 0x722e2e5c, 0x712f2f5e,
96 0x50303060, 0x53313162, 0x56323264, 0x55333366,
97 0x5c343468, 0x5f35356a, 0x5a36366c, 0x5937376e,
98 0x48383870, 0x4b393972, 0x4e3a3a74, 0x4d3b3b76,
99 0x443c3c78, 0x473d3d7a, 0x423e3e7c, 0x413f3f7e,
100 0xc0404080, 0xc3414182, 0xc6424284, 0xc5434386,
101 0xcc444488, 0xcf45458a, 0xca46468c, 0xc947478e,
102 0xd8484890, 0xdb494992, 0xde4a4a94, 0xdd4b4b96,
103 0xd44c4c98, 0xd74d4d9a, 0xd24e4e9c, 0xd14f4f9e,
104 0xf05050a0, 0xf35151a2, 0xf65252a4, 0xf55353a6,
105 0xfc5454a8, 0xff5555aa, 0xfa5656ac, 0xf95757ae,
106 0xe85858b0, 0xeb5959b2, 0xee5a5ab4, 0xed5b5bb6,
107 0xe45c5cb8, 0xe75d5dba, 0xe25e5ebc, 0xe15f5fbe,
108 0xa06060c0, 0xa36161c2, 0xa66262c4, 0xa56363c6,
109 0xac6464c8, 0xaf6565ca, 0xaa6666cc, 0xa96767ce,
110 0xb86868d0, 0xbb6969d2, 0xbe6a6ad4, 0xbd6b6bd6,
111 0xb46c6cd8, 0xb76d6dda, 0xb26e6edc, 0xb16f6fde,
112 0x907070e0, 0x937171e2, 0x967272e4, 0x957373e6,
113 0x9c7474e8, 0x9f7575ea, 0x9a7676ec, 0x997777ee,
114 0x887878f0, 0x8b7979f2, 0x8e7a7af4, 0x8d7b7bf6,
115 0x847c7cf8, 0x877d7dfa, 0x827e7efc, 0x817f7ffe,
116 0x9b80801b, 0x98818119, 0x9d82821f, 0x9e83831d,
117 0x97848413, 0x94858511, 0x91868617, 0x92878715,
118 0x8388880b, 0x80898909, 0x858a8a0f, 0x868b8b0d,
119 0x8f8c8c03, 0x8c8d8d01, 0x898e8e07, 0x8a8f8f05,
120 0xab90903b, 0xa8919139, 0xad92923f, 0xae93933d,
121 0xa7949433, 0xa4959531, 0xa1969637, 0xa2979735,
122 0xb398982b, 0xb0999929, 0xb59a9a2f, 0xb69b9b2d,
123 0xbf9c9c23, 0xbc9d9d21, 0xb99e9e27, 0xba9f9f25,
124 0xfba0a05b, 0xf8a1a159, 0xfda2a25f, 0xfea3a35d,
125 0xf7a4a453, 0xf4a5a551, 0xf1a6a657, 0xf2a7a755,
126 0xe3a8a84b, 0xe0a9a949, 0xe5aaaa4f, 0xe6abab4d,
127 0xefacac43, 0xecadad41, 0xe9aeae47, 0xeaafaf45,
128 0xcbb0b07b, 0xc8b1b179, 0xcdb2b27f, 0xceb3b37d,
129 0xc7b4b473, 0xc4b5b571, 0xc1b6b677, 0xc2b7b775,
130 0xd3b8b86b, 0xd0b9b969, 0xd5baba6f, 0xd6bbbb6d,
131 0xdfbcbc63, 0xdcbdbd61, 0xd9bebe67, 0xdabfbf65,
132 0x5bc0c09b, 0x58c1c199, 0x5dc2c29f, 0x5ec3c39d,
133 0x57c4c493, 0x54c5c591, 0x51c6c697, 0x52c7c795,
134 0x43c8c88b, 0x40c9c989, 0x45caca8f, 0x46cbcb8d,
135 0x4fcccc83, 0x4ccdcd81, 0x49cece87, 0x4acfcf85,
136 0x6bd0d0bb, 0x68d1d1b9, 0x6dd2d2bf, 0x6ed3d3bd,
137 0x67d4d4b3, 0x64d5d5b1, 0x61d6d6b7, 0x62d7d7b5,
138 0x73d8d8ab, 0x70d9d9a9, 0x75dadaaf, 0x76dbdbad,
139 0x7fdcdca3, 0x7cdddda1, 0x79dedea7, 0x7adfdfa5,
140 0x3be0e0db, 0x38e1e1d9, 0x3de2e2df, 0x3ee3e3dd,
141 0x37e4e4d3, 0x34e5e5d1, 0x31e6e6d7, 0x32e7e7d5,
142 0x23e8e8cb, 0x20e9e9c9, 0x25eaeacf, 0x26ebebcd,
143 0x2fececc3, 0x2cededc1, 0x29eeeec7, 0x2aefefc5,
144 0x0bf0f0fb, 0x08f1f1f9, 0x0df2f2ff, 0x0ef3f3fd,
145 0x07f4f4f3, 0x04f5f5f1, 0x01f6f6f7, 0x02f7f7f5,
146 0x13f8f8eb, 0x10f9f9e9, 0x15fafaef, 0x16fbfbed,
147 0x1ffcfce3, 0x1cfdfde1, 0x19fefee7, 0x1affffe5,
148 }, {
149 /* Inverse MixColumns lookup table */
150 0x00000000, 0x0b0d090e, 0x161a121c, 0x1d171b12,
151 0x2c342438, 0x27392d36, 0x3a2e3624, 0x31233f2a,
152 0x58684870, 0x5365417e, 0x4e725a6c, 0x457f5362,
153 0x745c6c48, 0x7f516546, 0x62467e54, 0x694b775a,
154 0xb0d090e0, 0xbbdd99ee, 0xa6ca82fc, 0xadc78bf2,
155 0x9ce4b4d8, 0x97e9bdd6, 0x8afea6c4, 0x81f3afca,
156 0xe8b8d890, 0xe3b5d19e, 0xfea2ca8c, 0xf5afc382,
157 0xc48cfca8, 0xcf81f5a6, 0xd296eeb4, 0xd99be7ba,
158 0x7bbb3bdb, 0x70b632d5, 0x6da129c7, 0x66ac20c9,
159 0x578f1fe3, 0x5c8216ed, 0x41950dff, 0x4a9804f1,
160 0x23d373ab, 0x28de7aa5, 0x35c961b7, 0x3ec468b9,
161 0x0fe75793, 0x04ea5e9d, 0x19fd458f, 0x12f04c81,
162 0xcb6bab3b, 0xc066a235, 0xdd71b927, 0xd67cb029,
163 0xe75f8f03, 0xec52860d, 0xf1459d1f, 0xfa489411,
164 0x9303e34b, 0x980eea45, 0x8519f157, 0x8e14f859,
165 0xbf37c773, 0xb43ace7d, 0xa92dd56f, 0xa220dc61,
166 0xf66d76ad, 0xfd607fa3, 0xe07764b1, 0xeb7a6dbf,
167 0xda595295, 0xd1545b9b, 0xcc434089, 0xc74e4987,
168 0xae053edd, 0xa50837d3, 0xb81f2cc1, 0xb31225cf,
169 0x82311ae5, 0x893c13eb, 0x942b08f9, 0x9f2601f7,
170 0x46bde64d, 0x4db0ef43, 0x50a7f451, 0x5baafd5f,
171 0x6a89c275, 0x6184cb7b, 0x7c93d069, 0x779ed967,
172 0x1ed5ae3d, 0x15d8a733, 0x08cfbc21, 0x03c2b52f,
173 0x32e18a05, 0x39ec830b, 0x24fb9819, 0x2ff69117,
174 0x8dd64d76, 0x86db4478, 0x9bcc5f6a, 0x90c15664,
175 0xa1e2694e, 0xaaef6040, 0xb7f87b52, 0xbcf5725c,
176 0xd5be0506, 0xdeb30c08, 0xc3a4171a, 0xc8a91e14,
177 0xf98a213e, 0xf2872830, 0xef903322, 0xe49d3a2c,
178 0x3d06dd96, 0x360bd498, 0x2b1ccf8a, 0x2011c684,
179 0x1132f9ae, 0x1a3ff0a0, 0x0728ebb2, 0x0c25e2bc,
180 0x656e95e6, 0x6e639ce8, 0x737487fa, 0x78798ef4,
181 0x495ab1de, 0x4257b8d0, 0x5f40a3c2, 0x544daacc,
182 0xf7daec41, 0xfcd7e54f, 0xe1c0fe5d, 0xeacdf753,
183 0xdbeec879, 0xd0e3c177, 0xcdf4da65, 0xc6f9d36b,
184 0xafb2a431, 0xa4bfad3f, 0xb9a8b62d, 0xb2a5bf23,
185 0x83868009, 0x888b8907, 0x959c9215, 0x9e919b1b,
186 0x470a7ca1, 0x4c0775af, 0x51106ebd, 0x5a1d67b3,
187 0x6b3e5899, 0x60335197, 0x7d244a85, 0x7629438b,
188 0x1f6234d1, 0x146f3ddf, 0x097826cd, 0x02752fc3,
189 0x335610e9, 0x385b19e7, 0x254c02f5, 0x2e410bfb,
190 0x8c61d79a, 0x876cde94, 0x9a7bc586, 0x9176cc88,
191 0xa055f3a2, 0xab58faac, 0xb64fe1be, 0xbd42e8b0,
192 0xd4099fea, 0xdf0496e4, 0xc2138df6, 0xc91e84f8,
193 0xf83dbbd2, 0xf330b2dc, 0xee27a9ce, 0xe52aa0c0,
194 0x3cb1477a, 0x37bc4e74, 0x2aab5566, 0x21a65c68,
195 0x10856342, 0x1b886a4c, 0x069f715e, 0x0d927850,
196 0x64d90f0a, 0x6fd40604, 0x72c31d16, 0x79ce1418,
197 0x48ed2b32, 0x43e0223c, 0x5ef7392e, 0x55fa3020,
198 0x01b79aec, 0x0aba93e2, 0x17ad88f0, 0x1ca081fe,
199 0x2d83bed4, 0x268eb7da, 0x3b99acc8, 0x3094a5c6,
200 0x59dfd29c, 0x52d2db92, 0x4fc5c080, 0x44c8c98e,
201 0x75ebf6a4, 0x7ee6ffaa, 0x63f1e4b8, 0x68fcedb6,
202 0xb1670a0c, 0xba6a0302, 0xa77d1810, 0xac70111e,
203 0x9d532e34, 0x965e273a, 0x8b493c28, 0x80443526,
204 0xe90f427c, 0xe2024b72, 0xff155060, 0xf418596e,
205 0xc53b6644, 0xce366f4a, 0xd3217458, 0xd82c7d56,
206 0x7a0ca137, 0x7101a839, 0x6c16b32b, 0x671bba25,
207 0x5638850f, 0x5d358c01, 0x40229713, 0x4b2f9e1d,
208 0x2264e947, 0x2969e049, 0x347efb5b, 0x3f73f255,
209 0x0e50cd7f, 0x055dc471, 0x184adf63, 0x1347d66d,
210 0xcadc31d7, 0xc1d138d9, 0xdcc623cb, 0xd7cb2ac5,
211 0xe6e815ef, 0xede51ce1, 0xf0f207f3, 0xfbff0efd,
212 0x92b479a7, 0x99b970a9, 0x84ae6bbb, 0x8fa362b5,
213 0xbe805d9f, 0xb58d5491, 0xa89a4f83, 0xa397468d,
214 } };
215
216 union CRYPTO_STATE st = { .l = { rm[0], rm[1] } };
217 int i;
218
219 for (i = 0; i < 16; i += 4) {
220 CR_ST_WORD(st, i >> 2) =
221 mc[decrypt][CR_ST_BYTE(st, i)] ^
222 rol32(mc[decrypt][CR_ST_BYTE(st, i + 1)], 8) ^
223 rol32(mc[decrypt][CR_ST_BYTE(st, i + 2)], 16) ^
224 rol32(mc[decrypt][CR_ST_BYTE(st, i + 3)], 24);
225 }
226
227 rd[0] = st.l[0];
228 rd[1] = st.l[1];
229 }
230
231 void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t desc)
232 {
233 intptr_t i, opr_sz = simd_oprsz(desc);
234 bool decrypt = simd_data(desc);
235
236 for (i = 0; i < opr_sz; i += 16) {
237 do_crypto_aesmc(vd + i, vm + i, decrypt);
238 }
239 clear_tail(vd, opr_sz, simd_maxsz(desc));
240 }
241
242 /*
243 * SHA-1 logical functions
244 */
245
246 static uint32_t cho(uint32_t x, uint32_t y, uint32_t z)
247 {
248 return (x & (y ^ z)) ^ z;
249 }
250
251 static uint32_t par(uint32_t x, uint32_t y, uint32_t z)
252 {
253 return x ^ y ^ z;
254 }
255
256 static uint32_t maj(uint32_t x, uint32_t y, uint32_t z)
257 {
258 return (x & y) | ((x | y) & z);
259 }
260
261 void HELPER(crypto_sha1su0)(void *vd, void *vn, void *vm, uint32_t desc)
262 {
263 uint64_t *d = vd, *n = vn, *m = vm;
264 uint64_t d0, d1;
265
266 d0 = d[1] ^ d[0] ^ m[0];
267 d1 = n[0] ^ d[1] ^ m[1];
268 d[0] = d0;
269 d[1] = d1;
270
271 clear_tail_16(vd, desc);
272 }
273
274 static inline void crypto_sha1_3reg(uint64_t *rd, uint64_t *rn,
275 uint64_t *rm, uint32_t desc,
276 uint32_t (*fn)(union CRYPTO_STATE *d))
277 {
278 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
279 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
280 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
281 int i;
282
283 for (i = 0; i < 4; i++) {
284 uint32_t t = fn(&d);
285
286 t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0)
287 + CR_ST_WORD(m, i);
288
289 CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3);
290 CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
291 CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2);
292 CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
293 CR_ST_WORD(d, 0) = t;
294 }
295 rd[0] = d.l[0];
296 rd[1] = d.l[1];
297
298 clear_tail_16(rd, desc);
299 }
300
301 static uint32_t do_sha1c(union CRYPTO_STATE *d)
302 {
303 return cho(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
304 }
305
306 void HELPER(crypto_sha1c)(void *vd, void *vn, void *vm, uint32_t desc)
307 {
308 crypto_sha1_3reg(vd, vn, vm, desc, do_sha1c);
309 }
310
311 static uint32_t do_sha1p(union CRYPTO_STATE *d)
312 {
313 return par(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
314 }
315
316 void HELPER(crypto_sha1p)(void *vd, void *vn, void *vm, uint32_t desc)
317 {
318 crypto_sha1_3reg(vd, vn, vm, desc, do_sha1p);
319 }
320
321 static uint32_t do_sha1m(union CRYPTO_STATE *d)
322 {
323 return maj(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
324 }
325
326 void HELPER(crypto_sha1m)(void *vd, void *vn, void *vm, uint32_t desc)
327 {
328 crypto_sha1_3reg(vd, vn, vm, desc, do_sha1m);
329 }
330
331 void HELPER(crypto_sha1h)(void *vd, void *vm, uint32_t desc)
332 {
333 uint64_t *rd = vd;
334 uint64_t *rm = vm;
335 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
336
337 CR_ST_WORD(m, 0) = ror32(CR_ST_WORD(m, 0), 2);
338 CR_ST_WORD(m, 1) = CR_ST_WORD(m, 2) = CR_ST_WORD(m, 3) = 0;
339
340 rd[0] = m.l[0];
341 rd[1] = m.l[1];
342
343 clear_tail_16(vd, desc);
344 }
345
346 void HELPER(crypto_sha1su1)(void *vd, void *vm, uint32_t desc)
347 {
348 uint64_t *rd = vd;
349 uint64_t *rm = vm;
350 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
351 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
352
353 CR_ST_WORD(d, 0) = rol32(CR_ST_WORD(d, 0) ^ CR_ST_WORD(m, 1), 1);
354 CR_ST_WORD(d, 1) = rol32(CR_ST_WORD(d, 1) ^ CR_ST_WORD(m, 2), 1);
355 CR_ST_WORD(d, 2) = rol32(CR_ST_WORD(d, 2) ^ CR_ST_WORD(m, 3), 1);
356 CR_ST_WORD(d, 3) = rol32(CR_ST_WORD(d, 3) ^ CR_ST_WORD(d, 0), 1);
357
358 rd[0] = d.l[0];
359 rd[1] = d.l[1];
360
361 clear_tail_16(vd, desc);
362 }
363
364 /*
365 * The SHA-256 logical functions, according to
366 * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf
367 */
368
369 static uint32_t S0(uint32_t x)
370 {
371 return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22);
372 }
373
374 static uint32_t S1(uint32_t x)
375 {
376 return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25);
377 }
378
379 static uint32_t s0(uint32_t x)
380 {
381 return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3);
382 }
383
384 static uint32_t s1(uint32_t x)
385 {
386 return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10);
387 }
388
389 void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm, uint32_t desc)
390 {
391 uint64_t *rd = vd;
392 uint64_t *rn = vn;
393 uint64_t *rm = vm;
394 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
395 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
396 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
397 int i;
398
399 for (i = 0; i < 4; i++) {
400 uint32_t t = cho(CR_ST_WORD(n, 0), CR_ST_WORD(n, 1), CR_ST_WORD(n, 2))
401 + CR_ST_WORD(n, 3) + S1(CR_ST_WORD(n, 0))
402 + CR_ST_WORD(m, i);
403
404 CR_ST_WORD(n, 3) = CR_ST_WORD(n, 2);
405 CR_ST_WORD(n, 2) = CR_ST_WORD(n, 1);
406 CR_ST_WORD(n, 1) = CR_ST_WORD(n, 0);
407 CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3) + t;
408
409 t += maj(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2))
410 + S0(CR_ST_WORD(d, 0));
411
412 CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
413 CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1);
414 CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
415 CR_ST_WORD(d, 0) = t;
416 }
417
418 rd[0] = d.l[0];
419 rd[1] = d.l[1];
420
421 clear_tail_16(vd, desc);
422 }
423
424 void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm, uint32_t desc)
425 {
426 uint64_t *rd = vd;
427 uint64_t *rn = vn;
428 uint64_t *rm = vm;
429 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
430 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
431 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
432 int i;
433
434 for (i = 0; i < 4; i++) {
435 uint32_t t = cho(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2))
436 + CR_ST_WORD(d, 3) + S1(CR_ST_WORD(d, 0))
437 + CR_ST_WORD(m, i);
438
439 CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
440 CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1);
441 CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
442 CR_ST_WORD(d, 0) = CR_ST_WORD(n, 3 - i) + t;
443 }
444
445 rd[0] = d.l[0];
446 rd[1] = d.l[1];
447
448 clear_tail_16(vd, desc);
449 }
450
451 void HELPER(crypto_sha256su0)(void *vd, void *vm, uint32_t desc)
452 {
453 uint64_t *rd = vd;
454 uint64_t *rm = vm;
455 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
456 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
457
458 CR_ST_WORD(d, 0) += s0(CR_ST_WORD(d, 1));
459 CR_ST_WORD(d, 1) += s0(CR_ST_WORD(d, 2));
460 CR_ST_WORD(d, 2) += s0(CR_ST_WORD(d, 3));
461 CR_ST_WORD(d, 3) += s0(CR_ST_WORD(m, 0));
462
463 rd[0] = d.l[0];
464 rd[1] = d.l[1];
465
466 clear_tail_16(vd, desc);
467 }
468
469 void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm, uint32_t desc)
470 {
471 uint64_t *rd = vd;
472 uint64_t *rn = vn;
473 uint64_t *rm = vm;
474 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
475 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
476 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
477
478 CR_ST_WORD(d, 0) += s1(CR_ST_WORD(m, 2)) + CR_ST_WORD(n, 1);
479 CR_ST_WORD(d, 1) += s1(CR_ST_WORD(m, 3)) + CR_ST_WORD(n, 2);
480 CR_ST_WORD(d, 2) += s1(CR_ST_WORD(d, 0)) + CR_ST_WORD(n, 3);
481 CR_ST_WORD(d, 3) += s1(CR_ST_WORD(d, 1)) + CR_ST_WORD(m, 0);
482
483 rd[0] = d.l[0];
484 rd[1] = d.l[1];
485
486 clear_tail_16(vd, desc);
487 }
488
489 /*
490 * The SHA-512 logical functions (same as above but using 64-bit operands)
491 */
492
493 static uint64_t cho512(uint64_t x, uint64_t y, uint64_t z)
494 {
495 return (x & (y ^ z)) ^ z;
496 }
497
498 static uint64_t maj512(uint64_t x, uint64_t y, uint64_t z)
499 {
500 return (x & y) | ((x | y) & z);
501 }
502
503 static uint64_t S0_512(uint64_t x)
504 {
505 return ror64(x, 28) ^ ror64(x, 34) ^ ror64(x, 39);
506 }
507
508 static uint64_t S1_512(uint64_t x)
509 {
510 return ror64(x, 14) ^ ror64(x, 18) ^ ror64(x, 41);
511 }
512
513 static uint64_t s0_512(uint64_t x)
514 {
515 return ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7);
516 }
517
518 static uint64_t s1_512(uint64_t x)
519 {
520 return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6);
521 }
522
523 void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm, uint32_t desc)
524 {
525 uint64_t *rd = vd;
526 uint64_t *rn = vn;
527 uint64_t *rm = vm;
528 uint64_t d0 = rd[0];
529 uint64_t d1 = rd[1];
530
531 d1 += S1_512(rm[1]) + cho512(rm[1], rn[0], rn[1]);
532 d0 += S1_512(d1 + rm[0]) + cho512(d1 + rm[0], rm[1], rn[0]);
533
534 rd[0] = d0;
535 rd[1] = d1;
536
537 clear_tail_16(vd, desc);
538 }
539
540 void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm, uint32_t desc)
541 {
542 uint64_t *rd = vd;
543 uint64_t *rn = vn;
544 uint64_t *rm = vm;
545 uint64_t d0 = rd[0];
546 uint64_t d1 = rd[1];
547
548 d1 += S0_512(rm[0]) + maj512(rn[0], rm[1], rm[0]);
549 d0 += S0_512(d1) + maj512(d1, rm[0], rm[1]);
550
551 rd[0] = d0;
552 rd[1] = d1;
553
554 clear_tail_16(vd, desc);
555 }
556
557 void HELPER(crypto_sha512su0)(void *vd, void *vn, uint32_t desc)
558 {
559 uint64_t *rd = vd;
560 uint64_t *rn = vn;
561 uint64_t d0 = rd[0];
562 uint64_t d1 = rd[1];
563
564 d0 += s0_512(rd[1]);
565 d1 += s0_512(rn[0]);
566
567 rd[0] = d0;
568 rd[1] = d1;
569
570 clear_tail_16(vd, desc);
571 }
572
573 void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm, uint32_t desc)
574 {
575 uint64_t *rd = vd;
576 uint64_t *rn = vn;
577 uint64_t *rm = vm;
578
579 rd[0] += s1_512(rn[0]) + rm[0];
580 rd[1] += s1_512(rn[1]) + rm[1];
581
582 clear_tail_16(vd, desc);
583 }
584
585 void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm, uint32_t desc)
586 {
587 uint64_t *rd = vd;
588 uint64_t *rn = vn;
589 uint64_t *rm = vm;
590 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
591 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
592 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
593 uint32_t t;
594
595 t = CR_ST_WORD(d, 0) ^ CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 1), 17);
596 CR_ST_WORD(d, 0) = t ^ ror32(t, 17) ^ ror32(t, 9);
597
598 t = CR_ST_WORD(d, 1) ^ CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 2), 17);
599 CR_ST_WORD(d, 1) = t ^ ror32(t, 17) ^ ror32(t, 9);
600
601 t = CR_ST_WORD(d, 2) ^ CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 3), 17);
602 CR_ST_WORD(d, 2) = t ^ ror32(t, 17) ^ ror32(t, 9);
603
604 t = CR_ST_WORD(d, 3) ^ CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 0), 17);
605 CR_ST_WORD(d, 3) = t ^ ror32(t, 17) ^ ror32(t, 9);
606
607 rd[0] = d.l[0];
608 rd[1] = d.l[1];
609
610 clear_tail_16(vd, desc);
611 }
612
613 void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm, uint32_t desc)
614 {
615 uint64_t *rd = vd;
616 uint64_t *rn = vn;
617 uint64_t *rm = vm;
618 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
619 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
620 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
621 uint32_t t = CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 0), 25);
622
623 CR_ST_WORD(d, 0) ^= t;
624 CR_ST_WORD(d, 1) ^= CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 1), 25);
625 CR_ST_WORD(d, 2) ^= CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 2), 25);
626 CR_ST_WORD(d, 3) ^= CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(m, 3), 25) ^
627 ror32(t, 17) ^ ror32(t, 2) ^ ror32(t, 26);
628
629 rd[0] = d.l[0];
630 rd[1] = d.l[1];
631
632 clear_tail_16(vd, desc);
633 }
634
635 static inline void QEMU_ALWAYS_INLINE
636 crypto_sm3tt(uint64_t *rd, uint64_t *rn, uint64_t *rm,
637 uint32_t desc, uint32_t opcode)
638 {
639 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
640 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
641 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
642 uint32_t imm2 = simd_data(desc);
643 uint32_t t;
644
645 assert(imm2 < 4);
646
647 if (opcode == 0 || opcode == 2) {
648 /* SM3TT1A, SM3TT2A */
649 t = par(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
650 } else if (opcode == 1) {
651 /* SM3TT1B */
652 t = maj(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
653 } else if (opcode == 3) {
654 /* SM3TT2B */
655 t = cho(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
656 } else {
657 qemu_build_not_reached();
658 }
659
660 t += CR_ST_WORD(d, 0) + CR_ST_WORD(m, imm2);
661
662 CR_ST_WORD(d, 0) = CR_ST_WORD(d, 1);
663
664 if (opcode < 2) {
665 /* SM3TT1A, SM3TT1B */
666 t += CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 3), 20);
667
668 CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 23);
669 } else {
670 /* SM3TT2A, SM3TT2B */
671 t += CR_ST_WORD(n, 3);
672 t ^= rol32(t, 9) ^ rol32(t, 17);
673
674 CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 13);
675 }
676
677 CR_ST_WORD(d, 2) = CR_ST_WORD(d, 3);
678 CR_ST_WORD(d, 3) = t;
679
680 rd[0] = d.l[0];
681 rd[1] = d.l[1];
682
683 clear_tail_16(rd, desc);
684 }
685
686 #define DO_SM3TT(NAME, OPCODE) \
687 void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
688 { crypto_sm3tt(vd, vn, vm, desc, OPCODE); }
689
690 DO_SM3TT(crypto_sm3tt1a, 0)
691 DO_SM3TT(crypto_sm3tt1b, 1)
692 DO_SM3TT(crypto_sm3tt2a, 2)
693 DO_SM3TT(crypto_sm3tt2b, 3)
694
695 #undef DO_SM3TT
696
697 static uint8_t const sm4_sbox[] = {
698 0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7,
699 0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05,
700 0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3,
701 0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99,
702 0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a,
703 0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62,
704 0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95,
705 0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6,
706 0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba,
707 0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8,
708 0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b,
709 0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35,
710 0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2,
711 0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87,
712 0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52,
713 0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e,
714 0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5,
715 0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1,
716 0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55,
717 0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3,
718 0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60,
719 0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f,
720 0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f,
721 0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51,
722 0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f,
723 0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8,
724 0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd,
725 0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0,
726 0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e,
727 0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84,
728 0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20,
729 0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48,
730 };
731
732 static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm)
733 {
734 union CRYPTO_STATE d = { .l = { rn[0], rn[1] } };
735 union CRYPTO_STATE n = { .l = { rm[0], rm[1] } };
736 uint32_t t, i;
737
738 for (i = 0; i < 4; i++) {
739 t = CR_ST_WORD(d, (i + 1) % 4) ^
740 CR_ST_WORD(d, (i + 2) % 4) ^
741 CR_ST_WORD(d, (i + 3) % 4) ^
742 CR_ST_WORD(n, i);
743
744 t = sm4_sbox[t & 0xff] |
745 sm4_sbox[(t >> 8) & 0xff] << 8 |
746 sm4_sbox[(t >> 16) & 0xff] << 16 |
747 sm4_sbox[(t >> 24) & 0xff] << 24;
748
749 CR_ST_WORD(d, i) ^= t ^ rol32(t, 2) ^ rol32(t, 10) ^ rol32(t, 18) ^
750 rol32(t, 24);
751 }
752
753 rd[0] = d.l[0];
754 rd[1] = d.l[1];
755 }
756
757 void HELPER(crypto_sm4e)(void *vd, void *vn, void *vm, uint32_t desc)
758 {
759 intptr_t i, opr_sz = simd_oprsz(desc);
760
761 for (i = 0; i < opr_sz; i += 16) {
762 do_crypto_sm4e(vd + i, vn + i, vm + i);
763 }
764 clear_tail(vd, opr_sz, simd_maxsz(desc));
765 }
766
767 static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm)
768 {
769 union CRYPTO_STATE d;
770 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
771 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
772 uint32_t t, i;
773
774 d = n;
775 for (i = 0; i < 4; i++) {
776 t = CR_ST_WORD(d, (i + 1) % 4) ^
777 CR_ST_WORD(d, (i + 2) % 4) ^
778 CR_ST_WORD(d, (i + 3) % 4) ^
779 CR_ST_WORD(m, i);
780
781 t = sm4_sbox[t & 0xff] |
782 sm4_sbox[(t >> 8) & 0xff] << 8 |
783 sm4_sbox[(t >> 16) & 0xff] << 16 |
784 sm4_sbox[(t >> 24) & 0xff] << 24;
785
786 CR_ST_WORD(d, i) ^= t ^ rol32(t, 13) ^ rol32(t, 23);
787 }
788
789 rd[0] = d.l[0];
790 rd[1] = d.l[1];
791 }
792
793 void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm, uint32_t desc)
794 {
795 intptr_t i, opr_sz = simd_oprsz(desc);
796
797 for (i = 0; i < opr_sz; i += 16) {
798 do_crypto_sm4ekey(vd + i, vn + i, vm + i);
799 }
800 clear_tail(vd, opr_sz, simd_maxsz(desc));
801 }
802
803 void HELPER(crypto_rax1)(void *vd, void *vn, void *vm, uint32_t desc)
804 {
805 intptr_t i, opr_sz = simd_oprsz(desc);
806 uint64_t *d = vd, *n = vn, *m = vm;
807
808 for (i = 0; i < opr_sz / 8; ++i) {
809 d[i] = n[i] ^ rol64(m[i], 1);
810 }
811 clear_tail(vd, opr_sz, simd_maxsz(desc));
812 }