Skip to content

Commit 16ccc25

Browse files
zx2c4intel-lab-lkp
authored andcommitted
zinc: ChaCha20 ARM and ARM64 implementations
These NEON and non-NEON implementations come from Andy Polyakov's implementation. They are exactly the same as Andy Polyakov's original, with the following exceptions: - Entries and exits use the proper kernel convention macro. - CPU feature checking is done in C by the glue code, so that has been removed from the assembly. - The function names have been renamed to fit kernel conventions. - Labels have been renamed (prefixed with .L) to fit kernel conventions. - Constants have been rearranged so that they are closer to the code that is using them. [ARM only] - The neon code can jump to the scalar code when it makes sense to do so. - The neon_512 function as a separate function has been removed, leaving the decision up to the main neon entry point. [ARM64 only] After '/^#/d;/^\..*[^:]$/d', the code has the following diff in actual instructions from the original. ARM: -ChaCha20_ctr32: -.LChaCha20_ctr32: +ENTRY(chacha20_arm) ldr r12,[sp,#0] @ pull pointer to counter and nonce stmdb sp!,{r0-r2,r4-r11,lr} - sub r14,pc,torvalds#16 @ ChaCha20_ctr32 - adr r14,.LChaCha20_ctr32 cmp r2,#0 @ len==0? itt eq addeq sp,sp,#4*3 - beq .Lno_data - cmp r2,torvalds#192 @ test len - bls .Lshort - ldr r4,[r14,#-32] - ldr r4,[r14,r4] - ldr r4,[r4] - tst r4,#ARMV7_NEON - bne .LChaCha20_neon + beq .Lno_data_arm .Lshort: ldmia r12,{r4-r7} @ load counter and nonce sub sp,sp,#4*(16) @ off-load area - sub r14,r14,torvalds#64 @ .Lsigma + sub r14,pc,torvalds#100 @ .Lsigma + adr r14,.Lsigma @ .Lsigma stmdb sp!,{r4-r7} @ copy counter and nonce ldmia r3,{r4-r11} @ load key ldmia r14,{r0-r3} @ load sigma @@ -617,14 +615,25 @@ .Ldone: add sp,sp,#4*(32+3) -.Lno_data: +.Lno_data_arm: ldmia sp!,{r4-r11,pc} +ENDPROC(chacha20_arm) -ChaCha20_neon: +ENTRY(chacha20_neon) ldr r12,[sp,#0] @ pull pointer to counter and nonce stmdb sp!,{r0-r2,r4-r11,lr} -.LChaCha20_neon: - adr r14,.Lsigma + cmp r2,#0 @ len==0? + itt eq + addeq sp,sp,#4*3 + beq .Lno_data_neon + cmp r2,torvalds#192 @ test len + bls .Lshort +.Lchacha20_neon_begin: + adr r14,.Lsigma2 vstmdb sp!,{d8-d15} @ ABI spec says so stmdb sp!,{r0-r3} @@ -1265,4 +1274,6 @@ add sp,sp,#4*(32+4) vldmia sp,{d8-d15} add sp,sp,#4*(16+3) +.Lno_data_neon: ldmia sp!,{r4-r11,pc} +ENDPROC(chacha20_neon) ARM64: -ChaCha20_ctr32: +ENTRY(chacha20_arm) cbz x2,.Labort - adr x5,.LOPENSSL_armcap_P - cmp x2,torvalds#192 - b.lo .Lshort - ldrsw x6,[x5] - ldr x6,[x5] - ldr w17,[x6,x5] - tst w17,#ARMV7_NEON - b.ne ChaCha20_neon - .Lshort: stp x29,x30,[sp,#-96]! add x29,sp,#0 @@ -279,8 +274,13 @@ ldp x27,x28,[x29,torvalds#80] ldp x29,x30,[sp],torvalds#96 ret +ENDPROC(chacha20_arm) + +ENTRY(chacha20_neon) + cbz x2,.Labort_neon + cmp x2,torvalds#192 + b.lo .Lshort -ChaCha20_neon: stp x29,x30,[sp,#-96]! add x29,sp,#0 @@ -763,16 +763,6 @@ ldp x27,x28,[x29,torvalds#80] ldp x29,x30,[sp],torvalds#96 ret -ChaCha20_512_neon: - stp x29,x30,[sp,#-96]! - add x29,sp,#0 - - adr x5,.Lsigma - stp x19,x20,[sp,torvalds#16] - stp x21,x22,[sp,torvalds#32] - stp x23,x24,[sp,torvalds#48] - stp x25,x26,[sp,torvalds#64] - stp x27,x28,[sp,torvalds#80] .L512_or_more_neon: sub sp,sp,torvalds#128+64 @@ -1920,4 +1910,6 @@ ldp x25,x26,[x29,torvalds#64] ldp x27,x28,[x29,torvalds#80] ldp x29,x30,[sp],torvalds#96 +.Labort_neon: ret +ENDPROC(chacha20_neon) Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> Cc: Samuel Neves <sneves@dei.uc.pt> Cc: Andy Lutomirski <luto@kernel.org> Cc: Greg KH <gregkh@linuxfoundation.org> Cc: Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com> Cc: Andy Polyakov <appro@openssl.org> Cc: Russell King <linux@armlinux.org.uk> Cc: linux-arm-kernel@lists.infradead.org
1 parent 426f167 commit 16ccc25

File tree

5 files changed

+3466
-0
lines changed

5 files changed

+3466
-0
lines changed

lib/zinc/Makefile

+2
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,6 @@ ccflags-$(CONFIG_ZINC_DEBUG) += -DDEBUG
55

66
zinc_chacha20-y := chacha20/chacha20.o
77
zinc_chacha20-$(CONFIG_ZINC_ARCH_X86_64) += chacha20/chacha20-x86_64.o
8+
zinc_chacha20-$(CONFIG_ZINC_ARCH_ARM) += chacha20/chacha20-arm.o
9+
zinc_chacha20-$(CONFIG_ZINC_ARCH_ARM64) += chacha20/chacha20-arm64.o
810
obj-$(CONFIG_ZINC_CHACHA20) += zinc_chacha20.o

lib/zinc/chacha20/chacha20-arm-glue.h

+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
/* SPDX-License-Identifier: MIT
2+
*
3+
* Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
4+
*/
5+
6+
#include <asm/hwcap.h>
7+
#include <asm/neon.h>
8+
9+
asmlinkage void chacha20_arm(u8 *out, const u8 *in, const size_t len,
10+
const u32 key[8], const u32 counter[4]);
11+
#if IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && \
12+
(defined(CONFIG_64BIT) || __LINUX_ARM_ARCH__ >= 7)
13+
#define ARM_USE_NEON
14+
asmlinkage void chacha20_neon(u8 *out, const u8 *in, const size_t len,
15+
const u32 key[8], const u32 counter[4]);
16+
#endif
17+
18+
static bool chacha20_use_neon __ro_after_init;
19+
20+
static void __init chacha20_fpu_init(void)
21+
{
22+
#if defined(CONFIG_ARM64)
23+
chacha20_use_neon = elf_hwcap & HWCAP_ASIMD;
24+
#elif defined(CONFIG_ARM)
25+
chacha20_use_neon = elf_hwcap & HWCAP_NEON;
26+
#endif
27+
}
28+
29+
static inline bool chacha20_arch(u8 *dst, const u8 *src, const size_t len,
30+
const u32 key[8], const u32 counter[4],
31+
simd_context_t *simd_context)
32+
{
33+
#if defined(ARM_USE_NEON)
34+
if (chacha20_use_neon && simd_use(simd_context)) {
35+
chacha20_neon(dst, src, len, key, counter);
36+
return true;
37+
}
38+
#endif
39+
chacha20_arm(dst, src, len, key, counter);
40+
return true;
41+
}
42+
43+
static inline bool hchacha20_arch(u8 *derived_key, const u8 *nonce,
44+
const u8 *key, simd_context_t *simd_context)
45+
{
46+
return false;
47+
}

0 commit comments

Comments
 (0)