From 6e1898f7de3743e524e10cdf117fe2b2e13de244 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 5 Aug 2022 15:32:15 +0200 Subject: [PATCH] add chacha20 implementation Signed-off-by: Felix Fietkau --- CMakeLists.txt | 2 +- chacha20.c | 236 +++++++++++++++++++++++++++++++++++++++++++++++++ chacha20.h | 13 +++ siphash.h | 26 ++---- utils.h | 34 +++++++ 5 files changed, 293 insertions(+), 18 deletions(-) create mode 100644 chacha20.c create mode 100644 chacha20.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 651ac80..4c3f670 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -30,7 +30,7 @@ ELSE() SET(ubus "") ENDIF() -ADD_LIBRARY(unet SHARED curve25519.c siphash.c sha512.c fprime.c f25519.c ed25519.c edsign.c auth-data.c) +ADD_LIBRARY(unet SHARED curve25519.c siphash.c sha512.c fprime.c f25519.c ed25519.c edsign.c auth-data.c chacha20.c) TARGET_LINK_LIBRARIES(unet ubox) ADD_EXECUTABLE(unetd ${SOURCES}) diff --git a/chacha20.c b/chacha20.c new file mode 100644 index 0000000..71f7eac --- /dev/null +++ b/chacha20.c @@ -0,0 +1,236 @@ + +/* + chacha-merged.c version 20080118 + D. J. Bernstein + Public domain. + */ + +#include +#include +#include +#include "utils.h" +#include "chacha20.h" + +struct chacha_ctx { + uint32_t input[16]; +}; + +#define LOAD32_LE(SRC) get_unaligned_le32(SRC) + +#define STORE32_LE(DST, W) store32_le((DST), (W)) + +static inline void +store32_le(uint8_t dst[4], uint32_t w) +{ + dst[0] = (uint8_t) w; w >>= 8; + dst[1] = (uint8_t) w; w >>= 8; + dst[2] = (uint8_t) w; w >>= 8; + dst[3] = (uint8_t) w; +} + + +#define ROTL32(X, B) rotl32((X), (B)) +static inline uint32_t +rotl32(const uint32_t x, const int b) +{ + return (x << b) | (x >> (32 - b)); +} + +typedef struct chacha_ctx chacha_ctx; + +#define U32C(v) (v##U) + +#define U32V(v) ((uint32_t)(v) &U32C(0xFFFFFFFF)) + +#define ROTATE(v, c) (ROTL32(v, c)) +#define XOR(v, w) ((v) ^ (w)) +#define PLUS(v, w) (U32V((v) + (w))) +#define PLUSONE(v) (PLUS((v), 1)) + +#define QUARTERROUND(a, b, c, d) \ + a = PLUS(a, b); \ + d = ROTATE(XOR(d, a), 16); \ + c = PLUS(c, d); \ + b = ROTATE(XOR(b, c), 12); \ + a = PLUS(a, b); \ + d = ROTATE(XOR(d, a), 8); \ + c = PLUS(c, d); \ + b = ROTATE(XOR(b, c), 7); + +static void +chacha_keysetup(chacha_ctx *ctx, const uint8_t *k) +{ + ctx->input[0] = U32C(0x61707865); + ctx->input[1] = U32C(0x3320646e); + ctx->input[2] = U32C(0x79622d32); + ctx->input[3] = U32C(0x6b206574); + ctx->input[4] = LOAD32_LE(k + 0); + ctx->input[5] = LOAD32_LE(k + 4); + ctx->input[6] = LOAD32_LE(k + 8); + ctx->input[7] = LOAD32_LE(k + 12); + ctx->input[8] = LOAD32_LE(k + 16); + ctx->input[9] = LOAD32_LE(k + 20); + ctx->input[10] = LOAD32_LE(k + 24); + ctx->input[11] = LOAD32_LE(k + 28); +} + +static void +chacha_ivsetup(chacha_ctx *ctx, const uint8_t *iv, const uint8_t *counter) +{ + ctx->input[12] = counter == NULL ? 0 : LOAD32_LE(counter + 0); + ctx->input[13] = counter == NULL ? 0 : LOAD32_LE(counter + 4); + ctx->input[14] = LOAD32_LE(iv + 0); + ctx->input[15] = LOAD32_LE(iv + 4); +} + +static void +chacha20_encrypt_bytes(chacha_ctx *ctx, const uint8_t *m, uint8_t *c, + unsigned long long bytes) +{ + uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, + x15; + uint32_t j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, + j15; + uint8_t *ctarget = NULL; + uint8_t tmp[64]; + unsigned int i; + + if (!bytes) { + return; /* LCOV_EXCL_LINE */ + } + j0 = ctx->input[0]; + j1 = ctx->input[1]; + j2 = ctx->input[2]; + j3 = ctx->input[3]; + j4 = ctx->input[4]; + j5 = ctx->input[5]; + j6 = ctx->input[6]; + j7 = ctx->input[7]; + j8 = ctx->input[8]; + j9 = ctx->input[9]; + j10 = ctx->input[10]; + j11 = ctx->input[11]; + j12 = ctx->input[12]; + j13 = ctx->input[13]; + j14 = ctx->input[14]; + j15 = ctx->input[15]; + + for (;;) { + if (bytes < 64) { + memset(tmp, 0, 64); + for (i = 0; i < bytes; ++i) { + tmp[i] = m[i]; + } + m = tmp; + ctarget = c; + c = tmp; + } + x0 = j0; + x1 = j1; + x2 = j2; + x3 = j3; + x4 = j4; + x5 = j5; + x6 = j6; + x7 = j7; + x8 = j8; + x9 = j9; + x10 = j10; + x11 = j11; + x12 = j12; + x13 = j13; + x14 = j14; + x15 = j15; + for (i = 20; i > 0; i -= 2) { + QUARTERROUND(x0, x4, x8, x12) + QUARTERROUND(x1, x5, x9, x13) + QUARTERROUND(x2, x6, x10, x14) + QUARTERROUND(x3, x7, x11, x15) + QUARTERROUND(x0, x5, x10, x15) + QUARTERROUND(x1, x6, x11, x12) + QUARTERROUND(x2, x7, x8, x13) + QUARTERROUND(x3, x4, x9, x14) + } + x0 = PLUS(x0, j0); + x1 = PLUS(x1, j1); + x2 = PLUS(x2, j2); + x3 = PLUS(x3, j3); + x4 = PLUS(x4, j4); + x5 = PLUS(x5, j5); + x6 = PLUS(x6, j6); + x7 = PLUS(x7, j7); + x8 = PLUS(x8, j8); + x9 = PLUS(x9, j9); + x10 = PLUS(x10, j10); + x11 = PLUS(x11, j11); + x12 = PLUS(x12, j12); + x13 = PLUS(x13, j13); + x14 = PLUS(x14, j14); + x15 = PLUS(x15, j15); + + x0 = XOR(x0, LOAD32_LE(m + 0)); + x1 = XOR(x1, LOAD32_LE(m + 4)); + x2 = XOR(x2, LOAD32_LE(m + 8)); + x3 = XOR(x3, LOAD32_LE(m + 12)); + x4 = XOR(x4, LOAD32_LE(m + 16)); + x5 = XOR(x5, LOAD32_LE(m + 20)); + x6 = XOR(x6, LOAD32_LE(m + 24)); + x7 = XOR(x7, LOAD32_LE(m + 28)); + x8 = XOR(x8, LOAD32_LE(m + 32)); + x9 = XOR(x9, LOAD32_LE(m + 36)); + x10 = XOR(x10, LOAD32_LE(m + 40)); + x11 = XOR(x11, LOAD32_LE(m + 44)); + x12 = XOR(x12, LOAD32_LE(m + 48)); + x13 = XOR(x13, LOAD32_LE(m + 52)); + x14 = XOR(x14, LOAD32_LE(m + 56)); + x15 = XOR(x15, LOAD32_LE(m + 60)); + + j12 = PLUSONE(j12); + /* LCOV_EXCL_START */ + if (!j12) { + j13 = PLUSONE(j13); + } + /* LCOV_EXCL_STOP */ + + STORE32_LE(c + 0, x0); + STORE32_LE(c + 4, x1); + STORE32_LE(c + 8, x2); + STORE32_LE(c + 12, x3); + STORE32_LE(c + 16, x4); + STORE32_LE(c + 20, x5); + STORE32_LE(c + 24, x6); + STORE32_LE(c + 28, x7); + STORE32_LE(c + 32, x8); + STORE32_LE(c + 36, x9); + STORE32_LE(c + 40, x10); + STORE32_LE(c + 44, x11); + STORE32_LE(c + 48, x12); + STORE32_LE(c + 52, x13); + STORE32_LE(c + 56, x14); + STORE32_LE(c + 60, x15); + + if (bytes <= 64) { + if (bytes < 64) { + for (i = 0; i < (unsigned int) bytes; ++i) { + ctarget[i] = c[i]; /* ctarget cannot be NULL */ + } + } + ctx->input[12] = j12; + ctx->input[13] = j13; + + return; + } + bytes -= 64; + c += 64; + m += 64; + } +} + +void chacha20_encrypt_msg(void *msg, size_t len, const void *nonce, const void *key) +{ + struct chacha_ctx ctx; + + chacha_keysetup(&ctx, key); + chacha_ivsetup(&ctx, nonce, NULL); + chacha20_encrypt_bytes(&ctx, msg, msg, len); +} diff --git a/chacha20.h b/chacha20.h new file mode 100644 index 0000000..5519ef8 --- /dev/null +++ b/chacha20.h @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (C) 2022 Felix Fietkau + */ +#ifndef __UNETD_CHACHA20_H +#define __UNETD_CHACHA20_H + +#define CHACHA20_NONCE_SIZE 8 +#define CHACHA20_KEY_SIZE 32 + +void chacha20_encrypt_msg(void *msg, size_t len, const void *nonce, const void *key); + +#endif diff --git a/siphash.h b/siphash.h index ff76d9c..233d33a 100644 --- a/siphash.h +++ b/siphash.h @@ -16,29 +16,13 @@ #include #include #include -#include +#include "utils.h" #define SIPHASH_ALIGNMENT __alignof__(uint64_t) typedef struct { uint64_t key[2]; } siphash_key_t; -static inline uint16_t get_unaligned_le16(const uint8_t *p) -{ - return p[0] | p[1] << 8; -} - -static inline uint32_t get_unaligned_le32(const uint8_t *p) -{ - return p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24; -} - -static inline uint64_t get_unaligned_le64(const uint8_t *p) -{ - return (uint64_t)get_unaligned_le32(p + 4) << 32 | - get_unaligned_le32(p); -} - static inline bool siphash_key_is_zero(const siphash_key_t *key) { return !(key->key[0] | key->key[1]); @@ -54,4 +38,12 @@ static inline void siphash_to_le64(void *dest, const void *data, size_t len, *(uint64_t *)dest = cpu_to_le64(hash); } +static inline void siphash_to_be64(void *dest, const void *data, size_t len, + const siphash_key_t *key) +{ + uint64_t hash = siphash(data, len, key); + + *(uint64_t *)dest = cpu_to_be64(hash); +} + #endif /* _LINUX_SIPHASH_H */ diff --git a/utils.h b/utils.h index 0770807..ce8943c 100644 --- a/utils.h +++ b/utils.h @@ -5,7 +5,9 @@ #ifndef __UNETD_UTILS_H #define __UNETD_UTILS_H +#include #include +#include struct nl_msg; @@ -84,6 +86,38 @@ static inline void bitmask_set_val(uint32_t *mask, unsigned int i, bool val) bitmask_clear(mask, i); } +static inline uint16_t get_unaligned_be16(const uint8_t *p) +{ + return p[1] | p[0] << 8; +} + +static inline uint32_t get_unaligned_be32(const uint8_t *p) +{ + return p[3] | p[2] << 8 | p[1] << 16 | p[0] << 24; +} + +static inline uint64_t get_unaligned_be64(const uint8_t *p) +{ + return (uint64_t)get_unaligned_be32(p) << 32 | + get_unaligned_be32(p + 4); +} + +static inline uint16_t get_unaligned_le16(const uint8_t *p) +{ + return p[0] | p[1] << 8; +} + +static inline uint32_t get_unaligned_le32(const uint8_t *p) +{ + return p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24; +} + +static inline uint64_t get_unaligned_le64(const uint8_t *p) +{ + return (uint64_t)get_unaligned_le32(p + 4) << 32 | + get_unaligned_le32(p); +} + int rtnl_init(void); int rtnl_call(struct nl_msg *msg); -- 2.30.2