#include <sys/param.h>
#include <machine/endian.h>
#include <sys/mbuf.h>
#include <kern/debug.h>
#include <net/dlil.h>
#include <netinet/in.h>
#define _IP_VHL
#include <netinet/ip.h>
#include <netinet/ip_var.h>
#define REDUCE16 { \
q_util.q = sum; \
l_util.l = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \
sum = l_util.s[0] + l_util.s[1]; \
ADDCARRY(sum); \
}
union l_util {
uint16_t s[2];
uint32_t l;
};
union q_util {
uint16_t s[4];
uint32_t l[2];
uint64_t q;
};
#define PREDICT_FALSE(_exp) __builtin_expect((_exp), 0)
static uint16_t in_cksumdata(const void *buf, int len);
#if ULONG_MAX == 0xffffffffUL
static uint16_t
in_cksumdata(const void *buf, int mlen)
{
uint32_t sum, partial;
unsigned int final_acc;
const uint8_t *data = (const uint8_t *)buf;
boolean_t needs_swap, started_on_odd;
VERIFY(mlen >= 0);
needs_swap = FALSE;
started_on_odd = FALSE;
sum = 0;
partial = 0;
if ((uintptr_t)data & 1) {
started_on_odd = !started_on_odd;
#if BYTE_ORDER == LITTLE_ENDIAN
partial = *data << 8;
#else
partial = *data;
#endif
++data;
--mlen;
}
needs_swap = started_on_odd;
while (mlen >= 32) {
__builtin_prefetch(data + 32);
partial += *(const uint16_t *)(const void *)data;
partial += *(const uint16_t *)(const void *)(data + 2);
partial += *(const uint16_t *)(const void *)(data + 4);
partial += *(const uint16_t *)(const void *)(data + 6);
partial += *(const uint16_t *)(const void *)(data + 8);
partial += *(const uint16_t *)(const void *)(data + 10);
partial += *(const uint16_t *)(const void *)(data + 12);
partial += *(const uint16_t *)(const void *)(data + 14);
partial += *(const uint16_t *)(const void *)(data + 16);
partial += *(const uint16_t *)(const void *)(data + 18);
partial += *(const uint16_t *)(const void *)(data + 20);
partial += *(const uint16_t *)(const void *)(data + 22);
partial += *(const uint16_t *)(const void *)(data + 24);
partial += *(const uint16_t *)(const void *)(data + 26);
partial += *(const uint16_t *)(const void *)(data + 28);
partial += *(const uint16_t *)(const void *)(data + 30);
data += 32;
mlen -= 32;
if (PREDICT_FALSE(partial & 0xc0000000)) {
if (needs_swap)
partial = (partial << 8) +
(partial >> 24);
sum += (partial >> 16);
sum += (partial & 0xffff);
partial = 0;
}
}
if (mlen & 16) {
partial += *(const uint16_t *)(const void *)data;
partial += *(const uint16_t *)(const void *)(data + 2);
partial += *(const uint16_t *)(const void *)(data + 4);
partial += *(const uint16_t *)(const void *)(data + 6);
partial += *(const uint16_t *)(const void *)(data + 8);
partial += *(const uint16_t *)(const void *)(data + 10);
partial += *(const uint16_t *)(const void *)(data + 12);
partial += *(const uint16_t *)(const void *)(data + 14);
data += 16;
mlen -= 16;
}
if (mlen & 8) {
partial += *(const uint16_t *)(const void *)data;
partial += *(const uint16_t *)(const void *)(data + 2);
partial += *(const uint16_t *)(const void *)(data + 4);
partial += *(const uint16_t *)(const void *)(data + 6);
data += 8;
}
if (mlen & 4) {
partial += *(const uint16_t *)(const void *)data;
partial += *(const uint16_t *)(const void *)(data + 2);
data += 4;
}
if (mlen & 2) {
partial += *(const uint16_t *)(const void *)data;
data += 2;
}
if (mlen & 1) {
#if BYTE_ORDER == LITTLE_ENDIAN
partial += *data;
#else
partial += *data << 8;
#endif
started_on_odd = !started_on_odd;
}
if (needs_swap)
partial = (partial << 8) + (partial >> 24);
sum += (partial >> 16) + (partial & 0xffff);
sum = (sum >> 16) + (sum & 0xffff);
final_acc = ((sum >> 16) & 0xffff) + (sum & 0xffff);
final_acc = (final_acc >> 16) + (final_acc & 0xffff);
return (final_acc);
}
#else
static uint16_t
in_cksumdata(const void *buf, int mlen)
{
uint64_t sum, partial;
unsigned int final_acc;
const uint8_t *data = (const uint8_t *)buf;
boolean_t needs_swap, started_on_odd;
VERIFY(mlen >= 0);
needs_swap = FALSE;
started_on_odd = FALSE;
sum = 0;
partial = 0;
if ((uintptr_t)data & 1) {
started_on_odd = !started_on_odd;
#if BYTE_ORDER == LITTLE_ENDIAN
partial = *data << 8;
#else
partial = *data;
#endif
++data;
--mlen;
}
needs_swap = started_on_odd;
if ((uintptr_t)data & 2) {
if (mlen < 2)
goto trailing_bytes;
partial += *(const uint16_t *)(const void *)data;
data += 2;
mlen -= 2;
}
while (mlen >= 64) {
__builtin_prefetch(data + 32);
__builtin_prefetch(data + 64);
partial += *(const uint32_t *)(const void *)data;
partial += *(const uint32_t *)(const void *)(data + 4);
partial += *(const uint32_t *)(const void *)(data + 8);
partial += *(const uint32_t *)(const void *)(data + 12);
partial += *(const uint32_t *)(const void *)(data + 16);
partial += *(const uint32_t *)(const void *)(data + 20);
partial += *(const uint32_t *)(const void *)(data + 24);
partial += *(const uint32_t *)(const void *)(data + 28);
partial += *(const uint32_t *)(const void *)(data + 32);
partial += *(const uint32_t *)(const void *)(data + 36);
partial += *(const uint32_t *)(const void *)(data + 40);
partial += *(const uint32_t *)(const void *)(data + 44);
partial += *(const uint32_t *)(const void *)(data + 48);
partial += *(const uint32_t *)(const void *)(data + 52);
partial += *(const uint32_t *)(const void *)(data + 56);
partial += *(const uint32_t *)(const void *)(data + 60);
data += 64;
mlen -= 64;
if (PREDICT_FALSE(partial & (3ULL << 62))) {
if (needs_swap)
partial = (partial << 8) +
(partial >> 56);
sum += (partial >> 32);
sum += (partial & 0xffffffff);
partial = 0;
}
}
if (mlen & 32) {
partial += *(const uint32_t *)(const void *)data;
partial += *(const uint32_t *)(const void *)(data + 4);
partial += *(const uint32_t *)(const void *)(data + 8);
partial += *(const uint32_t *)(const void *)(data + 12);
partial += *(const uint32_t *)(const void *)(data + 16);
partial += *(const uint32_t *)(const void *)(data + 20);
partial += *(const uint32_t *)(const void *)(data + 24);
partial += *(const uint32_t *)(const void *)(data + 28);
data += 32;
}
if (mlen & 16) {
partial += *(const uint32_t *)(const void *)data;
partial += *(const uint32_t *)(const void *)(data + 4);
partial += *(const uint32_t *)(const void *)(data + 8);
partial += *(const uint32_t *)(const void *)(data + 12);
data += 16;
}
if (mlen & 8) {
partial += *(const uint32_t *)(const void *)data;
partial += *(const uint32_t *)(const void *)(data + 4);
data += 8;
}
if (mlen & 4) {
partial += *(const uint32_t *)(const void *)data;
data += 4;
}
if (mlen & 2) {
partial += *(const uint16_t *)(const void *)data;
data += 2;
}
trailing_bytes:
if (mlen & 1) {
#if BYTE_ORDER == LITTLE_ENDIAN
partial += *data;
#else
partial += *data << 8;
#endif
started_on_odd = !started_on_odd;
}
if (needs_swap)
partial = (partial << 8) + (partial >> 56);
sum += (partial >> 32) + (partial & 0xffffffff);
sum = (sum >> 32) + (sum & 0xffffffff);
final_acc = (sum >> 48) + ((sum >> 32) & 0xffff) +
((sum >> 16) & 0xffff) + (sum & 0xffff);
final_acc = (final_acc >> 16) + (final_acc & 0xffff);
final_acc = (final_acc >> 16) + (final_acc & 0xffff);
return (final_acc);
}
#endif
uint16_t
b_sum16(const void *buf, int len)
{
return (in_cksumdata(buf, len));
}
uint16_t inet_cksum_simple(struct mbuf *, int);
uint16_t
inet_cksum_simple(struct mbuf *m, int len)
{
return (inet_cksum(m, 0, 0, len));
}
uint16_t
in_addword(uint16_t a, uint16_t b)
{
uint64_t sum = a + b;
ADDCARRY(sum);
return (sum);
}
uint16_t
in_pseudo(uint32_t a, uint32_t b, uint32_t c)
{
uint64_t sum;
union q_util q_util;
union l_util l_util;
sum = (uint64_t)a + b + c;
REDUCE16;
return (sum);
}
uint16_t
in_pseudo64(uint64_t a, uint64_t b, uint64_t c)
{
uint64_t sum;
union q_util q_util;
union l_util l_util;
sum = a + b + c;
REDUCE16;
return (sum);
}
uint16_t
in_cksum_hdr_opt(const struct ip *ip)
{
return (~b_sum16(ip, (IP_VHL_HL(ip->ip_vhl) << 2)) & 0xffff);
}
uint16_t
ip_cksum_hdr_dir(struct mbuf *m, uint32_t hlen, int out)
{
struct ip *ip = mtod(m, struct ip *);
if (out) {
ipstat.ips_snd_swcsum++;
ipstat.ips_snd_swcsum_bytes += hlen;
} else {
ipstat.ips_rcv_swcsum++;
ipstat.ips_rcv_swcsum_bytes += hlen;
}
if (hlen == sizeof (*ip) &&
m->m_len >= sizeof (*ip) && IP_HDR_ALIGNED_P(ip))
return (in_cksum_hdr(ip));
return (inet_cksum(m, 0, 0, hlen));
}
uint16_t
inet_cksum(struct mbuf *m, uint32_t nxt, uint32_t off, uint32_t len)
{
uint32_t sum;
sum = m_sum16(m, off, len);
if (nxt != 0) {
struct ip *ip;
unsigned char buf[sizeof ((*ip))] __attribute__((aligned(8)));
uint32_t mlen;
if ((mlen = m_length2(m, NULL)) < sizeof (*ip)) {
panic("%s: mbuf %p too short (%d) for IPv4 header",
__func__, m, mlen);
}
if ((sizeof (*ip) > m->m_len) ||
!IP_HDR_ALIGNED_P(mtod(m, caddr_t))) {
m_copydata(m, 0, sizeof (*ip), (caddr_t)buf);
ip = (struct ip *)(void *)buf;
} else {
ip = (struct ip *)(void *)(m->m_data);
}
sum += in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
htonl(len + nxt));
ADDCARRY(sum);
}
return (~sum & 0xffff);
}