#include <net/flowhash.h>
#include <net/route.h>
#include <netinet/in_pcb.h>
#include <netinet/tcp_cache.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_var.h>
#include <kern/locks.h>
#include <sys/queue.h>
#include <dev/random/randomdev.h>
struct tcp_heuristic_key {
union {
uint8_t thk_net_signature[IFNET_SIGNATURELEN];
union {
struct in_addr addr;
struct in6_addr addr6;
} thk_ip;
};
sa_family_t thk_family;
};
struct tcp_heuristic {
SLIST_ENTRY(tcp_heuristic) list;
u_int32_t th_last_access;
struct tcp_heuristic_key th_key;
char th_val_start[0];
u_int8_t th_tfo_cookie_loss;
u_int8_t th_ecn_loss;
u_int8_t th_ecn_aggressive;
u_int32_t th_tfo_fallback_trials;
u_int32_t th_tfo_cookie_backoff;
u_int32_t th_ecn_backoff;
u_int8_t th_tfo_in_backoff:1,
th_tfo_aggressive_fallback:1,
th_tfo_snd_middlebox_supp:1,
th_tfo_rcv_middlebox_supp:1;
char th_val_end[0];
};
struct tcp_heuristics_head {
SLIST_HEAD(tcp_heur_bucket, tcp_heuristic) tcp_heuristics;
lck_mtx_t thh_mtx;
};
struct tcp_cache_key {
sa_family_t tck_family;
struct tcp_heuristic_key tck_src;
union {
struct in_addr addr;
struct in6_addr addr6;
} tck_dst;
};
struct tcp_cache {
SLIST_ENTRY(tcp_cache) list;
u_int32_t tc_last_access;
struct tcp_cache_key tc_key;
u_int8_t tc_tfo_cookie[TFO_COOKIE_LEN_MAX];
u_int8_t tc_tfo_cookie_len;
};
struct tcp_cache_head {
SLIST_HEAD(tcp_cache_bucket, tcp_cache) tcp_caches;
lck_mtx_t tch_mtx;
};
static u_int32_t tcp_cache_hash_seed;
size_t tcp_cache_size;
#define TCP_CACHE_BUCKET_SIZE 5
static struct tcp_cache_head *tcp_cache;
decl_lck_mtx_data(, tcp_cache_mtx);
static lck_attr_t *tcp_cache_mtx_attr;
static lck_grp_t *tcp_cache_mtx_grp;
static lck_grp_attr_t *tcp_cache_mtx_grp_attr;
static struct tcp_heuristics_head *tcp_heuristics;
decl_lck_mtx_data(, tcp_heuristics_mtx);
static lck_attr_t *tcp_heuristic_mtx_attr;
static lck_grp_t *tcp_heuristic_mtx_grp;
static lck_grp_attr_t *tcp_heuristic_mtx_grp_attr;
int tcp_ecn_timeout = 60;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, ecn_timeout, CTLFLAG_RW | CTLFLAG_LOCKED,
&tcp_ecn_timeout, 0, "Initial minutes to wait before re-trying ECN");
static u_int32_t tcp_cache_roundup2(u_int32_t a)
{
a--;
a |= a >> 1;
a |= a >> 2;
a |= a >> 4;
a |= a >> 8;
a |= a >> 16;
a++;
return a;
}
static void tcp_cache_hash_src(struct inpcb *inp, struct tcp_heuristic_key *key)
{
struct ifnet *ifn = inp->inp_last_outifp;
uint8_t len = sizeof(key->thk_net_signature);
uint16_t flags;
if (inp->inp_vflag & INP_IPV6) {
int ret;
key->thk_family = AF_INET6;
ret = ifnet_get_netsignature(ifn, AF_INET6, &len, &flags,
key->thk_net_signature);
if (ret == ENOENT || ret == EINVAL)
memcpy(&key->thk_ip.addr6, &inp->in6p_laddr, sizeof(struct in6_addr));
} else {
int ret;
key->thk_family = AF_INET;
ret = ifnet_get_netsignature(ifn, AF_INET, &len, &flags,
key->thk_net_signature);
if (ret == ENOENT || ret == EINVAL)
memcpy(&key->thk_ip.addr, &inp->inp_laddr, sizeof(struct in_addr));
}
}
static u_int16_t tcp_cache_hash(struct inpcb *inp, struct tcp_cache_key *key)
{
u_int32_t hash;
bzero(key, sizeof(struct tcp_cache_key));
tcp_cache_hash_src(inp, &key->tck_src);
if (inp->inp_vflag & INP_IPV6) {
key->tck_family = AF_INET6;
memcpy(&key->tck_dst.addr6, &inp->in6p_faddr,
sizeof(struct in6_addr));
} else {
key->tck_family = AF_INET;
memcpy(&key->tck_dst.addr, &inp->inp_faddr,
sizeof(struct in_addr));
}
hash = net_flowhash(key, sizeof(struct tcp_cache_key),
tcp_cache_hash_seed);
return (hash & (tcp_cache_size - 1));
}
static void tcp_cache_unlock(struct tcp_cache_head *head)
{
lck_mtx_unlock(&head->tch_mtx);
}
static struct tcp_cache *tcp_getcache_with_lock(struct tcpcb *tp, int create,
struct tcp_cache_head **headarg)
{
struct inpcb *inp = tp->t_inpcb;
struct tcp_cache *tpcache = NULL;
struct tcp_cache_head *head;
struct tcp_cache_key key;
u_int16_t hash;
int i = 0;
hash = tcp_cache_hash(inp, &key);
head = &tcp_cache[hash];
lck_mtx_lock(&head->tch_mtx);
SLIST_FOREACH(tpcache, &head->tcp_caches, list) {
if (memcmp(&tpcache->tc_key, &key, sizeof(key)) == 0)
break;
i++;
}
if ((tpcache == NULL) && create) {
if (i >= TCP_CACHE_BUCKET_SIZE) {
struct tcp_cache *oldest_cache = NULL;
u_int32_t max_age = 0;
SLIST_FOREACH(tpcache, &head->tcp_caches, list) {
u_int32_t age = tcp_now - tpcache->tc_last_access;
if (age > max_age) {
max_age = age;
oldest_cache = tpcache;
}
}
VERIFY(oldest_cache != NULL);
tpcache = oldest_cache;
tpcache->tc_tfo_cookie_len = 0;
} else {
tpcache = _MALLOC(sizeof(struct tcp_cache), M_TEMP,
M_NOWAIT | M_ZERO);
if (tpcache == NULL)
goto out_null;
SLIST_INSERT_HEAD(&head->tcp_caches, tpcache, list);
}
memcpy(&tpcache->tc_key, &key, sizeof(key));
}
if (tpcache == NULL)
goto out_null;
tpcache->tc_last_access = tcp_now;
*headarg = head;
return (tpcache);
out_null:
tcp_cache_unlock(head);
return (NULL);
}
void tcp_cache_set_cookie(struct tcpcb *tp, u_char *cookie, u_int8_t len)
{
struct tcp_cache_head *head;
struct tcp_cache *tpcache;
tpcache = tcp_getcache_with_lock(tp, 1, &head);
if (tpcache == NULL)
return;
tpcache->tc_tfo_cookie_len = len;
memcpy(tpcache->tc_tfo_cookie, cookie, len);
tcp_cache_unlock(head);
}
int tcp_cache_get_cookie(struct tcpcb *tp, u_char *cookie, u_int8_t *len)
{
struct tcp_cache_head *head;
struct tcp_cache *tpcache;
tpcache = tcp_getcache_with_lock(tp, 1, &head);
if (tpcache == NULL)
return (0);
if (tpcache->tc_tfo_cookie_len == 0) {
tcp_cache_unlock(head);
return (0);
}
VERIFY(tpcache->tc_tfo_cookie_len <= *len);
memcpy(cookie, tpcache->tc_tfo_cookie, tpcache->tc_tfo_cookie_len);
*len = tpcache->tc_tfo_cookie_len;
tcp_cache_unlock(head);
return (1);
}
unsigned int tcp_cache_get_cookie_len(struct tcpcb *tp)
{
struct tcp_cache_head *head;
struct tcp_cache *tpcache;
unsigned int cookie_len;
tpcache = tcp_getcache_with_lock(tp, 1, &head);
if (tpcache == NULL)
return (0);
cookie_len = tpcache->tc_tfo_cookie_len;
tcp_cache_unlock(head);
return cookie_len;
}
static u_int16_t tcp_heuristics_hash(struct inpcb *inp,
struct tcp_heuristic_key *key)
{
u_int32_t hash;
bzero(key, sizeof(struct tcp_heuristic_key));
tcp_cache_hash_src(inp, key);
hash = net_flowhash(key, sizeof(struct tcp_heuristic_key),
tcp_cache_hash_seed);
return (hash & (tcp_cache_size - 1));
}
static void tcp_heuristic_unlock(struct tcp_heuristics_head *head)
{
lck_mtx_unlock(&head->thh_mtx);
}
static struct tcp_heuristic *tcp_getheuristic_with_lock(struct tcpcb *tp,
int create, struct tcp_heuristics_head **headarg)
{
struct inpcb *inp = tp->t_inpcb;
struct tcp_heuristic *tpheur = NULL;
struct tcp_heuristics_head *head;
struct tcp_heuristic_key key;
u_int16_t hash;
int i = 0;
hash = tcp_heuristics_hash(inp, &key);
head = &tcp_heuristics[hash];
lck_mtx_lock(&head->thh_mtx);
SLIST_FOREACH(tpheur, &head->tcp_heuristics, list) {
if (memcmp(&tpheur->th_key, &key, sizeof(key)) == 0)
break;
i++;
}
if ((tpheur == NULL) && create) {
if (i >= TCP_CACHE_BUCKET_SIZE) {
struct tcp_heuristic *oldest_heur = NULL;
u_int32_t max_age = 0;
SLIST_FOREACH(tpheur, &head->tcp_heuristics, list) {
u_int32_t age = tcp_now - tpheur->th_last_access;
if (age > max_age) {
max_age = age;
oldest_heur = tpheur;
}
}
VERIFY(oldest_heur != NULL);
tpheur = oldest_heur;
bzero(tpheur->th_val_start,
tpheur->th_val_end - tpheur->th_val_start);
} else {
tpheur = _MALLOC(sizeof(struct tcp_heuristic), M_TEMP,
M_NOWAIT | M_ZERO);
if (tpheur == NULL)
goto out_null;
SLIST_INSERT_HEAD(&head->tcp_heuristics, tpheur, list);
}
tpheur->th_ecn_backoff = tcp_now;
tpheur->th_tfo_cookie_backoff = tcp_now;
memcpy(&tpheur->th_key, &key, sizeof(key));
}
if (tpheur == NULL)
goto out_null;
tpheur->th_last_access = tcp_now;
*headarg = head;
return (tpheur);
out_null:
tcp_heuristic_unlock(head);
return (NULL);
}
void tcp_heuristic_tfo_success(struct tcpcb *tp)
{
struct tcp_heuristics_head *head;
struct tcp_heuristic *tpheur = tcp_getheuristic_with_lock(tp, 1, &head);
if (tpheur == NULL)
return;
tpheur->th_tfo_cookie_loss = 0;
tcp_heuristic_unlock(head);
}
void tcp_heuristic_tfo_rcv_good(struct tcpcb *tp)
{
struct tcp_heuristics_head *head;
struct tcp_heuristic *tpheur = tcp_getheuristic_with_lock(tp, 1, &head);
if (tpheur == NULL)
return;
tpheur->th_tfo_rcv_middlebox_supp = 1;
tcp_heuristic_unlock(head);
tp->t_tfo_flags |= TFO_F_NO_RCVPROBING;
}
void tcp_heuristic_tfo_snd_good(struct tcpcb *tp)
{
struct tcp_heuristics_head *head;
struct tcp_heuristic *tpheur = tcp_getheuristic_with_lock(tp, 1, &head);
if (tpheur == NULL)
return;
tpheur->th_tfo_snd_middlebox_supp = 1;
tcp_heuristic_unlock(head);
tp->t_tfo_flags |= TFO_F_NO_SNDPROBING;
}
void tcp_heuristic_inc_loss(struct tcpcb *tp, int tfo, int ecn)
{
struct tcp_heuristics_head *head;
struct tcp_heuristic *tpheur;
tpheur = tcp_getheuristic_with_lock(tp, 1, &head);
if (tpheur == NULL)
return;
if (tfo && tpheur->th_tfo_cookie_loss < 9)
tpheur->th_tfo_cookie_loss++;
if (ecn && tpheur->th_ecn_loss < 9) {
tpheur->th_ecn_loss++;
if (tpheur->th_ecn_loss >= ECN_MAX_SYN_LOSS) {
tcpstat.tcps_ecn_fallback_synloss++;
INP_INC_IFNET_STAT(tp->t_inpcb, ecn_fallback_synloss);
tpheur->th_ecn_backoff = tcp_now +
((tcp_ecn_timeout * 60 * TCP_RETRANSHZ)
<< (tpheur->th_ecn_loss - ECN_MAX_SYN_LOSS));
}
}
tcp_heuristic_unlock(head);
}
void tcp_heuristic_tfo_middlebox(struct tcpcb *tp)
{
struct tcp_heuristics_head *head;
struct tcp_heuristic *tpheur;
tpheur = tcp_getheuristic_with_lock(tp, 1, &head);
if (tpheur == NULL)
return;
tpheur->th_tfo_aggressive_fallback = 1;
tcp_heuristic_unlock(head);
}
void tcp_heuristic_ecn_aggressive(struct tcpcb *tp)
{
struct tcp_heuristics_head *head;
struct tcp_heuristic *tpheur;
tpheur = tcp_getheuristic_with_lock(tp, 1, &head);
if (tpheur == NULL)
return;
tpheur->th_ecn_backoff = tcp_now +
((tcp_ecn_timeout * 60 * TCP_RETRANSHZ) << (tpheur->th_ecn_aggressive));
if (tpheur->th_ecn_aggressive < 9)
tpheur->th_ecn_aggressive++;
tcp_heuristic_unlock(head);
}
void tcp_heuristic_reset_loss(struct tcpcb *tp, int tfo, int ecn)
{
struct tcp_heuristics_head *head;
struct tcp_heuristic *tpheur;
tpheur = tcp_getheuristic_with_lock(tp, 0, &head);
if (tpheur == NULL)
return;
if (tfo)
tpheur->th_tfo_cookie_loss = 0;
if (ecn)
tpheur->th_ecn_loss = 0;
tcp_heuristic_unlock(head);
}
boolean_t tcp_heuristic_do_tfo(struct tcpcb *tp)
{
struct tcp_heuristics_head *head;
struct tcp_heuristic *tpheur;
tpheur = tcp_getheuristic_with_lock(tp, 0, &head);
if (tpheur == NULL)
return (true);
if (tpheur->th_tfo_aggressive_fallback) {
tcp_heuristic_unlock(head);
return (false);
}
if (tpheur->th_tfo_cookie_loss >= TFO_MAX_COOKIE_LOSS &&
(tpheur->th_tfo_fallback_trials < tcp_tfo_fallback_min ||
TSTMP_GT(tpheur->th_tfo_cookie_backoff, tcp_now))) {
tpheur->th_tfo_fallback_trials++;
if (tpheur->th_tfo_fallback_trials >= tcp_tfo_fallback_min &&
!tpheur->th_tfo_in_backoff) {
if (tpheur->th_tfo_cookie_loss == TFO_MAX_COOKIE_LOSS)
tpheur->th_tfo_cookie_backoff = tcp_now + (60 * 2 * TCP_RETRANSHZ);
else
tpheur->th_tfo_cookie_backoff = tcp_now + (60 * 60 * TCP_RETRANSHZ);
tpheur->th_tfo_in_backoff = 1;
}
tcp_heuristic_unlock(head);
return (false);
}
tpheur->th_tfo_fallback_trials = 0;
tpheur->th_tfo_in_backoff = 0;
if (tpheur->th_tfo_rcv_middlebox_supp)
tp->t_tfo_flags |= TFO_F_NO_RCVPROBING;
if (tpheur->th_tfo_snd_middlebox_supp)
tp->t_tfo_flags |= TFO_F_NO_SNDPROBING;
tcp_heuristic_unlock(head);
return (true);
}
boolean_t tcp_heuristic_do_ecn(struct tcpcb *tp)
{
struct tcp_heuristics_head *head;
struct tcp_heuristic *tpheur;
boolean_t ret = true;
tpheur = tcp_getheuristic_with_lock(tp, 0, &head);
if (tpheur == NULL)
return ret;
if (TSTMP_GT(tpheur->th_ecn_backoff, tcp_now))
ret = false;
tcp_heuristic_unlock(head);
return (ret);
}
static void sysctl_cleartfocache(void)
{
int i;
for (i = 0; i < tcp_cache_size; i++) {
struct tcp_cache_head *head = &tcp_cache[i];
struct tcp_cache *tpcache, *tmp;
struct tcp_heuristics_head *hhead = &tcp_heuristics[i];
struct tcp_heuristic *tpheur, *htmp;
lck_mtx_lock(&head->tch_mtx);
SLIST_FOREACH_SAFE(tpcache, &head->tcp_caches, list, tmp) {
SLIST_REMOVE(&head->tcp_caches, tpcache, tcp_cache, list);
_FREE(tpcache, M_TEMP);
}
lck_mtx_unlock(&head->tch_mtx);
lck_mtx_lock(&hhead->thh_mtx);
SLIST_FOREACH_SAFE(tpheur, &hhead->tcp_heuristics, list, htmp) {
SLIST_REMOVE(&hhead->tcp_heuristics, tpheur, tcp_heuristic, list);
_FREE(tpheur, M_TEMP);
}
lck_mtx_unlock(&hhead->thh_mtx);
}
}
static int tcpcleartfo = 0;
static int sysctl_cleartfo SYSCTL_HANDLER_ARGS
{
#pragma unused(arg1, arg2)
int error = 0, val, oldval = tcpcleartfo;
val = oldval;
error = sysctl_handle_int(oidp, &val, 0, req);
if (error || !req->newptr)
return (error);
if (val != oldval)
sysctl_cleartfocache();
tcpcleartfo = val;
return (error);
}
SYSCTL_PROC(_net_inet_tcp, OID_AUTO, clear_tfocache, CTLTYPE_INT | CTLFLAG_RW |
CTLFLAG_LOCKED, &tcpcleartfo, 0, &sysctl_cleartfo, "I",
"Toggle to clear the TFO destination based heuristic cache");
void tcp_cache_init(void)
{
uint64_t sane_size_meg = sane_size / 1024 / 1024;
int i;
tcp_cache_size = tcp_cache_roundup2((u_int32_t)(sane_size_meg >> 2));
if (tcp_cache_size < 32)
tcp_cache_size = 32;
else if (tcp_cache_size > 1024)
tcp_cache_size = 1024;
tcp_cache = _MALLOC(sizeof(struct tcp_cache_head) * tcp_cache_size,
M_TEMP, M_ZERO);
if (tcp_cache == NULL)
panic("Allocating tcp_cache failed at boot-time!");
tcp_cache_mtx_grp_attr = lck_grp_attr_alloc_init();
tcp_cache_mtx_grp = lck_grp_alloc_init("tcpcache", tcp_cache_mtx_grp_attr);
tcp_cache_mtx_attr = lck_attr_alloc_init();
tcp_heuristics = _MALLOC(sizeof(struct tcp_heuristics_head) * tcp_cache_size,
M_TEMP, M_ZERO);
if (tcp_heuristics == NULL)
panic("Allocating tcp_heuristic failed at boot-time!");
tcp_heuristic_mtx_grp_attr = lck_grp_attr_alloc_init();
tcp_heuristic_mtx_grp = lck_grp_alloc_init("tcpheuristic", tcp_heuristic_mtx_grp_attr);
tcp_heuristic_mtx_attr = lck_attr_alloc_init();
for (i = 0; i < tcp_cache_size; i++) {
lck_mtx_init(&tcp_cache[i].tch_mtx, tcp_cache_mtx_grp,
tcp_cache_mtx_attr);
SLIST_INIT(&tcp_cache[i].tcp_caches);
lck_mtx_init(&tcp_heuristics[i].thh_mtx, tcp_heuristic_mtx_grp,
tcp_heuristic_mtx_attr);
SLIST_INIT(&tcp_heuristics[i].tcp_heuristics);
}
tcp_cache_hash_seed = RandomULong();
}