#include <sys/param.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/domain.h>
#include <sys/syslog.h>
#include <sys/queue.h>
#include <kern/lock.h>
#include <kern/zalloc.h>
#include <net/if.h>
#include <net/route.h>
#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet/ip_mroute.h>
#include <netinet/ip_var.h>
#include <net/if_dl.h>
#include <libkern/OSAtomic.h>
#include <libkern/OSDebug.h>
#include <pexpert/pexpert.h>
#define equal(a1, a2) (bcmp((caddr_t)(a1), (caddr_t)(a2), (a1)->sa_len) == 0)
#define SA(p) ((struct sockaddr *)(p))
extern void kdp_set_gateway_mac (void *gatewaymac);
extern struct domain routedomain;
struct route_cb route_cb;
__private_extern__ struct rtstat rtstat = { 0, 0, 0, 0, 0 };
struct radix_node_head *rt_tables[AF_MAX+1];
lck_mtx_t *rt_mtx;
lck_attr_t *rt_mtx_attr;
lck_grp_t *rt_mtx_grp;
lck_grp_attr_t *rt_mtx_grp_attr;
lck_mtx_t *route_domain_mtx;
int rttrash = 0;
static unsigned int rte_debug;
#define RTD_DEBUG 0x1
#define RTD_TRACE 0x2
#define RTD_NO_FREE 0x4
static struct zone *rte_zone;
#define RTE_ZONE_MAX 65536
#define RTE_ZONE_NAME "rtentry"
#define RTD_INUSE 0xFEEDFACE
#define RTD_FREED 0xDEADBEEF
#define RTD_TRSTACK_SIZE 8
#define RTD_REFHIST_SIZE 4
struct rtentry_dbg {
struct rtentry rtd_entry;
struct rtentry rtd_entry_saved;
u_int32_t rtd_inuse;
u_int16_t rtd_refhold_cnt;
u_int16_t rtd_refrele_cnt;
struct thread *rtd_alloc_thread;
void *rtd_alloc_stk_pc[RTD_TRSTACK_SIZE];
struct thread *rtd_free_thread;
void *rtd_free_stk_pc[RTD_TRSTACK_SIZE];
u_int16_t rtd_refhold_next;
u_int16_t rtd_refrele_next;
struct {
struct thread *th;
void *pc[RTD_TRSTACK_SIZE];
} rtd_refhold[RTD_REFHIST_SIZE];
struct {
struct thread *th;
void *pc[RTD_TRSTACK_SIZE];
} rtd_refrele[RTD_REFHIST_SIZE];
TAILQ_ENTRY(rtentry_dbg) rtd_trash_link;
};
static TAILQ_HEAD(, rtentry_dbg) rttrash_head;
static inline struct rtentry *rte_alloc_debug(void);
static inline void rte_free_debug(struct rtentry *);
static void rt_maskedcopy(struct sockaddr *,
struct sockaddr *, struct sockaddr *);
static void rtable_init(void **);
static inline void rtref_audit(struct rtentry_dbg *);
static inline void rtunref_audit(struct rtentry_dbg *);
static struct rtentry *rtalloc1_common_locked(struct sockaddr *, int, u_long,
unsigned int);
static int rtrequest_common_locked(int, struct sockaddr *,
struct sockaddr *, struct sockaddr *, int, struct rtentry **,
unsigned int);
static void rtalloc_ign_common_locked(struct route *, u_long, unsigned int);
static inline void sa_set_ifscope(struct sockaddr *, unsigned int);
static struct sockaddr *sin_copy(struct sockaddr_in *, struct sockaddr_in *,
unsigned int);
static struct sockaddr *mask_copy(struct sockaddr *, struct sockaddr_in *,
unsigned int);
static struct radix_node *node_lookup(struct sockaddr *, struct sockaddr *,
unsigned int);
static struct radix_node *node_lookup_default(void);
static int rn_match_ifscope(struct radix_node *, void *);
static struct ifaddr *ifa_ifwithroute_common_locked(int,
const struct sockaddr *, const struct sockaddr *, unsigned int);
__private_extern__ u_long route_generation = 0;
extern int use_routegenid;
struct sockaddr_inifscope {
__uint8_t sin_len;
sa_family_t sin_family;
in_port_t sin_port;
struct in_addr sin_addr;
union {
char sin_zero[8];
struct {
__uint32_t ifscope;
} _in_index;
} un;
#define sin_ifscope un._in_index.ifscope
};
#define SIN(sa) ((struct sockaddr_in *)(size_t)(sa))
#define SINIFSCOPE(sa) ((struct sockaddr_inifscope *)(size_t)(sa))
#define ASSERT_SINIFSCOPE(sa) { \
if ((sa)->sa_family != AF_INET || \
(sa)->sa_len < sizeof (struct sockaddr_in)) \
panic("%s: bad sockaddr_in %p\n", __func__, sa); \
}
struct matchleaf_arg {
unsigned int ifscope;
};
static struct sockaddr sin_def = {
sizeof (struct sockaddr_in), AF_INET, { 0, }
};
static unsigned int primary_ifscope = IFSCOPE_NONE;
#define INET_DEFAULT(dst) \
((dst)->sa_family == AF_INET && SIN(dst)->sin_addr.s_addr == 0)
#define RT(r) ((struct rtentry *)r)
#define RT_HOST(r) (RT(r)->rt_flags & RTF_HOST)
boolean_t
rt_inet_default(struct rtentry *rt, struct sockaddr *dst)
{
return (INET_DEFAULT(dst) && !(rt->rt_flags & RTF_IFSCOPE));
}
void
set_primary_ifscope(unsigned int ifscope)
{
primary_ifscope = ifscope;
}
unsigned int
get_primary_ifscope(void)
{
return (primary_ifscope);
}
static inline void
sa_set_ifscope(struct sockaddr *sa, unsigned int ifscope)
{
ASSERT_SINIFSCOPE(sa);
SINIFSCOPE(sa)->sin_ifscope = ifscope;
}
unsigned int
sa_get_ifscope(struct sockaddr *sa)
{
ASSERT_SINIFSCOPE(sa);
return (SINIFSCOPE(sa)->sin_ifscope);
}
static struct sockaddr *
sin_copy(struct sockaddr_in *src, struct sockaddr_in *dst, unsigned int ifscope)
{
*dst = *src;
sa_set_ifscope(SA(dst), ifscope);
return (SA(dst));
}
static struct sockaddr *
mask_copy(struct sockaddr *src, struct sockaddr_in *dst, unsigned int ifscope)
{
bzero(dst, sizeof (*dst));
rt_maskedcopy(src, SA(dst), src);
SINIFSCOPE(dst)->sin_ifscope = ifscope;
SINIFSCOPE(dst)->sin_len =
offsetof(struct sockaddr_inifscope, sin_ifscope) +
sizeof (SINIFSCOPE(dst)->sin_ifscope);
return (SA(dst));
}
static int
rn_match_ifscope(struct radix_node *rn, void *arg)
{
struct rtentry *rt = (struct rtentry *)rn;
struct matchleaf_arg *ma = arg;
if (!(rt->rt_flags & RTF_IFSCOPE) || rt_key(rt)->sa_family != AF_INET)
return (0);
return (SINIFSCOPE(rt_key(rt))->sin_ifscope == ma->ifscope);
}
static void
rtable_init(void **table)
{
struct domain *dom;
for (dom = domains; dom; dom = dom->dom_next)
if (dom->dom_rtattach)
dom->dom_rtattach(&table[dom->dom_family],
dom->dom_rtoffset);
}
void
route_init(void)
{
int size;
PE_parse_boot_argn("rte_debug", &rte_debug, sizeof (rte_debug));
if (rte_debug != 0)
rte_debug |= RTD_DEBUG;
rt_mtx_grp_attr = lck_grp_attr_alloc_init();
rt_mtx_grp = lck_grp_alloc_init("route", rt_mtx_grp_attr);
rt_mtx_attr = lck_attr_alloc_init();
if ((rt_mtx = lck_mtx_alloc_init(rt_mtx_grp, rt_mtx_attr)) == NULL) {
printf("route_init: can't alloc rt_mtx\n");
return;
}
lck_mtx_lock(rt_mtx);
rn_init();
lck_mtx_unlock(rt_mtx);
rtable_init((void **)rt_tables);
route_domain_mtx = routedomain.dom_mtx;
if (rte_debug & RTD_DEBUG)
size = sizeof (struct rtentry_dbg);
else
size = sizeof (struct rtentry);
rte_zone = zinit(size, RTE_ZONE_MAX * size, 0, RTE_ZONE_NAME);
if (rte_zone == NULL)
panic("route_init: failed allocating rte_zone");
zone_change(rte_zone, Z_EXPAND, TRUE);
TAILQ_INIT(&rttrash_head);
}
void
rtalloc(struct route *ro)
{
rtalloc_ign(ro, 0UL);
}
void
rtalloc_ign_locked(struct route *ro, u_long ignore)
{
return (rtalloc_ign_common_locked(ro, ignore, IFSCOPE_NONE));
}
void
rtalloc_scoped_ign_locked(struct route *ro, u_long ignore, unsigned int ifscope)
{
return (rtalloc_ign_common_locked(ro, ignore, ifscope));
}
static void
rtalloc_ign_common_locked(struct route *ro, u_long ignore,
unsigned int ifscope)
{
struct rtentry *rt;
if ((rt = ro->ro_rt) != NULL) {
if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP)
return;
rtfree_locked(rt);
ro->ro_rt = NULL;
}
ro->ro_rt = rtalloc1_common_locked(&ro->ro_dst, 1, ignore, ifscope);
if (ro->ro_rt)
ro->ro_rt->generation_id = route_generation;
}
void
rtalloc_ign(struct route *ro, u_long ignore)
{
lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_NOTOWNED);
lck_mtx_lock(rt_mtx);
rtalloc_ign_locked(ro, ignore);
lck_mtx_unlock(rt_mtx);
}
struct rtentry *
rtalloc1_locked(struct sockaddr *dst, int report, u_long ignflags)
{
return (rtalloc1_common_locked(dst, report, ignflags, IFSCOPE_NONE));
}
struct rtentry *
rtalloc1_scoped_locked(struct sockaddr *dst, int report, u_long ignflags,
unsigned int ifscope)
{
return (rtalloc1_common_locked(dst, report, ignflags, ifscope));
}
static struct rtentry *
rtalloc1_common_locked(struct sockaddr *dst, int report, u_long ignflags,
unsigned int ifscope)
{
struct radix_node_head *rnh = rt_tables[dst->sa_family];
struct rtentry *rt, *newrt = NULL;
struct rt_addrinfo info;
u_long nflags;
int err = 0, msgtype = RTM_MISS;
if (rnh == NULL)
goto unreachable;
rt = rt_lookup(FALSE, dst, NULL, rnh, ifscope);
if (rt == NULL)
goto unreachable;
newrt = rt;
nflags = rt->rt_flags & ~ignflags;
if (report && (nflags & (RTF_CLONING | RTF_PRCLONING))) {
err = rtrequest_locked(RTM_RESOLVE, dst, NULL, NULL, 0, &newrt);
if (err) {
newrt = rt;
goto miss;
}
rtfree_locked(rt);
if ((rt = newrt) && (rt->rt_flags & RTF_XRESOLVE)) {
msgtype = RTM_RESOLVE;
goto miss;
}
}
goto done;
unreachable:
rtstat.rts_unreach++;
miss:
if (report) {
bzero((caddr_t)&info, sizeof(info));
info.rti_info[RTAX_DST] = dst;
rt_missmsg(msgtype, &info, 0, err);
}
done:
return (newrt);
}
struct rtentry *
rtalloc1(struct sockaddr *dst, int report, u_long ignflags)
{
struct rtentry * entry;
lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_NOTOWNED);
lck_mtx_lock(rt_mtx);
entry = rtalloc1_locked(dst, report, ignflags);
lck_mtx_unlock(rt_mtx);
return (entry);
}
void
rtfree_locked(struct rtentry *rt)
{
struct radix_node_head *rnh;
lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED);
if (rt == 0) {
printf("rtfree - rt is NULL\n");
return;
}
rnh = rt_tables[rt_key(rt)->sa_family];
rtunref(rt);
if (rt->rt_refcnt > 0)
return;
if (rnh && rnh->rnh_close && rt->rt_refcnt == 0)
rnh->rnh_close((struct radix_node *)rt, rnh);
if (!(rt->rt_flags & RTF_UP)) {
if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT))
panic ("rtfree 2");
(void) OSDecrementAtomic((SInt32 *)&rttrash);
if (rte_debug & RTD_DEBUG) {
TAILQ_REMOVE(&rttrash_head, (struct rtentry_dbg *)rt,
rtd_trash_link);
}
#ifdef DIAGNOSTIC
if (rt->rt_refcnt < 0) {
printf("rtfree: %p not freed (neg refs) cnt=%d\n",
rt, rt->rt_refcnt);
return;
}
#endif
if (rt->rt_parent)
rtfree_locked(rt->rt_parent);
if(rt->rt_ifa) {
ifafree(rt->rt_ifa);
rt->rt_ifa = NULL;
}
R_Free(rt_key(rt));
rte_free(rt);
}
}
void
rtfree(struct rtentry *rt)
{
lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_NOTOWNED);
lck_mtx_lock(rt_mtx);
rtfree_locked(rt);
lck_mtx_unlock(rt_mtx);
}
void
rtunref(struct rtentry *p)
{
lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED);
if (p->rt_refcnt <= 0)
panic("rtunref: bad refcnt %d for rt=%p\n", p->rt_refcnt, p);
if (rte_debug & RTD_DEBUG)
rtunref_audit((struct rtentry_dbg *)p);
p->rt_refcnt--;
}
static inline void
rtunref_audit(struct rtentry_dbg *rte)
{
if (rte->rtd_inuse != RTD_INUSE)
panic("rtunref: on freed rte=%p\n", rte);
rte->rtd_refrele_cnt++;
if (rte_debug & RTD_TRACE) {
rte->rtd_refrele[rte->rtd_refrele_next].th = current_thread();
bzero(rte->rtd_refrele[rte->rtd_refrele_next].pc,
sizeof (rte->rtd_refrele[rte->rtd_refrele_next].pc));
(void) OSBacktrace(rte->rtd_refrele[rte->rtd_refrele_next].pc,
RTD_TRSTACK_SIZE);
rte->rtd_refrele_next =
(rte->rtd_refrele_next + 1) % RTD_REFHIST_SIZE;
}
}
void
rtref(struct rtentry *p)
{
lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED);
if (p->rt_refcnt < 0)
panic("rtref: bad refcnt %d for rt=%p\n", p->rt_refcnt, p);
if (rte_debug & RTD_DEBUG)
rtref_audit((struct rtentry_dbg *)p);
p->rt_refcnt++;
}
static inline void
rtref_audit(struct rtentry_dbg *rte)
{
if (rte->rtd_inuse != RTD_INUSE)
panic("rtref_audit: on freed rte=%p\n", rte);
rte->rtd_refhold_cnt++;
if (rte_debug & RTD_TRACE) {
rte->rtd_refhold[rte->rtd_refhold_next].th = current_thread();
bzero(rte->rtd_refhold[rte->rtd_refhold_next].pc,
sizeof (rte->rtd_refhold[rte->rtd_refhold_next].pc));
(void) OSBacktrace(rte->rtd_refhold[rte->rtd_refhold_next].pc,
RTD_TRSTACK_SIZE);
rte->rtd_refhold_next =
(rte->rtd_refhold_next + 1) % RTD_REFHIST_SIZE;
}
}
void
rtsetifa(struct rtentry *rt, struct ifaddr* ifa)
{
if (rt == NULL)
panic("rtsetifa");
if (rt->rt_ifa == ifa)
return;
if (rt->rt_ifa)
ifafree(rt->rt_ifa);
rt->rt_ifa = ifa;
if (rt->rt_ifa)
ifaref(rt->rt_ifa);
}
void
ifafree(struct ifaddr *ifa)
{
int oldval;
if (ifa == NULL)
panic("ifafree");
oldval = OSAddAtomic(-1, (SInt32 *)&ifa->ifa_refcnt);
if (oldval == 0) {
if ((ifa->ifa_debug & IFA_ATTACHED) != 0) {
panic("ifa attached to ifp is being freed\n");
}
FREE(ifa, M_IFADDR);
}
}
void
ifaref(struct ifaddr *ifa)
{
if (ifa == NULL)
panic("ifaref");
if (OSAddAtomic(1, (SInt32 *)&ifa->ifa_refcnt) == 0xffffffff)
panic("ifaref - reference count rolled over!");
}
void
rtredirect(struct ifnet *ifp, struct sockaddr *dst, struct sockaddr *gateway,
struct sockaddr *netmask, int flags, struct sockaddr *src,
struct rtentry **rtp)
{
struct rtentry *rt = NULL;
int error = 0;
short *stat = 0;
struct rt_addrinfo info;
struct ifaddr *ifa = NULL;
unsigned int ifscope = (ifp != NULL) ? ifp->if_index : IFSCOPE_NONE;
struct sockaddr_in sin;
lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_NOTOWNED);
lck_mtx_lock(rt_mtx);
if ((ifa = ifa_ifwithnet_scoped(gateway, ifscope)) == NULL) {
error = ENETUNREACH;
goto out;
}
rt = rtalloc1_scoped_locked(dst, 0, RTF_CLONING|RTF_PRCLONING, ifscope);
if (ip_doscopedroute && src->sa_family == AF_INET)
src = sin_copy(SIN(src), &sin, ifscope);
if (!(flags & RTF_DONE) && rt &&
(!equal(src, rt->rt_gateway) || !equal(rt->rt_ifa->ifa_addr,
ifa->ifa_addr))) {
error = EINVAL;
} else {
ifafree(ifa);
if ((ifa = ifa_ifwithaddr(gateway))) {
ifafree(ifa);
ifa = NULL;
error = EHOSTUNREACH;
}
}
if (ifa) {
ifafree(ifa);
ifa = NULL;
}
if (error)
goto done;
if ((rt == 0) || (rt_mask(rt) && rt_mask(rt)->sa_len < 2))
goto create;
if (rt->rt_flags & RTF_GATEWAY) {
if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) {
create:
flags |= RTF_GATEWAY | RTF_DYNAMIC;
error = rtrequest_scoped_locked(RTM_ADD, dst,
gateway, netmask, flags, NULL, ifscope);
stat = &rtstat.rts_dynamic;
} else {
rt->rt_flags |= RTF_MODIFIED;
flags |= RTF_MODIFIED;
stat = &rtstat.rts_newgateway;
error = rt_setgate(rt, rt_key(rt), gateway);
}
} else {
error = EHOSTUNREACH;
}
done:
if (rt) {
if (rtp && !error)
*rtp = rt;
else
rtfree_locked(rt);
}
out:
if (error) {
rtstat.rts_badredirect++;
} else {
if (stat != NULL)
(*stat)++;
if (use_routegenid)
route_generation++;
}
bzero((caddr_t)&info, sizeof(info));
info.rti_info[RTAX_DST] = dst;
info.rti_info[RTAX_GATEWAY] = gateway;
info.rti_info[RTAX_NETMASK] = netmask;
info.rti_info[RTAX_AUTHOR] = src;
rt_missmsg(RTM_REDIRECT, &info, flags, error);
lck_mtx_unlock(rt_mtx);
}
int
rtioctl(int req, caddr_t data, struct proc *p)
{
#pragma unused(p)
#if INET && MROUTING
return mrt_ioctl(req, data);
#else
return ENXIO;
#endif
}
struct ifaddr *
ifa_ifwithroute(
int flags,
const struct sockaddr *dst,
const struct sockaddr *gateway)
{
struct ifaddr *ifa;
lck_mtx_lock(rt_mtx);
ifa = ifa_ifwithroute_locked(flags, dst, gateway);
lck_mtx_unlock(rt_mtx);
return (ifa);
}
struct ifaddr *
ifa_ifwithroute_locked(int flags, const struct sockaddr *dst,
const struct sockaddr *gateway)
{
return (ifa_ifwithroute_common_locked((flags & ~RTF_IFSCOPE), dst,
gateway, IFSCOPE_NONE));
}
struct ifaddr *
ifa_ifwithroute_scoped_locked(int flags, const struct sockaddr *dst,
const struct sockaddr *gateway, unsigned int ifscope)
{
if (ifscope != IFSCOPE_NONE)
flags |= RTF_IFSCOPE;
else
flags &= ~RTF_IFSCOPE;
return (ifa_ifwithroute_common_locked(flags, dst, gateway, ifscope));
}
static struct ifaddr *
ifa_ifwithroute_common_locked(int flags, const struct sockaddr *dst,
const struct sockaddr *gateway, unsigned int ifscope)
{
struct ifaddr *ifa = NULL;
struct rtentry *rt = NULL;
struct sockaddr_in dst_in, gw_in;
lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED);
if (ip_doscopedroute) {
if (dst != NULL && dst->sa_family == AF_INET)
dst = sin_copy(SIN(dst), &dst_in, IFSCOPE_NONE);
if (gateway != NULL && gateway->sa_family == AF_INET)
gateway = sin_copy(SIN(gateway), &gw_in, IFSCOPE_NONE);
}
if (!(flags & RTF_GATEWAY)) {
if (flags & RTF_HOST) {
ifa = ifa_ifwithdstaddr(dst);
}
if (ifa == NULL)
ifa = ifa_ifwithaddr_scoped(gateway, ifscope);
} else {
ifa = ifa_ifwithdstaddr(gateway);
}
if (ifa == NULL)
ifa = ifa_ifwithnet_scoped(gateway, ifscope);
if (ifa == NULL) {
rt = rtalloc1_scoped_locked((struct sockaddr *)(size_t)dst,
0, 0UL, ifscope);
if (rt != NULL) {
ifa = rt->rt_ifa;
if (ifa != NULL)
ifaref(ifa);
rtunref(rt);
rt = NULL;
}
}
if (ifa != NULL && ifa->ifa_addr->sa_family != dst->sa_family) {
struct ifaddr *newifa;
newifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
if (newifa != NULL) {
ifafree(ifa);
ifa = newifa;
}
}
if ((ifa == NULL ||
!equal(ifa->ifa_addr, (struct sockaddr *)(size_t)gateway)) &&
(rt = rtalloc1_scoped_locked((struct sockaddr *)(size_t)gateway,
0, 0UL, ifscope)) != NULL) {
if (ifa != NULL)
ifafree(ifa);
ifa = rt->rt_ifa;
if (ifa != NULL)
ifaref(ifa);
rtunref(rt);
}
if ((flags & RTF_IFSCOPE) &&
ifa != NULL && ifa->ifa_ifp->if_index != ifscope) {
ifafree(ifa);
ifa = NULL;
}
return (ifa);
}
#define ROUNDUP(a) (a>0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
static int rt_fixdelete __P((struct radix_node *, void *));
static int rt_fixchange __P((struct radix_node *, void *));
struct rtfc_arg {
struct rtentry *rt0;
struct radix_node_head *rnh;
};
int
rtrequest_locked(int req, struct sockaddr *dst, struct sockaddr *gateway,
struct sockaddr *netmask, int flags, struct rtentry **ret_nrt)
{
return (rtrequest_common_locked(req, dst, gateway, netmask,
(flags & ~RTF_IFSCOPE), ret_nrt, IFSCOPE_NONE));
}
int
rtrequest_scoped_locked(int req, struct sockaddr *dst,
struct sockaddr *gateway, struct sockaddr *netmask, int flags,
struct rtentry **ret_nrt, unsigned int ifscope)
{
if (ifscope != IFSCOPE_NONE)
flags |= RTF_IFSCOPE;
else
flags &= ~RTF_IFSCOPE;
return (rtrequest_common_locked(req, dst, gateway, netmask,
flags, ret_nrt, ifscope));
}
static int
rtrequest_common_locked(int req, struct sockaddr *dst0,
struct sockaddr *gateway, struct sockaddr *netmask, int flags,
struct rtentry **ret_nrt, unsigned int ifscope)
{
int error = 0;
struct rtentry *rt;
struct radix_node *rn;
struct radix_node_head *rnh;
struct ifaddr *ifa = NULL;
struct sockaddr *ndst, *dst = dst0;
struct sockaddr_in sin, mask;
#define senderr(x) { error = x ; goto bad; }
lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED);
if ((rnh = rt_tables[dst->sa_family]) == 0)
senderr(ESRCH);
if (flags & RTF_HOST)
netmask = 0;
if (req != RTM_RESOLVE && (flags & RTF_IFSCOPE)) {
if (dst->sa_family != AF_INET ||
(req == RTM_ADD && !ip_doscopedroute))
senderr(EINVAL);
if (ifscope == IFSCOPE_NONE) {
flags &= ~RTF_IFSCOPE;
} else {
dst = sin_copy(SIN(dst), &sin, ifscope);
if (netmask != NULL)
netmask = mask_copy(netmask, &mask, ifscope);
}
}
switch (req) {
case RTM_DELETE:
if ((rn = rnh->rnh_deladdr(dst, netmask, rnh)) == 0)
senderr(ESRCH);
if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT))
panic ("rtrequest delete");
rt = (struct rtentry *)rn;
rtref(rt);
rt->rt_flags &= ~RTF_UP;
if ((rt->rt_flags & (RTF_CLONING | RTF_PRCLONING)) &&
rt_mask(rt)) {
rnh->rnh_walktree_from(rnh, dst, rt_mask(rt),
rt_fixdelete, rt);
}
if (rt->rt_gwroute) {
rt = rt->rt_gwroute;
rtfree_locked(rt);
(rt = (struct rtentry *)rn)->rt_gwroute = 0;
}
if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest)
ifa->ifa_rtrequest(RTM_DELETE, rt, SA(0));
ifa = NULL;
(void) OSIncrementAtomic((SInt32 *)&rttrash);
if (rte_debug & RTD_DEBUG) {
TAILQ_INSERT_TAIL(&rttrash_head,
(struct rtentry_dbg *)rt, rtd_trash_link);
}
if (rt_inet_default(rt, rt_key(rt)))
set_primary_ifscope(IFSCOPE_NONE);
if (ret_nrt != NULL) {
*ret_nrt = rt;
} else {
rtfree_locked(rt);
}
break;
case RTM_RESOLVE:
if (ret_nrt == 0 || (rt = *ret_nrt) == 0)
senderr(EINVAL);
ifa = rt->rt_ifa;
ifaref(ifa);
flags = rt->rt_flags &
~(RTF_CLONING | RTF_PRCLONING | RTF_STATIC);
flags |= RTF_WASCLONED;
gateway = rt->rt_gateway;
if ((netmask = rt->rt_genmask) == 0)
flags |= RTF_HOST;
if (!ip_doscopedroute || dst->sa_family != AF_INET)
goto makeroute;
if (!IN_LINKLOCAL(ntohl(SIN(dst)->sin_addr.s_addr))) {
if (flags & RTF_IFSCOPE) {
ifscope = sa_get_ifscope(rt_key(rt));
} else {
ifscope = rt->rt_ifp->if_index;
flags |= RTF_IFSCOPE;
}
} else {
ifscope = IFSCOPE_NONE;
flags &= ~RTF_IFSCOPE;
}
dst = sin_copy(SIN(dst), &sin, ifscope);
if (netmask != NULL)
netmask = mask_copy(netmask, &mask, ifscope);
goto makeroute;
case RTM_ADD:
if ((flags & RTF_GATEWAY) && !gateway)
panic("rtrequest: RTF_GATEWAY but no gateway");
if (flags & RTF_IFSCOPE) {
ifa = ifa_ifwithroute_scoped_locked(flags, dst0,
gateway, ifscope);
} else {
ifa = ifa_ifwithroute_locked(flags, dst0, gateway);
}
if (ifa == NULL)
senderr(ENETUNREACH);
makeroute:
if ((rt = rte_alloc()) == NULL)
senderr(ENOBUFS);
Bzero(rt, sizeof(*rt));
rt->rt_flags = RTF_UP | flags;
if ((error = rt_setgate(rt, dst, gateway)) != 0) {
rte_free(rt);
senderr(error);
}
ndst = rt_key(rt);
if (netmask)
rt_maskedcopy(dst, ndst, netmask);
else
Bcopy(dst, ndst, dst->sa_len);
rtsetifa(rt, ifa);
rt->rt_ifp = rt->rt_ifa->ifa_ifp;
rn = rnh->rnh_addaddr((caddr_t)ndst, (caddr_t)netmask,
rnh, rt->rt_nodes);
if (rn == 0) {
struct rtentry *rt2;
if (flags & RTF_IFSCOPE) {
rt2 = rtalloc1_scoped_locked(dst0, 0,
RTF_CLONING | RTF_PRCLONING, ifscope);
} else {
rt2 = rtalloc1_locked(dst, 0,
RTF_CLONING | RTF_PRCLONING);
}
if (rt2 && rt2->rt_parent) {
rtrequest_locked(RTM_DELETE,
(struct sockaddr *)rt_key(rt2),
rt2->rt_gateway,
rt_mask(rt2), rt2->rt_flags, 0);
rtfree_locked(rt2);
rn = rnh->rnh_addaddr((caddr_t)ndst,
(caddr_t)netmask,
rnh, rt->rt_nodes);
} else if (rt2) {
rtfree_locked(rt2);
}
}
if (rn == 0) {
if (rt->rt_gwroute)
rtfree_locked(rt->rt_gwroute);
if (rt->rt_ifa) {
ifafree(rt->rt_ifa);
}
R_Free(rt_key(rt));
rte_free(rt);
senderr(EEXIST);
}
rt->rt_parent = 0;
if (req == RTM_RESOLVE) {
rt->rt_rmx = (*ret_nrt)->rt_rmx;
if ((*ret_nrt)->rt_flags & (RTF_CLONING | RTF_PRCLONING)) {
rt->rt_parent = (*ret_nrt);
rtref(*ret_nrt);
}
}
if (ifa->ifa_rtrequest)
ifa->ifa_rtrequest(req, rt, SA(ret_nrt ? *ret_nrt : 0));
ifafree(ifa);
ifa = 0;
if (!(rt->rt_flags & RTF_HOST) && rt_mask(rt) != 0) {
struct rtfc_arg arg;
arg.rnh = rnh;
arg.rt0 = rt;
rnh->rnh_walktree_from(rnh, rt_key(rt), rt_mask(rt),
rt_fixchange, &arg);
}
if (rt_inet_default(rt, rt_key(rt)))
set_primary_ifscope(rt->rt_ifp->if_index);
if (ret_nrt) {
*ret_nrt = rt;
rtref(rt);
}
break;
}
bad:
if (ifa)
ifafree(ifa);
return (error);
}
int
rtrequest(
int req,
struct sockaddr *dst,
struct sockaddr *gateway,
struct sockaddr *netmask,
int flags,
struct rtentry **ret_nrt)
{
int error;
lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_NOTOWNED);
lck_mtx_lock(rt_mtx);
error = rtrequest_locked(req, dst, gateway, netmask, flags, ret_nrt);
lck_mtx_unlock(rt_mtx);
return (error);
}
static int
rt_fixdelete(struct radix_node *rn, void *vp)
{
struct rtentry *rt = (struct rtentry *)rn;
struct rtentry *rt0 = vp;
lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED);
if (rt->rt_parent == rt0 &&
!(rt->rt_flags & (RTF_PINNED | RTF_CLONING | RTF_PRCLONING))) {
return rtrequest_locked(RTM_DELETE, rt_key(rt),
(struct sockaddr *)0, rt_mask(rt),
rt->rt_flags, (struct rtentry **)0);
}
return 0;
}
static int
rt_fixchange(struct radix_node *rn, void *vp)
{
struct rtentry *rt = (struct rtentry *)rn;
struct rtfc_arg *ap = vp;
struct rtentry *rt0 = ap->rt0;
struct radix_node_head *rnh = ap->rnh;
u_char *xk1, *xm1, *xk2, *xmp;
int i, len, mlen;
lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED);
if (!rt->rt_parent ||
(rt->rt_flags & (RTF_PINNED | RTF_CLONING | RTF_PRCLONING)))
return (0);
if (rt->rt_parent == rt0)
goto delete_rt;
len = imin(rt_key(rt0)->sa_len, rt_key(rt)->sa_len);
xk1 = (u_char *)rt_key(rt0);
xm1 = (u_char *)rt_mask(rt0);
xk2 = (u_char *)rt_key(rt);
xmp = (u_char *)rt_mask(rt->rt_parent);
mlen = rt_key(rt->rt_parent)->sa_len;
if (mlen > rt_key(rt0)->sa_len)
return (0);
for (i = rnh->rnh_treetop->rn_offset; i < mlen; i++) {
if ((xmp[i] & ~(xmp[i] ^ xm1[i])) != xmp[i])
return (0);
}
for (i = rnh->rnh_treetop->rn_offset; i < len; i++) {
if ((xk2[i] & xm1[i]) != xk1[i])
return (0);
}
delete_rt:
return (rtrequest_locked(RTM_DELETE, rt_key(rt), NULL,
rt_mask(rt), rt->rt_flags, NULL));
}
int
rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate)
{
int dlen = ROUNDUP(dst->sa_len), glen = ROUNDUP(gate->sa_len);
struct radix_node_head *rnh = rt_tables[dst->sa_family];
lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED);
if (((rt->rt_flags & (RTF_HOST|RTF_GATEWAY|RTF_LLINFO)) ==
(RTF_HOST|RTF_GATEWAY)) && (dst->sa_len == gate->sa_len) &&
(bcmp(dst, gate, dst->sa_len) == 0)) {
if (rt_key(rt))
rtrequest_locked(RTM_DELETE, rt_key(rt),
rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL);
return (EADDRNOTAVAIL);
}
if (rt->rt_flags & RTF_GATEWAY) {
struct rtentry *gwrt;
unsigned int ifscope;
ifscope = (dst->sa_family == AF_INET) ?
sa_get_ifscope(dst) : IFSCOPE_NONE;
gwrt = rtalloc1_scoped_locked(gate, 1, RTF_PRCLONING, ifscope);
if (gwrt == rt) {
rtunref(gwrt);
return (EADDRINUSE);
}
if (ifscope != IFSCOPE_NONE && (rt->rt_flags & RTF_IFSCOPE) &&
gwrt != NULL && gwrt->rt_ifp != NULL &&
gwrt->rt_ifp->if_index != ifscope) {
rtfree_locked(gwrt);
return ((rt->rt_flags & RTF_HOST) ?
EHOSTUNREACH : ENETUNREACH);
}
if (rt->rt_gwroute != NULL)
rtfree_locked(rt->rt_gwroute);
rt->rt_gwroute = gwrt;
if (rt_inet_default(rt, dst) && rt->rt_ifp != NULL)
set_primary_ifscope(rt->rt_ifp->if_index);
if ((dst->sa_family == AF_INET) &&
gwrt != NULL && gwrt->rt_gateway->sa_family == AF_LINK &&
(gwrt->rt_ifp->if_index == get_primary_ifscope() ||
get_primary_ifscope() == IFSCOPE_NONE))
kdp_set_gateway_mac(SDL(gwrt->rt_gateway)->sdl_data);
}
if (rt->rt_gateway == NULL || glen > ROUNDUP(rt->rt_gateway->sa_len)) {
caddr_t new;
R_Malloc(new, caddr_t, dlen + glen);
if (new == NULL) {
if (rt->rt_gwroute != NULL)
rtfree_locked(rt->rt_gwroute);
rt->rt_gwroute = NULL;
return (ENOBUFS);
}
Bcopy(dst, new, dlen);
R_Free(rt_key(rt));
rt->rt_nodes->rn_key = new;
rt->rt_gateway = (struct sockaddr *)(new + dlen);
}
Bcopy(gate, rt->rt_gateway, glen);
if ((rt->rt_flags & RTF_GATEWAY) && rt->rt_gwroute != NULL &&
(rt->rt_gwroute->rt_flags & RTF_IFSCOPE) &&
rt->rt_gateway->sa_family == AF_INET &&
rt_key(rt->rt_gwroute)->sa_family == AF_INET) {
sa_set_ifscope(rt->rt_gateway,
sa_get_ifscope(rt_key(rt->rt_gwroute)));
}
if (!(rt->rt_flags & RTF_HOST) && rt_mask(rt) != 0) {
struct rtfc_arg arg;
arg.rnh = rnh;
arg.rt0 = rt;
rnh->rnh_walktree_from(rnh, rt_key(rt), rt_mask(rt),
rt_fixchange, &arg);
}
return (0);
}
static void
rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst,
struct sockaddr *netmask)
{
u_char *cp1 = (u_char *)src;
u_char *cp2 = (u_char *)dst;
u_char *cp3 = (u_char *)netmask;
u_char *cplim = cp2 + *cp3;
u_char *cplim2 = cp2 + *cp1;
*cp2++ = *cp1++; *cp2++ = *cp1++;
cp3 += 2;
if (cplim > cplim2)
cplim = cplim2;
while (cp2 < cplim)
*cp2++ = *cp1++ & *cp3++;
if (cp2 < cplim2)
bzero((caddr_t)cp2, (unsigned)(cplim2 - cp2));
}
static struct radix_node *
node_lookup(struct sockaddr *dst, struct sockaddr *netmask,
unsigned int ifscope)
{
struct radix_node_head *rnh = rt_tables[AF_INET];
struct radix_node *rn;
struct sockaddr_in sin, mask;
struct matchleaf_arg ma = { ifscope };
rn_matchf_t *f = rn_match_ifscope;
void *w = &ma;
if (dst->sa_family != AF_INET)
return (NULL);
dst = sin_copy(SIN(dst), &sin, ifscope);
if (netmask != NULL)
netmask = mask_copy(netmask, &mask, ifscope);
if (ifscope == IFSCOPE_NONE)
f = w = NULL;
rn = rnh->rnh_lookup_args(dst, netmask, rnh, f, w);
if (rn != NULL && (rn->rn_flags & RNF_ROOT))
rn = NULL;
return (rn);
}
static struct radix_node *
node_lookup_default(void)
{
struct radix_node_head *rnh = rt_tables[AF_INET];
return (rnh->rnh_lookup(&sin_def, NULL, rnh));
}
struct rtentry *
rt_lookup(boolean_t lookup_only, struct sockaddr *dst, struct sockaddr *netmask,
struct radix_node_head *rnh, unsigned int ifscope)
{
struct radix_node *rn0, *rn;
boolean_t dontcare = (ifscope == IFSCOPE_NONE);
lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED);
if (!lookup_only)
netmask = NULL;
if (!ip_doscopedroute || dst->sa_family != AF_INET) {
if (lookup_only)
rn = rnh->rnh_lookup(dst, netmask, rnh);
else
rn = rnh->rnh_matchaddr(dst, rnh);
goto done;
}
rn0 = rn = node_lookup(dst, netmask, IFSCOPE_NONE);
if (dontcare && (ifscope = get_primary_ifscope()) == IFSCOPE_NONE)
goto validate;
if (rn != NULL) {
struct rtentry *rt = RT(rn);
if (rt->rt_ifp != lo_ifp) {
if (rt->rt_ifp->if_index != ifscope) {
rn = NULL;
if (dontcare)
ifscope = rt->rt_ifp->if_index;
else
rn0 = NULL;
} else if (!(rt->rt_flags & RTF_IFSCOPE)) {
rn = NULL;
}
}
}
if (rn == NULL)
rn = node_lookup(dst, netmask, ifscope);
if (rn == NULL || (rn0 != NULL &&
((INET_DEFAULT(rt_key(RT(rn))) && !INET_DEFAULT(rt_key(RT(rn0)))) ||
(!RT_HOST(rn) && RT_HOST(rn0)))))
rn = rn0;
if (rn == NULL && (rn = node_lookup_default()) != NULL &&
RT(rn)->rt_ifp->if_index != ifscope)
rn = NULL;
validate:
if (rn != NULL && !lookup_only)
(void) in_validate(rn);
done:
if (rn != NULL && (rn->rn_flags & RNF_ROOT))
rn = NULL;
else if (rn != NULL)
rtref(RT(rn));
return (RT(rn));
}
int
rtinit(struct ifaddr *ifa, int cmd, int flags)
{
int error;
lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_NOTOWNED);
lck_mtx_lock(rt_mtx);
error = rtinit_locked(ifa, cmd, flags);
lck_mtx_unlock(rt_mtx);
return (error);
}
int
rtinit_locked(struct ifaddr *ifa, int cmd, int flags)
{
struct rtentry *rt;
struct sockaddr *dst;
struct sockaddr *deldst;
struct mbuf *m = 0;
struct rtentry *nrt = 0;
int error;
dst = flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr;
if (cmd == RTM_DELETE) {
if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) {
m = m_get(M_DONTWAIT, MT_SONAME);
if (m == NULL) {
return(ENOBUFS);
}
deldst = mtod(m, struct sockaddr *);
rt_maskedcopy(dst, deldst, ifa->ifa_netmask);
dst = deldst;
}
rt = rtalloc1_locked(dst, 0, 0UL);
if (rt) {
rtunref(rt);
if (rt->rt_ifa != ifa) {
if (m)
(void) m_free(m);
return (flags & RTF_HOST ? EHOSTUNREACH
: ENETUNREACH);
}
}
#if 0
else {
lck_mtx_unlock(rt_mtx);
return (flags & RTF_HOST ? EHOSTUNREACH
: ENETUNREACH);
}
#endif
}
error = rtrequest_locked(cmd, dst, ifa->ifa_addr, ifa->ifa_netmask,
flags | ifa->ifa_flags, &nrt);
if (m)
(void) m_free(m);
if (cmd == RTM_DELETE && error == 0 && (rt = nrt)) {
rt_newaddrmsg(cmd, ifa, error, nrt);
if (use_routegenid)
route_generation++;
rtfree_locked(rt);
}
if (cmd == RTM_ADD && error == 0 && (rt = nrt)) {
if (rt->rt_ifa != ifa) {
if (!(rt->rt_ifa->ifa_ifp->if_flags &
(IFF_POINTOPOINT|IFF_LOOPBACK)))
printf("rtinit: wrong ifa (%p) was (%p)\n",
ifa, rt->rt_ifa);
if (rt->rt_ifa->ifa_rtrequest)
rt->rt_ifa->ifa_rtrequest(RTM_DELETE, rt, SA(0));
rtsetifa(rt, ifa);
rt->rt_ifp = ifa->ifa_ifp;
rt->rt_rmx.rmx_mtu = ifa->ifa_ifp->if_mtu;
if (ifa->ifa_rtrequest)
ifa->ifa_rtrequest(RTM_ADD, rt, SA(0));
}
rt_newaddrmsg(cmd, ifa, error, nrt);
if (use_routegenid)
route_generation++;
rtunref(rt);
}
return (error);
}
struct rtentry *
rte_alloc(void)
{
if (rte_debug & RTD_DEBUG)
return (rte_alloc_debug());
return ((struct rtentry *)zalloc(rte_zone));
}
void
rte_free(struct rtentry *p)
{
if (rte_debug & RTD_DEBUG) {
rte_free_debug(p);
return;
}
if (p->rt_refcnt != 0)
panic("rte_free: rte=%p refcnt=%d non-zero\n", p, p->rt_refcnt);
zfree(rte_zone, p);
}
static inline struct rtentry *
rte_alloc_debug(void)
{
struct rtentry_dbg *rte;
rte = ((struct rtentry_dbg *)zalloc(rte_zone));
if (rte != NULL) {
bzero(rte, sizeof (*rte));
if (rte_debug & RTD_TRACE) {
rte->rtd_alloc_thread = current_thread();
(void) OSBacktrace(rte->rtd_alloc_stk_pc,
RTD_TRSTACK_SIZE);
}
rte->rtd_inuse = RTD_INUSE;
}
return ((struct rtentry *)rte);
}
static inline void
rte_free_debug(struct rtentry *p)
{
struct rtentry_dbg *rte = (struct rtentry_dbg *)p;
if (p->rt_refcnt != 0)
panic("rte_free: rte=%p refcnt=%d\n", p, p->rt_refcnt);
if (rte->rtd_inuse == RTD_FREED)
panic("rte_free: double free rte=%p\n", rte);
else if (rte->rtd_inuse != RTD_INUSE)
panic("rte_free: corrupted rte=%p\n", rte);
bcopy((caddr_t)p, (caddr_t)&rte->rtd_entry_saved, sizeof (*p));
bzero((caddr_t)p, sizeof (*p));
rte->rtd_inuse = RTD_FREED;
if (rte_debug & RTD_TRACE) {
rte->rtd_free_thread = current_thread();
(void) OSBacktrace(rte->rtd_free_stk_pc, RTD_TRSTACK_SIZE);
}
if (!(rte_debug & RTD_NO_FREE))
zfree(rte_zone, p);
}