[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

next step on the way to real policy routing



we're getting closer to have real policy routing - this diff gives us 
multiple routing tables, however, they're not accessible from userland 
(aka route(8) yet) or beeing used for routing. nontheless this diff 
needs testing and review. please do so.

this is important, and it is important that we're reasonably fast, and 
I need help. I can't do such big changes in one of the most twisted 
parts of teh kernel without getting help in testing and review.

this diff does break netstat -r, this is beeing worked on. there might 
be more userland utils (I doubt that tho) that try to snoop the routing 
table via kvm, that will not work any more.

Index: route.c
===================================================================
RCS file: /cvs/src/sys/net/route.c,v
retrieving revision 1.70
diff -u -p -r1.70 route.c
--- route.c	22 Mar 2006 14:37:44 -0000	1.70
+++ route.c	30 Mar 2006 11:19:44 -0000
@@ -130,9 +130,12 @@ struct ifaddr	*encap_findgwifa(struct so
 
 #define	SA(p) ((struct sockaddr *)(p))
 
-struct	route_cb	 route_cb;
-struct	rtstat		 rtstat;
-struct	radix_node_head	*rt_tables[AF_MAX+1];
+struct	route_cb	   route_cb;
+struct	rtstat		   rtstat;
+struct	radix_node_head	***rt_tables;
+u_int8_t		   af2rtafidx[AF_MAX+1];
+u_int8_t		   rtafidx_max;
+u_int			   rtbl_cnt = 0;
 
 int			rttrash;	/* routes not in table but not freed */
 struct sockaddr		wildcard;	/* zero cookie for wildcard searches */
@@ -140,6 +143,8 @@ struct sockaddr		wildcard;	/* zero cooki
 struct pool		rtentry_pool;	/* pool for rtentry structures */
 struct pool		rttimer_pool;	/* pool for rttimer structures */
 
+int	rtable_init(struct radix_node_head ***);
+int	rtable_add(char *);
 int	okaytoclone(u_int, int);
 int	rtdeletemsg(struct rtentry *);
 int	rtflushclone1(struct radix_node *, void *);
@@ -147,6 +152,7 @@ void	rtflushclone(struct radix_node_head
 int	rt_if_remove_rtdelete(struct radix_node *, void *);
 
 #define	LABELID_MAX	50000
+#define	RTBL_CNT_INC	4	/* allocate rtables in chunks of 4 */
 
 struct rt_label {
 	TAILQ_ENTRY(rt_label)	rtl_entry;
@@ -165,14 +171,35 @@ encap_findgwifa(struct sockaddr *gw)
 }
 #endif
 
-void
-rtable_init(void **table)
+int
+rtable_init(struct radix_node_head ***table)
 {
-	struct domain *dom;
+	void		**p;
+	struct domain	 *dom;
+	u_int8_t	  i;
+
+	bzero(af2rtafidx, sizeof(af2rtafidx));
+	rtafidx_max = i = 0;
+
+	/* 1st pass: find out how many tables to allocate */
 	for (dom = domains; dom != NULL; dom = dom->dom_next)
 		if (dom->dom_rtattach)
-			dom->dom_rtattach(&table[dom->dom_family],
+			rtafidx_max++;
+
+	if ((p = malloc(sizeof(void *) * (rtafidx_max + 1), M_RTABLE,
+	    M_NOWAIT)) == NULL)
+		return (-1);
+	bzero(p, sizeof(void *) * (rtafidx_max + 1));
+
+	/* 2nd pass: attach */
+	for (dom = domains; dom != NULL; dom = dom->dom_next)
+		if (dom->dom_rtattach) {
+			af2rtafidx[dom->dom_family] = i++;
+			dom->dom_rtattach(&p[af2rtafidx[dom->dom_family]],
 			    dom->dom_rtoffset);
+		}
+
+	*table = (struct radix_node_head **)p;
 }
 
 void
@@ -181,7 +208,34 @@ route_init()
 	pool_init(&rtentry_pool, sizeof(struct rtentry), 0, 0, 0, "rtentpl",
 	    NULL);
 	rn_init();	/* initialize all zeroes, all ones, mask table */
-	rtable_init((void **)rt_tables);
+	if (rtable_add("main") == -1)
+		panic("route_init rtable_add");
+}
+
+int
+rtable_add(char *tblname)
+{
+	u_int	 i;
+	void	*p;
+
+	for (i = 0; i < rtbl_cnt; i++)
+		if (rt_tables[i] == NULL)
+			break;
+
+	if (i == rtbl_cnt) {
+		rtbl_cnt += RTBL_CNT_INC;
+		if ((p = malloc(sizeof(void *) * rtbl_cnt, M_RTABLE,
+		    M_NOWAIT)) == NULL)
+			return (-1);
+		bzero(p, sizeof(void *) * rtbl_cnt);
+		if (i > 0) {
+			bcopy(rt_tables, p, sizeof(void *) * i);
+			free(rt_tables, M_RTABLE);
+		}
+		rt_tables = p;
+	}
+
+	return (rtable_init(&rt_tables[i]));
 }
 
 void
@@ -205,13 +259,14 @@ okaytoclone(u_int flags, int howstrict)
 struct rtentry *
 rtalloc2(struct sockaddr *dst, int report, int howstrict)
 {
-	struct radix_node_head	*rnh = rt_tables[dst->sa_family];
+	struct radix_node_head	*rnh;
 	struct rtentry		*rt;
 	struct radix_node	*rn;
 	struct rtentry		*newrt = 0;
 	struct rt_addrinfo	 info;
 	int			 s = splnet(), err = 0, msgtype = RTM_MISS;
 
+	rnh = rt_gettable(dst->sa_family, 0);
 	if (rnh && (rn = rnh->rnh_matchaddr((caddr_t)dst, rnh)) &&
 	    ((rn->rn_flags & RNF_ROOT) == 0)) {
 		newrt = rt = (struct rtentry *)rn;
@@ -257,13 +312,14 @@ rtalloc(struct route *ro)
 struct rtentry *
 rtalloc1(struct sockaddr *dst, int report)
 {
-	struct radix_node_head	*rnh = rt_tables[dst->sa_family];
+	struct radix_node_head	*rnh;
 	struct rtentry		*rt;
 	struct radix_node	*rn;
 	struct rtentry		*newrt = 0;
 	struct rt_addrinfo	 info;
 	int			 s = splsoftnet(), err = 0, msgtype = RTM_MISS;
 
+	rnh = rt_gettable(dst->sa_family, 0);
 	if (rnh && (rn = rnh->rnh_matchaddr((caddr_t)dst, rnh)) &&
 	    ((rn->rn_flags & RNF_ROOT) == 0)) {
 		newrt = rt = (struct rtentry *)rn;
@@ -653,7 +709,7 @@ rtrequest1(int req, struct rt_addrinfo *
 	struct sockaddr_rtlabel	*sa_rl;
 #define senderr(x) { error = x ; goto bad; }
 
-	if ((rnh = rt_tables[info->rti_info[RTAX_DST]->sa_family]) == 0)
+	if ((rnh = rt_gettable(info->rti_info[RTAX_DST]->sa_family, 0)) == NULL)
 		senderr(EAFNOSUPPORT);
 	if (info->rti_flags & RTF_HOST)
 		info->rti_info[RTAX_NETMASK] = NULL;
@@ -1123,10 +1179,9 @@ rt_timer_add(struct rtentry *rt, void (*
 }
 
 struct radix_node_head *
-rt_gettable(sa_family_t af, int id)
+rt_gettable(sa_family_t af, u_int id)
 {
-	/* ignore id for now */
-	return (rt_tables[af]);
+	return (rt_tables[id][af2rtafidx[af]]);
 }
 
 struct radix_node *
@@ -1138,6 +1193,7 @@ rt_lookup(struct sockaddr *dst, struct s
 
 	return (rnh->rnh_lookup(dst, mask, rnh));
 }
+
 /* ARGSUSED */
 void
 rt_timer_timer(void *arg)
@@ -1254,13 +1310,11 @@ rt_if_remove(struct ifnet *ifp)
 	int			 i;
 	struct radix_node_head	*rnh;
 
-	for (i = 1; i <= AF_MAX; i++) {
-		rnh = rt_tables[i];
-		if (rnh)
+	for (i = 1; i <= AF_MAX; i++)
+		if ((rnh = rt_gettable(i, 0)) != NULL)
 			while ((*rnh->rnh_walktree)(rnh,
 			    rt_if_remove_rtdelete, ifp) == EAGAIN)
-				;
-	}
+				;	/* nothing */
 }
 
 /*
Index: route.h
===================================================================
RCS file: /cvs/src/sys/net/route.h,v
retrieving revision 1.38
diff -u -p -r1.38 route.h
--- route.h	30 Mar 2006 09:53:43 -0000	1.38
+++ route.h	30 Mar 2006 11:19:44 -0000
@@ -334,7 +334,6 @@ void	 rt_timer_queue_destroy(struct rtti
 void	 rt_timer_remove_all(struct rtentry *);
 unsigned long	rt_timer_count(struct rttimer_queue *);
 void	 rt_timer_timer(void *);
-void	 rtable_init(void **);
 void	 rtalloc(struct route *);
 struct rtentry *
 	 rtalloc1(struct sockaddr *, int);
@@ -354,7 +353,7 @@ int	 rtrequest(int, struct sockaddr *,
 int	 rtrequest1(int, struct rt_addrinfo *, struct rtentry **);
 void	 rt_if_remove(struct ifnet *);
 
-struct radix_node_head	*rt_gettable(sa_family_t, int);
+struct radix_node_head	*rt_gettable(sa_family_t, u_int);
 struct radix_node	*rt_lookup(struct sockaddr *, struct sockaddr *, int);
 #endif /* _KERNEL */
 #endif /* _NET_ROUTE_H_ */