[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

bgpd: fd passing to solve listener reconfig at runtime



as annouced with the last diff, here comes file descriptor passing in 
bgpd.
now listeners can be reconfigured in every imaginable manner at runtime 
- the privileged parent process does the bind() and passes the fd to 
the unpriviliged session engine process then which starts t listen().

to test, start bgpd, change the addresses it should listen on ("listen 
on 1.2.3.4" in bgpd.conf) and reload the config - pester it with ugly 
corner cases ;)

Index: bgpd.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.c,v
retrieving revision 1.95
diff -u -r1.95 bgpd.c
--- bgpd.c	6 Jun 2004 17:38:10 -0000	1.95
+++ bgpd.c	18 Jun 2004 03:53:25 -0000
@@ -176,22 +176,24 @@
 
 	log_info("startup");
 
-	if (pipe(pipe_m2s) == -1)
-		fatal("pipe");
+	if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, pipe_m2s) == -1)
+		fatal("socketpair");
 	if (fcntl(pipe_m2s[0], F_SETFL, O_NONBLOCK) == -1 ||
 	    fcntl(pipe_m2s[1], F_SETFL, O_NONBLOCK) == -1)
 		fatal("fcntl");
-	if (pipe(pipe_m2r) == -1)
-		fatal("pipe");
+	if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, pipe_m2r) == -1)
+		fatal("socketpair");
 	if (fcntl(pipe_m2r[0], F_SETFL, O_NONBLOCK) == -1 ||
 	    fcntl(pipe_m2r[1], F_SETFL, O_NONBLOCK) == -1)
 		fatal("fcntl");
-	if (pipe(pipe_s2r) == -1)
-		fatal("pipe");
+	if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, pipe_s2r) == -1)
+		fatal("socketpair");
 	if (fcntl(pipe_s2r[0], F_SETFL, O_NONBLOCK) == -1 ||
 	    fcntl(pipe_s2r[1], F_SETFL, O_NONBLOCK) == -1)
 		fatal("fcntl");
 
+	prepare_listeners(&conf);
+
 	/* fork children */
 	rde_pid = rde_main(&conf, &net_l, rules_l, &mrt_l, pipe_m2r, pipe_s2r);
 	io_pid = session_main(&conf, peer_l, &net_l, rules_l, &mrt_l,
@@ -382,6 +384,8 @@
 		return (-1);
 	}
 
+	prepare_listeners(conf);
+
 	if (imsg_compose(&ibuf_se, IMSG_RECONF_CONF, 0,
 	    conf, sizeof(struct bgpd_config)) == -1)
 		return (-1);
@@ -407,7 +411,7 @@
 		free(r);
 	}
 	while ((la = TAILQ_FIRST(conf->listen_addrs)) != NULL) {
-		if (imsg_compose(&ibuf_se, IMSG_RECONF_LISTENER, 0,
+		if (imsg_compose_fdpass(&ibuf_se, IMSG_RECONF_LISTENER, la->fd,
 		    la, sizeof(struct listen_addr)) == -1)
 			return (-1);
 		TAILQ_REMOVE(conf->listen_addrs, la, entry);
Index: bgpd.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v
retrieving revision 1.126
diff -u -r1.126 bgpd.h
--- bgpd.h	6 Jun 2004 17:38:10 -0000	1.126
+++ bgpd.h	18 Jun 2004 03:53:25 -0000
@@ -83,6 +83,7 @@
 	ssize_t			 size;
 	ssize_t			 wpos;
 	ssize_t			 rpos;
+	int			 fd;
 };
 
 struct msgbuf {
@@ -109,6 +110,7 @@
 };
 
 #define DEFAULT_LISTENER	0x01
+#define LISTENER_LISTENING	0x02
 
 struct listen_addr {
 	TAILQ_ENTRY(listen_addr)	 entry;
@@ -238,11 +240,17 @@
 #define	IMSG_HEADER_SIZE	sizeof(struct imsg_hdr)
 #define	MAX_IMSGSIZE		8192
 
-struct imsgbuf {
+struct imsg_fd {
+	TAILQ_ENTRY(imsg_fd)	entry;
 	int			fd;
-	pid_t			pid;
-	struct buf_read		r;
-	struct msgbuf		w;
+};
+
+struct imsgbuf {
+	int				fd;
+	pid_t				pid;
+	TAILQ_HEAD(fds, imsg_fd)	fds;	
+	struct buf_read			r;
+	struct msgbuf			w;
 };
 
 enum imsg_type {
@@ -565,11 +573,13 @@
 int	 imsg_get(struct imsgbuf *, struct imsg *);
 int	 imsg_compose(struct imsgbuf *, int, u_int32_t, void *, u_int16_t);
 int	 imsg_compose_pid(struct imsgbuf *, int, pid_t, void *, u_int16_t);
+int	 imsg_compose_fdpass(struct imsgbuf *, int, int, void *, u_int16_t);
 struct buf *imsg_create(struct imsgbuf *, int, u_int32_t, u_int16_t);
 struct buf *imsg_create_pid(struct imsgbuf *, int, pid_t, u_int16_t);
 int	 imsg_add(struct buf *, void *, u_int16_t);
 int	 imsg_close(struct imsgbuf *, struct buf *);
 void	 imsg_free(struct imsg *);
+int	 imsg_get_fd(struct imsgbuf *);
 
 /* kroute.c */
 int	kr_init(int);
Index: buffer.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/buffer.c,v
retrieving revision 1.18
diff -u -r1.18 buffer.c
--- buffer.c	29 Apr 2004 19:56:04 -0000	1.18
+++ buffer.c	18 Jun 2004 03:53:25 -0000
@@ -45,6 +45,7 @@
 		return (NULL);
 	}
 	buf->size = len;
+	buf->fd = -1;
 
 	return (buf);
 }
@@ -142,17 +143,36 @@
 	struct buf	*buf, *next;
 	int		 i = 0;
 	ssize_t		 n;
+	struct msghdr	 msg;
+	struct cmsghdr	*cmsg;
+	char		 cmsgbuf[CMSG_SPACE(sizeof(int))];
 
 	bzero(&iov, sizeof(iov));
+	bzero(&msg, sizeof(msg));
 	TAILQ_FOREACH(buf, &msgbuf->bufs, entries) {
 		if (i >= IOV_MAX)
 			break;
 		iov[i].iov_base = buf->buf + buf->rpos;
 		iov[i].iov_len = buf->size - buf->rpos;
 		i++;
+		if (buf->fd != -1)
+			break;
 	}
 
-	if ((n = writev(msgbuf->fd, iov, i)) == -1) {
+	msg.msg_iov = iov;
+	msg.msg_iovlen = i;
+
+	if (buf != NULL && buf->fd != -1) {
+		msg.msg_control = (caddr_t)cmsgbuf;
+		msg.msg_controllen = CMSG_LEN(sizeof(int));
+		cmsg = CMSG_FIRSTHDR(&msg);
+		cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+		cmsg->cmsg_level = SOL_SOCKET;
+		cmsg->cmsg_type = SCM_RIGHTS;
+		*(int *)CMSG_DATA(cmsg) = buf->fd;
+	}
+
+	if ((n = sendmsg(msgbuf->fd, &msg, 0)) == -1) {
 		if (errno == EAGAIN)	/* cannot write immediately */
 			return (0);
 		else
@@ -163,6 +183,9 @@
 		errno = 0;
 		return (-2);
 	}
+
+	if (buf != NULL && buf->fd == -1)
+		close(buf->fd);
 
 	for (buf = TAILQ_FIRST(&msgbuf->bufs); buf != NULL && n > 0;
 	    buf = next) {
Index: config.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/config.c,v
retrieving revision 1.38
diff -u -r1.38 config.c
--- config.c	6 Jun 2004 17:38:10 -0000	1.38
+++ config.c	18 Jun 2004 03:53:25 -0000
@@ -224,3 +224,51 @@
 
 	return (0);
 }
+
+void
+prepare_listeners(struct bgpd_config *conf)
+{
+	struct listen_addr	*la;
+	int			 opt = 1;
+
+	if (TAILQ_EMPTY(conf->listen_addrs)) {
+		if ((la = calloc(1, sizeof(struct listen_addr))) == NULL)
+			fatal("setup_listeners calloc");
+		la->fd = -1;
+		la->flags = DEFAULT_LISTENER;
+		la->sa.ss_len = sizeof(struct sockaddr_in);
+		((struct sockaddr_in *)&la->sa)->sin_family = AF_INET;
+		((struct sockaddr_in *)&la->sa)->sin_addr.s_addr =
+		    htonl(INADDR_ANY);
+		((struct sockaddr_in *)&la->sa)->sin_port = htons(BGP_PORT);
+		TAILQ_INSERT_TAIL(conf->listen_addrs, la, entry);
+
+		if ((la = calloc(1, sizeof(struct listen_addr))) == NULL)
+			fatal("setup_listeners calloc");
+		la->fd = -1;
+		la->flags = DEFAULT_LISTENER;
+		la->sa.ss_len = sizeof(struct sockaddr_in6);
+		((struct sockaddr_in6 *)&la->sa)->sin6_family = AF_INET6;
+		((struct sockaddr_in6 *)&la->sa)->sin6_port = htons(BGP_PORT);
+		TAILQ_INSERT_TAIL(conf->listen_addrs, la, entry);
+	}
+
+	TAILQ_FOREACH(la, conf->listen_addrs, entry) {
+		if ((la->fd = socket(la->sa.ss_family, SOCK_STREAM,
+		    IPPROTO_TCP)) == -1)
+			fatal("socket");
+
+		opt = 1;
+		if (setsockopt(la->fd, SOL_SOCKET, SO_REUSEPORT,
+		    &opt, sizeof(opt)) == -1)
+			fatal("setsockopt SO_REUSEPORT");
+
+		if (bind(la->fd, (struct sockaddr *)&la->sa, la->sa.ss_len) ==
+		    -1) {
+			log_warn("cannot bind to %s",
+			    log_sockaddr((struct sockaddr *)&la->sa));
+			close(la->fd);
+			la->fd = -1;
+		}
+	}
+}
Index: imsg.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/imsg.c,v
retrieving revision 1.25
diff -u -r1.25 imsg.c
--- imsg.c	29 Apr 2004 19:56:04 -0000	1.25
+++ imsg.c	18 Jun 2004 03:53:25 -0000
@@ -17,6 +17,7 @@
  */
 
 #include <sys/types.h>
+#include <sys/uio.h>
 
 #include <errno.h>
 #include <stdlib.h>
@@ -26,7 +27,7 @@
 #include "bgpd.h"
 
 int		 imsg_compose_core(struct imsgbuf *, int, u_int32_t, void *,
-		     u_int16_t, pid_t);
+		     u_int16_t, pid_t, int);
 struct buf	*imsg_create_core(struct imsgbuf *, int, u_int32_t, u_int16_t,
 		     pid_t);
 
@@ -38,15 +39,29 @@
 	ibuf->fd = fd;
 	ibuf->w.fd = fd;
 	ibuf->pid = getpid();
+	TAILQ_INIT(&ibuf->fds);
 }
 
 int
 imsg_read(struct imsgbuf *ibuf)
 {
+	struct msghdr		 msg;
+	struct cmsghdr		*cmsg;
+	char			 cmsgbuf[CMSG_SPACE(sizeof(int))];
+	struct iovec		 iov;
 	ssize_t			 n;
+	int			 fd;
+	struct imsg_fd		*ifd;
 
-	if ((n = read(ibuf->fd, ibuf->r.buf + ibuf->r.wpos,
-	    sizeof(ibuf->r.buf) - ibuf->r.wpos)) == -1) {
+	bzero(&msg, sizeof(msg));
+	iov.iov_base = ibuf->r.buf + ibuf->r.wpos;
+	iov.iov_len = sizeof(ibuf->r.buf) - ibuf->r.wpos;
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+	msg.msg_control = cmsgbuf;
+	msg.msg_controllen = sizeof(cmsgbuf);
+
+	if ((n = recvmsg(ibuf->fd, &msg, 0)) == -1) {
 		if (errno != EINTR && errno != EAGAIN) {
 			log_warn("imsg_read: pipe read error");
 			return (-1);
@@ -56,6 +71,19 @@
 
 	ibuf->r.wpos += n;
 
+	if ((cmsg = CMSG_FIRSTHDR(&msg)) != NULL) {
+		if (cmsg->cmsg_level == SOL_SOCKET &&
+		    cmsg->cmsg_type == SCM_RIGHTS) {
+			fd = (*(int *)CMSG_DATA(cmsg));
+			if ((ifd = calloc(1, sizeof(struct imsg_fd))) == NULL)
+				fatal("imsg_read calloc");
+			ifd->fd = fd;
+			TAILQ_INSERT_TAIL(&ibuf->fds, ifd, entry);
+		} else
+			log_warn("imsg_read: got unexpected ctl data lvel %d"
+			    "type %d", cmsg->cmsg_level, cmsg->cmsg_type);
+	}
+
 	return (n);
 }
 
@@ -98,21 +126,29 @@
 
 int
 imsg_compose(struct imsgbuf *ibuf, int type, u_int32_t peerid, void *data,
-    u_int16_t dlen)
+    u_int16_t datalen)
 {
-	return (imsg_compose_core(ibuf, type, peerid, data, dlen, ibuf->pid));
+	return (imsg_compose_core(ibuf, type, peerid, data, datalen,
+	    ibuf->pid, -1));
 }
 
 int
 imsg_compose_pid(struct imsgbuf *ibuf, int type, pid_t pid, void *data,
     u_int16_t datalen)
 {
-	return (imsg_compose_core(ibuf, type, 0, data, datalen, pid));
+	return (imsg_compose_core(ibuf, type, 0, data, datalen, pid, -1));
+}
+
+int
+imsg_compose_fdpass(struct imsgbuf *ibuf, int type, int fd, void *data,
+    u_int16_t datalen)
+{
+	return (imsg_compose_core(ibuf, type, 0, data, datalen, ibuf->pid, fd));
 }
 
 int
 imsg_compose_core(struct imsgbuf *ibuf, int type, u_int32_t peerid, void *data,
-    u_int16_t datalen, pid_t pid)
+    u_int16_t datalen, pid_t pid, int fd)
 {
 	struct buf	*wbuf;
 	struct imsg_hdr	 hdr;
@@ -139,6 +175,8 @@
 			return (-1);
 		}
 
+	wbuf->fd = fd;
+
 	if ((n = buf_close(&ibuf->w, wbuf)) < 0) {
 			log_warnx("imsg_compose: buf_add error");
 			buf_free(wbuf);
@@ -212,4 +250,20 @@
 imsg_free(struct imsg *imsg)
 {
 	free(imsg->data);
+}
+
+int
+imsg_get_fd(struct imsgbuf *ibuf)
+{
+	int		 fd;
+	struct imsg_fd	*ifd;
+
+	if ((ifd = TAILQ_FIRST(&ibuf->fds)) == NULL)
+		return (-1);
+
+	fd = ifd->fd;
+	TAILQ_REMOVE(&ibuf->fds, ifd, entry);
+	free(ifd);
+
+	return(fd);
 }
Index: session.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/session.c,v
retrieving revision 1.174
diff -u -r1.174 session.c
--- session.c	9 Jun 2004 13:01:44 -0000	1.174
+++ session.c	18 Jun 2004 03:53:26 -0000
@@ -114,38 +114,18 @@
 	struct listen_addr	*la;
 	u_int			 cnt = 0;
 
-	if (TAILQ_EMPTY(conf->listen_addrs)) {
-		if ((la = calloc(1, sizeof(struct listen_addr))) == NULL)
-			fatal("setup_listeners calloc");
-		la->fd = -1;
-		la->flags = DEFAULT_LISTENER;
-		la->sa.ss_len = sizeof(struct sockaddr_in);
-		((struct sockaddr_in *)&la->sa)->sin_family = AF_INET;
-		((struct sockaddr_in *)&la->sa)->sin_addr.s_addr =
-		    htonl(INADDR_ANY);
-		((struct sockaddr_in *)&la->sa)->sin_port = htons(BGP_PORT);
-		TAILQ_INSERT_TAIL(conf->listen_addrs, la, entry);
-
-		if ((la = calloc(1, sizeof(struct listen_addr))) == NULL)
-			fatal("setup_listeners calloc");
-		la->fd = -1;
-		la->flags = DEFAULT_LISTENER;
-		la->sa.ss_len = sizeof(struct sockaddr_in6);
-		((struct sockaddr_in6 *)&la->sa)->sin6_family = AF_INET6;
-		((struct sockaddr_in6 *)&la->sa)->sin6_port = htons(BGP_PORT);
-		TAILQ_INSERT_TAIL(conf->listen_addrs, la, entry);
-	}
-
 	TAILQ_FOREACH(la, conf->listen_addrs, entry) {
 		la->reconf = RECONF_NONE;
 		cnt++;
 
-		if (la->fd != -1)
+		if (la->flags & LISTENER_LISTENING)
 			continue;
 
-		if ((la->fd = socket(la->sa.ss_family, SOCK_STREAM,
-		    IPPROTO_TCP)) == -1)
-			fatal("socket");
+		if (la->fd == -1) {
+			log_warn("cannot establish listener on %s: invalid fd",
+			    log_sockaddr((struct sockaddr *)&la->sa));
+			continue;
+		}
 
 		opt = 1;
 		if (setsockopt(la->fd, IPPROTO_TCP, TCP_MD5SIG,
@@ -157,18 +137,6 @@
 				fatal("setsockopt TCP_MD5SIG");
 		}
 
-		if (bind(la->fd, (struct sockaddr *)&la->sa, la->sa.ss_len) ==
-		    -1) {
-			if (errno == EACCES) {
-				log_warnx("can't establish listener on %s",
-				    log_sockaddr((struct sockaddr *)&la->sa));
-				close(la->fd);
-				la->fd = -1;
-				return (-1);
-			} else
-				fatal("bind");
-		}
-
 		session_socket_blockmode(la->fd, BM_NONBLOCK);
 
 		if (listen(la->fd, MAX_BACKLOG)) {
@@ -176,6 +144,8 @@
 			fatal("listen");
 		}
 
+		la->flags |= LISTENER_LISTENING;
+
 		log_info("listening on %s",
 		    log_sockaddr((struct sockaddr *)&la->sa));
 	}
@@ -234,9 +204,6 @@
 	setproctitle("session engine");
 	bgpd_process = PROC_SE;
 
-	listener_cnt = 0;
-	setup_listeners(&listener_cnt);
-
 	if (pfkey_init(&sysdep) == -1)
 		fatalx("pfkey setup failed");
 
@@ -247,6 +214,9 @@
 
 	endpwent();
 
+	listener_cnt = 0;
+	setup_listeners(&listener_cnt);
+
 	signal(SIGTERM, session_sighdlr);
 	signal(SIGINT, session_sighdlr);
 	signal(SIGPIPE, SIG_IGN);
@@ -2066,18 +2036,25 @@
 					break;
 			}
 
+			if ((nla->fd = imsg_get_fd(ibuf)) == -1)
+				log_warnx("expected to receive fd for %s "
+				    "but didn't receive any",
+				    log_sockaddr((struct sockaddr *)&la->sa));
+
 			if (la == NULL) {
 				la = calloc(1, sizeof(struct listen_addr));
 				if (la == NULL)
 					fatal(NULL);
 				memcpy(&la->sa, &nla->sa, sizeof(la->sa));
-				la->fd = nla->fd;
 				la->flags = nla->flags;
+				la->fd = nla->fd;
 				la->reconf = RECONF_REINIT;
 				TAILQ_INSERT_TAIL(nconf->listen_addrs, la,
 				    entry);
 			} else {
 				la->reconf = RECONF_KEEP;
+				shutdown(nla->fd, SHUT_RDWR);
+				close(nla->fd);
 			}
 			break;
 		case IMSG_RECONF_DONE:
@@ -2118,6 +2095,7 @@
 					    (struct sockaddr *)&la->sa));
 					TAILQ_REMOVE(conf->listen_addrs, la,
 					    entry);
+					shutdown(la->fd, SHUT_RDWR);
 					close(la->fd);
 					free(la);
 				}
Index: session.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/session.h,v
retrieving revision 1.55
diff -u -r1.55 session.h
--- session.h	9 Jun 2004 13:01:44 -0000	1.55
+++ session.h	18 Jun 2004 03:53:26 -0000
@@ -210,6 +210,7 @@
 /* config.c */
 int	 merge_config(struct bgpd_config *, struct bgpd_config *,
 	    struct peer *, struct listen_addrs *);
+void	 prepare_listeners(struct bgpd_config *);
 
 /* rde.c */
 int	 rde_main(struct bgpd_config *, struct network_head *,