/* bvirt.cc * Code relating to virtual network namesapces. * * Copyright 2009 Benjamin C. R. LaHaise, All Rights Reserved. * Permission is hereby granted to copy under the terms of the GPLv2 * or later. See the file LICENSE for details. */ #include #include #include #include #include #include #include #include #include "babd.h" #include "ctrlfd.h" #include "bvirt_ns.h" #include "netlink.h" #include "compat.h" #if 1 /* I hate glibc */ #include static int gettid(void) { return syscall(SYS_gettid); } #endif #define NR_NAMESPACES 64 static bvirt_ns_t *namespaces[NR_NAMESPACES]; bvirt_ns_t *lookup_bvirt_ns(const char *name) { unsigned i; for (i=0; im_name, name)) return namespaces[i]; return NULL; } int bvirt_ns_t::sendrpc(bvirt_ns_rpc2_t func, void *arg1, void *arg2, int sync) { struct bvirt_ns_rpc rpc; int sync_fds[2]; memset(&rpc, 0, sizeof(rpc)); rpc.func = func; rpc.arg1 = arg1; rpc.arg2 = arg2; rpc.reply_fd = -1; if (sync) { if (cloexec_pipe(sync_fds)) { perror("bvirt_ns_t::sendrpc - cloexec_pipe"); return -1; } rpc.reply_fd = sync_fds[1]; } if (sizeof(rpc) != write(m_pipefds[1], &rpc, sizeof rpc)) return -errno; if (sync) { int ret = 0; if (sizeof(ret) != read(sync_fds[0], &ret, sizeof(ret))) { perror("bvirt_ns_t::sendrpc: read"); ret = -1; } close(sync_fds[1]); close(sync_fds[0]); return ret; } return 0; } static void bvirt_usage(ctrlfd_t *cfd) { cfd->printf("Usage:\n"); cfd->printf(" bvirt list [-v]\n"); cfd->printf(" bvirt add \n"); cfd->printf(" bvirt del \n"); cfd->printf(" bvirt gettid \n"); cfd->printf(" bvirt iface-virt-realm \n"); cfd->printf(" bvirt del-iface-virt-realm \n"); cfd->printf(" bvirt system \n"); cfd->done(-1); } void *bvirt_ns_t::start(void) { m_cfd->printf("alive in bvirt_ns_t::start()\n"); m_cfd->printf("pid=%d tid=%d\n", (int)getpid(), gettid()); for (;;) { struct bvirt_ns_rpc rpc; long size = read(m_pipefds[0], &rpc, sizeof(rpc)); if (!size) break; if (size < 0) perror("bvirt_ns_t::start/rpc read error\n"); if (size != sizeof(rpc)) *(char *)0 = 0; int ret = (this->*rpc.func)(rpc.arg1, rpc.arg2); if (rpc.reply_fd != -1) { ret = write(rpc.reply_fd, &ret, sizeof(ret)); if (ret != sizeof(ret)) perror("bvirt_ns_t::start - write"); } } return NULL; } static void *bvirt_ns_thread(void *arg) { bvirt_ns_t *ns = (bvirt_ns_t *)arg; return ns->start(); } int bvirt_ns_t::rpc_init_rpc(void *arg1, void *arg2) { if (unshare(CLONE_NEWNET)) { m_cfd->perror("unshare(CLONE_NEWNET)"); m_cfd->printf("virt '%s' not created\n", m_name); m_cfd->done(-1); return -1; } m_tid = gettid(); m_b_sock_fd = socket(AF_INET, SOCK_STREAM, 0); if (m_b_sock_fd == -1) { m_cfd->perror("socket"); m_cfd->printf("virt '%s' not created\n", m_name); m_cfd->done(-1); return -1; } m_cfd->done(0); m_cfd = NULL; return 0; } bvirt_ns_t::bvirt_ns_t(ctrlfd_t *cfd, const char *new_name) { m_name = strdup(new_name); m_index = -1; m_cfd = cfd; m_tid = 0; m_nr_ifaces = 0; for (unsigned i=0; iprintf("out of space in namespace table\n"); goto out_err; } if (cloexec_pipe(m_pipefds)) { cfd->perror("cloexec_pipe"); goto out_err; } if (pthread_create(&m_pthread, NULL, bvirt_ns_thread, this)) { cfd->perror("pthread_create:\n"); goto out_err; } /* Make sure early init is completed synchronously. */ if (this->sendrpc(&bvirt_ns_t::rpc_init_rpc, NULL, NULL, 1)) { delete this; return; } m_netlink = new bnetlink_t(this); char tmp[256]; sprintf(tmp, "/etc/babylon/bvirt-up-host %s %d", new_name, m_tid); system(tmp); //FIXME: error checking sprintf(tmp, "/etc/babylon/bvirt-up-guest %s %d", new_name, m_tid); this->sendrpc(&bvirt_ns_t::system_rpc, NULL, strdup(tmp), 1); return; out_err: m_cfd->printf("namespace '%s' not added\n", m_name); m_cfd->done(-1); m_cfd = NULL; delete this; return; } bvirt_ns_t::~bvirt_ns_t() { if (m_index >= 0) namespaces[m_index] = NULL; m_index = -1; free(m_name); m_name = NULL; } static void do_bvirt_add(ctrlfd_t *cfd, char *name, char *str) { #if 0 int pid = atoi(str); if (pid <= 0) { cfd->printf("bvirt add: bad pid '%s'\n", str); return cfd->done(-1); } if (0 != kill(pid, 0)) { cfd->perror("bvirt add"); return cfd->done(-2); } #endif if (*str) return bvirt_usage(cfd); bvirt_ns_t *ns = lookup_bvirt_ns(name); if (ns) { cfd->printf("namespace '%s' already exists\n", name); cfd->done(-1); return; } ns = new bvirt_ns_t(cfd, name); } static void do_bvirt_del(ctrlfd_t *cfd, char *name, char *str) { if (cfd) { cfd->printf("bvirt del currently not supported\n"); cfd->done(-1); return; } if (*str) return bvirt_usage(cfd); bvirt_ns_t *ns = lookup_bvirt_ns(name); if (!ns) { cfd->printf("unable to find namespace '%s'\n", name); cfd->done(-2); return; } delete ns; cfd->done(0); } static void do_bvirt_gettid(ctrlfd_t *cfd, char *name) { bvirt_ns_t *ns = lookup_bvirt_ns(name); if (!ns) { cfd->printf("unable to find namespace '%s'\n", name); cfd->done(-2); return; } cfd->printf("%d\n", ns->m_tid); cfd->done(0); } static void show_bvirt_list(ctrlfd_t *cfd, int verbose) { unsigned nr = 0; for (unsigned i = 0; iprintf("bvirt add pid=%d nr_ifaces=%d %s\n", ns->m_tid, ns->m_nr_ifaces, ns->m_name); else cfd->printf("bvirt add %s\n", ns->m_name); } if (nr) cfd->my_putchar('\n'); } static void do_bvirt_list(ctrlfd_t *cfd, char *str) { int verbose = 0; cfd->printf("list[%s]\n", str); if (*str && !strcmp(str, " -v")) verbose = 1; else if (*str) return bvirt_usage(cfd); show_bvirt_list(cfd, verbose); cfd->done(0); } int bvirt_ns_t::system_rpc(void *arg1, void *arg2) { //ctrlfd_t *cfd = (ctrlfd_t *)arg1; char *str = (char *)arg2; int ret = system(str); if (-1 == ret) perror("system"); free(str); return ret; } static void do_bvirt_system(ctrlfd_t *cfd, char *name, char *str) { bvirt_ns_t *ns = lookup_bvirt_ns(name); if (!ns && strcmp(name, "none")) { cfd->printf("unable to find namespace '%s'\n", name); cfd->done(-1); return; } int ret; if (!ns) ret = system(str); else ret = ns->sendrpc(&bvirt_ns_t::system_rpc, cfd, strdup(str), 1); if (WIFEXITED(ret)) cfd->done(-1-WEXITSTATUS(ret)); else cfd->done(-127); } static struct realm_mapping { char *domain; int nr_netns; #define MAX_REALM_MAPPING_NS 8 bvirt_ns_t *netns[MAX_REALM_MAPPING_NS]; } realm_mappings[32]; static int nr_realm_mappings; void bvirt_show_running_global_config(ctrlfd_t *cfd) { show_bvirt_list(cfd, 0); for (int i=0; iprintf("bvirt iface-virt-realm %s %s\n", realm_mappings[i].netns[j]->m_name, realm_mappings[i].domain); } } } bvirt_ns_t *realm_mapping_select_ns(struct realm_mapping *mapping) { bvirt_ns_t *selected = mapping->netns[0]; for (int i=1; inetns[i]; if (!cur) continue; if (!selected || cur->m_nr_ifaces < selected->m_nr_ifaces) selected = cur; } return selected; } bvirt_ns_t *lookup_bvirt_iface_virt_realm(const char *domain, int *idxp) { for (int i=0; iprintf("unable to find namespace '%s'\n", name); cfd->done(-1); return; } if (*str != ' ') { cfd->printf("missing domain name for virtual realm.\n"); cfd->done(-2); return; } str++; int idx; bvirt_ns_t *old_ns = lookup_bvirt_iface_virt_realm(str, &idx); if (!add) { /* deleting the entry... */ if (!old_ns) { cfd->printf("domain '%s' not mapped!\n", str); cfd->done(-1); } int netns_idx = -1; for (int i=0; i < MAX_REALM_MAPPING_NS; i++) { if (realm_mappings[idx].netns[i] == ns) { netns_idx = i; old_ns = ns; break; } } if (old_ns != ns) { cfd->printf("virtual router namespace mismatch '%s' vs '%s'\n", old_ns->m_name, ns->m_name); cfd->done(-1); } if (realm_mappings[idx].nr_netns > 1) { for (int i=netns_idx; (i+1)done(0); return; } if (NULL != old_ns) { struct realm_mapping *mapping = &realm_mappings[idx]; if (mapping->nr_netns >= MAX_REALM_MAPPING_NS) { cfd->printf("max realm mappings met at %d\n", mapping->nr_netns); cfd->done(-1); return; } cfd->printf("virt realm '%s' domain '%s' index %d %d\n", name, str, idx, mapping->nr_netns); mapping->netns[mapping->nr_netns++] = ns; cfd->done(0); return; } struct realm_mapping *mapping = &realm_mappings[nr_realm_mappings++]; mapping->domain = strdup(str); mapping->netns[mapping->nr_netns++] = ns; cfd->printf("virt realm '%s' domain '%s' index %d\n", name, str, nr_realm_mappings-1); cfd->done(0); } void do_bvirt(ctrlfd_t *cfd, char *str) { char name[32]; char command[32]; int i; if (*str != ' ') return bvirt_usage(cfd); if (!strncmp(str, " list", 5)) return do_bvirt_list(cfd, str + 5); str++; for (i=0; *str && *str != ' ' && i < 31; ) command[i++] = *str++; command[i] = 0; if (*str == ' ') str++; for (i=0; *str && *str != ' ' && i < 31; ) name[i++] = *str++; name[i] = 0; if (!strcmp(command, "add")) return do_bvirt_add(cfd, name, str); if (!strcmp(command, "del")) return do_bvirt_del(cfd, name, str); if (!strcmp(command, "gettid")) return do_bvirt_gettid(cfd, name); if (!strcmp(command, "iface-virt-realm")) return do_bvirt_iface_virt_realm(cfd, name, str, 1); if (!strcmp(command, "del-iface-virt-realm")) return do_bvirt_iface_virt_realm(cfd, name, str, 0); if (!strcmp(command, "system")) return do_bvirt_system(cfd, name, str); bvirt_usage(cfd); } struct bvirt_rpc_socket { int domain; int type; int proto; }; int bvirt_ns_t::rpc_socket_rpc(void *arg1, void *arg2) { struct bvirt_rpc_socket *rpc = (struct bvirt_rpc_socket *)arg1; int ret = socket(rpc->domain, rpc->type, rpc->proto); if (ret < 0) return -errno; return ret; } int bvirt_ns_t::socket_rpc(int domain, int type, int proto) { struct bvirt_rpc_socket rpc; rpc.domain = domain; rpc.type = type; rpc.proto = proto; return sendrpc(&bvirt_ns_t::rpc_socket_rpc, &rpc, NULL, 1); } int netns_socket(bvirt_ns_t *netns, int domain, int type, int proto) { if (netns) { int ret = netns->socket_rpc(domain, type, proto); if (ret < 0) { errno = -ret; return -1; } return ret; } return socket(domain, type, proto); } struct bvirt_rpc_sendto { int sockfd; const void *buf; size_t len; int flags; const struct sockaddr *dst; socklen_t addrlen; }; int bvirt_ns_t::rpc_sendto_rpc(void *arg1, void *arg2) { struct bvirt_rpc_sendto *rpc = (struct bvirt_rpc_sendto *)arg1; return sendto(rpc->sockfd, rpc->buf, rpc->len, rpc->flags, rpc->dst, rpc->addrlen); } int bvirt_ns_t::sendto_rpc(int sockfd, const void *buf, size_t len, int flags, const struct sockaddr *dst, socklen_t addrlen) { struct bvirt_rpc_sendto rpc; rpc.sockfd = sockfd; rpc.buf = buf; rpc.len = len; rpc.flags = flags; rpc.dst = dst; rpc.addrlen = addrlen; return sendrpc(&bvirt_ns_t::rpc_sendto_rpc, &rpc, NULL, 1); } int bvirt_parse_ns(ctrlfd_t *cfd, char **strp, bvirt_ns_t **netnsp) { char *str = *strp; char *s = strchr(str, '^'); *netnsp = NULL; if (s) { char *ns = str; *s++ = 0; if (!*s) { cfd->printf("missing id after namespace -- '%s'\n", s); cfd->done(-1); return -1; } *strp = s; *netnsp = lookup_bvirt_ns(ns); if (!*netnsp) { cfd->printf("virtual network namespace '%s' not found\n", ns); cfd->done(-1); return -1; } } return 0; } struct bvirt_rpc_generic { int (*fn)(void *arg1, void *arg2); void *arg1; void *arg2; }; int bvirt_ns_t::rpc_generic_rpc(void *arg1, void *arg2) { struct bvirt_rpc_generic *rpc = (struct bvirt_rpc_generic *)arg1; int ret = rpc->fn(rpc->arg1, rpc->arg2); delete rpc; return ret; } int bvirt_ns_t::generic_rpc(int (*fn)(void *arg1, void *arg2), void *arg1, void *arg2, int sync) { struct bvirt_rpc_generic *rpc = new struct bvirt_rpc_generic; if (!rpc) return -ENOMEM; rpc->fn = fn; rpc->arg1 = arg1; rpc->arg2 = arg2; return sendrpc(&bvirt_ns_t::rpc_generic_rpc, rpc, NULL, sync); }