diff options
Diffstat (limited to 'daemon.c')
-rw-r--r-- | daemon.c | 754 |
1 files changed, 754 insertions, 0 deletions
diff --git a/daemon.c b/daemon.c new file mode 100644 index 0000000000..a1ccda30e2 --- /dev/null +++ b/daemon.c @@ -0,0 +1,754 @@ +#include <signal.h> +#include <sys/wait.h> +#include <sys/socket.h> +#include <sys/time.h> +#include <sys/poll.h> +#include <netdb.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <syslog.h> +#include "pkt-line.h" +#include "cache.h" +#include "exec_cmd.h" + +static int log_syslog; +static int verbose; +static int reuseaddr; + +static const char daemon_usage[] = +"git-daemon [--verbose] [--syslog] [--inetd | --port=n] [--export-all]\n" +" [--timeout=n] [--init-timeout=n] [--strict-paths]\n" +" [--base-path=path] [--user-path | --user-path=path]\n" +" [--reuseaddr] [directory...]"; + +/* List of acceptable pathname prefixes */ +static char **ok_paths = NULL; +static int strict_paths = 0; + +/* If this is set, git-daemon-export-ok is not required */ +static int export_all_trees = 0; + +/* Take all paths relative to this one if non-NULL */ +static char *base_path = NULL; + +/* If defined, ~user notation is allowed and the string is inserted + * after ~user/. E.g. a request to git://host/~alice/frotz would + * go to /home/alice/pub_git/frotz with --user-path=pub_git. + */ +static char *user_path = NULL; + +/* Timeout, and initial timeout */ +static unsigned int timeout = 0; +static unsigned int init_timeout = 0; + +static void logreport(int priority, const char *err, va_list params) +{ + /* We should do a single write so that it is atomic and output + * of several processes do not get intermingled. */ + char buf[1024]; + int buflen; + int maxlen, msglen; + + /* sizeof(buf) should be big enough for "[pid] \n" */ + buflen = snprintf(buf, sizeof(buf), "[%ld] ", (long) getpid()); + + maxlen = sizeof(buf) - buflen - 1; /* -1 for our own LF */ + msglen = vsnprintf(buf + buflen, maxlen, err, params); + + if (log_syslog) { + syslog(priority, "%s", buf); + return; + } + + /* maxlen counted our own LF but also counts space given to + * vsnprintf for the terminating NUL. We want to make sure that + * we have space for our own LF and NUL after the "meat" of the + * message, so truncate it at maxlen - 1. + */ + if (msglen > maxlen - 1) + msglen = maxlen - 1; + else if (msglen < 0) + msglen = 0; /* Protect against weird return values. */ + buflen += msglen; + + buf[buflen++] = '\n'; + buf[buflen] = '\0'; + + write(2, buf, buflen); +} + +static void logerror(const char *err, ...) +{ + va_list params; + va_start(params, err); + logreport(LOG_ERR, err, params); + va_end(params); +} + +static void loginfo(const char *err, ...) +{ + va_list params; + if (!verbose) + return; + va_start(params, err); + logreport(LOG_INFO, err, params); + va_end(params); +} + +static int avoid_alias(char *p) +{ + int sl, ndot; + + /* + * This resurrects the belts and suspenders paranoia check by HPA + * done in <435560F7.4080006@zytor.com> thread, now enter_repo() + * does not do getcwd() based path canonicalizations. + * + * sl becomes true immediately after seeing '/' and continues to + * be true as long as dots continue after that without intervening + * non-dot character. + */ + if (!p || (*p != '/' && *p != '~')) + return -1; + sl = 1; ndot = 0; + p++; + + while (1) { + char ch = *p++; + if (sl) { + if (ch == '.') + ndot++; + else if (ch == '/') { + if (ndot < 3) + /* reject //, /./ and /../ */ + return -1; + ndot = 0; + } + else if (ch == 0) { + if (0 < ndot && ndot < 3) + /* reject /.$ and /..$ */ + return -1; + return 0; + } + else + sl = ndot = 0; + } + else if (ch == 0) + return 0; + else if (ch == '/') { + sl = 1; + ndot = 0; + } + } +} + +static char *path_ok(char *dir) +{ + static char rpath[PATH_MAX]; + char *path; + + if (avoid_alias(dir)) { + logerror("'%s': aliased", dir); + return NULL; + } + + if (*dir == '~') { + if (!user_path) { + logerror("'%s': User-path not allowed", dir); + return NULL; + } + if (*user_path) { + /* Got either "~alice" or "~alice/foo"; + * rewrite them to "~alice/%s" or + * "~alice/%s/foo". + */ + int namlen, restlen = strlen(dir); + char *slash = strchr(dir, '/'); + if (!slash) + slash = dir + restlen; + namlen = slash - dir; + restlen -= namlen; + loginfo("userpath <%s>, request <%s>, namlen %d, restlen %d, slash <%s>", user_path, dir, namlen, restlen, slash); + snprintf(rpath, PATH_MAX, "%.*s/%s%.*s", + namlen, dir, user_path, restlen, slash); + dir = rpath; + } + } + else if (base_path) { + if (*dir != '/') { + /* Allow only absolute */ + logerror("'%s': Non-absolute path denied (base-path active)", dir); + return NULL; + } + else { + snprintf(rpath, PATH_MAX, "%s%s", base_path, dir); + dir = rpath; + } + } + + path = enter_repo(dir, strict_paths); + + if (!path) { + logerror("'%s': unable to chdir or not a git archive", dir); + return NULL; + } + + if ( ok_paths && *ok_paths ) { + char **pp; + int pathlen = strlen(path); + + /* The validation is done on the paths after enter_repo + * appends optional {.git,.git/.git} and friends, but + * it does not use getcwd(). So if your /pub is + * a symlink to /mnt/pub, you can whitelist /pub and + * do not have to say /mnt/pub. + * Do not say /pub/. + */ + for ( pp = ok_paths ; *pp ; pp++ ) { + int len = strlen(*pp); + if (len <= pathlen && + !memcmp(*pp, path, len) && + (path[len] == '\0' || + (!strict_paths && path[len] == '/'))) + return path; + } + } + else { + /* be backwards compatible */ + if (!strict_paths) + return path; + } + + logerror("'%s': not in whitelist", path); + return NULL; /* Fallthrough. Deny by default */ +} + +static int upload(char *dir) +{ + /* Timeout as string */ + char timeout_buf[64]; + const char *path; + + loginfo("Request for '%s'", dir); + + if (!(path = path_ok(dir))) + return -1; + + /* + * Security on the cheap. + * + * We want a readable HEAD, usable "objects" directory, and + * a "git-daemon-export-ok" flag that says that the other side + * is ok with us doing this. + * + * path_ok() uses enter_repo() and does whitelist checking. + * We only need to make sure the repository is exported. + */ + + if (!export_all_trees && access("git-daemon-export-ok", F_OK)) { + logerror("'%s': repository not exported.", path); + errno = EACCES; + return -1; + } + + /* + * We'll ignore SIGTERM from now on, we have a + * good client. + */ + signal(SIGTERM, SIG_IGN); + + snprintf(timeout_buf, sizeof timeout_buf, "--timeout=%u", timeout); + + /* git-upload-pack only ever reads stuff, so this is safe */ + execl_git_cmd("upload-pack", "--strict", timeout_buf, ".", NULL); + return -1; +} + +static int execute(void) +{ + static char line[1000]; + int len; + + alarm(init_timeout ? init_timeout : timeout); + len = packet_read_line(0, line, sizeof(line)); + alarm(0); + + if (len && line[len-1] == '\n') + line[--len] = 0; + + if (!strncmp("git-upload-pack ", line, 16)) + return upload(line+16); + + logerror("Protocol error: '%s'", line); + return -1; +} + + +/* + * We count spawned/reaped separately, just to avoid any + * races when updating them from signals. The SIGCHLD handler + * will only update children_reaped, and the fork logic will + * only update children_spawned. + * + * MAX_CHILDREN should be a power-of-two to make the modulus + * operation cheap. It should also be at least twice + * the maximum number of connections we will ever allow. + */ +#define MAX_CHILDREN 128 + +static int max_connections = 25; + +/* These are updated by the signal handler */ +static volatile unsigned int children_reaped = 0; +static pid_t dead_child[MAX_CHILDREN]; + +/* These are updated by the main loop */ +static unsigned int children_spawned = 0; +static unsigned int children_deleted = 0; + +static struct child { + pid_t pid; + int addrlen; + struct sockaddr_storage address; +} live_child[MAX_CHILDREN]; + +static void add_child(int idx, pid_t pid, struct sockaddr *addr, int addrlen) +{ + live_child[idx].pid = pid; + live_child[idx].addrlen = addrlen; + memcpy(&live_child[idx].address, addr, addrlen); +} + +/* + * Walk from "deleted" to "spawned", and remove child "pid". + * + * We move everything up by one, since the new "deleted" will + * be one higher. + */ +static void remove_child(pid_t pid, unsigned deleted, unsigned spawned) +{ + struct child n; + + deleted %= MAX_CHILDREN; + spawned %= MAX_CHILDREN; + if (live_child[deleted].pid == pid) { + live_child[deleted].pid = -1; + return; + } + n = live_child[deleted]; + for (;;) { + struct child m; + deleted = (deleted + 1) % MAX_CHILDREN; + if (deleted == spawned) + die("could not find dead child %d\n", pid); + m = live_child[deleted]; + live_child[deleted] = n; + if (m.pid == pid) + return; + n = m; + } +} + +/* + * This gets called if the number of connections grows + * past "max_connections". + * + * We _should_ start off by searching for connections + * from the same IP, and if there is some address wth + * multiple connections, we should kill that first. + * + * As it is, we just "randomly" kill 25% of the connections, + * and our pseudo-random generator sucks too. I have no + * shame. + * + * Really, this is just a place-holder for a _real_ algorithm. + */ +static void kill_some_children(int signo, unsigned start, unsigned stop) +{ + start %= MAX_CHILDREN; + stop %= MAX_CHILDREN; + while (start != stop) { + if (!(start & 3)) + kill(live_child[start].pid, signo); + start = (start + 1) % MAX_CHILDREN; + } +} + +static void check_max_connections(void) +{ + for (;;) { + int active; + unsigned spawned, reaped, deleted; + + spawned = children_spawned; + reaped = children_reaped; + deleted = children_deleted; + + while (deleted < reaped) { + pid_t pid = dead_child[deleted % MAX_CHILDREN]; + remove_child(pid, deleted, spawned); + deleted++; + } + children_deleted = deleted; + + active = spawned - deleted; + if (active <= max_connections) + break; + + /* Kill some unstarted connections with SIGTERM */ + kill_some_children(SIGTERM, deleted, spawned); + if (active <= max_connections << 1) + break; + + /* If the SIGTERM thing isn't helping use SIGKILL */ + kill_some_children(SIGKILL, deleted, spawned); + sleep(1); + } +} + +static void handle(int incoming, struct sockaddr *addr, int addrlen) +{ + pid_t pid = fork(); + char addrbuf[256] = ""; + int port = -1; + + if (pid) { + unsigned idx; + + close(incoming); + if (pid < 0) + return; + + idx = children_spawned % MAX_CHILDREN; + children_spawned++; + add_child(idx, pid, addr, addrlen); + + check_max_connections(); + return; + } + + dup2(incoming, 0); + dup2(incoming, 1); + close(incoming); + + if (addr->sa_family == AF_INET) { + struct sockaddr_in *sin_addr = (void *) addr; + inet_ntop(AF_INET, &sin_addr->sin_addr, addrbuf, sizeof(addrbuf)); + port = sin_addr->sin_port; + +#ifndef NO_IPV6 + } else if (addr->sa_family == AF_INET6) { + struct sockaddr_in6 *sin6_addr = (void *) addr; + + char *buf = addrbuf; + *buf++ = '['; *buf = '\0'; /* stpcpy() is cool */ + inet_ntop(AF_INET6, &sin6_addr->sin6_addr, buf, sizeof(addrbuf) - 1); + strcat(buf, "]"); + + port = sin6_addr->sin6_port; +#endif + } + loginfo("Connection from %s:%d", addrbuf, port); + + exit(execute()); +} + +static void child_handler(int signo) +{ + for (;;) { + int status; + pid_t pid = waitpid(-1, &status, WNOHANG); + + if (pid > 0) { + unsigned reaped = children_reaped; + dead_child[reaped % MAX_CHILDREN] = pid; + children_reaped = reaped + 1; + /* XXX: Custom logging, since we don't wanna getpid() */ + if (verbose) { + char *dead = ""; + if (!WIFEXITED(status) || WEXITSTATUS(status) > 0) + dead = " (with error)"; + if (log_syslog) + syslog(LOG_INFO, "[%d] Disconnected%s", pid, dead); + else + fprintf(stderr, "[%d] Disconnected%s\n", pid, dead); + } + continue; + } + break; + } +} + +static int set_reuse_addr(int sockfd) +{ + int on = 1; + + if (!reuseaddr) + return 0; + return setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, + &on, sizeof(on)); +} + +#ifndef NO_IPV6 + +static int socksetup(int port, int **socklist_p) +{ + int socknum = 0, *socklist = NULL; + int maxfd = -1; + char pbuf[NI_MAXSERV]; + + struct addrinfo hints, *ai0, *ai; + int gai; + + sprintf(pbuf, "%d", port); + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_STREAM; + hints.ai_protocol = IPPROTO_TCP; + hints.ai_flags = AI_PASSIVE; + + gai = getaddrinfo(NULL, pbuf, &hints, &ai0); + if (gai) + die("getaddrinfo() failed: %s\n", gai_strerror(gai)); + + for (ai = ai0; ai; ai = ai->ai_next) { + int sockfd; + int *newlist; + + sockfd = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol); + if (sockfd < 0) + continue; + if (sockfd >= FD_SETSIZE) { + error("too large socket descriptor."); + close(sockfd); + continue; + } + +#ifdef IPV6_V6ONLY + if (ai->ai_family == AF_INET6) { + int on = 1; + setsockopt(sockfd, IPPROTO_IPV6, IPV6_V6ONLY, + &on, sizeof(on)); + /* Note: error is not fatal */ + } +#endif + + if (set_reuse_addr(sockfd)) { + close(sockfd); + return 0; /* not fatal */ + } + + if (bind(sockfd, ai->ai_addr, ai->ai_addrlen) < 0) { + close(sockfd); + continue; /* not fatal */ + } + if (listen(sockfd, 5) < 0) { + close(sockfd); + continue; /* not fatal */ + } + + newlist = realloc(socklist, sizeof(int) * (socknum + 1)); + if (!newlist) + die("memory allocation failed: %s", strerror(errno)); + + socklist = newlist; + socklist[socknum++] = sockfd; + + if (maxfd < sockfd) + maxfd = sockfd; + } + + freeaddrinfo(ai0); + + *socklist_p = socklist; + return socknum; +} + +#else /* NO_IPV6 */ + +static int socksetup(int port, int **socklist_p) +{ + struct sockaddr_in sin; + int sockfd; + + sockfd = socket(AF_INET, SOCK_STREAM, 0); + if (sockfd < 0) + return 0; + + memset(&sin, 0, sizeof sin); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = htonl(INADDR_ANY); + sin.sin_port = htons(port); + + if (set_reuse_addr(sockfd)) { + close(sockfd); + return 0; + } + + if ( bind(sockfd, (struct sockaddr *)&sin, sizeof sin) < 0 ) { + close(sockfd); + return 0; + } + + if (listen(sockfd, 5) < 0) { + close(sockfd); + return 0; + } + + *socklist_p = xmalloc(sizeof(int)); + **socklist_p = sockfd; + return 1; +} + +#endif + +static int service_loop(int socknum, int *socklist) +{ + struct pollfd *pfd; + int i; + + pfd = xcalloc(socknum, sizeof(struct pollfd)); + + for (i = 0; i < socknum; i++) { + pfd[i].fd = socklist[i]; + pfd[i].events = POLLIN; + } + + signal(SIGCHLD, child_handler); + + for (;;) { + int i; + + if (poll(pfd, socknum, -1) < 0) { + if (errno != EINTR) { + error("poll failed, resuming: %s", + strerror(errno)); + sleep(1); + } + continue; + } + + for (i = 0; i < socknum; i++) { + if (pfd[i].revents & POLLIN) { + struct sockaddr_storage ss; + unsigned int sslen = sizeof(ss); + int incoming = accept(pfd[i].fd, (struct sockaddr *)&ss, &sslen); + if (incoming < 0) { + switch (errno) { + case EAGAIN: + case EINTR: + case ECONNABORTED: + continue; + default: + die("accept returned %s", strerror(errno)); + } + } + handle(incoming, (struct sockaddr *)&ss, sslen); + } + } + } +} + +static int serve(int port) +{ + int socknum, *socklist; + + socknum = socksetup(port, &socklist); + if (socknum == 0) + die("unable to allocate any listen sockets on port %u", port); + + return service_loop(socknum, socklist); +} + +int main(int argc, char **argv) +{ + int port = DEFAULT_GIT_PORT; + int inetd_mode = 0; + int i; + + for (i = 1; i < argc; i++) { + char *arg = argv[i]; + + if (!strncmp(arg, "--port=", 7)) { + char *end; + unsigned long n; + n = strtoul(arg+7, &end, 0); + if (arg[7] && !*end) { + port = n; + continue; + } + } + if (!strcmp(arg, "--inetd")) { + inetd_mode = 1; + log_syslog = 1; + continue; + } + if (!strcmp(arg, "--verbose")) { + verbose = 1; + continue; + } + if (!strcmp(arg, "--syslog")) { + log_syslog = 1; + continue; + } + if (!strcmp(arg, "--export-all")) { + export_all_trees = 1; + continue; + } + if (!strncmp(arg, "--timeout=", 10)) { + timeout = atoi(arg+10); + continue; + } + if (!strncmp(arg, "--init-timeout=", 15)) { + init_timeout = atoi(arg+15); + continue; + } + if (!strcmp(arg, "--strict-paths")) { + strict_paths = 1; + continue; + } + if (!strncmp(arg, "--base-path=", 12)) { + base_path = arg+12; + continue; + } + if (!strcmp(arg, "--reuseaddr")) { + reuseaddr = 1; + continue; + } + if (!strcmp(arg, "--user-path")) { + user_path = ""; + continue; + } + if (!strncmp(arg, "--user-path=", 12)) { + user_path = arg + 12; + continue; + } + if (!strcmp(arg, "--")) { + ok_paths = &argv[i+1]; + break; + } else if (arg[0] != '-') { + ok_paths = &argv[i]; + break; + } + + usage(daemon_usage); + } + + if (log_syslog) + openlog("git-daemon", 0, LOG_DAEMON); + + if (strict_paths && (!ok_paths || !*ok_paths)) { + if (!inetd_mode) + die("git-daemon: option --strict-paths requires a whitelist"); + + logerror("option --strict-paths requires a whitelist"); + exit (1); + } + + if (inetd_mode) { + fclose(stderr); //FIXME: workaround + return execute(); + } + + return serve(port); +} |