/* Steve, the jobserver
 * (c) 2025 Michał Górny
 * SPDX-License-Identifier: GPL-2.0-or-later
 *
 * Inspired by CUSE example, nixos-jobserver (draft) and guildmaster:
 * https://github.com/libfuse/libfuse/blob/f58d4c5b0d56116d8870753f6b9d1620ee082709/example/cuse.c
 * https://github.com/RaitoBezarius/nixpkgs/blob/e97220ecf1e8887b949e4e16547bf0334826d076/pkgs/by-name/ni/nixos-jobserver/nixos-jobserver.cpp#L213
 * https://codeberg.org/amonakov/guildmaster/
 */

#define FUSE_USE_VERSION 31

#include <cassert>
#include <cstddef>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <cerrno>
#include <climits>
#include <cmath>
#include <csignal>
#include <deque>
#include <functional>
#include <memory>
#include <print>
#include <string>
#include <unordered_map>
#include <variant>

#include <getopt.h>
#include <grp.h>
#include <pwd.h>
#include <sys/poll.h>
#include <sys/syscall.h>
#include <unistd.h>

#include <event2/event.h>

#include <cuse_lowlevel.h>
#include <fuse.h>
#include <fuse_opt.h>

#include "steve.h"
#include "util.hxx"

struct steve_waiter {
	std::variant<fuse_req_t, fuse_pollhandle *> handle;
	uint64_t pid;

	steve_waiter(fuse_req_t new_req, uint64_t new_pid)
		: handle(new_req), pid(new_pid) {}
	steve_waiter(fuse_pollhandle *new_poll_handle, uint64_t new_pid)
		: handle(new_poll_handle), pid(new_pid) {}

	steve_waiter(const steve_waiter &) = delete;
	steve_waiter& operator=(const steve_waiter &) = delete;

	steve_waiter(steve_waiter &&other)
		: handle(other.handle), pid(other.pid)
	{
		other.handle = static_cast<fuse_pollhandle *>(nullptr);
	}
	steve_waiter& operator=(steve_waiter &&other) {
		handle = other.handle;
		pid = other.pid;
		other.handle = static_cast<fuse_pollhandle *>(nullptr);
		return *this;
	}

	~steve_waiter() {
		if (fuse_pollhandle **poll_handle = std::get_if<fuse_pollhandle *>(&handle)) {
			if (*poll_handle)
				fuse_pollhandle_destroy(*poll_handle);
		}
	}
};

struct steve_process {
	int pid_fd{-1};
	ssize_t tokens_held{0};
	bool token_reserved{false};
	std::unique_ptr<struct event, std::function<void(struct event*)>> event;

	~steve_process() {
		if (pid_fd != -1)
			close(pid_fd);
	}
};

struct steve_state {
	bool verbose;
	uint64_t jobs;
	uint64_t min_jobs;
	double max_load_avg{-1};  /* < 0 implies no load average */
	double load_avg;
	int64_t tokens;
	std::deque<steve_waiter> waiters;
	std::unordered_map<uint64_t, steve_process> processes;
	struct event_base *evb;
	int loadavg_fd{-2};
	struct timeval recheck_timeout{0, 500000};
	bool recheck_triggered{false};
	std::unique_ptr<struct event, std::function<void(struct event*)>> recheck_event{nullptr, event_free};

	/* to workaround lack of fuse_buf_free(), keep a global buffer */
	/* https://github.com/libfuse/libfuse/issues/1373 */
	struct fuse_session *session;
	struct fuse_buf buf{};
};

enum class steve_token_availability {
	available,
	no_tokens,
	load_exceeded,
};

static void steve_get_load(steve_state *state)
{
	if (state->loadavg_fd == -2) {
		state->loadavg_fd = open("/proc/loadavg", O_RDONLY);
		if (state->loadavg_fd == -1)
			perror("Unable to open /proc/loadavg, falling back to getloadavg()");
	}

	if (state->loadavg_fd != -1) {
		char buf[64];
		ssize_t rd = pread(state->loadavg_fd, buf, sizeof(buf) - 1, 0);

		if (rd >= 0) {
			buf[rd] = 0;

			char *begin = buf;
			for (int field = 0; *begin && field != 3; ++begin) {
				if (*begin == ' ')
					++field;
			}
			char *end = strchr(buf, '/');

			if (*begin && *end) {
				*end = 0;
				long load;
				/* Decrease by one to account for our process. */
				if (arg_to_long(begin, &load)) {
					state->load_avg = load - 1;
					return;
				}
			}

			std::print(stderr, "Parsing /proc/loadavg failed, value = {}\n", buf);
		} else
			perror("Reading /proc/loadavg failed, falling back to getloadavg()");

		close(state->loadavg_fd);
		state->loadavg_fd = -1;
	}

	if (getloadavg(&state->load_avg, 1) == -1) {
		static bool warned = false;
		if (!warned) {
			perror("getloadavg() failed, will ignore (further warnings will be suppressed)");
			warned = true;
		}
		/* to make it clear it failed */
		state->load_avg = -0.0;
	}
}

static steve_token_availability steve_can_give_token(steve_state *state, uint64_t pid)
{
	/* if there is a token reserved, we give it immediately (even if load is exceeded now) */
	if (state->processes[pid].token_reserved)
		return steve_token_availability::available;
	/* if we have been throttled due to load, wait for it to time out */
	if (state->recheck_triggered) {
		assert(state->tokens > 0);
		return steve_token_availability::load_exceeded;
	}

	if (state->tokens <= 0)
		return steve_token_availability::no_tokens;
	if (state->max_load_avg > 0) {
		if (state->jobs < state->min_jobs + state->tokens)
			return steve_token_availability::available;
		steve_get_load(state);
		if (state->load_avg > state->max_load_avg) {
			/* trigger a recheck if we don't have one now */
			assert(!state->recheck_triggered);
			if (evtimer_add(state->recheck_event.get(), &state->recheck_timeout) == -1)
				std::print(stderr, "failed to enable recheck timer\n");
			else
				state->recheck_triggered = true;

			return steve_token_availability::load_exceeded;
		}
	}

	return steve_token_availability::available;
}

static void steve_give_token(steve_state *state, fuse_req_t req, uint64_t pid)
{
	if (state->processes[pid].token_reserved) {
		state->processes[pid].tokens_held++;
		state->processes[pid].token_reserved = false;
		if (state->verbose)
			std::print(stderr, "Giving reserved token to PID {}, {} left, {} tokens held by process\n",
					pid, state->tokens, state->processes[pid].tokens_held);
		fuse_reply_buf(req, "+", 1);
		return;
	}

	state->tokens--;
	state->processes[pid].tokens_held++;
	if (state->verbose) {
		if (state->max_load_avg > 0)
			std::print(stderr, "Giving job token to PID {}, {} left, {} tokens held by process, token reserved: {}, load average = {:.3} (limit: {})\n",
					pid, state->tokens, state->processes[pid].tokens_held, state->processes[pid].token_reserved, state->load_avg, state->max_load_avg);
		else
			std::print(stderr, "Giving job token to PID {}, {} left, {} tokens held by process, token reserved: {}\n",
					pid, state->tokens, state->processes[pid].tokens_held, state->processes[pid].token_reserved);
	}
	fuse_reply_buf(req, "+", 1);
}

static void steve_reserve_token(steve_state *state, uint64_t pid)
{
	if (state->processes[pid].token_reserved)
		return;

	state->tokens--;
	state->processes[pid].token_reserved = true;
	if (state->verbose) {
		if (state->max_load_avg > 0)
			std::print(stderr, "Reserving job token for PID {}, {} left, {} tokens held by process, load average = {:.3} (limit: {})\n",
					pid, state->tokens, state->processes[pid].tokens_held, state->load_avg, state->max_load_avg);
		else
			std::print(stderr, "Reserving job token for PID {}, {} left, {} tokens held by process\n",
					pid, state->tokens, state->processes[pid].tokens_held);
	}

	/* TODO: we need to handle expiring reservations if client doesn't read */
}

static void steve_wake_waiters(steve_state *state)
{
	for (auto it = state->waiters.begin(); it != state->waiters.end();) {
		if (steve_can_give_token(state, it->pid) != steve_token_availability::available)
			break;

		if (fuse_req_t *read_req = std::get_if<fuse_req_t>(&it->handle)) {
			/* read request */
			steve_give_token(state, *read_req, it->pid);
		} else if (fuse_pollhandle **poll_handle = std::get_if<fuse_pollhandle *>(&it->handle)) {
			/* poll request */
			steve_reserve_token(state, it->pid);
			if (state->verbose)
				std::print(stderr, "Notifying PID {} about POLLIN\n", it->pid);
			fuse_lowlevel_notify_poll(*poll_handle);
		} else
			assert(0 && "invalid waiter");

		it = state->waiters.erase(it);
	}
}

static void steve_handle_pidfd(evutil_socket_t pid_fd, short, void *userdata) {
	steve_state *state = static_cast<steve_state *>(userdata);

	for (auto it = state->processes.begin(); it != state->processes.end(); ++it) {
		if (it->second.pid_fd == pid_fd) {
			uint64_t pid = it->first;

			/* remove all waiters */
			for (auto wit = state->waiters.begin(); wit != state->waiters.end();) {
				if (wit->pid != pid) {
					++wit;
					continue;
				}

				if (fuse_req_t *read_req = std::get_if<fuse_req_t>(&wit->handle)) {
					/* can we even have read waiters at this point? */
					fuse_reply_err(*read_req, EPIPE);
					if (state->verbose)
						std::print(stderr, "Cleaning up read waiter for PID {}\n", wit->pid);
				} else if (fuse_pollhandle **poll_handle = std::get_if<fuse_pollhandle *>(&wit->handle)) {
					/* notify the poller, just in case */
					fuse_lowlevel_notify_poll(*poll_handle);
					if (state->verbose)
						std::print(stderr, "Cleaning up poll notification for PID {}\n", wit->pid);
				} else
					assert(0 && "invalid waiter");

				wit = state->waiters.erase(wit);
			}

			/* return all tokens held */
			state->tokens += it->second.tokens_held;
			if (it->second.token_reserved)
				++state->tokens;
			if (state->verbose || it->second.tokens_held > 0) {
				std::print(stderr, "Process {} exited while holding {} tokens, token reserved: {}, "
						"{} tokens available after returning them\n",
						pid, it->second.tokens_held, it->second.token_reserved, state->tokens);
			}

			/* remove the process */
			state->processes.erase(it);
			/* if we have new tokens, wake the waiters */
			steve_wake_waiters(state);
			/* make sure the process was removed */
			assert(state->processes.find(pid) == state->processes.end());
			return;
		}
	}

	assert(0 && "pidfd triggered for unknown process");
}

static void steve_init(void *userdata, struct fuse_conn_info *)
{
	steve_state *state = static_cast<steve_state *>(userdata);

	state->tokens = state->jobs;

	std::print(stderr, "steve running on /dev/steve for {} jobs\n", state->jobs);
	if (state->max_load_avg > 0) {
		std::print(stderr, "  tokens will be served with load average < {:.3}\n", state->max_load_avg);
		std::print(stderr, "  with a recheck timeout of {} s {} us\n",
				state->recheck_timeout.tv_sec, state->recheck_timeout.tv_usec);
	}
	if (state->min_jobs > 0)
		std::print(stderr, "  at least {} jobs will be always available\n", state->min_jobs);
}

static void steve_destroy(void *userdata)
{
	steve_state *state = static_cast<steve_state *>(userdata);

	state->waiters.clear();
	state->processes.clear();
	if (state->loadavg_fd >= 0)
		close(state->loadavg_fd);
}

static void steve_open(fuse_req_t req, struct fuse_file_info *fi)
{
	const struct fuse_ctx *context = fuse_req_ctx(req);
	steve_state *state = static_cast<steve_state *>(fuse_req_userdata(req));

	/* pid is not available in release, so store it here */
	static_assert(sizeof(fi->fh) >= sizeof(context->pid));
	fi->fh = context->pid;

	if (state->verbose) {
		char cmdline[128] = {};

		std::string path = std::format("/proc/{}/cmdline", fi->fh);
		FILE *cmdline_file = fopen(path.c_str(), "r");
		if (cmdline_file) {
			size_t rd = fread(cmdline, 1, sizeof(cmdline) - 1, cmdline_file);
			if (rd > 0) {
				/* replace all NULs with spaces, except for the final one */
				for (size_t i = 0; i < rd - 1; ++i) {
					if (cmdline[i] == 0)
						cmdline[i] = ' ';
				}
				/* ensure a NUL, in case it was truncated */
				cmdline[rd] = 0;
			}
			fclose(cmdline_file);
		}

		if (cmdline[0]) {
			std::print(stderr, "Device open by PID {} ({})\n", fi->fh, cmdline);
		} else
			std::print(stderr, "Device open by PID {} (process name unknown)\n", fi->fh);
	}

	if (state->processes.find(fi->fh) != state->processes.end()) {
		assert(state->processes[fi->fh].pid_fd != -1);
		assert(state->processes[fi->fh].event);
	} else {
		int pid_fd = syscall(SYS_pidfd_open, context->pid, 0);
		if (pid_fd == -1) {
			perror("unable to open pidfd, rejecting to open");
			fuse_reply_err(req, EIO);
			return;
		}

		std::unique_ptr<struct event, std::function<void(struct event*)>>
			pidfd_event{event_new(state->evb, pid_fd, EV_READ|EV_PERSIST, steve_handle_pidfd, state), event_free};
		if (!pidfd_event) {
			std::print(stderr, "unable to allocate event for pidfd");
			close(pid_fd);
			fuse_reply_err(req, EIO);
			return;
		}
		if (event_add(pidfd_event.get(), nullptr) == -1) {
			std::print(stderr, "failed to enable pidfd handler");
			close(pid_fd);
			fuse_reply_err(req, EIO);
			return;
		}

		state->processes[fi->fh].pid_fd = pid_fd;
		state->processes[fi->fh].event = std::move(pidfd_event);
	}

	fuse_reply_open(req, fi);
}

static void steve_release(fuse_req_t req, struct fuse_file_info *fi)
{
	steve_state *state = static_cast<steve_state *>(fuse_req_userdata(req));

	if (state->verbose)
		std::print(stderr, "Device closed by PID {}\n", fi->fh);

	fuse_reply_err(req, 0);
}

static void steve_interrupt(fuse_req_t req, void *userdata)
{
	steve_state *state = static_cast<steve_state *>(userdata);

	fuse_reply_err(req, EINTR);
	for (auto it = state->waiters.begin(); it != state->waiters.end(); ++it) {
		if (fuse_req_t *read_req = std::get_if<fuse_req_t>(&it->handle)) {
			if (*read_req == req) {
				if (state->verbose)
					std::print(stderr, "Passed EINTR to PID {}\n", it->pid);
				state->waiters.erase(it);
				break;
			}
		}
	}
}

static void steve_read(
	fuse_req_t req, size_t size, off_t off, struct fuse_file_info *fi)
{
	steve_state *state = static_cast<steve_state *>(fuse_req_userdata(req));

	if (off != 0) {
		fuse_reply_err(req, EIO);
		return;
	}
	if (size == 0) {
		fuse_reply_buf(req, "", 0);
		return;
	}

	/* no need to support reading more than one token at a time */
	steve_token_availability token_avail = steve_can_give_token(state, fi->fh);
	if (token_avail == steve_token_availability::available) {
		steve_give_token(state, req, fi->fh);
		return;
	}

	if (fi->flags & O_NONBLOCK) {
		fuse_reply_err(req, EAGAIN);
		return;
	}

	state->waiters.emplace_back(steve_waiter{req, fi->fh});
	if (state->verbose) {
		if (token_avail == steve_token_availability::load_exceeded) {
			std::print(stderr, "Load exceeded while PID {} requested token, waiting, {} tokens free, "
					"{} tokens held by process, load average {:.3} >= {}\n",
					fi->fh, state->tokens, state->processes[fi->fh].tokens_held,
					state->load_avg, state->max_load_avg);
		} else
			std::print(stderr, "No free job token for PID {}, waiting, {} tokens held by process\n",
					fi->fh, state->processes[fi->fh].tokens_held);
	}
	fuse_req_interrupt_func(req, steve_interrupt, state);
}

static void steve_write(
	fuse_req_t req, const char *, size_t size, off_t off,
	struct fuse_file_info *fi)
{
	steve_state *state = static_cast<steve_state *>(fuse_req_userdata(req));

	if (off != 0) {
		fuse_reply_err(req, EIO);
		return;
	}
	if (size > SSIZE_MAX) {
		std::print(stderr, "Warning: process {} tried to return more than SSIZE_MAX tokens\n",
				fi->fh);
		fuse_reply_err(req, EFBIG);
		return;
	}

	/* workaround for https://github.com/medek/nasm-rs/issues/44 */
	if (state->processes[fi->fh].tokens_held == 0 && size == 1) {
		std::print(stderr, "Warning: process {} pre-released an unacquired token, please report a bug upstream\n",
				fi->fh);
	} else if (state->processes[fi->fh].tokens_held < static_cast<ssize_t>(size)) {
		std::print(stderr, "Warning: process {} tried to return {} tokens while holding only {} tokens, capping\n",
				fi->fh, size, state->processes[fi->fh].tokens_held);
		if (state->processes[fi->fh].tokens_held < 0)
			size = 0;
		else
			size = state->processes[fi->fh].tokens_held;
	}
	if (size == 0) {
		fuse_reply_err(req, ENOSPC);
		return;
	}

	state->tokens += size;
	state->processes[fi->fh].tokens_held -= size;
	if (state->verbose)
		std::print(stderr, "PID {} returned {} tokens, {} available now, {} tokens held by process, token reserved: {}\n",
				fi->fh, size, state->tokens, state->processes[fi->fh].tokens_held, state->processes[fi->fh].token_reserved);
	fuse_reply_write(req, size);

	/* Since we have jobs now, see if anyone's waiting */
	steve_wake_waiters(state);
}

static void steve_poll(
	fuse_req_t req, struct fuse_file_info *fi, struct fuse_pollhandle *ph)
{
	steve_state *state = static_cast<steve_state *>(fuse_req_userdata(req));
	int events = fi->poll_events & (POLLIN | POLLOUT);

	/* POLLOUT is always possible, POLLIN only if we have any tokens */
	steve_token_availability token_avail = steve_can_give_token(state, fi->fh);
	if (token_avail != steve_token_availability::available) {
		state->waiters.emplace_back(ph, fi->fh);
		events &= ~POLLIN;
	}

	if (state->verbose) {
		if (token_avail == steve_token_availability::load_exceeded) {
			assert(state->max_load_avg > 0);
			/* capped by load average */
			std::print(stderr, "Load exceeded while PID {} requested token, waiting, {} tokens free, "
					"{} tokens held by process, load average {:.3} >= {}\n",
					fi->fh, state->tokens, state->processes[fi->fh].tokens_held,
					state->load_avg, state->max_load_avg);
		} else
			std::print(stderr, "PID {} requested poll, {} tokens available, {} tokens held by process\n",
					fi->fh, state->tokens, state->processes[fi->fh].tokens_held);
	}

	fuse_reply_poll(req, events);
}

static void steve_timeout_to_timeval(struct timeval *out, double timeout) {
	out->tv_sec = trunc(timeout);
	out->tv_usec = round((timeout - out->tv_sec) * 1000000);
}

static void steve_ioctl(
	fuse_req_t req, int cmd, void *, fuse_file_info *fi,
	unsigned flags, const void *in_buf, size_t, size_t)
{
	steve_state *state = static_cast<steve_state *>(fuse_req_userdata(req));
	/* FUSE uses the wrong type, sigh */
	unsigned ioctl_num = cmd;

	if (flags & FUSE_IOCTL_COMPAT) {
		fuse_reply_err(req, ENOSYS);
		return;
	}

	if (state->verbose)
		std::print(stderr, "PID {} requested ioctl 0x{:08x}\n",
				fi->fh, ioctl_num);

	int64_t val;
	double dval;
	if (STEVE_IOC_IS_SET(ioctl_num)) {
		switch (ioctl_num) {
			case STEVE_IOC_SET_LOAD_AVG:
			case STEVE_IOC_SET_LOAD_RECHECK_TIMEOUT: {
				const double *in_val = static_cast<const double *>(in_buf);
				dval = *in_val;
				break;
			}
			default: {
				const int64_t *in_val = static_cast<const int64_t *>(in_buf);
				if (*in_val < 0 || *in_val >= INT_MAX) {
					fuse_reply_err(req, EINVAL);
					return;
				}
				val = *in_val;
			}
		}
	}

	switch (ioctl_num) {
		case STEVE_IOC_GET_TOKENS:
			val = state->tokens;
			fuse_reply_ioctl(req, 0, &val, sizeof(val));
			break;
		case STEVE_IOC_GET_JOBS:
			val = state->jobs;
			fuse_reply_ioctl(req, 0, &val, sizeof(val));
			break;
		case STEVE_IOC_GET_LOAD_AVG:
			dval = state->max_load_avg;
			fuse_reply_ioctl(req, 0, &dval, sizeof(dval));
			break;
		case STEVE_IOC_GET_MIN_JOBS:
			val = state->min_jobs;
			fuse_reply_ioctl(req, 0, &val, sizeof(val));
			break;
		case STEVE_IOC_GET_LOAD_RECHECK_TIMEOUT:
			dval = state->recheck_timeout.tv_sec + (
				state->recheck_timeout.tv_usec / 1000000.
			);
			fuse_reply_ioctl(req, 0, &dval, sizeof(dval));
			break;
		case STEVE_IOC_SET_JOBS:
			if (val == 0)
				val = sysconf(_SC_NPROCESSORS_ONLN);
			state->tokens += val - state->jobs;
			state->jobs = val;
			std::print(stderr, "PID {} set jobs to {}\n", fi->fh, state->jobs);
			if (state->verbose)
				std::print(stderr, "  new token availability: {}\n", state->tokens);
			if (state->min_jobs > state->jobs) {
				state->min_jobs = state->jobs;
				if (state->verbose)
					std::print(stderr, "  capping min-jobs to {}\n", state->min_jobs);
			}
			fuse_reply_ioctl(req, 0, nullptr, 0);
			steve_wake_waiters(state);
			break;
		case STEVE_IOC_SET_MIN_JOBS:
			if (static_cast<uint64_t>(val) > state->jobs) {
				fuse_reply_err(req, EINVAL);
				return;
			}
			state->min_jobs = val;
			std::print(stderr, "PID {} set min-jobs to {}\n", fi->fh, state->min_jobs);
			fuse_reply_ioctl(req, 0, nullptr, 0);
			steve_wake_waiters(state);
			break;
		case STEVE_IOC_SET_LOAD_AVG:
			if (dval < 1) {
				fuse_reply_err(req, EINVAL);
				return;
			}
			state->max_load_avg = dval;
			std::print(stderr, "PID {} set load-average to {}\n", fi->fh, state->max_load_avg);
			fuse_reply_ioctl(req, 0, nullptr, 0);
			steve_wake_waiters(state);
			break;
		case STEVE_IOC_SET_LOAD_RECHECK_TIMEOUT:
			if (dval < 0.000001 || dval > INT_MAX) {
				fuse_reply_err(req, EINVAL);
				return;
			}
			steve_timeout_to_timeval(&state->recheck_timeout, dval);
			std::print(stderr, "PID {} set load-recheck-timeout to {} s {} us\n",
					fi->fh, state->recheck_timeout.tv_sec, state->recheck_timeout.tv_usec);
			fuse_reply_ioctl(req, 0, nullptr, 0);
			/* TODO: reset the event? */
			break;
		default:
			fuse_reply_err(req, ENOTTY);
	}
}

#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
static const struct cuse_lowlevel_ops steve_ops = {
	.init = steve_init,
	.destroy = steve_destroy,
	.open = steve_open,
	.read = steve_read,
	.write = steve_write,
	.release = steve_release,
	.ioctl = steve_ioctl,
	.poll = steve_poll,
};
#pragma GCC diagnostic pop

static void steve_handle_sigusr1(evutil_socket_t, short, void *userdata) {
	steve_state *state = static_cast<steve_state *>(userdata);

	std::print(stderr, "steve: currently {} tokens available out of {}\n",
			state->tokens, state->jobs);
	for (auto &it : state->processes) {
		std::print(stderr, "PID {} holds {} tokens\n", it.first, it.second.tokens_held);
	}
}

static void steve_handle_cuse(evutil_socket_t, short, void *userdata) {
	steve_state *state = static_cast<steve_state *>(userdata);

	if (fuse_session_receive_buf(state->session, &state->buf) > 0)
		fuse_session_process_buf(state->session, &state->buf);
}

static void steve_handle_recheck(evutil_socket_t, short, void *userdata) {
	steve_state *state = static_cast<steve_state *>(userdata);
	state->recheck_triggered = false;
	steve_wake_waiters(state);
}

static bool steve_drop_privileges(const char *user) {
	errno = 0;
	struct passwd *pw = getpwnam(user);

	if (!pw) {
		if (errno != 0)
			perror("getpwnam() failed");
		else
			std::print(stderr, "user {} not found\n", user);
	} else if (chdir("/") == -1)
		perror("chdir('/') failed");
	else if (setgroups(0, nullptr) == -1)
		perror("setgroups() failed");
	else if (setgid(pw->pw_gid) == -1)
		perror("setgid() failed");
	else if (setuid(pw->pw_uid) == -1)
		perror("setuid() failed");
	else
		return true;

	return false;
}

static constexpr char steve_usage[] =
"usage: {} [options]\n"
"\n"
"options:\n"
"    --help, -h             print this help message\n"
"    --version, -V          print version\n"
"    --jobs=JOBS, -j JOBS   jobs to use (default: nproc)\n"
"    --load-average=LOAD_AVG, -l LOAD_AVG\n"
"                           do not serve tokens unless load is below LOAD_AVG\n"
"    --load-recheck-timeout=TIMEOUT, -r TIMEOUT\n"
"                           timeout for throttling due to exceeded load, in sec\n"
"                           (fractional down to usec, default: 0.5)\n"
"    --min-jobs=MIN_JOBS, -m MIN_JOBS\n"
"                           min. jobs to serve even if load average is exceeded\n"
"    --user=USER, -u USER   drop superuser privileges and switch to USER\n"
"                           (and its primary group)\n"
"    --verbose, -v          enable verbose logging\n"
"    --debug, -d            enable FUSE debug output\n";

static const struct option steve_long_opts[] = {
	{ "help", no_argument, 0, 'h' },
	{ "version", no_argument, 0, 'V' },
	{ "jobs", required_argument, 0, 'j' },
	{ "load-average", required_argument, 0, 'l' },
	{ "load-recheck-timeout", required_argument, 0, 'r' },
	{ "min-jobs", required_argument, 0, 'm' },
	{ "user", required_argument, 0, 'u' },
	{ "verbose", no_argument, 0, 'v' },
	{ "debug", no_argument, 0, 'd' },
	{},
};

static const char *steve_short_opts = "hVj:l:r:m:u:vd";

int main(int argc, char **argv)
{
	steve_state state{};

	int opt;
	bool debug = false;
	const char *user = nullptr;
	while ((opt = getopt_long(argc, argv, steve_short_opts, steve_long_opts, nullptr)) != -1) {
		switch (opt) {
			case 'h':
				std::print(steve_usage, argv[0]);
				return 0;
			case 'V':
				std::print("steve {}\n", STEVE_VERSION);
				return 0;
			case 'j':
			case 'm':
				{
					long jobs_arg;
					if (!arg_to_long(optarg, &jobs_arg)) {
						std::print(stderr, "invalid job number: {}\n", optarg);
						return 1;
					}
					if (opt == 'j')
						state.jobs = jobs_arg;
					else if (opt == 'm')
						state.min_jobs = jobs_arg;
					else
						assert(0 && "not reached");
				}
				break;
			case 'l':
				if (!arg_to_double(optarg, &state.max_load_avg) || state.max_load_avg < 1) {
					std::print(stderr, "invalid load average value (must be >=1): {}\n", optarg);
					return 1;
				}
				break;
			case 'r': {
				double timeout;
				if (!arg_to_double(optarg, &timeout) || timeout < 0.000001 || timeout > INT_MAX) {
					std::print(stderr, "invalid timeout value (must be >=1 us): {}\n", optarg);
					return 1;
				}
				steve_timeout_to_timeval(&state.recheck_timeout, timeout);
				break;
			}
			case 'u':
				user = optarg;
				break;
			case 'v':
				state.verbose = true;
				break;
			case 'd':
				debug = true;
				break;
			default:
				std::print(stderr, steve_usage, argv[0]);
				return 1;
		}
	}

	if (argv[optind]) {
		std::print(stderr, "{}: unexpected positional arguments\n", argv[0]);
		std::print(stderr, steve_usage, argv[0]);
		return 1;
	}

	if (state.jobs == 0)
		state.jobs = sysconf(_SC_NPROCESSORS_ONLN);
	if (state.min_jobs > state.jobs) {
		std::print(stderr, "--min-jobs ({}) must be smaller than --jobs ({})\n",
				state.min_jobs, state.jobs);
		return 1;
	}

	std::unique_ptr<struct event_base, std::function<void(struct event_base*)>>
		evb{event_base_new(), event_base_free};
	if (!evb) {
		std::print(stderr, "failed to initialize libevent\n");
		return 1;
	}
	state.evb = evb.get();

	state.recheck_event.reset(
		evtimer_new(state.evb, steve_handle_recheck, &state)
	);
	if (!state.recheck_event) {
		std::print(stderr, "failed to initialize timer recheck event");
		return 1;
	}

	int cuse_fd = open("/dev/cuse", O_RDWR);
	if (cuse_fd == -1) {
		perror("unable to open /dev/cuse");
		return 1;
	}
	fd_guard cuse_fd_guard{cuse_fd};

	if (user && !steve_drop_privileges(user))
		return 1;

	const char *dev_name = "DEVNAME=steve";
	const char *dev_info_argv[] = { dev_name };
	struct cuse_info ci{};
	ci.dev_info_argc = 1;
	ci.dev_info_argv = dev_info_argv;

	struct fuse_args args = FUSE_ARGS_INIT(0, nullptr);
	std::unique_ptr<struct fuse_args, std::function<void(struct fuse_args*)>>
		args_ptr{&args, fuse_opt_free_args};
	fuse_opt_add_arg(args_ptr.get(), argv[0]);
	if (debug)
		fuse_opt_add_arg(args_ptr.get(), "-d");

	std::unique_ptr<struct fuse_session, std::function<void(struct fuse_session*)>> session{
		cuse_lowlevel_new(args_ptr.get(), &ci, &steve_ops, &state), fuse_session_destroy};
	if (!session) {
		std::print(stderr, "failed to initialize FUSE");
		return 1;
	}
	state.session = session.get();

	std::unique_ptr<struct event, std::function<void(struct event*)>>
		cuse_event{event_new(evb.get(), cuse_fd, EV_READ|EV_PERSIST, steve_handle_cuse, &state), event_free};
	if (!cuse_event) {
		std::print(stderr, "failed to initialize CUSE handler");
		return 1;
	}
	if (event_add(cuse_event.get(), nullptr) == -1) {
		std::print(stderr, "failed to enable CUSE handler");
		return 1;
	}

	std::unique_ptr<struct event, std::function<void(struct event*)>>
		sigusr1_event{evsignal_new(evb.get(), SIGUSR1, steve_handle_sigusr1, &state), event_free};
	if (!sigusr1_event) {
		std::print(stderr, "failed to initialize SIGUSR1 handler");
		return 1;
	}
	if (event_add(sigusr1_event.get(), nullptr) == -1) {
		std::print(stderr, "failed to enable SIGUSR1 handler");
		return 1;
	}

	std::string mountpoint = std::format("/dev/fd/{}", cuse_fd);
	if (fuse_session_mount(session.get(), mountpoint.c_str()) == -1) {
		std::print(stderr, "failed to mount the filesystem");
		return 1;
	}

	event_base_dispatch(evb.get());
	fuse_session_unmount(session.get());
	return 0;
}
