mirror of
https://github.com/haproxy/haproxy.git
synced 2026-03-28 13:24:28 -04:00
This is the first attempt at moving all internal parts from using struct timeval to integer ticks. Those provides simpler and faster code due to simplified operations, and this change also saved about 64 bytes per session. A new header file has been added : include/common/ticks.h. It is possible that some functions should finally not be inlined because they're used quite a lot (eg: tick_first, tick_add_ifset and tick_is_expired). More measurements are required in order to decide whether this is interesting or not. Some function and variable names are still subject to change for a better overall logics.
630 lines
18 KiB
C
630 lines
18 KiB
C
/*
|
|
* FD polling functions for Speculative I/O combined with Linux epoll()
|
|
*
|
|
* Copyright 2000-2008 Willy Tarreau <w@1wt.eu>
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or (at your option) any later version.
|
|
*
|
|
*
|
|
* This code implements "speculative I/O" under Linux. The principle is to
|
|
* try to perform expected I/O before registering the events in the poller.
|
|
* Each time this succeeds, it saves an expensive epoll_ctl(). It generally
|
|
* succeeds for all reads after an accept(), and for writes after a connect().
|
|
* It also improves performance for streaming connections because even if only
|
|
* one side is polled, the other one may react accordingly depending on the
|
|
* level of the buffer.
|
|
*
|
|
* It has a presents drawbacks though. If too many events are set for spec I/O,
|
|
* those ones can starve the polled events. Experiments show that when polled
|
|
* events starve, they quickly turn into spec I/O, making the situation even
|
|
* worse. While we can reduce the number of polled events processed at once,
|
|
* we cannot do this on speculative events because most of them are new ones
|
|
* (avg 2/3 new - 1/3 old from experiments).
|
|
*
|
|
* The solution against this problem relies on those two factors :
|
|
* 1) one FD registered as a spec event cannot be polled at the same time
|
|
* 2) even during very high loads, we will almost never be interested in
|
|
* simultaneous read and write streaming on the same FD.
|
|
*
|
|
* The first point implies that during starvation, we will not have more than
|
|
* half of our FDs in the poll list, otherwise it means there is less than that
|
|
* in the spec list, implying there is no starvation.
|
|
*
|
|
* The second point implies that we're statically only interested in half of
|
|
* the maximum number of file descriptors at once, because we will unlikely
|
|
* have simultaneous read and writes for a same buffer during long periods.
|
|
*
|
|
* So, if we make it possible to drain maxsock/2/2 during peak loads, then we
|
|
* can ensure that there will be no starvation effect. This means that we must
|
|
* always allocate maxsock/4 events for the poller.
|
|
*
|
|
*
|
|
*/
|
|
|
|
#include <unistd.h>
|
|
#include <sys/time.h>
|
|
#include <sys/types.h>
|
|
|
|
#include <common/compat.h>
|
|
#include <common/config.h>
|
|
#include <common/debug.h>
|
|
#include <common/standard.h>
|
|
#include <common/ticks.h>
|
|
#include <common/time.h>
|
|
#include <common/tools.h>
|
|
|
|
#include <types/fd.h>
|
|
#include <types/global.h>
|
|
|
|
#include <proto/fd.h>
|
|
#include <proto/task.h>
|
|
|
|
#if defined(USE_MY_EPOLL)
|
|
#include <common/epoll.h>
|
|
#include <errno.h>
|
|
#include <sys/syscall.h>
|
|
static _syscall1 (int, epoll_create, int, size);
|
|
static _syscall4 (int, epoll_ctl, int, epfd, int, op, int, fd, struct epoll_event *, event);
|
|
static _syscall4 (int, epoll_wait, int, epfd, struct epoll_event *, events, int, maxevents, int, timeout);
|
|
#else
|
|
#include <sys/epoll.h>
|
|
#endif
|
|
|
|
/*
|
|
* We define 4 states for each direction of a file descriptor, which we store
|
|
* as 2 bits :
|
|
*
|
|
* 00 = IDLE : we're not interested in this event
|
|
* 01 = SPEC : perform speculative I/O on this FD
|
|
* 10 = WAIT : really wait for an availability event on this FD (poll)
|
|
* 11 = STOP : was marked WAIT, but disabled. It can switch back to WAIT if
|
|
* the application changes its mind, otherwise disable FD polling
|
|
* and switch back to IDLE.
|
|
*
|
|
* Since we do not want to scan all the FD list to find speculative I/O events,
|
|
* we store them in a list consisting in a linear array holding only the FD
|
|
* indexes right now.
|
|
*
|
|
* The STOP state requires the event to be present in the spec list so that
|
|
* it can be detected and flushed upon next scan without having to scan the
|
|
* whole FD list.
|
|
*
|
|
* This translates like this :
|
|
*
|
|
* EVENT_IN_SPEC_LIST = 01
|
|
* EVENT_IN_POLL_LIST = 10
|
|
*
|
|
* IDLE = 0
|
|
* SPEC = (EVENT_IN_SPEC_LIST)
|
|
* WAIT = (EVENT_IN_POLL_LIST)
|
|
* STOP = (EVENT_IN_SPEC_LIST|EVENT_IN_POLL_LIST)
|
|
*
|
|
* fd_is_set() just consists in checking that the status is 01 or 10.
|
|
*
|
|
* For efficiency reasons, we will store the Read and Write bits interlaced to
|
|
* form a 4-bit field, so that we can simply shift the value right by 0/1 and
|
|
* get what we want :
|
|
* 3 2 1 0
|
|
* Wp Rp Ws Rs
|
|
*
|
|
* The FD array has to hold a back reference to the speculative list. This
|
|
* reference is only valid if at least one of the directions is marked SPEC.
|
|
*
|
|
*/
|
|
|
|
#define FD_EV_IN_SL 1
|
|
#define FD_EV_IN_PL 4
|
|
|
|
#define FD_EV_IDLE 0
|
|
#define FD_EV_SPEC (FD_EV_IN_SL)
|
|
#define FD_EV_WAIT (FD_EV_IN_PL)
|
|
#define FD_EV_STOP (FD_EV_IN_SL|FD_EV_IN_PL)
|
|
|
|
/* Those match any of R or W for Spec list or Poll list */
|
|
#define FD_EV_RW_SL (FD_EV_IN_SL | (FD_EV_IN_SL << 1))
|
|
#define FD_EV_RW_PL (FD_EV_IN_PL | (FD_EV_IN_PL << 1))
|
|
#define FD_EV_MASK_DIR (FD_EV_IN_SL|FD_EV_IN_PL)
|
|
|
|
#define FD_EV_IDLE_R 0
|
|
#define FD_EV_SPEC_R (FD_EV_IN_SL)
|
|
#define FD_EV_WAIT_R (FD_EV_IN_PL)
|
|
#define FD_EV_STOP_R (FD_EV_IN_SL|FD_EV_IN_PL)
|
|
#define FD_EV_MASK_R (FD_EV_IN_SL|FD_EV_IN_PL)
|
|
|
|
#define FD_EV_IDLE_W (FD_EV_IDLE_R << 1)
|
|
#define FD_EV_SPEC_W (FD_EV_SPEC_R << 1)
|
|
#define FD_EV_WAIT_W (FD_EV_WAIT_R << 1)
|
|
#define FD_EV_STOP_W (FD_EV_STOP_R << 1)
|
|
#define FD_EV_MASK_W (FD_EV_MASK_R << 1)
|
|
|
|
#define FD_EV_MASK (FD_EV_MASK_W | FD_EV_MASK_R)
|
|
|
|
/* This is the minimum number of events successfully processed in speculative
|
|
* mode above which we agree to return without checking epoll() (1/2 times).
|
|
*/
|
|
#define MIN_RETURN_EVENTS 25
|
|
|
|
/* descriptor of one FD.
|
|
* FIXME: should be a bit field */
|
|
struct fd_status {
|
|
unsigned int e:4; // read and write events status.
|
|
unsigned int s1:28; // Position in spec list+1. 0=not in list. Should be last.
|
|
};
|
|
|
|
static int nbspec = 0; // current size of the spec list
|
|
static int absmaxevents = 0; // absolute maximum amounts of polled events
|
|
|
|
static struct fd_status *fd_list = NULL; // list of FDs
|
|
static unsigned int *spec_list = NULL; // speculative I/O list
|
|
|
|
/* private data */
|
|
static struct epoll_event *epoll_events;
|
|
static int epoll_fd;
|
|
|
|
/* This structure may be used for any purpose. Warning! do not use it in
|
|
* recursive functions !
|
|
*/
|
|
static struct epoll_event ev;
|
|
|
|
|
|
REGPRM1 static void alloc_spec_entry(const int fd)
|
|
{
|
|
if (fd_list[fd].s1)
|
|
return;
|
|
fd_list[fd].s1 = nbspec + 1;
|
|
spec_list[nbspec] = fd;
|
|
nbspec++;
|
|
}
|
|
|
|
/* Removes entry used by fd <fd> from the spec list and replaces it with the
|
|
* last one. The fd_list is adjusted to match the back reference if needed.
|
|
* If the fd has no entry assigned, return immediately.
|
|
*/
|
|
REGPRM1 static void release_spec_entry(int fd)
|
|
{
|
|
unsigned int pos;
|
|
|
|
pos = fd_list[fd].s1;
|
|
if (!pos)
|
|
return;
|
|
|
|
fd_list[fd].s1 = 0;
|
|
pos--;
|
|
/* we have spec_list[pos]==fd */
|
|
|
|
nbspec--;
|
|
if (pos == nbspec)
|
|
return;
|
|
|
|
/* we replace current FD by the highest one, which may sometimes be the same */
|
|
fd = spec_list[nbspec];
|
|
fd_list[fd].s1 = pos + 1;
|
|
spec_list[pos] = fd;
|
|
}
|
|
|
|
/*
|
|
* Returns non-zero if <fd> is already monitored for events in direction <dir>.
|
|
*/
|
|
REGPRM2 static int __fd_is_set(const int fd, int dir)
|
|
{
|
|
int ret;
|
|
|
|
ret = ((unsigned)fd_list[fd].e >> dir) & FD_EV_MASK_DIR;
|
|
return (ret == FD_EV_SPEC || ret == FD_EV_WAIT);
|
|
}
|
|
|
|
/*
|
|
* Don't worry about the strange constructs in __fd_set/__fd_clr, they are
|
|
* designed like this in order to reduce the number of jumps (verified).
|
|
*/
|
|
REGPRM2 static int __fd_set(const int fd, int dir)
|
|
{
|
|
__label__ switch_state;
|
|
unsigned int i;
|
|
|
|
i = ((unsigned)fd_list[fd].e >> dir) & FD_EV_MASK_DIR;
|
|
|
|
if (i == FD_EV_IDLE) {
|
|
// switch to SPEC state and allocate a SPEC entry.
|
|
alloc_spec_entry(fd);
|
|
switch_state:
|
|
fd_list[fd].e ^= (unsigned int)(FD_EV_IN_SL << dir);
|
|
return 1;
|
|
}
|
|
else if (i == FD_EV_STOP) {
|
|
// switch to WAIT state
|
|
goto switch_state;
|
|
}
|
|
else
|
|
return 0;
|
|
}
|
|
|
|
REGPRM2 static int __fd_clr(const int fd, int dir)
|
|
{
|
|
__label__ switch_state;
|
|
unsigned int i;
|
|
|
|
i = ((unsigned)fd_list[fd].e >> dir) & FD_EV_MASK_DIR;
|
|
|
|
if (i == FD_EV_SPEC) {
|
|
// switch to IDLE state
|
|
goto switch_state;
|
|
}
|
|
else if (likely(i == FD_EV_WAIT)) {
|
|
// switch to STOP state
|
|
/* We will create a queue entry for this one because we want to
|
|
* process it later in order to merge it with other events on
|
|
* the same FD.
|
|
*/
|
|
alloc_spec_entry(fd);
|
|
switch_state:
|
|
fd_list[fd].e ^= (unsigned int)(FD_EV_IN_SL << dir);
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/* normally unused */
|
|
REGPRM1 static void __fd_rem(int fd)
|
|
{
|
|
__fd_clr(fd, DIR_RD);
|
|
__fd_clr(fd, DIR_WR);
|
|
}
|
|
|
|
/*
|
|
* On valid epoll() implementations, a call to close() automatically removes
|
|
* the fds. This means that the FD will appear as previously unset.
|
|
*/
|
|
REGPRM1 static void __fd_clo(int fd)
|
|
{
|
|
if (fd_list[fd].e & FD_EV_RW_SL)
|
|
release_spec_entry(fd);
|
|
fd_list[fd].e &= ~(FD_EV_MASK);
|
|
}
|
|
|
|
/*
|
|
* speculative epoll() poller
|
|
*/
|
|
REGPRM2 static void _do_poll(struct poller *p, int exp)
|
|
{
|
|
static unsigned int last_skipped;
|
|
static unsigned int spec_processed;
|
|
int status, eo;
|
|
int fd, opcode;
|
|
int count;
|
|
int spec_idx;
|
|
int wait_time;
|
|
|
|
|
|
/* Here we have two options :
|
|
* - either walk the list forwards and hope to match more events
|
|
* - or walk it backwards to minimize the number of changes and
|
|
* to make better use of the cache.
|
|
* Tests have shown that walking backwards improves perf by 0.2%.
|
|
*/
|
|
|
|
status = 0;
|
|
spec_idx = nbspec;
|
|
while (likely(spec_idx > 0)) {
|
|
spec_idx--;
|
|
fd = spec_list[spec_idx];
|
|
eo = fd_list[fd].e; /* save old events */
|
|
|
|
/*
|
|
* Process the speculative events.
|
|
*
|
|
* Principle: events which are marked FD_EV_SPEC are processed
|
|
* with their assigned function. If the function returns 0, it
|
|
* means there is nothing doable without polling first. We will
|
|
* then convert the event to a pollable one by assigning them
|
|
* the WAIT status.
|
|
*/
|
|
|
|
fdtab[fd].ev &= FD_POLL_STICKY;
|
|
if ((eo & FD_EV_MASK_R) == FD_EV_SPEC_R) {
|
|
/* The owner is interested in reading from this FD */
|
|
if (fdtab[fd].state != FD_STCLOSE && fdtab[fd].state != FD_STERROR) {
|
|
/* Pretend there is something to read */
|
|
fdtab[fd].ev |= FD_POLL_IN;
|
|
if (!fdtab[fd].cb[DIR_RD].f(fd))
|
|
fd_list[fd].e ^= (FD_EV_WAIT_R ^ FD_EV_SPEC_R);
|
|
else
|
|
status++;
|
|
}
|
|
}
|
|
else if ((eo & FD_EV_MASK_R) == FD_EV_STOP_R) {
|
|
/* This FD was being polled and is now being removed. */
|
|
fd_list[fd].e &= ~FD_EV_MASK_R;
|
|
}
|
|
|
|
if ((eo & FD_EV_MASK_W) == FD_EV_SPEC_W) {
|
|
/* The owner is interested in writing to this FD */
|
|
if (fdtab[fd].state != FD_STCLOSE && fdtab[fd].state != FD_STERROR) {
|
|
/* Pretend there is something to write */
|
|
fdtab[fd].ev |= FD_POLL_OUT;
|
|
if (!fdtab[fd].cb[DIR_WR].f(fd))
|
|
fd_list[fd].e ^= (FD_EV_WAIT_W ^ FD_EV_SPEC_W);
|
|
else
|
|
status++;
|
|
}
|
|
}
|
|
else if ((eo & FD_EV_MASK_W) == FD_EV_STOP_W) {
|
|
/* This FD was being polled and is now being removed. */
|
|
fd_list[fd].e &= ~FD_EV_MASK_W;
|
|
}
|
|
|
|
/* Now, we will adjust the event in the poll list. Indeed, it
|
|
* is possible that an event which was previously in the poll
|
|
* list now goes out, and the opposite is possible too. We can
|
|
* have opposite changes for READ and WRITE too.
|
|
*/
|
|
|
|
if ((eo ^ fd_list[fd].e) & FD_EV_RW_PL) {
|
|
/* poll status changed*/
|
|
if ((fd_list[fd].e & FD_EV_RW_PL) == 0) {
|
|
/* fd removed from poll list */
|
|
opcode = EPOLL_CTL_DEL;
|
|
}
|
|
else if ((eo & FD_EV_RW_PL) == 0) {
|
|
/* new fd in the poll list */
|
|
opcode = EPOLL_CTL_ADD;
|
|
}
|
|
else {
|
|
/* fd status changed */
|
|
opcode = EPOLL_CTL_MOD;
|
|
}
|
|
|
|
/* construct the epoll events based on new state */
|
|
ev.events = 0;
|
|
if (fd_list[fd].e & FD_EV_WAIT_R)
|
|
ev.events |= EPOLLIN;
|
|
|
|
if (fd_list[fd].e & FD_EV_WAIT_W)
|
|
ev.events |= EPOLLOUT;
|
|
|
|
ev.data.fd = fd;
|
|
epoll_ctl(epoll_fd, opcode, fd, &ev);
|
|
}
|
|
|
|
|
|
if (!(fd_list[fd].e & FD_EV_RW_SL)) {
|
|
/* This fd switched to combinations of either WAIT or
|
|
* IDLE. It must be removed from the spec list.
|
|
*/
|
|
release_spec_entry(fd);
|
|
continue;
|
|
}
|
|
}
|
|
|
|
/* It may make sense to immediately return here if there are enough
|
|
* processed events, without passing through epoll_wait() because we
|
|
* have exactly done a poll.
|
|
* Measures have shown a great performance increase if we call the
|
|
* epoll_wait() only the second time after speculative accesses have
|
|
* succeeded. This reduces the number of unsucessful calls to
|
|
* epoll_wait() by a factor of about 3, and the total number of calls
|
|
* by about 2.
|
|
* However, when we do that after having processed too many events,
|
|
* events waiting in epoll() starve for too long a time and tend to
|
|
* become themselves eligible for speculative polling. So we try to
|
|
* limit this practise to reasonable situations.
|
|
*/
|
|
|
|
spec_processed += status;
|
|
if (status >= MIN_RETURN_EVENTS && spec_processed < absmaxevents) {
|
|
/* We have processed at least MIN_RETURN_EVENTS, it's worth
|
|
* returning now without checking epoll_wait().
|
|
*/
|
|
if (++last_skipped <= 1) {
|
|
tv_update_date(0, 1);
|
|
return;
|
|
}
|
|
}
|
|
last_skipped = 0;
|
|
|
|
if (nbspec || status || run_queue) {
|
|
/* Maybe we have processed some events that we must report, or
|
|
* maybe we still have events in the spec list, or there are
|
|
* some tasks left pending in the run_queue, so we must not
|
|
* wait in epoll() otherwise we will delay their delivery by
|
|
* the next timeout.
|
|
*/
|
|
wait_time = 0;
|
|
}
|
|
else {
|
|
if (!exp)
|
|
wait_time = MAX_DELAY_MS;
|
|
else if (tick_is_expired(exp, now_ms))
|
|
wait_time = 0;
|
|
else {
|
|
wait_time = TICKS_TO_MS(tick_remain(now_ms, exp)) + 1;
|
|
if (wait_time > MAX_DELAY_MS)
|
|
wait_time = MAX_DELAY_MS;
|
|
}
|
|
}
|
|
|
|
/* now let's wait for real events. We normally use maxpollevents as a
|
|
* high limit, unless <nbspec> is already big, in which case we need
|
|
* to compensate for the high number of events processed there.
|
|
*/
|
|
fd = MIN(absmaxevents, spec_processed);
|
|
fd = MAX(global.tune.maxpollevents, fd);
|
|
fd = MIN(maxfd, fd);
|
|
spec_processed = 0;
|
|
status = epoll_wait(epoll_fd, epoll_events, fd, wait_time);
|
|
tv_update_date(wait_time, status);
|
|
|
|
for (count = 0; count < status; count++) {
|
|
int e = epoll_events[count].events;
|
|
fd = epoll_events[count].data.fd;
|
|
|
|
/* it looks complicated but gcc can optimize it away when constants
|
|
* have same values.
|
|
*/
|
|
DPRINTF(stderr, "%s:%d: fd=%d, ev=0x%08x, e=0x%08x\n",
|
|
__FUNCTION__, __LINE__,
|
|
fd, fdtab[fd].ev, e);
|
|
|
|
fdtab[fd].ev &= FD_POLL_STICKY;
|
|
fdtab[fd].ev |=
|
|
((e & EPOLLIN ) ? FD_POLL_IN : 0) |
|
|
((e & EPOLLPRI) ? FD_POLL_PRI : 0) |
|
|
((e & EPOLLOUT) ? FD_POLL_OUT : 0) |
|
|
((e & EPOLLERR) ? FD_POLL_ERR : 0) |
|
|
((e & EPOLLHUP) ? FD_POLL_HUP : 0);
|
|
|
|
if ((fd_list[fd].e & FD_EV_MASK_R) == FD_EV_WAIT_R) {
|
|
if (fdtab[fd].state == FD_STCLOSE || fdtab[fd].state == FD_STERROR)
|
|
continue;
|
|
if (fdtab[fd].ev & (FD_POLL_IN|FD_POLL_HUP|FD_POLL_ERR))
|
|
fdtab[fd].cb[DIR_RD].f(fd);
|
|
}
|
|
|
|
if ((fd_list[fd].e & FD_EV_MASK_W) == FD_EV_WAIT_W) {
|
|
if (fdtab[fd].state == FD_STCLOSE || fdtab[fd].state == FD_STERROR)
|
|
continue;
|
|
if (fdtab[fd].ev & (FD_POLL_OUT|FD_POLL_ERR))
|
|
fdtab[fd].cb[DIR_WR].f(fd);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Initialization of the speculative epoll() poller.
|
|
* Returns 0 in case of failure, non-zero in case of success. If it fails, it
|
|
* disables the poller by setting its pref to 0.
|
|
*/
|
|
REGPRM1 static int _do_init(struct poller *p)
|
|
{
|
|
__label__ fail_fd_list, fail_spec, fail_ee, fail_fd;
|
|
|
|
p->private = NULL;
|
|
|
|
epoll_fd = epoll_create(global.maxsock + 1);
|
|
if (epoll_fd < 0)
|
|
goto fail_fd;
|
|
|
|
/* See comments at the top of the file about this formula. */
|
|
absmaxevents = MAX(global.tune.maxpollevents, global.maxsock/4);
|
|
epoll_events = (struct epoll_event*)
|
|
calloc(1, sizeof(struct epoll_event) * absmaxevents);
|
|
|
|
if (epoll_events == NULL)
|
|
goto fail_ee;
|
|
|
|
if ((spec_list = (uint32_t *)calloc(1, sizeof(uint32_t) * global.maxsock)) == NULL)
|
|
goto fail_spec;
|
|
|
|
fd_list = (struct fd_status *)calloc(1, sizeof(struct fd_status) * global.maxsock);
|
|
if (fd_list == NULL)
|
|
goto fail_fd_list;
|
|
|
|
return 1;
|
|
|
|
fail_fd_list:
|
|
free(spec_list);
|
|
fail_spec:
|
|
free(epoll_events);
|
|
fail_ee:
|
|
close(epoll_fd);
|
|
epoll_fd = 0;
|
|
fail_fd:
|
|
p->pref = 0;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Termination of the speculative epoll() poller.
|
|
* Memory is released and the poller is marked as unselectable.
|
|
*/
|
|
REGPRM1 static void _do_term(struct poller *p)
|
|
{
|
|
if (fd_list)
|
|
free(fd_list);
|
|
if (spec_list)
|
|
free(spec_list);
|
|
if (epoll_events)
|
|
free(epoll_events);
|
|
|
|
close(epoll_fd);
|
|
epoll_fd = 0;
|
|
|
|
fd_list = NULL;
|
|
spec_list = NULL;
|
|
epoll_events = NULL;
|
|
|
|
p->private = NULL;
|
|
p->pref = 0;
|
|
}
|
|
|
|
/*
|
|
* Check that the poller works.
|
|
* Returns 1 if OK, otherwise 0.
|
|
*/
|
|
REGPRM1 static int _do_test(struct poller *p)
|
|
{
|
|
int fd;
|
|
|
|
fd = epoll_create(global.maxsock + 1);
|
|
if (fd < 0)
|
|
return 0;
|
|
close(fd);
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* Recreate the epoll file descriptor after a fork(). Returns 1 if OK,
|
|
* otherwise 0. It will ensure that all processes will not share their
|
|
* epoll_fd. Some side effects were encountered because of this, such
|
|
* as epoll_wait() returning an FD which was previously deleted.
|
|
*/
|
|
REGPRM1 static int _do_fork(struct poller *p)
|
|
{
|
|
close(epoll_fd);
|
|
epoll_fd = epoll_create(global.maxsock + 1);
|
|
if (epoll_fd < 0)
|
|
return 0;
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* It is a constructor, which means that it will automatically be called before
|
|
* main(). This is GCC-specific but it works at least since 2.95.
|
|
* Special care must be taken so that it does not need any uninitialized data.
|
|
*/
|
|
__attribute__((constructor))
|
|
static void _do_register(void)
|
|
{
|
|
struct poller *p;
|
|
|
|
if (nbpollers >= MAX_POLLERS)
|
|
return;
|
|
p = &pollers[nbpollers++];
|
|
|
|
p->name = "sepoll";
|
|
p->pref = 400;
|
|
p->private = NULL;
|
|
|
|
p->test = _do_test;
|
|
p->init = _do_init;
|
|
p->term = _do_term;
|
|
p->poll = _do_poll;
|
|
p->fork = _do_fork;
|
|
|
|
p->is_set = __fd_is_set;
|
|
p->cond_s = p->set = __fd_set;
|
|
p->cond_c = p->clr = __fd_clr;
|
|
p->rem = __fd_rem;
|
|
p->clo = __fd_clo;
|
|
}
|
|
|
|
|
|
/*
|
|
* Local variables:
|
|
* c-indent-level: 8
|
|
* c-basic-offset: 8
|
|
* End:
|
|
*/
|