.\" Copyright (C) 2000 Internet Software Consortium. .\" .\" Permission to use, copy, modify, and distribute this software for any .\" purpose with or without fee is hereby granted, provided that the above .\" copyright notice and this permission notice appear in all copies. .\" .\" THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM .\" DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL .\" INTERNET SOFTWARE CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, .\" INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING .\" FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, .\" NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION .\" WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. .\" .\" $Id: isc_socket.3,v 1.2 2000/07/27 09:43:12 tale Exp $ .\" .Dd Jun 30, 2000 .Dt ISC_SOCKET 3 .Os BIND9 9 .ds vT BIND9 Programmer's Manual .Sh NAME .Nm isc_socket_create , .Nm isc_socket_attach , .Nm isc_socket_detach , .Nm isc_socketmgr_create , .Nm isc_socketmgr_destroy , .Nm isc_socket_recvv , .Nm isc_socket_recv , .Nm isc_socket_send , .Nm isc_socket_sendto , .Nm isc_socket_sendv , .Nm isc_socket_sendtov , .Nm isc_socket_bind , .Nm isc_socket_listen , .Nm isc_socket_accept , .Nm isc_socket_connect , .Nm isc_socket_getpeername , .Nm isc_socket_getsockname , .Nm isc_socket_cancel , .Nm isc_socket_recvmark , .Nm isc_socket_sendmark , .Nm isc_socket_gettype , .Nm isc_socket_isbound .Nd name server network I/O .Sh SYNOPSIS .Fd #include .Fd .Fd #include .Fd #include .Fd #include .Fd #include .Fd #include .Fd .Fd #include .Fd #include .Fd #include .Fd #include .Fd #include .Fd #include .Fd .Fd #include .Fd #include .Fd #include .Fd #include .Fd #include .Fd #include .Fd #include .Fd #include .Fd #include .Fd #include .Fd #include .Fd #include .Fd #include .Fd .Ft isc_result_t .Fo isc_socket_create .Fa "isc_socketmgr_t *manager" .Fa "int pf" .Fa "isc_sockettype_t type" .Fa "isc_socket_t **socketp" .Fc .Ft void .Fo isc_socket_attach .Fa "isc_socket_t *sock" .Fa "isc_socket_t **socketp" .Fc .Ft void .Fo isc_socket_detach .Fa "isc_socket_t **socketp" .Fc .Ft isc_result_t .Fo isc_socketmgr_create .Fa "isc_mem_t *mctx" .Fa "isc_socketmgr_t **managerp" .Fc .Ft void .Fo isc_socketmgr_destroy .Fa "isc_socketmgr_t **managerp" .Fc .Ft isc_result_t .Fo isc_socket_recvv .Fa "isc_socket_t *sock" .Fa "isc_bufferlist_t *buflist" .Fa "unsigned int minimum" .Fa "isc_task_t *task" .Fa "isc_taskaction_t action" .Fa "void *arg" .Fc .Ft isc_result_t .Fo isc_socket_recv .Fa "isc_socket_t *sock" .Fa "isc_region_t *region" .Fa "unsigned int minimum" .Fa "sc_task_t *task" .Fa "isc_taskaction_t action" .Fa "void *arg" .Fc .Ft isc_result_t .Fo isc_socket_send .Fa "isc_socket_t *sock" .Fa "isc_region_t *region" .Fa "sc_task_t *task" .Fa "isc_taskaction_t action" .Fa "void *arg" .Fc .Ft isc_result_t .Fo isc_socket_sendto .Fa "isc_socket_t *sock" .Fa "isc_region_t *region" .Fa "isc_task_t *task" .Fa "isc_taskaction_t action" .Fa "void *arg" .Fa "isc_sockaddr_t *address" .Fa "struct in6_pktinfo *pktinfo" .Fc .Ft isc_result_t .Fo isc_socket_sendv .Fa "isc_socket_t *sock" .Fa "isc_bufferlist_t *buflist" .Fa "isc_task_t *task" .Fa "isc_taskaction_t action" .Fa "void *arg" .Fc .Ft isc_result_t .Fo isc_socket_sendtov .Fa "isc_socket_t *sock" .Fa "isc_bufferlist_t *buflist" .Fa "isc_task_t *task" .Fa "isc_taskaction_t action" .Fa "void *arg" .Fa "isc_sockaddr_t *address" .Fa "struct in6_pktinfo *pktinfo" .Fc .Ft isc_result_t .Fo isc_socket_bind .Fa "isc_socket_t *sock" .Fa "isc_sockaddr_t *sockaddr" .Fc .Ft isc_result_t .Fo isc_socket_listen .Fa "isc_socket_t *sock" .Fa "unsigned int backlog" .Fc .Ft isc_result_t .Fo isc_socket_accept .Fa "isc_socket_t *sock" .Fa "isc_task_t *task" .Fa "isc_taskaction_t action" .Fa "void *arg" .Fc .Ft isc_result_t .Fo isc_socket_connect .Fa "isc_socket_t *sock" .Fa "isc_sockaddr_t *addr" .Fa "isc_task_t *task" .Fa "isc_taskaction_t action" .Fa "void *arg" .Fc .Ft isc_result_t .Fo isc_socket_getpeername .Fa "isc_socket_t *sock" .Fa "isc_sockaddr_t *addressp" .Fc .Ft isc_result_t .Fo isc_socket_getsockname .Fa "isc_socket_t *sock" .Fa "isc_sockaddr_t *addressp" .Fc .Ft void .Fo isc_socket_cancel .Fa "isc_socket_t *sock" .Fa "isc_task_t *task" .Fa "unsigned int how" .Fc .Ft isc_result_t .Fo isc_socket_recvmark .Fa "isc_socket_t *sock" .Fa "isc_task_t *task" .Fa "isc_taskaction_t action" .Fa "void *arg" .Fc .Ft isc_result_t .Fo isc_socket_sendmark .Fa "isc_socket_t *sock" .Fa "isc_task_t *task" .Fa "isc_taskaction_t action" .Fa "void *arg" .Fc .Ft isc_sockettype_t .Fo isc_socket_gettype .Fa "isc_socket_t *sock" .Fc .Ft isc_boolean_t .Fo isc_socket_isbound .Fa "isc_socket_t *sock" .Fc .Sh DESCRIPTION Applications using the BIND9 library should use the following functions for interfacing with the operating system's networking subsystem. These functions make extensive make use of the two data structures below which are abstractions from the .Xr socket 2 interface provided by most .Ux systems. The contents of these structures should not be manipulated directly. They should be altered using the functions described below. .Bd -literal -offset indent struct isc_socket { /* Not locked. */ unsigned int magic; isc_socketmgr_t *manager; isc_mutex_t lock; isc_sockettype_t type; /* Locked by socket lock. */ unsigned int references; int fd; isc_result_t recv_result; isc_result_t send_result; ISC_LIST(isc_socketevent_t) send_list; ISC_LIST(isc_socketevent_t) recv_list; ISC_LIST(isc_socket_newconnev_t) accept_list; isc_socket_connev_t *connect_ev; /* * Internal events. Posted when a descriptor is readable or * writable. These are statically allocated and never freed. * They will be set to non-purgable before use. */ intev_t readable_ev; intev_t writable_ev; isc_sockaddr_t address; /* remote address */ unsigned int pending_recv : 1, pending_send : 1, pending_accept : 1, listener : 1, /* listener socket */ connected : 1, connecting : 1, /* connect pending */ bound : 1; /* bound to local addr */ #ifdef ISC_NET_RECVOVERFLOW unsigned char overflow; /* used for MSG_TRUNC fake */ #endif #ifdef USE_CMSG unsigned char *cmsg; unsigned int cmsglen; #endif }; typedef struct isc_socket isc_socket_t; .Ed .Bd -literal -offset indent struct isc_socketmgr { /* Not locked. */ unsigned int magic; isc_mem_t *mctx; isc_mutex_t lock; /* Locked by manager lock. */ unsigned int nsockets; /* sockets managed */ isc_thread_t watcher; isc_condition_t shutdown_ok; fd_set read_fds; fd_set write_fds; isc_socket_t *fds[FD_SETSIZE]; int fdstate[FD_SETSIZE]; int maxfd; int pipe_fds[2]; }; typedef struct isc_socketmgr isc_socketmgr_t; .Ed .Pp .Fn isc_socket_create creates a new socket of protocol family .Fa pf which is either .Dv PF_INET or .Dv PF_INET6 . The socket manager .Fa manager will be used to control the socket. .Fa manager must be a valid socket manager and .Fa socketp should be a pointer to a NULL pointer. The newly-created socket is returned through .Fa *socketp . .Pp .Fn isc_socket_attach increments the reference count to to the socket .Fa sock and attaches .Fa *socketp to socket .Fa sock . .Fa sock must be a pointer to a valid socket. The reference count on a socket is decremented by calling .Fn isc_socket_detach . .Fa *socketp must be a valid socket pointer. When the socket's last reference is removed, its reference count will be zero. The socket will then be destroyed and any resources associated with it will be discarded. .Pp Socket manager structures .Dv "struct isc_socketmgr" are set up and torn down by .Fn isc_socketmgr_create and .Fn isc_socketmgr_destroy respectively. Memory from context .Fa mctx is allocated for the structure and .Fa managerp should point to a NULL .Dv isc_socketmgr_t . If .Fn isc_socketmgr_create succeeds, .Fa *managerp will be a pointer to the created .Dv "struct isc_socketmgr" . Calls to .Fn isc_socketmgr_destroy will block until there are no sockets left in the manager referenced by .Fa *managerp . If the caller uses this manager to hold any socket references, it will deadlock itself. Therefore those sockets should be detached by calling .Xr isc_socket_detach 3 before invoking .Fn isc_socketmgr_destroy . .Pp Reading data from a socket is performed by .Fn isc_socket_recvv and .Fn isc_socket_recv . .Fn isc_socket_recv reads data into a single region, .Fa region while .Fn isc_socket_recvv uses a list of buffers .Fa buflist for storing the data. .Fa minimum indicates how many bytes should be read. A read done event - .Dv ISC_SOCKEVENT_RECVDONE - with the given action .Fa action and argument .Fa arg gets posted to the event queue of task .Fa task . When .Fa minimum is non-zero, the completion event will be posted when at least that number of bytes have been read from the socket .Fa sock . If .Fa minimum is zero, the completion event is posted when the exact number of bytes in .Fa region or .Fa buflist have been read. This only makes sense for TCP sockets where there is a reasonable guarantee that a predictable number of bytes can be received. For UDP sockets, .Fa minimum is always set to 1. The read completes when the requested number of bytes have been received or if either an error or end of input occurs. Buffers or regions passed to .Fn isc_socket_recv or .Fn isc_socket_recvv or any data they refer to should not be modified by the caller until the completion event has been received. When a successful call to .Fn isc_socket_recvv completes, .Fa *buflist will be empty. The list of buffers that store the data which has been read are returned in the done event's .Dv bufferlist . .Fa *buflist is unchanged if .Fn isc_socket_recvv returns an error. .Pp A number of assertion checks are performed by .Fn isc_socket_recv and .Fn isc_socket_recvv . For both functions, .Fa sock should be a valid socket and .Fa task should be a valid task. .Fa action has to be a valid action. It must not be NULL. .Fn isc_socket_recv checks that .Fa region is a valid region. .Fn isc_socket_recvv demands that .Fa buflist is not NULL and that there is at least one buffer in .Fa *buflist . .Pp The functions .Fn isc_socket_send , .Fn isc_socket_sendto , .Fn isc_socket_sendv and .Fn isc_socket_sendtov are used to send data to the peer for socket .Fa sock . The data to be sent are held in either the region .Fa region or in multiple buffers referenced through .Fa buflist . The contents of the .Fa region and .Fa buflist structures and their underlying buffers must not be modified by the caller until the completion event has been received. A send done event .Dv ISC_SOCKEVENT_SENDDONE with action .Fa action and argument .Fa arg is quueued for task .Fa task when the data have been sent. If .Fa action is NULL, no completion event is posted. .Pp For .Fn isc_socket_sendto and .Fn isc_socket_sendtov , .Fa address is a pointer to the .Dv "struct isc_sockaddr" containing the destination address. If .Fa sock is an IPv6 socket (protocol family .Dv PF_INET6 ) .Fa pktinfo is a pointer to the packet info structure .Dv "struct in6_pktinfo" for the socket. .Pp When .Fn isc_socket_sendv or .Fn isc_socket_sendtov complete successfully, .Fa *buflist will be empty. The done event's .Dv bufferlist will contain the list of the buffers that were used. .Fa *buflist is not altered if an error occurs in .Fn isc_socket_sendv or .Fn isc_socket_sendtov . .Pp These functions perform a number of assertion checks. .Fa task has to be a valid task and .Fa sock must be a valid, bound socket. \fBCONFIRM THIS! Recall recent discussion/bug report about OSes that don't allow sending data on unnamed sockets - JR\fP .Fa action must be either NULL or a valid action. .Fn isc_socket_send and .Fn isc_socket_sendto insist that .Fa region is a valid region. .Fn isc_socket_sendv and .Fn isc_socket_sendtov check that .Fa buflist is non-NULL and that there is at least one buffer in .Fa *buflist . .Pp .Fn isc_socket_send is a trivial "wrapper" function to .Fn isc_socket_sendto while .Fn isc_socket_sendv offers a similar service for calls to .Fn isc_socket_sendtov . If a task is shut down while it has any writes pending, the outcome is system-dependent. Data that has not yet been sent may be discarded or successfully written to the socket's peer. .Pp .Fn isc_socket_bind associates a name with socket .Fa sock . The name that is bound to the socket is given by .Fa *addressp . .Pp To put a socket into listen mode, .Fn isc_socket_listen is called. .Fa sock must be a valid socket. Once the socket .Fa sock is in listen mode, it can only be used in calls to .Fn isc_socket_accept , .Fn isc_socket_attach and .Fn isc_socket_detach . .Fa backlog has the usual meaning for the .Xr listen 2 system call in .Ux . It specifies the maximum number of pending connections that can be queued waiting for the application to .Xr accept 2 them. If .Fa backlog is zero, a reasonable system default is used, typically .Dv SOMAXCONN . The value of .Fa backlog may be ignored on other operating systems. .Pp .Fn isc_socket_accept is used to queue an accept event when an incoming connection request is made. Task .Fa task gets a .Dv ISC_SOCKEVENT_NEWCONN event with the sender set to socket .Fa sock which was previously put into listen mode by .Fn isc_socket_listen . The new socket structure is attached to task .Fa task It is made available through the .Dv isc_socket_newconnev_t event type. The function has assertion checks to ensure that .Fa sock is a valid socket and is in listen mode. .Pp .Fn isc_socket_connect connects socket .Fa sock to the peer with address .Fa addr . When the connection completes - either on success or if an error occurs - a .Dv ISC_SOCKEVENT_CONNECT event with action .Fa action and argument .Fa arg is posted to the event queue for task .Fa task . The function's assertion checks ensure that .Fa sock is a valid TCP socket and .Fa addressp points to a valid .Dv "struct isc_sockaddr" . The checks also make sure that .Fa task and .Fa action are valid tasks and actions respectively. .Pp .Fn isc_socket_getpeername returns the name of the peer connected to socket .Fa sock . and copies it to .Fa addressp . .Fn isc_socket_getsockname gets the name of socket .Fa sock and copies it to .Fa addressp . Both functions have assertion checks to ensure that .Fa sock is a valid socket and that .Fa *addressp is not NULL. .Pp Socket events that hav been queued for some task can be cancelled using .Fn isc_socket_cancel . .Fa sock and .Fa task are a valid socket and task pointers respectively. .Fa how is a bitmask of the events that are to be cancelled. Possible values for .Fa how are any combination of .Dv ISC_SOCKCANCEL_RECV , .Dv ISC_SOCKCANCEL_SEND , .Dv ISC_SOCKCANCEL_ACCEPT and .Dv ISC_SOCKCANCEL_CONNECT . When the events are cancelled, .Fn isc_socket_cancel attempts to remove it from the task's queue. If this fails, the event is marked as cancelled and the task is expected to clean it up later. A done event with status .Dv ISC_R_CANCELED is posted for each cancelled event and any necessary state information is reset. .Pp .Fn isc_socket_recvmark and .Fn isc_socket_sendmark insert a receive or send marker for socket .Fa sock . The marker gets processed when all I/O requests in the the task .Fa task 's queue have been processed. If that queue is empty, the event is posted immediately to that task. Both functions check that .Fa sock , .Fa task and .Fa action are a valid socket, task and action respectively. .Pp When the event handler returns, its .Dv result member can sometimes contain useful information. Depending on the marker type, the event's .Dv result member will contain the same error that the last .Fn isc_socket_recv , .Fn isc_socket_send .Fn isc_socket_sendto if the mark was processed after a fatal error. .Pp .Fn isc_socket_gettype returns the type of socket - UDP or TCP - for .Fa sock , which must be a valid socket. .Fn isc_socket_isbound returns .Er ISC_TRUE or .Er ISC_FALSE depending on whether the socket .Fa sock has been bound to a name or not: in other words if .Fn isc_socket_bind has been successfully invoked on the socket. .Sh RETURN VALUES A successful call to .Fn isc_socket_create returns .Er ISC_R_SUCCESS . .Er ISC_R_NORESOURCES if the operating system was unable to allocate resources for the socket: typically buffers or file descriptors. If the .Fn isc_socket_create is unable to allocate memory for the socket, .Er ISC_R_NOMEMORY is returned. Unexpected errors - for instance when setting options on the new socket - return .Er ISC_R_UNEXPECTED . .Pp .Fn isc_socketmgr_create returns .Er ISC_R_SUCCESS on success. The function returns .Er ISC_R_NOMEMORY if the BIND9 library was unable to allocate memory for the socket manager structure. .Er ISC_R_UNEXPECTED is returned if it was not possible to initialise the .Dv "struct isc_socketmgr" . .Pp Successful calls to .Fn isc_socket_recv and .Fn isc_socket_recvv return .Er ISC_R_SUCCESS . They return .Er ISC_R_NOMEMORY if it was not possible to allocate memory for a socket event handler. The handler that is invoked for the .Dv ISC_SOCKEVENT_RECVDONE when the socket read completes will return .Er ISC_R_SUCCESS on success or .Er ISC_R_UNEXPECTED if it encounters an error. .Pp The functions .Fn isc_socket_sendto , .Fn isc_socket_sendtov , .Fn isc_socket_send and .Fn isc_socket_sendv all return .Er ISC_R_SUCCESS on success. If these four functions are unable to allocate memory when setting up an event handler, they return .Er ISC_R_NOMEMORY . The event handler that is invoked when a .Dv ISC_SOCKEVENT_SENDDONE event is posted when socket write completes returns .Er ISC_R_SUCCESS on success. It returns .Er ISC_R_UNEXPECTED if an error occurs. .Pp Successful calls to .Fn isc_socket_bind return .Er ISC_R_SUCCESS . The function returns .Er ISC_R_NOPERM if the requested address needs a privileged port, and the current user does no have sufficient permission to access it. .Er ISC_R_ADDRNOTAVAIL is returned if the specified address is not available. .Fn isc_socket_bind returns .Er ISC_R_ADDRINUSE if the address is already in use and .Er ISC_R_BOUND if .Fa sock had already been bound to an address. .Er ISC_R_UNEXPECTED is returned for any other error conditions that are reported by .Xr bind 2 and an error message printed on .Dv stderr . .Pp .Fn isc_socket_listen returns .Er ISC_R_SUCCESS on success. .Er ISC_R_UNEXPECTED is returned and an error message printed on .Dv stderr if the .Xr listen 2 system call fails. .Pp .Er ISC_R_NOMEMORY is returned by .Fn isc_socket_accept if it was unable to allocate memory for the new socket or the event handler. .Er ISC_R_SUCCESS is returned on success. .Pp .Fn isc_socket_connect returns .Er ISC_R_NOMEMORY if there was a memory allocation problem or .Er ISC_R_UNEXPECTED if the call to .Xr connect 2 fails. .Er ISC_R_SUCCESS is returned otherwise. The event handler returns .Er ISC_R_SUCCESS when the connection attempt completes successfully. It returns .Er ISC_R_TIMEDOUT when the connection attempt times out or .Er ISC_R_CONNREFUSED if the peer .Fa addr refused the connection. An error of .Er ISC_R_NETUNREACH if the peer's network isn't reachable. Other error conditions from .Xr connect 2 return .Er ISC_R_UNEXPECTED . .Pp If the socket .Fa sock is connected, .Fa isc_socket_getpeername returns .Er ISC_R_SUCCESS and .Er ISC_R_NOTCONNECTED otherwise. .Fn isc_socket_getsockname normally returns .Er ISC_R_SUCCESS. It returns .Er ISC_R_NOTBOUND if .Fa sock is not bound. .Er ISC_R_UNEXPECTED is returned and an error message logged on .Dv stderr if .Xr getsockname 2 failed .Pp .Fn isc_socket_recvmark and .Fn isc_socket_sendmark always return .Er ISC_R_SUCCESS unless they were unable to set up an event handler, in which case they return .Er ISC_R_NOMEMORY . .Sh SEE ALSO .Xr socket 2 , .Xr readv 2 .Xr recv 2 , .Xr writev 2 , .Xr send 2 , .Xr sendto 2 , .Xr bind 2 , .Xr listen 2 , .Xr accept 2 , .Xr connect 2 , .Xr getpeername 2 , .Xr getsockname 2 .